1 /* $OpenBSD: parse.y,v 1.11 2024/11/09 18:03:44 op Exp $ */
2
3 /* parse.y - parser for flex input */
4
5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
7 %token OPT_TABLES
8
9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11
12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
14
15 %left CCL_OP_DIFF CCL_OP_UNION
16
17 /*
18 *POSIX and AT&T lex place the
19 * precedence of the repeat operator, {}, below that of concatenation.
20 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
21 * Regular Expression (ERE) precedence that has the repeat operator
22 * higher than concatenation. This causes ab{3} to yield abbb.
23 *
24 * In order to support the POSIX and AT&T precedence and the flex
25 * precedence we define two token sets for the begin and end tokens of
26 * the repeat operator, '{' and '}'. The lexical scanner chooses
27 * which tokens to return based on whether posix_compat or lex_compat
28 * are specified. Specifying either posix_compat or lex_compat will
29 * cause flex to parse scanner files as per the AT&T and
30 * POSIX-mandated behavior.
31 */
32
33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
34
35
36 %{
37 /* Copyright (c) 1990 The Regents of the University of California. */
38 /* All rights reserved. */
39
40 /* This code is derived from software contributed to Berkeley by */
41 /* Vern Paxson. */
42
43 /* The United States Government has rights in this work pursuant */
44 /* to contract no. DE-AC03-76SF00098 between the United States */
45 /* Department of Energy and the University of California. */
46
47 /* This file is part of flex. */
48
49 /* Redistribution and use in source and binary forms, with or without */
50 /* modification, are permitted provided that the following conditions */
51 /* are met: */
52
53 /* 1. Redistributions of source code must retain the above copyright */
54 /* notice, this list of conditions and the following disclaimer. */
55 /* 2. Redistributions in binary form must reproduce the above copyright */
56 /* notice, this list of conditions and the following disclaimer in the */
57 /* documentation and/or other materials provided with the distribution. */
58
59 /* Neither the name of the University nor the names of its contributors */
60 /* may be used to endorse or promote products derived from this software */
61 /* without specific prior written permission. */
62
63 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
64 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
65 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
66 /* PURPOSE. */
67
68 #include "flexdef.h"
69 #include "tables.h"
70
71 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
72 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
73
74 int *scon_stk;
75 int scon_stk_ptr;
76
77 static int madeany = false; /* whether we've made the '.' character class */
78 static int ccldot, cclany;
79 int previous_continued_action; /* whether the previous rule's action was '|' */
80
81 #define format_warn3(fmt, a1, a2) \
82 do{ \
83 char fw3_msg[MAXLINE];\
84 snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
85 warn( fw3_msg );\
86 }while(0)
87
88 /* Expand a POSIX character class expression. */
89 #define CCL_EXPR(func) \
90 do{ \
91 int c; \
92 for ( c = 0; c < csize; ++c ) \
93 if ( isascii(c) && func(c) ) \
94 ccladd( currccl, c ); \
95 }while(0)
96
97 /* negated class */
98 #define CCL_NEG_EXPR(func) \
99 do{ \
100 int c; \
101 for ( c = 0; c < csize; ++c ) \
102 if ( !func(c) ) \
103 ccladd( currccl, c ); \
104 }while(0)
105
106 /* On some over-ambitious machines, such as DEC Alpha's, the default
107 * token type is "long" instead of "int"; this leads to problems with
108 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
109 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
110 * following should ensure that the default token type is "int".
111 */
112 #define YYSTYPE int
113
114 %}
115
116 %%
117 goal : initlex sect1 sect1end sect2 initforrule
118 { /* add default rule */
119 int def_rule;
120
121 pat = cclinit();
122 cclnegate( pat );
123
124 def_rule = mkstate( -pat );
125
126 /* Remember the number of the default rule so we
127 * don't generate "can't match" warnings for it.
128 */
129 default_rule = num_rules;
130
131 finish_rule( def_rule, false, 0, 0, 0);
132
133 for ( i = 1; i <= lastsc; ++i )
134 scset[i] = mkbranch( scset[i], def_rule );
135
136 if ( spprdflt )
137 add_action(
138 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
139 else
140 add_action( "ECHO" );
141
142 add_action( ";\n\tYY_BREAK\n" );
143 }
144 ;
145
146 initlex :
147 { /* initialize for processing rules */
148
149 /* Create default DFA start condition. */
150 scinstal( "INITIAL", false );
151 }
152 ;
153
154 sect1 : sect1 startconddecl namelist1
155 | sect1 options
156 |
157 | error
158 { synerr( _("unknown error processing section 1") ); }
159 ;
160
161 sect1end : SECTEND
162 {
163 check_options();
164 scon_stk = allocate_integer_array( lastsc + 1 );
165 scon_stk_ptr = 0;
166 }
167 ;
168
169 startconddecl : SCDECL
170 { xcluflg = false; }
171
172 | XSCDECL
173 { xcluflg = true; }
174 ;
175
176 namelist1 : namelist1 NAME
177 { scinstal( nmstr, xcluflg ); }
178
179 | NAME
180 { scinstal( nmstr, xcluflg ); }
181
182 | error
183 { synerr( _("bad start condition list") ); }
184 ;
185
186 options : OPTION_OP optionlist
187 ;
188
189 optionlist : optionlist option
190 |
191 ;
192
193 option : OPT_OUTFILE '=' NAME
194 {
195 outfilename = copy_string( nmstr );
196 did_outfilename = 1;
197 }
198 | OPT_EXTRA_TYPE '=' NAME
199 { extra_type = copy_string( nmstr ); }
200 | OPT_PREFIX '=' NAME
201 { prefix = copy_string( nmstr ); }
202 | OPT_YYCLASS '=' NAME
203 { yyclass = copy_string( nmstr ); }
204 | OPT_HEADER '=' NAME
205 { headerfilename = copy_string( nmstr ); }
206 | OPT_TABLES '=' NAME
207 { tablesext = true; tablesfilename = copy_string( nmstr ); }
208 ;
209
210 sect2 : sect2 scon initforrule flexrule '\n'
211 { scon_stk_ptr = $2; }
212 | sect2 scon '{' sect2 '}'
213 { scon_stk_ptr = $2; }
214 |
215 ;
216
217 initforrule :
218 {
219 /* Initialize for a parse of one rule. */
220 trlcontxt = variable_trail_rule = varlength = false;
221 trailcnt = headcnt = rulelen = 0;
222 current_state_type = STATE_NORMAL;
223 previous_continued_action = continued_action;
224 in_rule = true;
225
226 new_rule();
227 }
228 ;
229
230 flexrule : '^' rule
231 {
232 pat = $2;
233 finish_rule( pat, variable_trail_rule,
234 headcnt, trailcnt , previous_continued_action);
235
236 if ( scon_stk_ptr > 0 )
237 {
238 for ( i = 1; i <= scon_stk_ptr; ++i )
239 scbol[scon_stk[i]] =
240 mkbranch( scbol[scon_stk[i]],
241 pat );
242 }
243
244 else
245 {
246 /* Add to all non-exclusive start conditions,
247 * including the default (0) start condition.
248 */
249
250 for ( i = 1; i <= lastsc; ++i )
251 if ( ! scxclu[i] )
252 scbol[i] = mkbranch( scbol[i],
253 pat );
254 }
255
256 if ( ! bol_needed )
257 {
258 bol_needed = true;
259
260 if ( performance_report > 1 )
261 pinpoint_message(
262 "'^' operator results in sub-optimal performance" );
263 }
264 }
265
266 | rule
267 {
268 pat = $1;
269 finish_rule( pat, variable_trail_rule,
270 headcnt, trailcnt , previous_continued_action);
271
272 if ( scon_stk_ptr > 0 )
273 {
274 for ( i = 1; i <= scon_stk_ptr; ++i )
275 scset[scon_stk[i]] =
276 mkbranch( scset[scon_stk[i]],
277 pat );
278 }
279
280 else
281 {
282 for ( i = 1; i <= lastsc; ++i )
283 if ( ! scxclu[i] )
284 scset[i] =
285 mkbranch( scset[i],
286 pat );
287 }
288 }
289
290 | EOF_OP
291 {
292 if ( scon_stk_ptr > 0 )
293 build_eof_action();
294
295 else
296 {
297 /* This EOF applies to all start conditions
298 * which don't already have EOF actions.
299 */
300 for ( i = 1; i <= lastsc; ++i )
301 if ( ! sceof[i] )
302 scon_stk[++scon_stk_ptr] = i;
303
304 if ( scon_stk_ptr == 0 )
305 warn(
306 "all start conditions already have <<EOF>> rules" );
307
308 else
309 build_eof_action();
310 }
311 }
312
313 | error
314 { synerr( _("unrecognized rule") ); }
315 ;
316
317 scon_stk_ptr :
318 { $$ = scon_stk_ptr; }
319 ;
320
321 scon : '<' scon_stk_ptr namelist2 '>'
322 { $$ = $2; }
323
324 | '<' '*' '>'
325 {
326 $$ = scon_stk_ptr;
327
328 for ( i = 1; i <= lastsc; ++i )
329 {
330 int j;
331
332 for ( j = 1; j <= scon_stk_ptr; ++j )
333 if ( scon_stk[j] == i )
334 break;
335
336 if ( j > scon_stk_ptr )
337 scon_stk[++scon_stk_ptr] = i;
338 }
339 }
340
341 |
342 { $$ = scon_stk_ptr; }
343 ;
344
345 namelist2 : namelist2 ',' sconname
346
347 | sconname
348
349 | error
350 { synerr( _("bad start condition list") ); }
351 ;
352
353 sconname : NAME
354 {
355 if ( (scnum = sclookup( nmstr )) == 0 )
356 format_pinpoint_message(
357 "undeclared start condition %s",
358 nmstr );
359 else
360 {
361 for ( i = 1; i <= scon_stk_ptr; ++i )
362 if ( scon_stk[i] == scnum )
363 {
364 format_warn(
365 "<%s> specified twice",
366 scname[scnum] );
367 break;
368 }
369
370 if ( i > scon_stk_ptr )
371 scon_stk[++scon_stk_ptr] = scnum;
372 }
373 }
374 ;
375
376 rule : re2 re
377 {
378 if ( transchar[lastst[$2]] != SYM_EPSILON )
379 /* Provide final transition \now/ so it
380 * will be marked as a trailing context
381 * state.
382 */
383 $2 = link_machines( $2,
384 mkstate( SYM_EPSILON ) );
385
386 mark_beginning_as_normal( $2 );
387 current_state_type = STATE_NORMAL;
388
389 if ( previous_continued_action )
390 {
391 /* We need to treat this as variable trailing
392 * context so that the backup does not happen
393 * in the action but before the action switch
394 * statement. If the backup happens in the
395 * action, then the rules "falling into" this
396 * one's action will *also* do the backup,
397 * erroneously.
398 */
399 if ( ! varlength || headcnt != 0 )
400 warn(
401 "trailing context made variable due to preceding '|' action" );
402
403 /* Mark as variable. */
404 varlength = true;
405 headcnt = 0;
406
407 }
408
409 if ( lex_compat || (varlength && headcnt == 0) )
410 { /* variable trailing context rule */
411 /* Mark the first part of the rule as the
412 * accepting "head" part of a trailing
413 * context rule.
414 *
415 * By the way, we didn't do this at the
416 * beginning of this production because back
417 * then current_state_type was set up for a
418 * trail rule, and add_accept() can create
419 * a new state ...
420 */
421 add_accept( $1,
422 num_rules | YY_TRAILING_HEAD_MASK );
423 variable_trail_rule = true;
424 }
425
426 else
427 trailcnt = rulelen;
428
429 $$ = link_machines( $1, $2 );
430 }
431
432 | re2 re '$'
433 { synerr( _("trailing context used twice") ); }
434
435 | re '$'
436 {
437 headcnt = 0;
438 trailcnt = 1;
439 rulelen = 1;
440 varlength = false;
441
442 current_state_type = STATE_TRAILING_CONTEXT;
443
444 if ( trlcontxt )
445 {
446 synerr( _("trailing context used twice") );
447 $$ = mkstate( SYM_EPSILON );
448 }
449
450 else if ( previous_continued_action )
451 {
452 /* See the comment in the rule for "re2 re"
453 * above.
454 */
455 warn(
456 "trailing context made variable due to preceding '|' action" );
457
458 varlength = true;
459 }
460
461 if ( lex_compat || varlength )
462 {
463 /* Again, see the comment in the rule for
464 * "re2 re" above.
465 */
466 add_accept( $1,
467 num_rules | YY_TRAILING_HEAD_MASK );
468 variable_trail_rule = true;
469 }
470
471 trlcontxt = true;
472
473 eps = mkstate( SYM_EPSILON );
474 $$ = link_machines( $1,
475 link_machines( eps, mkstate( '\n' ) ) );
476 }
477
478 | re
479 {
480 $$ = $1;
481
482 if ( trlcontxt )
483 {
484 if ( lex_compat || (varlength && headcnt == 0) )
485 /* Both head and trail are
486 * variable-length.
487 */
488 variable_trail_rule = true;
489 else
490 trailcnt = rulelen;
491 }
492 }
493 ;
494
495
496 re : re '|' series
497 {
498 varlength = true;
499 $$ = mkor( $1, $3 );
500 }
501
502 | series
503 { $$ = $1; }
504 ;
505
506
507 re2 : re '/'
508 {
509 /* This rule is written separately so the
510 * reduction will occur before the trailing
511 * series is parsed.
512 */
513
514 if ( trlcontxt )
515 synerr( _("trailing context used twice") );
516 else
517 trlcontxt = true;
518
519 if ( varlength )
520 /* We hope the trailing context is
521 * fixed-length.
522 */
523 varlength = false;
524 else
525 headcnt = rulelen;
526
527 rulelen = 0;
528
529 current_state_type = STATE_TRAILING_CONTEXT;
530 $$ = $1;
531 }
532 ;
533
534 series : series singleton
535 {
536 /* This is where concatenation of adjacent patterns
537 * gets done.
538 */
539 $$ = link_machines( $1, $2 );
540 }
541
542 | singleton
543 { $$ = $1; }
544
545 | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
546 {
547 varlength = true;
548
549 if ( $3 > $5 || $3 < 0 )
550 {
551 synerr( _("bad iteration values") );
552 $$ = $1;
553 }
554 else
555 {
556 if ( $3 == 0 )
557 {
558 if ( $5 <= 0 )
559 {
560 synerr(
561 _("bad iteration values") );
562 $$ = $1;
563 }
564 else
565 $$ = mkopt(
566 mkrep( $1, 1, $5 ) );
567 }
568 else
569 $$ = mkrep( $1, $3, $5 );
570 }
571 }
572
573 | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
574 {
575 varlength = true;
576
577 if ( $3 <= 0 )
578 {
579 synerr( _("iteration value must be positive") );
580 $$ = $1;
581 }
582
583 else
584 $$ = mkrep( $1, $3, INFINITE_REPEAT );
585 }
586
587 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
588 {
589 /* The series could be something like "(foo)",
590 * in which case we have no idea what its length
591 * is, so we punt here.
592 */
593 varlength = true;
594
595 if ( $3 <= 0 )
596 {
597 synerr( _("iteration value must be positive")
598 );
599 $$ = $1;
600 }
601
602 else
603 $$ = link_machines( $1,
604 copysingl( $1, $3 - 1 ) );
605 }
606
607 ;
608
609 singleton : singleton '*'
610 {
611 varlength = true;
612
613 $$ = mkclos( $1 );
614 }
615
616 | singleton '+'
617 {
618 varlength = true;
619 $$ = mkposcl( $1 );
620 }
621
622 | singleton '?'
623 {
624 varlength = true;
625 $$ = mkopt( $1 );
626 }
627
628 | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
629 {
630 varlength = true;
631
632 if ( $3 > $5 || $3 < 0 )
633 {
634 synerr( _("bad iteration values") );
635 $$ = $1;
636 }
637 else
638 {
639 if ( $3 == 0 )
640 {
641 if ( $5 <= 0 )
642 {
643 synerr(
644 _("bad iteration values") );
645 $$ = $1;
646 }
647 else
648 $$ = mkopt(
649 mkrep( $1, 1, $5 ) );
650 }
651 else
652 $$ = mkrep( $1, $3, $5 );
653 }
654 }
655
656 | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
657 {
658 varlength = true;
659
660 if ( $3 <= 0 )
661 {
662 synerr( _("iteration value must be positive") );
663 $$ = $1;
664 }
665
666 else
667 $$ = mkrep( $1, $3, INFINITE_REPEAT );
668 }
669
670 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
671 {
672 /* The singleton could be something like "(foo)",
673 * in which case we have no idea what its length
674 * is, so we punt here.
675 */
676 varlength = true;
677
678 if ( $3 <= 0 )
679 {
680 synerr( _("iteration value must be positive") );
681 $$ = $1;
682 }
683
684 else
685 $$ = link_machines( $1,
686 copysingl( $1, $3 - 1 ) );
687 }
688
689 | '.'
690 {
691 if ( ! madeany )
692 {
693 /* Create the '.' character class. */
694 ccldot = cclinit();
695 ccladd( ccldot, '\n' );
696 cclnegate( ccldot );
697
698 if ( useecs )
699 mkeccl( ccltbl + cclmap[ccldot],
700 ccllen[ccldot], nextecm,
701 ecgroup, csize, csize );
702
703 /* Create the (?s:'.') character class. */
704 cclany = cclinit();
705 cclnegate( cclany );
706
707 if ( useecs )
708 mkeccl( ccltbl + cclmap[cclany],
709 ccllen[cclany], nextecm,
710 ecgroup, csize, csize );
711
712 madeany = true;
713 }
714
715 ++rulelen;
716
717 if (sf_dot_all())
718 $$ = mkstate( -cclany );
719 else
720 $$ = mkstate( -ccldot );
721 }
722
723 | fullccl
724 {
725 /* Sort characters for fast searching.
726 */
727 qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
728
729 if ( useecs )
730 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
731 nextecm, ecgroup, csize, csize );
732
733 ++rulelen;
734
735 if (ccl_has_nl[$1])
736 rule_has_nl[num_rules] = true;
737
738 $$ = mkstate( -$1 );
739 }
740
741 | PREVCCL
742 {
743 ++rulelen;
744
745 if (ccl_has_nl[$1])
746 rule_has_nl[num_rules] = true;
747
748 $$ = mkstate( -$1 );
749 }
750
751 | '"' string '"'
752 { $$ = $2; }
753
754 | '(' re ')'
755 { $$ = $2; }
756
757 | CHAR
758 {
759 ++rulelen;
760
761 if ($1 == nlch)
762 rule_has_nl[num_rules] = true;
763
764 if (sf_case_ins() && has_case($1))
765 /* create an alternation, as in (a|A) */
766 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
767 else
768 $$ = mkstate( $1 );
769 }
770 ;
771 fullccl:
772 fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
773 | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); }
774 | braceccl
775 ;
776
777 braceccl:
778
779 '[' ccl ']' { $$ = $2; }
780
781 | '[' '^' ccl ']'
782 {
783 cclnegate( $3 );
784 $$ = $3;
785 }
786 ;
787
788 ccl : ccl CHAR '-' CHAR
789 {
790
791 if (sf_case_ins())
792 {
793
794 /* If one end of the range has case and the other
795 * does not, or the cases are different, then we're not
796 * sure what range the user is trying to express.
797 * Examples: [@-z] or [S-t]
798 */
799 if (has_case ($2) != has_case ($4)
800 || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
801 || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
802 format_warn3 (
803 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
804 $2, $4);
805
806 /* If the range spans uppercase characters but not
807 * lowercase (or vice-versa), then should we automatically
808 * include lowercase characters in the range?
809 * Example: [@-_] spans [a-z] but not [A-Z]
810 */
811 else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
812 format_warn3 (
813 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
814 $2, $4);
815 }
816
817 if ( $2 > $4 )
818 synerr( _("negative range in character class") );
819
820 else
821 {
822 for ( i = $2; i <= $4; ++i )
823 ccladd( $1, i );
824
825 /* Keep track if this ccl is staying in
826 * alphabetical order.
827 */
828 cclsorted = cclsorted && ($2 > lastchar);
829 lastchar = $4;
830
831 /* Do it again for upper/lowercase */
832 if (sf_case_ins() && has_case($2) && has_case($4)){
833 $2 = reverse_case ($2);
834 $4 = reverse_case ($4);
835
836 for ( i = $2; i <= $4; ++i )
837 ccladd( $1, i );
838
839 cclsorted = cclsorted && ($2 > lastchar);
840 lastchar = $4;
841 }
842
843 }
844
845 $$ = $1;
846 }
847
848 | ccl CHAR
849 {
850 ccladd( $1, $2 );
851 cclsorted = cclsorted && ($2 > lastchar);
852 lastchar = $2;
853
854 /* Do it again for upper/lowercase */
855 if (sf_case_ins() && has_case($2)){
856 $2 = reverse_case ($2);
857 ccladd ($1, $2);
858
859 cclsorted = cclsorted && ($2 > lastchar);
860 lastchar = $2;
861 }
862
863 $$ = $1;
864 }
865
866 | ccl ccl_expr
867 {
868 /* Too hard to properly maintain cclsorted. */
869 cclsorted = false;
870 $$ = $1;
871 }
872
873 |
874 {
875 cclsorted = true;
876 lastchar = 0;
877 currccl = $$ = cclinit();
878 }
879 ;
880
881 ccl_expr:
882 CCE_ALNUM { CCL_EXPR(isalnum); }
883 | CCE_ALPHA { CCL_EXPR(isalpha); }
884 | CCE_BLANK { CCL_EXPR(isblank); }
885 | CCE_CNTRL { CCL_EXPR(iscntrl); }
886 | CCE_DIGIT { CCL_EXPR(isdigit); }
887 | CCE_GRAPH { CCL_EXPR(isgraph); }
888 | CCE_LOWER {
889 CCL_EXPR(islower);
890 if (sf_case_ins())
891 CCL_EXPR(isupper);
892 }
893 | CCE_PRINT { CCL_EXPR(isprint); }
894 | CCE_PUNCT { CCL_EXPR(ispunct); }
895 | CCE_SPACE { CCL_EXPR(isspace); }
896 | CCE_XDIGIT { CCL_EXPR(isxdigit); }
897 | CCE_UPPER {
898 CCL_EXPR(isupper);
899 if (sf_case_ins())
900 CCL_EXPR(islower);
901 }
902
903 | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); }
904 | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); }
905 | CCE_NEG_BLANK { CCL_NEG_EXPR(isblank); }
906 | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); }
907 | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); }
908 | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); }
909 | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); }
910 | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); }
911 | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); }
912 | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); }
913 | CCE_NEG_LOWER {
914 if ( sf_case_ins() )
915 warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
916 else
917 CCL_NEG_EXPR(islower);
918 }
919 | CCE_NEG_UPPER {
920 if ( sf_case_ins() )
921 warn(_("[:^upper:] ambiguous in case insensitive scanner"));
922 else
923 CCL_NEG_EXPR(isupper);
924 }
925 ;
926
927 string : string CHAR
928 {
929 if ( $2 == nlch )
930 rule_has_nl[num_rules] = true;
931
932 ++rulelen;
933
934 if (sf_case_ins() && has_case($2))
935 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
936 else
937 $$ = mkstate ($2);
938
939 $$ = link_machines( $1, $$);
940 }
941
942 |
943 { $$ = mkstate( SYM_EPSILON ); }
944 ;
945
946 %%
947
948
949 /* build_eof_action - build the "<<EOF>>" action for the active start
950 * conditions
951 */
952
953 void build_eof_action(void)
954 {
955 int i;
956 char action_text[MAXLINE];
957
958 for ( i = 1; i <= scon_stk_ptr; ++i )
959 {
960 if ( sceof[scon_stk[i]] )
961 format_pinpoint_message(
962 "multiple <<EOF>> rules for start condition %s",
963 scname[scon_stk[i]] );
964
965 else
966 {
967 sceof[scon_stk[i]] = true;
968
969 if (previous_continued_action /* && previous action was regular */)
970 add_action("YY_RULE_SETUP\n");
971
972 snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
973 scname[scon_stk[i]] );
974 add_action( action_text );
975 }
976 }
977
978 line_directive_out( (FILE *) 0, 1 );
979
980 /* This isn't a normal rule after all - don't count it as
981 * such, so we don't have any holes in the rule numbering
982 * (which make generating "rule can never match" warnings
983 * more difficult.
984 */
985 --num_rules;
986 ++num_eof_rules;
987 }
988
989
990 /* format_synerr - write out formatted syntax error */
991
format_synerr(const char * msg,const char arg[])992 void format_synerr(const char *msg, const char arg[])
993 {
994 char errmsg[MAXLINE];
995
996 (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
997 synerr( errmsg );
998 }
999
1000
1001 /* synerr - report a syntax error */
1002
synerr(const char * str)1003 void synerr(const char *str)
1004 {
1005 syntaxerror = true;
1006 pinpoint_message( str );
1007 }
1008
1009
1010 /* format_warn - write out formatted warning */
1011
format_warn(const char * msg,const char arg[])1012 void format_warn(const char *msg, const char arg[])
1013 {
1014 char warn_msg[MAXLINE];
1015
1016 snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1017 warn( warn_msg );
1018 }
1019
1020
1021 /* warn - report a warning, unless -w was given */
1022
warn(const char * str)1023 void warn(const char *str)
1024 {
1025 line_warning( str, linenum );
1026 }
1027
1028 /* format_pinpoint_message - write out a message formatted with one string,
1029 * pinpointing its location
1030 */
1031
format_pinpoint_message(const char * msg,const char arg[])1032 void format_pinpoint_message(const char *msg, const char arg[])
1033 {
1034 char errmsg[MAXLINE];
1035
1036 snprintf( errmsg, sizeof(errmsg), msg, arg );
1037 pinpoint_message( errmsg );
1038 }
1039
1040
1041 /* pinpoint_message - write out a message, pinpointing its location */
1042
pinpoint_message(const char * str)1043 void pinpoint_message(const char *str)
1044 {
1045 line_pinpoint( str, linenum );
1046 }
1047
1048
1049 /* line_warning - report a warning at a given line, unless -w was given */
1050
line_warning(const char * str,int line)1051 void line_warning(const char *str, int line)
1052 {
1053 char warning[MAXLINE];
1054
1055 if ( ! nowarn )
1056 {
1057 snprintf( warning, sizeof(warning), "warning, %s", str );
1058 line_pinpoint( warning, line );
1059 }
1060 }
1061
1062
1063 /* line_pinpoint - write out a message, pinpointing it at the given line */
1064
line_pinpoint(const char * str,int line)1065 void line_pinpoint(const char *str, int line)
1066 {
1067 fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1068 }
1069
1070
1071 /* yyerror - eat up an error message from the parser;
1072 * currently, messages are ignore
1073 */
1074
yyerror(const char * msg)1075 void yyerror(const char *msg)
1076 {
1077 }
1078