1 /* $OpenBSD: parse.y,v 1.10 2017/04/12 14:53:27 millert Exp $ */ 2 3 /* parse.y - parser for flex input */ 4 5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP 6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE 7 %token OPT_TABLES 8 9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH 10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT 11 12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH 13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT 14 15 %left CCL_OP_DIFF CCL_OP_UNION 16 17 /* 18 *POSIX and AT&T lex place the 19 * precedence of the repeat operator, {}, below that of concatenation. 20 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended 21 * Regular Expression (ERE) precedence that has the repeat operator 22 * higher than concatenation. This causes ab{3} to yield abbb. 23 * 24 * In order to support the POSIX and AT&T precedence and the flex 25 * precedence we define two token sets for the begin and end tokens of 26 * the repeat operator, '{' and '}'. The lexical scanner chooses 27 * which tokens to return based on whether posix_compat or lex_compat 28 * are specified. Specifying either posix_compat or lex_compat will 29 * cause flex to parse scanner files as per the AT&T and 30 * POSIX-mandated behavior. 31 */ 32 33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX 34 35 36 %{ 37 /* Copyright (c) 1990 The Regents of the University of California. */ 38 /* All rights reserved. */ 39 40 /* This code is derived from software contributed to Berkeley by */ 41 /* Vern Paxson. */ 42 43 /* The United States Government has rights in this work pursuant */ 44 /* to contract no. DE-AC03-76SF00098 between the United States */ 45 /* Department of Energy and the University of California. */ 46 47 /* This file is part of flex. */ 48 49 /* Redistribution and use in source and binary forms, with or without */ 50 /* modification, are permitted provided that the following conditions */ 51 /* are met: */ 52 53 /* 1. Redistributions of source code must retain the above copyright */ 54 /* notice, this list of conditions and the following disclaimer. */ 55 /* 2. Redistributions in binary form must reproduce the above copyright */ 56 /* notice, this list of conditions and the following disclaimer in the */ 57 /* documentation and/or other materials provided with the distribution. */ 58 59 /* Neither the name of the University nor the names of its contributors */ 60 /* may be used to endorse or promote products derived from this software */ 61 /* without specific prior written permission. */ 62 63 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 64 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 65 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 66 /* PURPOSE. */ 67 68 #include "flexdef.h" 69 #include "tables.h" 70 71 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen; 72 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule; 73 74 int *scon_stk; 75 int scon_stk_ptr; 76 77 static int madeany = false; /* whether we've made the '.' character class */ 78 static int ccldot, cclany; 79 int previous_continued_action; /* whether the previous rule's action was '|' */ 80 81 #define format_warn3(fmt, a1, a2) \ 82 do{ \ 83 char fw3_msg[MAXLINE];\ 84 snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\ 85 warn( fw3_msg );\ 86 }while(0) 87 88 /* Expand a POSIX character class expression. */ 89 #define CCL_EXPR(func) \ 90 do{ \ 91 int c; \ 92 for ( c = 0; c < csize; ++c ) \ 93 if ( isascii(c) && func(c) ) \ 94 ccladd( currccl, c ); \ 95 }while(0) 96 97 /* negated class */ 98 #define CCL_NEG_EXPR(func) \ 99 do{ \ 100 int c; \ 101 for ( c = 0; c < csize; ++c ) \ 102 if ( !func(c) ) \ 103 ccladd( currccl, c ); \ 104 }while(0) 105 106 /* On some over-ambitious machines, such as DEC Alpha's, the default 107 * token type is "long" instead of "int"; this leads to problems with 108 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen 109 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the 110 * following should ensure that the default token type is "int". 111 */ 112 #define YYSTYPE int 113 114 %} 115 116 %% 117 goal : initlex sect1 sect1end sect2 initforrule 118 { /* add default rule */ 119 int def_rule; 120 121 pat = cclinit(); 122 cclnegate( pat ); 123 124 def_rule = mkstate( -pat ); 125 126 /* Remember the number of the default rule so we 127 * don't generate "can't match" warnings for it. 128 */ 129 default_rule = num_rules; 130 131 finish_rule( def_rule, false, 0, 0, 0); 132 133 for ( i = 1; i <= lastsc; ++i ) 134 scset[i] = mkbranch( scset[i], def_rule ); 135 136 if ( spprdflt ) 137 add_action( 138 "YY_FATAL_ERROR( \"flex scanner jammed\" )" ); 139 else 140 add_action( "ECHO" ); 141 142 add_action( ";\n\tYY_BREAK\n" ); 143 } 144 ; 145 146 initlex : 147 { /* initialize for processing rules */ 148 149 /* Create default DFA start condition. */ 150 scinstal( "INITIAL", false ); 151 } 152 ; 153 154 sect1 : sect1 startconddecl namelist1 155 | sect1 options 156 | 157 | error 158 { synerr( _("unknown error processing section 1") ); } 159 ; 160 161 sect1end : SECTEND 162 { 163 check_options(); 164 scon_stk = allocate_integer_array( lastsc + 1 ); 165 scon_stk_ptr = 0; 166 } 167 ; 168 169 startconddecl : SCDECL 170 { xcluflg = false; } 171 172 | XSCDECL 173 { xcluflg = true; } 174 ; 175 176 namelist1 : namelist1 NAME 177 { scinstal( nmstr, xcluflg ); } 178 179 | NAME 180 { scinstal( nmstr, xcluflg ); } 181 182 | error 183 { synerr( _("bad start condition list") ); } 184 ; 185 186 options : OPTION_OP optionlist 187 ; 188 189 optionlist : optionlist option 190 | 191 ; 192 193 option : OPT_OUTFILE '=' NAME 194 { 195 outfilename = copy_string( nmstr ); 196 did_outfilename = 1; 197 } 198 | OPT_EXTRA_TYPE '=' NAME 199 { extra_type = copy_string( nmstr ); } 200 | OPT_PREFIX '=' NAME 201 { prefix = copy_string( nmstr ); } 202 | OPT_YYCLASS '=' NAME 203 { yyclass = copy_string( nmstr ); } 204 | OPT_HEADER '=' NAME 205 { headerfilename = copy_string( nmstr ); } 206 | OPT_TABLES '=' NAME 207 { tablesext = true; tablesfilename = copy_string( nmstr ); } 208 ; 209 210 sect2 : sect2 scon initforrule flexrule '\n' 211 { scon_stk_ptr = $2; } 212 | sect2 scon '{' sect2 '}' 213 { scon_stk_ptr = $2; } 214 | 215 ; 216 217 initforrule : 218 { 219 /* Initialize for a parse of one rule. */ 220 trlcontxt = variable_trail_rule = varlength = false; 221 trailcnt = headcnt = rulelen = 0; 222 current_state_type = STATE_NORMAL; 223 previous_continued_action = continued_action; 224 in_rule = true; 225 226 new_rule(); 227 } 228 ; 229 230 flexrule : '^' rule 231 { 232 pat = $2; 233 finish_rule( pat, variable_trail_rule, 234 headcnt, trailcnt , previous_continued_action); 235 236 if ( scon_stk_ptr > 0 ) 237 { 238 for ( i = 1; i <= scon_stk_ptr; ++i ) 239 scbol[scon_stk[i]] = 240 mkbranch( scbol[scon_stk[i]], 241 pat ); 242 } 243 244 else 245 { 246 /* Add to all non-exclusive start conditions, 247 * including the default (0) start condition. 248 */ 249 250 for ( i = 1; i <= lastsc; ++i ) 251 if ( ! scxclu[i] ) 252 scbol[i] = mkbranch( scbol[i], 253 pat ); 254 } 255 256 if ( ! bol_needed ) 257 { 258 bol_needed = true; 259 260 if ( performance_report > 1 ) 261 pinpoint_message( 262 "'^' operator results in sub-optimal performance" ); 263 } 264 } 265 266 | rule 267 { 268 pat = $1; 269 finish_rule( pat, variable_trail_rule, 270 headcnt, trailcnt , previous_continued_action); 271 272 if ( scon_stk_ptr > 0 ) 273 { 274 for ( i = 1; i <= scon_stk_ptr; ++i ) 275 scset[scon_stk[i]] = 276 mkbranch( scset[scon_stk[i]], 277 pat ); 278 } 279 280 else 281 { 282 for ( i = 1; i <= lastsc; ++i ) 283 if ( ! scxclu[i] ) 284 scset[i] = 285 mkbranch( scset[i], 286 pat ); 287 } 288 } 289 290 | EOF_OP 291 { 292 if ( scon_stk_ptr > 0 ) 293 build_eof_action(); 294 295 else 296 { 297 /* This EOF applies to all start conditions 298 * which don't already have EOF actions. 299 */ 300 for ( i = 1; i <= lastsc; ++i ) 301 if ( ! sceof[i] ) 302 scon_stk[++scon_stk_ptr] = i; 303 304 if ( scon_stk_ptr == 0 ) 305 warn( 306 "all start conditions already have <<EOF>> rules" ); 307 308 else 309 build_eof_action(); 310 } 311 } 312 313 | error 314 { synerr( _("unrecognized rule") ); } 315 ; 316 317 scon_stk_ptr : 318 { $$ = scon_stk_ptr; } 319 ; 320 321 scon : '<' scon_stk_ptr namelist2 '>' 322 { $$ = $2; } 323 324 | '<' '*' '>' 325 { 326 $$ = scon_stk_ptr; 327 328 for ( i = 1; i <= lastsc; ++i ) 329 { 330 int j; 331 332 for ( j = 1; j <= scon_stk_ptr; ++j ) 333 if ( scon_stk[j] == i ) 334 break; 335 336 if ( j > scon_stk_ptr ) 337 scon_stk[++scon_stk_ptr] = i; 338 } 339 } 340 341 | 342 { $$ = scon_stk_ptr; } 343 ; 344 345 namelist2 : namelist2 ',' sconname 346 347 | sconname 348 349 | error 350 { synerr( _("bad start condition list") ); } 351 ; 352 353 sconname : NAME 354 { 355 if ( (scnum = sclookup( nmstr )) == 0 ) 356 format_pinpoint_message( 357 "undeclared start condition %s", 358 nmstr ); 359 else 360 { 361 for ( i = 1; i <= scon_stk_ptr; ++i ) 362 if ( scon_stk[i] == scnum ) 363 { 364 format_warn( 365 "<%s> specified twice", 366 scname[scnum] ); 367 break; 368 } 369 370 if ( i > scon_stk_ptr ) 371 scon_stk[++scon_stk_ptr] = scnum; 372 } 373 } 374 ; 375 376 rule : re2 re 377 { 378 if ( transchar[lastst[$2]] != SYM_EPSILON ) 379 /* Provide final transition \now/ so it 380 * will be marked as a trailing context 381 * state. 382 */ 383 $2 = link_machines( $2, 384 mkstate( SYM_EPSILON ) ); 385 386 mark_beginning_as_normal( $2 ); 387 current_state_type = STATE_NORMAL; 388 389 if ( previous_continued_action ) 390 { 391 /* We need to treat this as variable trailing 392 * context so that the backup does not happen 393 * in the action but before the action switch 394 * statement. If the backup happens in the 395 * action, then the rules "falling into" this 396 * one's action will *also* do the backup, 397 * erroneously. 398 */ 399 if ( ! varlength || headcnt != 0 ) 400 warn( 401 "trailing context made variable due to preceding '|' action" ); 402 403 /* Mark as variable. */ 404 varlength = true; 405 headcnt = 0; 406 407 } 408 409 if ( lex_compat || (varlength && headcnt == 0) ) 410 { /* variable trailing context rule */ 411 /* Mark the first part of the rule as the 412 * accepting "head" part of a trailing 413 * context rule. 414 * 415 * By the way, we didn't do this at the 416 * beginning of this production because back 417 * then current_state_type was set up for a 418 * trail rule, and add_accept() can create 419 * a new state ... 420 */ 421 add_accept( $1, 422 num_rules | YY_TRAILING_HEAD_MASK ); 423 variable_trail_rule = true; 424 } 425 426 else 427 trailcnt = rulelen; 428 429 $$ = link_machines( $1, $2 ); 430 } 431 432 | re2 re '$' 433 { synerr( _("trailing context used twice") ); } 434 435 | re '$' 436 { 437 headcnt = 0; 438 trailcnt = 1; 439 rulelen = 1; 440 varlength = false; 441 442 current_state_type = STATE_TRAILING_CONTEXT; 443 444 if ( trlcontxt ) 445 { 446 synerr( _("trailing context used twice") ); 447 $$ = mkstate( SYM_EPSILON ); 448 } 449 450 else if ( previous_continued_action ) 451 { 452 /* See the comment in the rule for "re2 re" 453 * above. 454 */ 455 warn( 456 "trailing context made variable due to preceding '|' action" ); 457 458 varlength = true; 459 } 460 461 if ( lex_compat || varlength ) 462 { 463 /* Again, see the comment in the rule for 464 * "re2 re" above. 465 */ 466 add_accept( $1, 467 num_rules | YY_TRAILING_HEAD_MASK ); 468 variable_trail_rule = true; 469 } 470 471 trlcontxt = true; 472 473 eps = mkstate( SYM_EPSILON ); 474 $$ = link_machines( $1, 475 link_machines( eps, mkstate( '\n' ) ) ); 476 } 477 478 | re 479 { 480 $$ = $1; 481 482 if ( trlcontxt ) 483 { 484 if ( lex_compat || (varlength && headcnt == 0) ) 485 /* Both head and trail are 486 * variable-length. 487 */ 488 variable_trail_rule = true; 489 else 490 trailcnt = rulelen; 491 } 492 } 493 ; 494 495 496 re : re '|' series 497 { 498 varlength = true; 499 $$ = mkor( $1, $3 ); 500 } 501 502 | series 503 { $$ = $1; } 504 ; 505 506 507 re2 : re '/' 508 { 509 /* This rule is written separately so the 510 * reduction will occur before the trailing 511 * series is parsed. 512 */ 513 514 if ( trlcontxt ) 515 synerr( _("trailing context used twice") ); 516 else 517 trlcontxt = true; 518 519 if ( varlength ) 520 /* We hope the trailing context is 521 * fixed-length. 522 */ 523 varlength = false; 524 else 525 headcnt = rulelen; 526 527 rulelen = 0; 528 529 current_state_type = STATE_TRAILING_CONTEXT; 530 $$ = $1; 531 } 532 ; 533 534 series : series singleton 535 { 536 /* This is where concatenation of adjacent patterns 537 * gets done. 538 */ 539 $$ = link_machines( $1, $2 ); 540 } 541 542 | singleton 543 { $$ = $1; } 544 545 | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX 546 { 547 varlength = true; 548 549 if ( $3 > $5 || $3 < 0 ) 550 { 551 synerr( _("bad iteration values") ); 552 $$ = $1; 553 } 554 else 555 { 556 if ( $3 == 0 ) 557 { 558 if ( $5 <= 0 ) 559 { 560 synerr( 561 _("bad iteration values") ); 562 $$ = $1; 563 } 564 else 565 $$ = mkopt( 566 mkrep( $1, 1, $5 ) ); 567 } 568 else 569 $$ = mkrep( $1, $3, $5 ); 570 } 571 } 572 573 | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX 574 { 575 varlength = true; 576 577 if ( $3 <= 0 ) 578 { 579 synerr( _("iteration value must be positive") ); 580 $$ = $1; 581 } 582 583 else 584 $$ = mkrep( $1, $3, INFINITE_REPEAT ); 585 } 586 587 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX 588 { 589 /* The series could be something like "(foo)", 590 * in which case we have no idea what its length 591 * is, so we punt here. 592 */ 593 varlength = true; 594 595 if ( $3 <= 0 ) 596 { 597 synerr( _("iteration value must be positive") 598 ); 599 $$ = $1; 600 } 601 602 else 603 $$ = link_machines( $1, 604 copysingl( $1, $3 - 1 ) ); 605 } 606 607 ; 608 609 singleton : singleton '*' 610 { 611 varlength = true; 612 613 $$ = mkclos( $1 ); 614 } 615 616 | singleton '+' 617 { 618 varlength = true; 619 $$ = mkposcl( $1 ); 620 } 621 622 | singleton '?' 623 { 624 varlength = true; 625 $$ = mkopt( $1 ); 626 } 627 628 | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX 629 { 630 varlength = true; 631 632 if ( $3 > $5 || $3 < 0 ) 633 { 634 synerr( _("bad iteration values") ); 635 $$ = $1; 636 } 637 else 638 { 639 if ( $3 == 0 ) 640 { 641 if ( $5 <= 0 ) 642 { 643 synerr( 644 _("bad iteration values") ); 645 $$ = $1; 646 } 647 else 648 $$ = mkopt( 649 mkrep( $1, 1, $5 ) ); 650 } 651 else 652 $$ = mkrep( $1, $3, $5 ); 653 } 654 } 655 656 | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX 657 { 658 varlength = true; 659 660 if ( $3 <= 0 ) 661 { 662 synerr( _("iteration value must be positive") ); 663 $$ = $1; 664 } 665 666 else 667 $$ = mkrep( $1, $3, INFINITE_REPEAT ); 668 } 669 670 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX 671 { 672 /* The singleton could be something like "(foo)", 673 * in which case we have no idea what its length 674 * is, so we punt here. 675 */ 676 varlength = true; 677 678 if ( $3 <= 0 ) 679 { 680 synerr( _("iteration value must be positive") ); 681 $$ = $1; 682 } 683 684 else 685 $$ = link_machines( $1, 686 copysingl( $1, $3 - 1 ) ); 687 } 688 689 | '.' 690 { 691 if ( ! madeany ) 692 { 693 /* Create the '.' character class. */ 694 ccldot = cclinit(); 695 ccladd( ccldot, '\n' ); 696 cclnegate( ccldot ); 697 698 if ( useecs ) 699 mkeccl( ccltbl + cclmap[ccldot], 700 ccllen[ccldot], nextecm, 701 ecgroup, csize, csize ); 702 703 /* Create the (?s:'.') character class. */ 704 cclany = cclinit(); 705 cclnegate( cclany ); 706 707 if ( useecs ) 708 mkeccl( ccltbl + cclmap[cclany], 709 ccllen[cclany], nextecm, 710 ecgroup, csize, csize ); 711 712 madeany = true; 713 } 714 715 ++rulelen; 716 717 if (sf_dot_all()) 718 $$ = mkstate( -cclany ); 719 else 720 $$ = mkstate( -ccldot ); 721 } 722 723 | fullccl 724 { 725 /* Sort characters for fast searching. 726 */ 727 qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp ); 728 729 if ( useecs ) 730 mkeccl( ccltbl + cclmap[$1], ccllen[$1], 731 nextecm, ecgroup, csize, csize ); 732 733 ++rulelen; 734 735 if (ccl_has_nl[$1]) 736 rule_has_nl[num_rules] = true; 737 738 $$ = mkstate( -$1 ); 739 } 740 741 | PREVCCL 742 { 743 ++rulelen; 744 745 if (ccl_has_nl[$1]) 746 rule_has_nl[num_rules] = true; 747 748 $$ = mkstate( -$1 ); 749 } 750 751 | '"' string '"' 752 { $$ = $2; } 753 754 | '(' re ')' 755 { $$ = $2; } 756 757 | CHAR 758 { 759 ++rulelen; 760 761 if ($1 == nlch) 762 rule_has_nl[num_rules] = true; 763 764 if (sf_case_ins() && has_case($1)) 765 /* create an alternation, as in (a|A) */ 766 $$ = mkor (mkstate($1), mkstate(reverse_case($1))); 767 else 768 $$ = mkstate( $1 ); 769 } 770 ; 771 fullccl: 772 fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); } 773 | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); } 774 | braceccl 775 ; 776 777 braceccl: 778 779 '[' ccl ']' { $$ = $2; } 780 781 | '[' '^' ccl ']' 782 { 783 cclnegate( $3 ); 784 $$ = $3; 785 } 786 ; 787 788 ccl : ccl CHAR '-' CHAR 789 { 790 791 if (sf_case_ins()) 792 { 793 794 /* If one end of the range has case and the other 795 * does not, or the cases are different, then we're not 796 * sure what range the user is trying to express. 797 * Examples: [@-z] or [S-t] 798 */ 799 if (has_case ($2) != has_case ($4) 800 || (has_case ($2) && (b_islower ($2) != b_islower ($4))) 801 || (has_case ($2) && (b_isupper ($2) != b_isupper ($4)))) 802 format_warn3 ( 803 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), 804 $2, $4); 805 806 /* If the range spans uppercase characters but not 807 * lowercase (or vice-versa), then should we automatically 808 * include lowercase characters in the range? 809 * Example: [@-_] spans [a-z] but not [A-Z] 810 */ 811 else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4)) 812 format_warn3 ( 813 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), 814 $2, $4); 815 } 816 817 if ( $2 > $4 ) 818 synerr( _("negative range in character class") ); 819 820 else 821 { 822 for ( i = $2; i <= $4; ++i ) 823 ccladd( $1, i ); 824 825 /* Keep track if this ccl is staying in 826 * alphabetical order. 827 */ 828 cclsorted = cclsorted && ($2 > lastchar); 829 lastchar = $4; 830 831 /* Do it again for upper/lowercase */ 832 if (sf_case_ins() && has_case($2) && has_case($4)){ 833 $2 = reverse_case ($2); 834 $4 = reverse_case ($4); 835 836 for ( i = $2; i <= $4; ++i ) 837 ccladd( $1, i ); 838 839 cclsorted = cclsorted && ($2 > lastchar); 840 lastchar = $4; 841 } 842 843 } 844 845 $$ = $1; 846 } 847 848 | ccl CHAR 849 { 850 ccladd( $1, $2 ); 851 cclsorted = cclsorted && ($2 > lastchar); 852 lastchar = $2; 853 854 /* Do it again for upper/lowercase */ 855 if (sf_case_ins() && has_case($2)){ 856 $2 = reverse_case ($2); 857 ccladd ($1, $2); 858 859 cclsorted = cclsorted && ($2 > lastchar); 860 lastchar = $2; 861 } 862 863 $$ = $1; 864 } 865 866 | ccl ccl_expr 867 { 868 /* Too hard to properly maintain cclsorted. */ 869 cclsorted = false; 870 $$ = $1; 871 } 872 873 | 874 { 875 cclsorted = true; 876 lastchar = 0; 877 currccl = $$ = cclinit(); 878 } 879 ; 880 881 ccl_expr: 882 CCE_ALNUM { CCL_EXPR(isalnum); } 883 | CCE_ALPHA { CCL_EXPR(isalpha); } 884 | CCE_BLANK { CCL_EXPR(isblank); } 885 | CCE_CNTRL { CCL_EXPR(iscntrl); } 886 | CCE_DIGIT { CCL_EXPR(isdigit); } 887 | CCE_GRAPH { CCL_EXPR(isgraph); } 888 | CCE_LOWER { 889 CCL_EXPR(islower); 890 if (sf_case_ins()) 891 CCL_EXPR(isupper); 892 } 893 | CCE_PRINT { CCL_EXPR(isprint); } 894 | CCE_PUNCT { CCL_EXPR(ispunct); } 895 | CCE_SPACE { CCL_EXPR(isspace); } 896 | CCE_XDIGIT { CCL_EXPR(isxdigit); } 897 | CCE_UPPER { 898 CCL_EXPR(isupper); 899 if (sf_case_ins()) 900 CCL_EXPR(islower); 901 } 902 903 | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); } 904 | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); } 905 | CCE_NEG_BLANK { CCL_NEG_EXPR(isblank); } 906 | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); } 907 | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); } 908 | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); } 909 | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); } 910 | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); } 911 | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); } 912 | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); } 913 | CCE_NEG_LOWER { 914 if ( sf_case_ins() ) 915 warn(_("[:^lower:] is ambiguous in case insensitive scanner")); 916 else 917 CCL_NEG_EXPR(islower); 918 } 919 | CCE_NEG_UPPER { 920 if ( sf_case_ins() ) 921 warn(_("[:^upper:] ambiguous in case insensitive scanner")); 922 else 923 CCL_NEG_EXPR(isupper); 924 } 925 ; 926 927 string : string CHAR 928 { 929 if ( $2 == nlch ) 930 rule_has_nl[num_rules] = true; 931 932 ++rulelen; 933 934 if (sf_case_ins() && has_case($2)) 935 $$ = mkor (mkstate($2), mkstate(reverse_case($2))); 936 else 937 $$ = mkstate ($2); 938 939 $$ = link_machines( $1, $$); 940 } 941 942 | 943 { $$ = mkstate( SYM_EPSILON ); } 944 ; 945 946 %% 947 948 949 /* build_eof_action - build the "<<EOF>>" action for the active start 950 * conditions 951 */ 952 953 void build_eof_action() 954 { 955 int i; 956 char action_text[MAXLINE]; 957 958 for ( i = 1; i <= scon_stk_ptr; ++i ) 959 { 960 if ( sceof[scon_stk[i]] ) 961 format_pinpoint_message( 962 "multiple <<EOF>> rules for start condition %s", 963 scname[scon_stk[i]] ); 964 965 else 966 { 967 sceof[scon_stk[i]] = true; 968 969 if (previous_continued_action /* && previous action was regular */) 970 add_action("YY_RULE_SETUP\n"); 971 972 snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n", 973 scname[scon_stk[i]] ); 974 add_action( action_text ); 975 } 976 } 977 978 line_directive_out( (FILE *) 0, 1 ); 979 980 /* This isn't a normal rule after all - don't count it as 981 * such, so we don't have any holes in the rule numbering 982 * (which make generating "rule can never match" warnings 983 * more difficult. 984 */ 985 --num_rules; 986 ++num_eof_rules; 987 } 988 989 990 /* format_synerr - write out formatted syntax error */ 991 992 void format_synerr( msg, arg ) 993 const char *msg, arg[]; 994 { 995 char errmsg[MAXLINE]; 996 997 (void) snprintf( errmsg, sizeof(errmsg), msg, arg ); 998 synerr( errmsg ); 999 } 1000 1001 1002 /* synerr - report a syntax error */ 1003 1004 void synerr( str ) 1005 const char *str; 1006 { 1007 syntaxerror = true; 1008 pinpoint_message( str ); 1009 } 1010 1011 1012 /* format_warn - write out formatted warning */ 1013 1014 void format_warn( msg, arg ) 1015 const char *msg, arg[]; 1016 { 1017 char warn_msg[MAXLINE]; 1018 1019 snprintf( warn_msg, sizeof(warn_msg), msg, arg ); 1020 warn( warn_msg ); 1021 } 1022 1023 1024 /* warn - report a warning, unless -w was given */ 1025 1026 void warn( str ) 1027 const char *str; 1028 { 1029 line_warning( str, linenum ); 1030 } 1031 1032 /* format_pinpoint_message - write out a message formatted with one string, 1033 * pinpointing its location 1034 */ 1035 1036 void format_pinpoint_message( msg, arg ) 1037 const char *msg, arg[]; 1038 { 1039 char errmsg[MAXLINE]; 1040 1041 snprintf( errmsg, sizeof(errmsg), msg, arg ); 1042 pinpoint_message( errmsg ); 1043 } 1044 1045 1046 /* pinpoint_message - write out a message, pinpointing its location */ 1047 1048 void pinpoint_message( str ) 1049 const char *str; 1050 { 1051 line_pinpoint( str, linenum ); 1052 } 1053 1054 1055 /* line_warning - report a warning at a given line, unless -w was given */ 1056 1057 void line_warning( str, line ) 1058 const char *str; 1059 int line; 1060 { 1061 char warning[MAXLINE]; 1062 1063 if ( ! nowarn ) 1064 { 1065 snprintf( warning, sizeof(warning), "warning, %s", str ); 1066 line_pinpoint( warning, line ); 1067 } 1068 } 1069 1070 1071 /* line_pinpoint - write out a message, pinpointing it at the given line */ 1072 1073 void line_pinpoint( str, line ) 1074 const char *str; 1075 int line; 1076 { 1077 fprintf( stderr, "%s:%d: %s\n", infilename, line, str ); 1078 } 1079 1080 1081 /* yyerror - eat up an error message from the parser; 1082 * currently, messages are ignore 1083 */ 1084 1085 void yyerror( msg ) 1086 const char *msg; 1087 { 1088 } 1089