1 /* scan.l - scanner for flex input */ 2 3 %{ 4 /*- 5 * Copyright (c) 1990 The Regents of the University of California. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Vern Paxson. 10 * 11 * The United States Government has rights in this work pursuant 12 * to contract no. DE-AC03-76SF00098 between the United States 13 * Department of Energy and the University of California. 14 * 15 * Redistribution and use in source and binary forms are permitted provided 16 * that: (1) source distributions retain this entire copyright notice and 17 * comment, and (2) distributions including binaries display the following 18 * acknowledgement: ``This product includes software developed by the 19 * University of California, Berkeley and its contributors'' in the 20 * documentation or other materials provided with the distribution and in 21 * all advertising materials mentioning features or use of this software. 22 * Neither the name of the University nor the names of its contributors may 23 * be used to endorse or promote products derived from this software without 24 * specific prior written permission. 25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 26 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 27 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 28 */ 29 30 /* $Header: scan.l,v 1.2 94/01/04 14:33:09 vern Exp $ */ 31 32 #include "flexdef.h" 33 #include "parse.h" 34 35 #define ACTION_ECHO add_action( yytext ) 36 #define MARK_END_OF_PROLOG mark_prolog(); 37 38 #define YY_DECL \ 39 int flexscan() 40 41 #define RETURNCHAR \ 42 yylval = (unsigned char) yytext[0]; \ 43 return CHAR; 44 45 #define RETURNNAME \ 46 strcpy( nmstr, yytext ); \ 47 return NAME; 48 49 #define PUT_BACK_STRING(str, start) \ 50 for ( i = strlen( str ) - 1; i >= start; --i ) \ 51 unput((str)[i]) 52 53 #define CHECK_REJECT(str) \ 54 if ( all_upper( str ) ) \ 55 reject = true; 56 57 #define CHECK_YYMORE(str) \ 58 if ( all_lower( str ) ) \ 59 yymore_used = true; 60 %} 61 62 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE 63 %x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT 64 %x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 65 66 WS [ \t]+ 67 OPTWS [ \t]* 68 NOT_WS [^ \t\n] 69 70 NL (\n|\r\n|\n\r) 71 72 NAME ([a-z_][a-z_0-9-]*) 73 NOT_NAME [^a-z_*\n]+ 74 75 SCNAME {NAME} 76 77 ESCSEQ (\\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2})) 78 79 FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ}) 80 CCL_CHAR ([^\\\n\]]|{ESCSEQ}) 81 82 %% 83 static int bracelevel, didadef, indented_code, checking_used; 84 85 int doing_codeblock = false; 86 int i; 87 Char nmdef[MAXLINE], myesc(); 88 89 90 ^{WS} indented_code = true; BEGIN(CODEBLOCK); 91 ^"/*" ACTION_ECHO; BEGIN(C_COMMENT); 92 ^"%s"{NAME}? return SCDECL; 93 ^"%x"{NAME}? return XSCDECL; 94 ^"%{".*{NL} { 95 ++linenum; 96 line_directive_out( (FILE *) 0 ); 97 indented_code = false; 98 BEGIN(CODEBLOCK); 99 } 100 101 {WS} return WHITESPACE; 102 103 ^"%%".* { 104 sectnum = 2; 105 bracelevel = 0; 106 mark_defs1(); 107 line_directive_out( (FILE *) 0 ); 108 BEGIN(SECT2PROLOG); 109 return SECTEND; 110 } 111 112 ^"%pointer".*{NL} { 113 if ( lex_compat ) 114 warn( "%pointer incompatible with -l option" ); 115 else 116 yytext_is_array = false; 117 ++linenum; 118 } 119 ^"%array".*{NL} { 120 if ( C_plus_plus ) 121 warn( "%array incompatible with -+ option" ); 122 else 123 yytext_is_array = true; 124 ++linenum; 125 } 126 127 ^"%used" { 128 warn( "%used/%unused have been deprecated" ); 129 checking_used = REALLY_USED; BEGIN(USED_LIST); 130 } 131 ^"%unused" { 132 warn( "%used/%unused have been deprecated" ); 133 checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); 134 } 135 136 137 ^"%"[aceknopr]{OPTWS}[0-9]*{OPTWS}{NL} ++linenum; /* ignore */ 138 139 ^"%"[^sxanpekotcru{}].* synerr( "unrecognized '%' directive" ); 140 141 ^{NAME} { 142 strcpy( nmstr, yytext ); 143 didadef = false; 144 BEGIN(PICKUPDEF); 145 } 146 147 {SCNAME} RETURNNAME; 148 ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ 149 {OPTWS}{NL} ++linenum; return '\n'; 150 151 152 <C_COMMENT>"*/" ACTION_ECHO; BEGIN(INITIAL); 153 <C_COMMENT>"*/".*{NL} ++linenum; ACTION_ECHO; BEGIN(INITIAL); 154 <C_COMMENT>[^*\n]+ ACTION_ECHO; 155 <C_COMMENT>"*" ACTION_ECHO; 156 <C_COMMENT>{NL} ++linenum; ACTION_ECHO; 157 158 159 <CODEBLOCK>^"%}".*{NL} ++linenum; BEGIN(INITIAL); 160 <CODEBLOCK>"reject" ACTION_ECHO; CHECK_REJECT(yytext); 161 <CODEBLOCK>"yymore" ACTION_ECHO; CHECK_YYMORE(yytext); 162 <CODEBLOCK>{NAME}|{NOT_NAME}|. ACTION_ECHO; 163 <CODEBLOCK>{NL} { 164 ++linenum; 165 ACTION_ECHO; 166 if ( indented_code ) 167 BEGIN(INITIAL); 168 } 169 170 171 <PICKUPDEF>{WS} /* separates name and definition */ 172 173 <PICKUPDEF>{NOT_WS}.* { 174 strcpy( (char *) nmdef, yytext ); 175 176 /* Skip trailing whitespace. */ 177 for ( i = strlen( (char *) nmdef ) - 1; 178 i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t'); 179 --i ) 180 ; 181 182 nmdef[i + 1] = '\0'; 183 184 ndinstal( nmstr, nmdef ); 185 didadef = true; 186 } 187 188 <PICKUPDEF>{NL} { 189 if ( ! didadef ) 190 synerr( "incomplete name definition" ); 191 BEGIN(INITIAL); 192 ++linenum; 193 } 194 195 <RECOVER>.*{NL} ++linenum; BEGIN(INITIAL); RETURNNAME; 196 197 198 <USED_LIST>{NL} ++linenum; BEGIN(INITIAL); 199 <USED_LIST>{WS} 200 <USED_LIST>"reject" { 201 if ( all_upper( yytext ) ) 202 reject_really_used = checking_used; 203 else 204 synerr( 205 "unrecognized %used/%unused construct" ); 206 } 207 <USED_LIST>"yymore" { 208 if ( all_lower( yytext ) ) 209 yymore_really_used = checking_used; 210 else 211 synerr( 212 "unrecognized %used/%unused construct" ); 213 } 214 <USED_LIST>{NOT_WS}+ synerr( "unrecognized %used/%unused construct" ); 215 216 217 <SECT2PROLOG>^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ 218 <SECT2PROLOG>^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ 219 220 <SECT2PROLOG>^{WS}.* ACTION_ECHO; /* indented code in prolog */ 221 222 <SECT2PROLOG>^{NOT_WS}.* { /* non-indented code */ 223 if ( bracelevel <= 0 ) 224 { /* not in %{ ... %} */ 225 yyless( 0 ); /* put it all back */ 226 mark_prolog(); 227 BEGIN(SECT2); 228 } 229 else 230 ACTION_ECHO; 231 } 232 233 <SECT2PROLOG>.* ACTION_ECHO; 234 <SECT2PROLOG>{NL} ++linenum; ACTION_ECHO; 235 236 <SECT2PROLOG><<EOF>> { 237 mark_prolog(); 238 sectnum = 0; 239 yyterminate(); /* to stop the parser */ 240 } 241 242 <SECT2>^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ 243 244 <SECT2>^({WS}|"%{") { 245 indented_code = (yytext[0] != '%'); 246 doing_codeblock = true; 247 bracelevel = 1; 248 249 if ( indented_code ) 250 ACTION_ECHO; 251 252 BEGIN(CODEBLOCK_2); 253 } 254 255 <SECT2>^"<" BEGIN(SC); return '<'; 256 <SECT2>^"^" return '^'; 257 <SECT2>\" BEGIN(QUOTE); return '"'; 258 <SECT2>"{"/[0-9] BEGIN(NUM); return '{'; 259 <SECT2>"{"[^0-9\n][^}\n]* BEGIN(BRACEERROR); 260 <SECT2>"$"/([ \t]|{NL}) return '$'; 261 262 <SECT2>{WS}"%{" { 263 bracelevel = 1; 264 BEGIN(PERCENT_BRACE_ACTION); 265 return '\n'; 266 } 267 <SECT2>{WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; 268 269 <SECT2>{WS} { 270 /* This rule is separate from the one below because 271 * otherwise we get variable trailing context, so 272 * we can't build the scanner using -{f,F}. 273 */ 274 bracelevel = 0; 275 continued_action = false; 276 BEGIN(ACTION); 277 return '\n'; 278 } 279 280 <SECT2>{OPTWS}{NL} { 281 bracelevel = 0; 282 continued_action = false; 283 BEGIN(ACTION); 284 unput( '\n' ); /* so <ACTION> sees it */ 285 return '\n'; 286 } 287 288 <SECT2>"<<EOF>>" return EOF_OP; 289 290 <SECT2>^"%%".* { 291 sectnum = 3; 292 BEGIN(SECT3); 293 yyterminate(); /* to stop the parser */ 294 } 295 296 <SECT2>"["{FIRST_CCL_CHAR}{CCL_CHAR}* { 297 int cclval; 298 299 strcpy( nmstr, yytext ); 300 301 /* Check to see if we've already encountered this 302 * ccl. 303 */ 304 if ( (cclval = ccllookup( (Char *) nmstr )) ) 305 { 306 if ( input() != ']' ) 307 synerr( "bad character class" ); 308 309 yylval = cclval; 310 ++cclreuse; 311 return PREVCCL; 312 } 313 else 314 { 315 /* We fudge a bit. We know that this ccl will 316 * soon be numbered as lastccl + 1 by cclinit. 317 */ 318 cclinstal( (Char *) nmstr, lastccl + 1 ); 319 320 /* Push back everything but the leading bracket 321 * so the ccl can be rescanned. 322 */ 323 yyless( 1 ); 324 325 BEGIN(FIRSTCCL); 326 return '['; 327 } 328 } 329 330 <SECT2>"{"{NAME}"}" { 331 register Char *nmdefptr; 332 Char *ndlookup(); 333 334 strcpy( nmstr, yytext + 1 ); 335 nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ 336 337 if ( ! (nmdefptr = ndlookup( nmstr )) ) 338 format_synerr( "undefined definition {%s}", 339 nmstr ); 340 341 else 342 { /* push back name surrounded by ()'s */ 343 int len = strlen( (char *) nmdefptr ); 344 345 if ( lex_compat || nmdefptr[0] == '^' || 346 (len > 0 && nmdefptr[len - 1] == '$') ) 347 { /* don't use ()'s after all */ 348 PUT_BACK_STRING((char *) nmdefptr, 0); 349 350 if ( nmdefptr[0] == '^' ) 351 BEGIN(CARETISBOL); 352 } 353 354 else 355 { 356 unput(')'); 357 PUT_BACK_STRING((char *) nmdefptr, 0); 358 unput('('); 359 } 360 } 361 } 362 363 <SECT2>[/|*+?.()] return (unsigned char) yytext[0]; 364 <SECT2>. RETURNCHAR; 365 366 367 <SC>[,*] return (unsigned char) yytext[0]; 368 <SC>">" BEGIN(SECT2); return '>'; 369 <SC>">"/^ BEGIN(CARETISBOL); return '>'; 370 <SC>{SCNAME} RETURNNAME; 371 <SC>. { 372 format_synerr( "bad <start condition>: %s", yytext ); 373 } 374 375 <CARETISBOL>"^" BEGIN(SECT2); return '^'; 376 377 378 <QUOTE>[^"\n] RETURNCHAR; 379 <QUOTE>\" BEGIN(SECT2); return '"'; 380 381 <QUOTE>{NL} { 382 synerr( "missing quote" ); 383 BEGIN(SECT2); 384 ++linenum; 385 return '"'; 386 } 387 388 389 <FIRSTCCL>"^"/[^-\]\n] BEGIN(CCL); return '^'; 390 <FIRSTCCL>"^"/("-"|"]") return '^'; 391 <FIRSTCCL>. BEGIN(CCL); RETURNCHAR; 392 393 <CCL>-/[^\]\n] return '-'; 394 <CCL>[^\]\n] RETURNCHAR; 395 <CCL>"]" BEGIN(SECT2); return ']'; 396 <CCL>.|{NL} { 397 synerr( "bad character class" ); 398 BEGIN(SECT2); 399 return ']'; 400 } 401 402 403 <NUM>[0-9]+ { 404 yylval = myctoi( yytext ); 405 return NUMBER; 406 } 407 408 <NUM>"," return ','; 409 <NUM>"}" BEGIN(SECT2); return '}'; 410 411 <NUM>. { 412 synerr( "bad character inside {}'s" ); 413 BEGIN(SECT2); 414 return '}'; 415 } 416 417 <NUM>{NL} { 418 synerr( "missing }" ); 419 BEGIN(SECT2); 420 ++linenum; 421 return '}'; 422 } 423 424 425 <BRACEERROR>"}" synerr( "bad name in {}'s" ); BEGIN(SECT2); 426 <BRACEERROR>{NL} synerr( "missing }" ); ++linenum; BEGIN(SECT2); 427 428 429 <CODEBLOCK_2>"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT); 430 <PERCENT_BRACE_ACTION,CODEBLOCK_2>{OPTWS}"%}".* bracelevel = 0; 431 <PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"reject" { 432 ACTION_ECHO; 433 CHECK_REJECT(yytext); 434 } 435 <PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"yymore" { 436 ACTION_ECHO; 437 CHECK_YYMORE(yytext); 438 } 439 <PERCENT_BRACE_ACTION,CODEBLOCK_2>{NAME}|{NOT_NAME}|. ACTION_ECHO; 440 <PERCENT_BRACE_ACTION,CODEBLOCK_2>{NL} { 441 ++linenum; 442 ACTION_ECHO; 443 if ( bracelevel == 0 || 444 (doing_codeblock && indented_code) ) 445 { 446 if ( ! doing_codeblock ) 447 add_action( "\tYY_BREAK\n" ); 448 449 doing_codeblock = false; 450 BEGIN(SECT2); 451 } 452 } 453 454 455 /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ 456 <ACTION>"{" ACTION_ECHO; ++bracelevel; 457 <ACTION>"}" ACTION_ECHO; --bracelevel; 458 <ACTION>[^a-z_{}"'/\n]+ ACTION_ECHO; 459 <ACTION>{NAME} ACTION_ECHO; 460 <ACTION>"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT); 461 <ACTION>"'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ 462 <ACTION>\" ACTION_ECHO; BEGIN(ACTION_STRING); 463 <ACTION>{NL} { 464 ++linenum; 465 ACTION_ECHO; 466 if ( bracelevel == 0 ) 467 { 468 add_action( "\tYY_BREAK\n" ); 469 BEGIN(SECT2); 470 } 471 } 472 <ACTION>. ACTION_ECHO; 473 474 <ACTION_COMMENT>"*/" { 475 ACTION_ECHO; 476 if ( doing_codeblock ) 477 BEGIN(CODEBLOCK_2); 478 else 479 BEGIN(ACTION); 480 } 481 482 <ACTION_COMMENT>"*" ACTION_ECHO; 483 <ACTION_COMMENT>[^*\n]+ ACTION_ECHO; 484 <ACTION_COMMENT>[^*\n]*{NL} ++linenum; ACTION_ECHO; 485 486 <ACTION_STRING>[^"\\\n]+ ACTION_ECHO; 487 <ACTION_STRING>\\. ACTION_ECHO; 488 <ACTION_STRING>{NL} ++linenum; ACTION_ECHO; 489 <ACTION_STRING>\" ACTION_ECHO; BEGIN(ACTION); 490 <ACTION_STRING>. ACTION_ECHO; 491 492 <ACTION,ACTION_COMMENT,ACTION_STRING><<EOF>> { 493 synerr( "EOF encountered inside an action" ); 494 yyterminate(); 495 } 496 497 498 <SECT2,QUOTE,CCL>{ESCSEQ} { 499 yylval = myesc( (Char *) yytext ); 500 return CHAR; 501 } 502 503 <FIRSTCCL>{ESCSEQ} { 504 yylval = myesc( (Char *) yytext ); 505 BEGIN(CCL); 506 return CHAR; 507 } 508 509 510 <SECT3>.*(\n?) ECHO; 511 <SECT3><<EOF>> sectnum = 0; yyterminate(); 512 513 <*>.|\n format_synerr( "bad character: %s", yytext ); 514 515 %% 516 517 518 int yywrap() 519 { 520 if ( --num_input_files > 0 ) 521 { 522 set_input_file( *++input_files ); 523 return 0; 524 } 525 526 else 527 return 1; 528 } 529 530 531 /* set_input_file - open the given file (if NULL, stdin) for scanning */ 532 533 void set_input_file( file ) 534 char *file; 535 { 536 if ( file ) 537 { 538 infilename = file; 539 yyin = fopen( infilename, "r" ); 540 541 if ( yyin == NULL ) 542 lerrsf( "can't open %s", file ); 543 } 544 545 else 546 { 547 yyin = stdin; 548 infilename = "<stdin>"; 549 } 550 } 551 552 553 /* Wrapper routines for accessing the scanner's malloc routines. */ 554 555 void *flex_alloc( size ) 556 unsigned int size; 557 { 558 return yy_flex_alloc( size ); 559 } 560 561 void *flex_realloc( ptr, size ) 562 void *ptr; 563 unsigned int size; 564 { 565 return yy_flex_realloc( ptr, size ); 566 } 567 568 void flex_free( ptr ) 569 void *ptr; 570 { 571 yy_flex_free( ptr ); 572 } 573