xref: /openbsd/usr.bin/lex/scan.l (revision 20c29e2b)
1 /*	$OpenBSD: scan.l,v 1.14 2024/11/09 18:03:44 op Exp $	*/
2 
3 /* scan.l - scanner for flex input -*-C-*- */
4 
5 %{
6 /*  Copyright (c) 1990 The Regents of the University of California. */
7 /*  All rights reserved. */
8 
9 /*  This code is derived from software contributed to Berkeley by */
10 /*  Vern Paxson. */
11 
12 /*  The United States Government has rights in this work pursuant */
13 /*  to contract no. DE-AC03-76SF00098 between the United States */
14 /*  Department of Energy and the University of California. */
15 
16 /*  This file is part of flex. */
17 
18 /*  Redistribution and use in source and binary forms, with or without */
19 /*  modification, are permitted provided that the following conditions */
20 /*  are met: */
21 
22 /*  1. Redistributions of source code must retain the above copyright */
23 /*     notice, this list of conditions and the following disclaimer. */
24 /*  2. Redistributions in binary form must reproduce the above copyright */
25 /*     notice, this list of conditions and the following disclaimer in the */
26 /*     documentation and/or other materials provided with the distribution. */
27 
28 /*  Neither the name of the University nor the names of its contributors */
29 /*  may be used to endorse or promote products derived from this software */
30 /*  without specific prior written permission. */
31 
32 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
33 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
34 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
35 /*  PURPOSE. */
36 
37 #include "flexdef.h"
38 #include "parse.h"
39 extern bool tablesverify, tablesext;
40 extern int trlcontxt; /* Set in  parse.y for each rule. */
41 extern const char *escaped_qstart, *escaped_qend;
42 
43 #define ACTION_ECHO add_action( yytext )
44 #define ACTION_IFDEF(def, should_define) \
45 	{ \
46 	if ( should_define ) \
47 		action_define( def, 1 ); \
48 	}
49 
50 #define ACTION_ECHO_QSTART add_action (escaped_qstart)
51 #define ACTION_ECHO_QEND   add_action (escaped_qend)
52 
53 #define ACTION_M4_IFDEF(def, should_define) \
54     do{ \
55         if ( should_define ) \
56             buf_m4_define( &m4defs_buf, def, NULL);\
57         else \
58             buf_m4_undefine( &m4defs_buf, def);\
59     } while(0)
60 
61 #define MARK_END_OF_PROLOG mark_prolog();
62 
63 #define YY_DECL \
64 	int flexscan(void)
65 
66 #define RETURNCHAR \
67 	yylval = (unsigned char) yytext[0]; \
68 	return CHAR;
69 
70 #define RETURNNAME \
71 	if(yyleng < MAXLINE) \
72          { \
73 	strlcpy( nmstr, yytext, sizeof nmstr ); \
74 	 } \
75 	else \
76 	 { \
77 	   synerr(_("Input line too long\n")); \
78 	   FLEX_EXIT(EXIT_FAILURE);  \
79 	 }  \
80 	return NAME;
81 
82 #define PUT_BACK_STRING(str, start) \
83 	for ( i = strlen( str ) - 1; i >= start; --i ) \
84 		unput((str)[i])
85 
86 #define CHECK_REJECT(str) \
87 	if ( all_upper( str ) ) \
88 		reject = true;
89 
90 #define CHECK_YYMORE(str) \
91 	if ( all_lower( str ) ) \
92 		yymore_used = true;
93 
94 #define YY_USER_INIT \
95 	if ( getenv("POSIXLY_CORRECT") ) \
96 		posix_compat = true;
97 
98 %}
99 
100 %option caseless nodefault stack noyy_top_state
101 %option nostdinit
102 
103 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
104 %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
105 %x OPTION LINEDIR CODEBLOCK_MATCH_BRACE
106 %x GROUP_WITH_PARAMS
107 %x GROUP_MINUS_PARAMS
108 %x EXTENDED_COMMENT
109 %x COMMENT_DISCARD
110 
111 WS		[[:blank:]]+
112 OPTWS		[[:blank:]]*
113 NOT_WS		[^[:blank:]\r\n]
114 
115 NL		\r?\n
116 
117 NAME		([[:alpha:]_][[:alnum:]_-]*)
118 NOT_NAME	[^[:alpha:]_*\n]+
119 
120 SCNAME		{NAME}
121 
122 ESCSEQ		(\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
123 
124 FIRST_CCL_CHAR	([^\\\n]|{ESCSEQ})
125 CCL_CHAR	([^\\\n\]]|{ESCSEQ})
126 CCL_EXPR	("[:"^?[[:alpha:]]+":]")
127 
128 LEXOPT		[aceknopr]
129 
130 M4QSTART    "[["
131 M4QEND      "]]"
132 
133 %%
134 	static int bracelevel, didadef, indented_code;
135 	static int doing_rule_action = false;
136 	static int option_sense;
137 
138 	int doing_codeblock = false;
139 	int i, brace_depth=0, brace_start_line=0;
140 	u_char nmdef[MAXLINE];
141 
142 
143 <INITIAL>{
144 	^{WS}		indented_code = true; BEGIN(CODEBLOCK);
145 	^"/*"		ACTION_ECHO; yy_push_state( COMMENT );
146 	^#{OPTWS}line{WS}	yy_push_state( LINEDIR );
147 	^"%s"{NAME}?	return SCDECL;
148 	^"%x"{NAME}?	return XSCDECL;
149 	^"%{".*{NL}	{
150 			++linenum;
151 			line_directive_out( (FILE *) 0, 1 );
152 			indented_code = false;
153 			BEGIN(CODEBLOCK);
154 			}
155     ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL}    {
156                 brace_start_line = linenum;
157                 ++linenum;
158                 buf_linedir( &top_buf, infilename?infilename:"<stdin>", linenum);
159                 brace_depth = 1;
160                 yy_push_state(CODEBLOCK_MATCH_BRACE);
161             }
162 
163     ^"%top".*   synerr( _("malformed '%top' directive") );
164 
165 	{WS}		/* discard */
166 
167 	^"%%".*		{
168 			sectnum = 2;
169 			bracelevel = 0;
170 			mark_defs1();
171 			line_directive_out( (FILE *) 0, 1 );
172 			BEGIN(SECT2PROLOG);
173 			return SECTEND;
174 			}
175 
176 	^"%pointer".*{NL}	yytext_is_array = false; ++linenum;
177 	^"%array".*{NL}		yytext_is_array = true; ++linenum;
178 
179 	^"%option"	BEGIN(OPTION); return OPTION_OP;
180 
181 	^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL}	++linenum; /* ignore */
182 	^"%"{LEXOPT}{WS}.*{NL}	++linenum;	/* ignore */
183 
184 	/* xgettext: no-c-format */
185 	^"%"[^sxaceknopr{}].*	synerr( _( "unrecognized '%' directive" ) );
186 
187 	^{NAME}		{
188 			if(yyleng < MAXLINE)
189         		 {
190 			strlcpy( nmstr, yytext, sizeof nmstr );
191 			 }
192 			else
193 			 {
194 			   synerr( _("Definition name too long\n"));
195 			   FLEX_EXIT(EXIT_FAILURE);
196 			 }
197 
198 			didadef = false;
199 			BEGIN(PICKUPDEF);
200 			}
201 
202 	{SCNAME}	RETURNNAME;
203 	^{OPTWS}{NL}	++linenum; /* allows blank lines in section 1 */
204 	{OPTWS}{NL}	ACTION_ECHO; ++linenum; /* maybe end of comment line */
205 }
206 
207 
208 <COMMENT>{
209 	"*/"		ACTION_ECHO; yy_pop_state();
210 	"*"		ACTION_ECHO;
211     {M4QSTART}  ACTION_ECHO_QSTART;
212     {M4QEND}    ACTION_ECHO_QEND;
213 	[^*\n]      ACTION_ECHO;
214 	{NL}	    ++linenum; ACTION_ECHO;
215 }
216 
217 <COMMENT_DISCARD>{
218         /* This is the same as COMMENT, but is discarded rather than output. */
219 	"*/"		yy_pop_state();
220     "*"         ;
221 	[^*\n]      ;
222 	{NL}	    ++linenum;
223 }
224 
225 <EXTENDED_COMMENT>{
226     ")"         yy_pop_state();
227     [^\n\)]+      ;
228     {NL}        ++linenum;
229 }
230 
231 <LINEDIR>{
232 	\n		yy_pop_state();
233 	[[:digit:]]+	linenum = myctoi( yytext );
234 
235 	\"[^"\n]*\"	{
236 			free( (void *) infilename );
237 			infilename = copy_string( yytext + 1 );
238 			infilename[strlen( infilename ) - 1] = '\0';
239 			}
240 	.		/* ignore spurious characters */
241 }
242 
243 <CODEBLOCK>{
244 	^"%}".*{NL}	++linenum; BEGIN(INITIAL);
245 
246     {M4QSTART}  ACTION_ECHO_QSTART;
247     {M4QEND}    ACTION_ECHO_QEND;
248 	.	        ACTION_ECHO;
249 
250 	{NL}		{
251 			++linenum;
252 			ACTION_ECHO;
253 			if ( indented_code )
254 				BEGIN(INITIAL);
255 			}
256 }
257 
258 <CODEBLOCK_MATCH_BRACE>{
259     "}"     {
260                 if( --brace_depth == 0){
261                     /* TODO: Matched. */
262                     yy_pop_state();
263                 }else
264                     buf_strnappend(&top_buf, yytext, yyleng);
265             }
266 
267     "{"     {
268                 brace_depth++;
269                 buf_strnappend(&top_buf, yytext, yyleng);
270             }
271 
272     {NL}    {
273                 ++linenum;
274                 buf_strnappend(&top_buf, yytext, yyleng);
275             }
276 
277     {M4QSTART}  buf_strnappend(&top_buf, escaped_qstart, strlen(escaped_qstart));
278     {M4QEND}    buf_strnappend(&top_buf, escaped_qend, strlen(escaped_qend));
279 
280     [^{}\r\n]  {
281                 buf_strnappend(&top_buf, yytext, yyleng);
282                }
283 
284     <<EOF>>     {
285                 linenum = brace_start_line;
286                 synerr(_("Unmatched '{'"));
287                 yyterminate();
288                 }
289 }
290 
291 
292 <PICKUPDEF>{
293 	{WS}		/* separates name and definition */
294 
295 	{NOT_WS}[^\r\n]*	{
296  		        if(yyleng < MAXLINE)
297  		         {
298 			strlcpy( (char *) nmdef, yytext, sizeof nmdef );
299  		         }
300  		        else
301  		         {
302  		           format_synerr( _("Definition value for {%s} too long\n"), nmstr);
303  		           FLEX_EXIT(EXIT_FAILURE);
304 			 }
305 			/* Skip trailing whitespace. */
306 			for ( i = strlen( (char *) nmdef ) - 1;
307 			      i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
308 			      --i )
309 				;
310 
311 			nmdef[i + 1] = '\0';
312 
313 			ndinstal( nmstr, nmdef );
314 			didadef = true;
315 			}
316 
317 	{NL}		{
318 			if ( ! didadef )
319 				synerr( _( "incomplete name definition" ) );
320 			BEGIN(INITIAL);
321 			++linenum;
322 			}
323 }
324 
325 
326 <OPTION>{
327 	{NL}		++linenum; BEGIN(INITIAL);
328 	{WS}		option_sense = true;
329 
330 	"="		return '=';
331 
332 	no		option_sense = ! option_sense;
333 
334 	7bit		csize = option_sense ? 128 : 256;
335 	8bit		csize = option_sense ? 256 : 128;
336 
337 	align		long_align = option_sense;
338 	always-interactive	{
339 			ACTION_M4_IFDEF( "M4""_YY_ALWAYS_INTERACTIVE", option_sense );
340             interactive = option_sense;
341 			}
342 	array		yytext_is_array = option_sense;
343     ansi-definitions ansi_func_defs = option_sense;
344     ansi-prototypes  ansi_func_protos = option_sense;
345 	backup		backing_up_report = option_sense;
346 	batch		interactive = ! option_sense;
347     bison-bridge     bison_bridge_lval = option_sense;
348     bison-locations  { if((bison_bridge_lloc = option_sense))
349                             bison_bridge_lval = true;
350                      }
351 	"c++"		C_plus_plus = option_sense;
352 	caseful|case-sensitive		sf_set_case_ins(!option_sense);
353 	caseless|case-insensitive	sf_set_case_ins(option_sense);
354 	debug		ddebug = option_sense;
355 	default		spprdflt = ! option_sense;
356 	ecs		useecs = option_sense;
357 	fast		{
358 			useecs = usemecs = false;
359 			use_read = fullspd = true;
360 			}
361 	full		{
362 			useecs = usemecs = false;
363 			use_read = fulltbl = true;
364 			}
365 	input		ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
366 	interactive	interactive = option_sense;
367 	lex-compat	lex_compat = option_sense;
368 	posix-compat	posix_compat = option_sense;
369 	main		{
370 			ACTION_M4_IFDEF( "M4""_YY_MAIN", option_sense);
371             /* Override yywrap */
372             if( option_sense == true )
373                 do_yywrap = false;
374 			}
375 	meta-ecs	usemecs = option_sense;
376 	never-interactive	{
377 			ACTION_M4_IFDEF( "M4""_YY_NEVER_INTERACTIVE", option_sense );
378             interactive = !option_sense;
379 			}
380 	perf-report	performance_report += option_sense ? 1 : -1;
381 	pointer		yytext_is_array = ! option_sense;
382 	read		use_read = option_sense;
383     reentrant   reentrant = option_sense;
384 	reject		reject_really_used = option_sense;
385 	stack		ACTION_M4_IFDEF( "M4""_YY_STACK_USED", option_sense );
386 	stdinit		do_stdinit = option_sense;
387 	stdout		use_stdout = option_sense;
388     unistd      ACTION_IFDEF("YY_NO_UNISTD_H", ! option_sense);
389 	unput		ACTION_M4_IFDEF("M4""_YY_NO_UNPUT", ! option_sense);
390 	verbose		printstats = option_sense;
391 	warn		nowarn = ! option_sense;
392 	yylineno	do_yylineno = option_sense; ACTION_M4_IFDEF("M4""_YY_USE_LINENO", option_sense);
393 	yymore		yymore_really_used = option_sense;
394 	yywrap      do_yywrap = option_sense;
395 
396 	yy_push_state	ACTION_M4_IFDEF("M4""_YY_NO_PUSH_STATE", ! option_sense);
397 	yy_pop_state	ACTION_M4_IFDEF("M4""_YY_NO_POP_STATE", ! option_sense);
398 	yy_top_state	ACTION_M4_IFDEF("M4""_YY_NO_TOP_STATE", ! option_sense);
399 
400 	yy_scan_buffer	ACTION_M4_IFDEF("M4""_YY_NO_SCAN_BUFFER", ! option_sense);
401 	yy_scan_bytes	ACTION_M4_IFDEF("M4""_YY_NO_SCAN_BYTES", ! option_sense);
402 	yy_scan_string	ACTION_M4_IFDEF("M4""_YY_NO_SCAN_STRING", ! option_sense);
403 
404     yyalloc         ACTION_M4_IFDEF("M4""_YY_NO_FLEX_ALLOC", ! option_sense);
405     yyrealloc       ACTION_M4_IFDEF("M4""_YY_NO_FLEX_REALLOC", ! option_sense);
406     yyfree          ACTION_M4_IFDEF("M4""_YY_NO_FLEX_FREE", ! option_sense);
407 
408     yyget_debug     ACTION_M4_IFDEF("M4""_YY_NO_GET_DEBUG", ! option_sense);
409     yyset_debug     ACTION_M4_IFDEF("M4""_YY_NO_SET_DEBUG", ! option_sense);
410     yyget_extra     ACTION_M4_IFDEF("M4""_YY_NO_GET_EXTRA", ! option_sense);
411     yyset_extra     ACTION_M4_IFDEF("M4""_YY_NO_SET_EXTRA", ! option_sense);
412     yyget_leng      ACTION_M4_IFDEF("M4""_YY_NO_GET_LENG", ! option_sense);
413     yyget_text      ACTION_M4_IFDEF("M4""_YY_NO_GET_TEXT", ! option_sense);
414     yyget_lineno    ACTION_M4_IFDEF("M4""_YY_NO_GET_LINENO", ! option_sense);
415     yyset_lineno    ACTION_M4_IFDEF("M4""_YY_NO_SET_LINENO", ! option_sense);
416     yyget_in        ACTION_M4_IFDEF("M4""_YY_NO_GET_IN", ! option_sense);
417     yyset_in        ACTION_M4_IFDEF("M4""_YY_NO_SET_IN", ! option_sense);
418     yyget_out       ACTION_M4_IFDEF("M4""_YY_NO_GET_OUT", ! option_sense);
419     yyset_out       ACTION_M4_IFDEF("M4""_YY_NO_SET_OUT", ! option_sense);
420     yyget_lval      ACTION_M4_IFDEF("M4""_YY_NO_GET_LVAL", ! option_sense);
421     yyset_lval      ACTION_M4_IFDEF("M4""_YY_NO_SET_LVAL", ! option_sense);
422     yyget_lloc      ACTION_M4_IFDEF("M4""_YY_NO_GET_LLOC", ! option_sense);
423     yyset_lloc      ACTION_M4_IFDEF("M4""_YY_NO_SET_LLOC", ! option_sense);
424 
425 	extra-type	return OPT_EXTRA_TYPE;
426 	outfile		return OPT_OUTFILE;
427 	prefix		return OPT_PREFIX;
428 	yyclass		return OPT_YYCLASS;
429 	header(-file)?      return OPT_HEADER;
430 	tables-file         return OPT_TABLES;
431 	tables-verify   {
432                     tablesverify = option_sense;
433                     if(!tablesext && option_sense)
434                         tablesext = true;
435                     }
436 
437 
438 	\"[^"\n]*\"	{
439 			if(yyleng-1 < MAXLINE)
440         		 {
441 			strlcpy( nmstr, yytext + 1, sizeof nmstr );
442 			 }
443 			else
444 			 {
445 			   synerr( _("Option line too long\n"));
446 			   FLEX_EXIT(EXIT_FAILURE);
447 			 }
448 			if (nmstr[strlen( nmstr ) - 1] == '"')
449 				nmstr[strlen( nmstr ) - 1] = '\0';
450 			return NAME;
451 			}
452 
453 	(([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|.	{
454 			format_synerr( _( "unrecognized %%option: %s" ),
455 				yytext );
456 			BEGIN(RECOVER);
457 			}
458 }
459 
460 <RECOVER>.*{NL}		++linenum; BEGIN(INITIAL);
461 
462 
463 <SECT2PROLOG>{
464 	^"%{".*	++bracelevel; yyless( 2 );	/* eat only %{ */
465 	^"%}".*	--bracelevel; yyless( 2 );	/* eat only %} */
466 
467 	^{WS}.*	ACTION_ECHO;	/* indented code in prolog */
468 
469 	^{NOT_WS}.*	{	/* non-indented code */
470 			if ( bracelevel <= 0 )
471 				{ /* not in %{ ... %} */
472 				yyless( 0 );	/* put it all back */
473 				yy_set_bol( 1 );
474 				mark_prolog();
475 				BEGIN(SECT2);
476 				}
477 			else
478 				ACTION_ECHO;
479 			}
480 
481 	.		ACTION_ECHO;
482 	{NL}	++linenum; ACTION_ECHO;
483 
484 	<<EOF>>		{
485 			mark_prolog();
486 			sectnum = 0;
487 			yyterminate(); /* to stop the parser */
488 			}
489 }
490 
491 <SECT2>{
492 	^{OPTWS}{NL}	++linenum; /* allow blank lines in section 2 */
493 
494 	^{OPTWS}"%{"	{
495 			indented_code = false;
496 			doing_codeblock = true;
497 			bracelevel = 1;
498 			BEGIN(PERCENT_BRACE_ACTION);
499 			}
500 
501 	^{OPTWS}"<"	    {
502                         /* Allow "<" to appear in (?x) patterns. */
503                         if (!sf_skip_ws())
504                             BEGIN(SC);
505                         return '<';
506                     }
507 	^{OPTWS}"^"	return '^';
508 	\"		BEGIN(QUOTE); return '"';
509 	"{"/[[:digit:]]	{
510 			BEGIN(NUM);
511 			if ( lex_compat || posix_compat )
512 				return BEGIN_REPEAT_POSIX;
513 			else
514 				return BEGIN_REPEAT_FLEX;
515 			}
516 	"$"/([[:blank:]]|{NL})	return '$';
517 
518 	{WS}"%{"		{
519 			bracelevel = 1;
520 			BEGIN(PERCENT_BRACE_ACTION);
521 
522 			if ( in_rule )
523 				{
524 				doing_rule_action = true;
525 				in_rule = false;
526 				return '\n';
527 				}
528 			}
529 	{WS}"|".*{NL}	{
530                         if (sf_skip_ws()){
531                             /* We're in the middle of a (?x: ) pattern. */
532                             /* Push back everything starting at the "|" */
533                             size_t amt;
534                             amt = strchr (yytext, '|') - yytext;
535                             yyless(amt);
536                         }
537                         else {
538                             continued_action = true;
539                             ++linenum;
540                             return '\n';
541                         }
542                     }
543 
544 	^{WS}"/*"	{
545 
546                 if (sf_skip_ws()){
547                     /* We're in the middle of a (?x: ) pattern. */
548                     yy_push_state(COMMENT_DISCARD);
549                 }
550                 else{
551                     yyless( yyleng - 2 );	/* put back '/', '*' */
552                     bracelevel = 0;
553                     continued_action = false;
554                     BEGIN(ACTION);
555                 }
556 			}
557 
558 	^{WS}		/* allow indented rules */ ;
559 
560 	{WS}		{
561             if (sf_skip_ws()){
562                 /* We're in the middle of a (?x: ) pattern. */
563             }
564             else{
565                 /* This rule is separate from the one below because
566                  * otherwise we get variable trailing context, so
567                  * we can't build the scanner using -{f,F}.
568                  */
569                 bracelevel = 0;
570                 continued_action = false;
571                 BEGIN(ACTION);
572 
573                 if ( in_rule )
574                     {
575                     doing_rule_action = true;
576                     in_rule = false;
577                     return '\n';
578                     }
579             }
580 			}
581 
582 	{OPTWS}{NL}	{
583             if (sf_skip_ws()){
584                 /* We're in the middle of a (?x: ) pattern. */
585                 ++linenum;
586             }
587             else{
588                 bracelevel = 0;
589                 continued_action = false;
590                 BEGIN(ACTION);
591                 unput( '\n' );	/* so <ACTION> sees it */
592 
593                 if ( in_rule )
594                     {
595                     doing_rule_action = true;
596                     in_rule = false;
597                     return '\n';
598                     }
599             }
600 			}
601 
602 	^{OPTWS}"<<EOF>>"	|
603 	"<<EOF>>"	return EOF_OP;
604 
605 	^"%%".*		{
606 			sectnum = 3;
607 			BEGIN(SECT3);
608 			outn("/* Begin user sect3 */");
609 			yyterminate(); /* to stop the parser */
610 			}
611 
612 	"["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})*	{
613 			int cclval;
614 
615 			if(yyleng < MAXLINE)
616         		 {
617 			strlcpy( nmstr, yytext, sizeof nmstr );
618 			 }
619 			else
620 			 {
621 			   synerr( _("Input line too long\n"));
622 			   FLEX_EXIT(EXIT_FAILURE);
623 			 }
624 
625 			/* Check to see if we've already encountered this
626 			 * ccl.
627 			 */
628 			if (0 /* <--- This "0" effectively disables the reuse of a
629                    * character class (purely based on its source text).
630                    * The reason it was disabled is so yacc/bison can parse
631                    * ccl operations, such as ccl difference and union.
632                    */
633                 &&  (cclval = ccllookup( (u_char *) nmstr )) != 0 )
634 				{
635 				if ( input() != ']' )
636 					synerr( _( "bad character class" ) );
637 
638 				yylval = cclval;
639 				++cclreuse;
640 				return PREVCCL;
641 				}
642 			else
643 				{
644 				/* We fudge a bit.  We know that this ccl will
645 				 * soon be numbered as lastccl + 1 by cclinit.
646 				 */
647 				cclinstal( (u_char *) nmstr, lastccl + 1 );
648 
649 				/* Push back everything but the leading bracket
650 				 * so the ccl can be rescanned.
651 				 */
652 				yyless( 1 );
653 
654 				BEGIN(FIRSTCCL);
655 				return '[';
656 				}
657 			}
658     "{-}"       return CCL_OP_DIFF;
659     "{+}"       return CCL_OP_UNION;
660 
661 
662     /* Check for :space: at the end of the rule so we don't
663      * wrap the expanded regex in '(' ')' -- breaking trailing
664      * context.
665      */
666 	"{"{NAME}"}"[[:space:]]?	 {
667 			u_char *nmdefptr;
668             int end_is_ws, end_ch;
669 
670             end_ch = yytext[yyleng-1];
671             end_is_ws = end_ch != '}' ? 1 : 0;
672 
673  			if(yyleng-1 < MAXLINE)
674          		 {
675 			strlcpy( nmstr, yytext + 1, sizeof nmstr );
676  			 }
677  			else
678  			 {
679  			   synerr( _("Input line too long\n"));
680  			   FLEX_EXIT(EXIT_FAILURE);
681  			 }
682 nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
683 
684 			if ( (nmdefptr = ndlookup( nmstr )) == 0 )
685 				format_synerr(
686 					_( "undefined definition {%s}" ),
687 						nmstr );
688 
689 			else
690 				{ /* push back name surrounded by ()'s */
691 				int len = strlen( (char *) nmdefptr );
692                 if (end_is_ws)
693                     unput(end_ch);
694 
695 				if ( lex_compat || nmdefptr[0] == '^' ||
696 				     (len > 0 && nmdefptr[len - 1] == '$')
697                      || (end_is_ws && trlcontxt && !sf_skip_ws()))
698 					{ /* don't use ()'s after all */
699 					PUT_BACK_STRING((char *) nmdefptr, 0);
700 
701 					if ( nmdefptr[0] == '^' )
702 						BEGIN(CARETISBOL);
703 					}
704 
705 				else
706 					{
707 					unput(')');
708 					PUT_BACK_STRING((char *) nmdefptr, 0);
709 					unput('(');
710 					}
711 				}
712 			}
713 
714     "/*"        {
715                     if (sf_skip_ws())
716                         yy_push_state(COMMENT_DISCARD);
717                     else{
718                         /* Push back the "*" and return "/" as usual. */
719                         yyless(1);
720                         return '/';
721                     }
722                 }
723 
724     "(?#"       {
725                     if (lex_compat || posix_compat){
726                         /* Push back the "?#" and treat it like a normal parens. */
727                         yyless(1);
728                         sf_push();
729                         return '(';
730                     }
731                     else
732                         yy_push_state(EXTENDED_COMMENT);
733                 }
734     "(?"        {
735                     sf_push();
736                     if (lex_compat || posix_compat)
737                         /* Push back the "?" and treat it like a normal parens. */
738                         yyless(1);
739                     else
740                         BEGIN(GROUP_WITH_PARAMS);
741                     return '(';
742                 }
743     "("         sf_push(); return '(';
744     ")"         {
745                     if (_sf_top_ix > 0) {
746                         sf_pop();
747                         return ')';
748                     } else
749                         synerr(_("unbalanced parenthesis"));
750                 }
751 
752 	[/|*+?.(){}]	return (unsigned char) yytext[0];
753 	.		RETURNCHAR;
754 }
755 
756 
757 <SC>{
758 	{OPTWS}{NL}{OPTWS}	++linenum;	/* Allow blank lines & continuations */
759 	[,*]		return (unsigned char) yytext[0];
760 	">"		BEGIN(SECT2); return '>';
761 	">"/^		BEGIN(CARETISBOL); return '>';
762 	{SCNAME}	RETURNNAME;
763 	.		{
764 			format_synerr( _( "bad <start condition>: %s" ),
765 				yytext );
766 			}
767 }
768 
769 <CARETISBOL>"^"		BEGIN(SECT2); return '^';
770 
771 
772 <QUOTE>{
773 	[^"\n]		RETURNCHAR;
774 	\"		BEGIN(SECT2); return '"';
775 
776 	{NL}		{
777 			synerr( _( "missing quote" ) );
778 			BEGIN(SECT2);
779 			++linenum;
780 			return '"';
781 			}
782 }
783 
784 <GROUP_WITH_PARAMS>{
785     ":"     BEGIN(SECT2);
786     "-"     BEGIN(GROUP_MINUS_PARAMS);
787     i       sf_set_case_ins(1);
788     s       sf_set_dot_all(1);
789     x       sf_set_skip_ws(1);
790 }
791 <GROUP_MINUS_PARAMS>{
792     ":"     BEGIN(SECT2);
793     i       sf_set_case_ins(0);
794     s       sf_set_dot_all(0);
795     x       sf_set_skip_ws(0);
796 }
797 
798 <FIRSTCCL>{
799 	"^"/[^-\]\n]	BEGIN(CCL); return '^';
800 	"^"/("-"|"]")	return '^';
801 	.		BEGIN(CCL); RETURNCHAR;
802 }
803 
804 <CCL>{
805 	-/[^\]\n]	return '-';
806 	[^\]\n]		RETURNCHAR;
807 	"]"		BEGIN(SECT2); return ']';
808 	.|{NL}		{
809 			synerr( _( "bad character class" ) );
810 			BEGIN(SECT2);
811 			return ']';
812 			}
813 }
814 
815 <FIRSTCCL,CCL>{
816 	"[:alnum:]"	BEGIN(CCL); return CCE_ALNUM;
817 	"[:alpha:]"	BEGIN(CCL); return CCE_ALPHA;
818 	"[:blank:]"	BEGIN(CCL); return CCE_BLANK;
819 	"[:cntrl:]"	BEGIN(CCL); return CCE_CNTRL;
820 	"[:digit:]"	BEGIN(CCL); return CCE_DIGIT;
821 	"[:graph:]"	BEGIN(CCL); return CCE_GRAPH;
822 	"[:lower:]"	BEGIN(CCL); return CCE_LOWER;
823 	"[:print:]"	BEGIN(CCL); return CCE_PRINT;
824 	"[:punct:]"	BEGIN(CCL); return CCE_PUNCT;
825 	"[:space:]"	BEGIN(CCL); return CCE_SPACE;
826 	"[:upper:]"	BEGIN(CCL); return CCE_UPPER;
827 	"[:xdigit:]"	BEGIN(CCL); return CCE_XDIGIT;
828 
829 	"[:^alnum:]"	BEGIN(CCL); return CCE_NEG_ALNUM;
830 	"[:^alpha:]"	BEGIN(CCL); return CCE_NEG_ALPHA;
831 	"[:^blank:]"	BEGIN(CCL); return CCE_NEG_BLANK;
832 	"[:^cntrl:]"	BEGIN(CCL); return CCE_NEG_CNTRL;
833 	"[:^digit:]"	BEGIN(CCL); return CCE_NEG_DIGIT;
834 	"[:^graph:]"	BEGIN(CCL); return CCE_NEG_GRAPH;
835 	"[:^lower:]"	BEGIN(CCL); return CCE_NEG_LOWER;
836 	"[:^print:]"	BEGIN(CCL); return CCE_NEG_PRINT;
837 	"[:^punct:]"	BEGIN(CCL); return CCE_NEG_PUNCT;
838 	"[:^space:]"	BEGIN(CCL); return CCE_NEG_SPACE;
839 	"[:^upper:]"	BEGIN(CCL); return CCE_NEG_UPPER;
840 	"[:^xdigit:]"	BEGIN(CCL); return CCE_NEG_XDIGIT;
841 	{CCL_EXPR}	{
842 			format_synerr(
843 				_( "bad character class expression: %s" ),
844 					yytext );
845 			BEGIN(CCL); return CCE_ALNUM;
846 			}
847 }
848 
849 <NUM>{
850 	[[:digit:]]+	{
851 			yylval = myctoi( yytext );
852 			return NUMBER;
853 			}
854 
855 	","		return ',';
856 	"}"		{
857 			BEGIN(SECT2);
858 			if ( lex_compat || posix_compat )
859 				return END_REPEAT_POSIX;
860 			else
861 				return END_REPEAT_FLEX;
862 			}
863 
864 	.		{
865 			synerr( _( "bad character inside {}'s" ) );
866 			BEGIN(SECT2);
867 			return '}';
868 			}
869 
870 	{NL}		{
871 			synerr( _( "missing }" ) );
872 			BEGIN(SECT2);
873 			++linenum;
874 			return '}';
875 			}
876 }
877 
878 
879 <PERCENT_BRACE_ACTION>{
880 	{OPTWS}"%}".*		bracelevel = 0;
881 
882 	<ACTION>"/*"		ACTION_ECHO; yy_push_state( COMMENT );
883 
884 	<CODEBLOCK,ACTION>{
885 		"reject"	{
886 			ACTION_ECHO;
887 			CHECK_REJECT(yytext);
888 			}
889 		"yymore"	{
890 			ACTION_ECHO;
891 			CHECK_YYMORE(yytext);
892 			}
893 	}
894 
895     {M4QSTART}  ACTION_ECHO_QSTART;
896     {M4QEND}    ACTION_ECHO_QEND;
897     .           ACTION_ECHO;
898 	{NL}		{
899 			++linenum;
900 			ACTION_ECHO;
901 			if ( bracelevel == 0 ||
902 			     (doing_codeblock && indented_code) )
903 				{
904 				if ( doing_rule_action )
905 					add_action( "\tYY_BREAK\n" );
906 
907 				doing_rule_action = doing_codeblock = false;
908 				BEGIN(SECT2);
909 				}
910 			}
911 }
912 
913 
914 	/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
915 <ACTION>{
916 	"{"		ACTION_ECHO; ++bracelevel;
917 	"}"		ACTION_ECHO; --bracelevel;
918     {M4QSTART}  ACTION_ECHO_QSTART;
919     {M4QEND}    ACTION_ECHO_QEND;
920 	[^[:alpha:]_{}"'/\n\[\]]+	ACTION_ECHO;
921     [\[\]]      ACTION_ECHO;
922 	{NAME}		ACTION_ECHO;
923 	"'"([^'\\\n]|\\.)*"'"	ACTION_ECHO; /* character constant */
924 	\"		ACTION_ECHO; BEGIN(ACTION_STRING);
925 	{NL}		{
926 			++linenum;
927 			ACTION_ECHO;
928 			if ( bracelevel == 0 )
929 				{
930 				if ( doing_rule_action )
931 					add_action( "\tYY_BREAK\n" );
932 
933 				doing_rule_action = false;
934 				BEGIN(SECT2);
935 				}
936 			}
937 	.		ACTION_ECHO;
938 }
939 
940 <ACTION_STRING>{
941 	[^"\\\n]+	ACTION_ECHO;
942 	\\.		ACTION_ECHO;
943 	{NL}		++linenum; ACTION_ECHO; BEGIN(ACTION);
944 	\"		ACTION_ECHO; BEGIN(ACTION);
945 	.		ACTION_ECHO;
946 }
947 
948 <COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING><<EOF>>	{
949 			synerr( _( "EOF encountered inside an action" ) );
950 			yyterminate();
951 			}
952 
953 <EXTENDED_COMMENT,GROUP_WITH_PARAMS,GROUP_MINUS_PARAMS><<EOF>>	{
954 			synerr( _( "EOF encountered inside pattern" ) );
955 			yyterminate();
956 			}
957 
958 <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ}	{
959 			yylval = myesc( (u_char *) yytext );
960 
961 			if ( YY_START == FIRSTCCL )
962 				BEGIN(CCL);
963 
964 			return CHAR;
965 			}
966 
967 
968 <SECT3>{
969     {M4QSTART}  fwrite (escaped_qstart, 1, strlen(escaped_qstart), yyout);
970     {M4QEND}    fwrite (escaped_qend, 1, strlen(escaped_qend), yyout);
971 	[^\[\]\n]*(\n?) ECHO;
972 	(.|\n)      ECHO;
973 	<<EOF>>		sectnum = 0; yyterminate();
974 }
975 
976 <*>.|\n			format_synerr( _( "bad character: %s" ), yytext );
977 
978 %%
979 
980 int yywrap(void)
981 	{
982 	if ( --num_input_files > 0 )
983 		{
984 		set_input_file( *++input_files );
985 		return 0;
986 		}
987 
988 	else
989 		return 1;
990 	}
991 
992 
993 /* set_input_file - open the given file (if NULL, stdin) for scanning */
994 
995 void set_input_file(const char *file)
996 	{
997 	if ( file && strcmp( file, "-" ) )
998 		{
999 		infilename = copy_string( file );
1000 		yyin = fopen( infilename, "r" );
1001 
1002 		if ( yyin == NULL )
1003 			lerrsf( _( "can't open %s" ), file );
1004 		}
1005 
1006 	else
1007 		{
1008 		yyin = stdin;
1009 		infilename = copy_string( "<stdin>" );
1010 		}
1011 
1012 	linenum = 1;
1013 	}
1014 
1015