xref: /386bsd/usr/src/usr.bin/lex/scan.l (revision a2142627)
1 /* scan.l - scanner for flex input */
2 
3 %{
4 /*-
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Vern Paxson.
10  *
11  * The United States Government has rights in this work pursuant
12  * to contract no. DE-AC03-76SF00098 between the United States
13  * Department of Energy and the University of California.
14  *
15  * Redistribution and use in source and binary forms are permitted provided
16  * that: (1) source distributions retain this entire copyright notice and
17  * comment, and (2) distributions including binaries display the following
18  * acknowledgement:  ``This product includes software developed by the
19  * University of California, Berkeley and its contributors'' in the
20  * documentation or other materials provided with the distribution and in
21  * all advertising materials mentioning features or use of this software.
22  * Neither the name of the University nor the names of its contributors may
23  * be used to endorse or promote products derived from this software without
24  * specific prior written permission.
25  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
26  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
27  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28  */
29 
30 /* $Header: scan.l,v 1.2 94/01/04 14:33:09 vern Exp $ */
31 
32 #include "flexdef.h"
33 #include "parse.h"
34 
35 #define ACTION_ECHO add_action( yytext )
36 #define MARK_END_OF_PROLOG mark_prolog();
37 
38 #define YY_DECL \
39 	int flexscan()
40 
41 #define RETURNCHAR \
42 	yylval = (unsigned char) yytext[0]; \
43 	return CHAR;
44 
45 #define RETURNNAME \
46 	strcpy( nmstr, yytext ); \
47 	return NAME;
48 
49 #define PUT_BACK_STRING(str, start) \
50 	for ( i = strlen( str ) - 1; i >= start; --i ) \
51 		unput((str)[i])
52 
53 #define CHECK_REJECT(str) \
54 	if ( all_upper( str ) ) \
55 		reject = true;
56 
57 #define CHECK_YYMORE(str) \
58 	if ( all_lower( str ) ) \
59 		yymore_used = true;
60 %}
61 
62 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
63 %x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT
64 %x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2
65 
66 WS		[ \t]+
67 OPTWS		[ \t]*
68 NOT_WS		[^ \t\n]
69 
70 NL		(\n|\r\n|\n\r)
71 
72 NAME		([a-z_][a-z_0-9-]*)
73 NOT_NAME	[^a-z_*\n]+
74 
75 SCNAME		{NAME}
76 
77 ESCSEQ		(\\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}))
78 
79 FIRST_CCL_CHAR	([^\\\n]|{ESCSEQ})
80 CCL_CHAR	([^\\\n\]]|{ESCSEQ})
81 
82 %%
83 	static int bracelevel, didadef, indented_code, checking_used;
84 
85 	int doing_codeblock = false;
86 	int i;
87 	Char nmdef[MAXLINE], myesc();
88 
89 
90 ^{WS}			indented_code = true; BEGIN(CODEBLOCK);
91 ^"/*"			ACTION_ECHO; BEGIN(C_COMMENT);
92 ^"%s"{NAME}?		return SCDECL;
93 ^"%x"{NAME}?		return XSCDECL;
94 ^"%{".*{NL}		{
95 			++linenum;
96 			line_directive_out( (FILE *) 0 );
97 			indented_code = false;
98 			BEGIN(CODEBLOCK);
99 			}
100 
101 {WS}			return WHITESPACE;
102 
103 ^"%%".*			{
104 			sectnum = 2;
105 			bracelevel = 0;
106 			mark_defs1();
107 			line_directive_out( (FILE *) 0 );
108 			BEGIN(SECT2PROLOG);
109 			return SECTEND;
110 			}
111 
112 ^"%pointer".*{NL}	{
113 			if ( lex_compat )
114 				warn( "%pointer incompatible with -l option" );
115 			else
116 				yytext_is_array = false;
117 			++linenum;
118 			}
119 ^"%array".*{NL}		{
120 			if ( C_plus_plus )
121 				warn( "%array incompatible with -+ option" );
122 			else
123 				yytext_is_array = true;
124 			++linenum;
125 			}
126 
127 ^"%used"		{
128 			warn( "%used/%unused have been deprecated" );
129 			checking_used = REALLY_USED; BEGIN(USED_LIST);
130 			}
131 ^"%unused"		{
132 			warn( "%used/%unused have been deprecated" );
133 			checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
134 			}
135 
136 
137 ^"%"[aceknopr]{OPTWS}[0-9]*{OPTWS}{NL}	++linenum;	/* ignore */
138 
139 ^"%"[^sxanpekotcru{}].*	synerr( "unrecognized '%' directive" );
140 
141 ^{NAME}			{
142 			strcpy( nmstr, yytext );
143 			didadef = false;
144 			BEGIN(PICKUPDEF);
145 			}
146 
147 {SCNAME}		RETURNNAME;
148 ^{OPTWS}{NL}		++linenum; /* allows blank lines in section 1 */
149 {OPTWS}{NL}		++linenum; return '\n';
150 
151 
152 <C_COMMENT>"*/"		ACTION_ECHO; BEGIN(INITIAL);
153 <C_COMMENT>"*/".*{NL}	++linenum; ACTION_ECHO; BEGIN(INITIAL);
154 <C_COMMENT>[^*\n]+	ACTION_ECHO;
155 <C_COMMENT>"*"		ACTION_ECHO;
156 <C_COMMENT>{NL}		++linenum; ACTION_ECHO;
157 
158 
159 <CODEBLOCK>^"%}".*{NL}	++linenum; BEGIN(INITIAL);
160 <CODEBLOCK>"reject"	ACTION_ECHO; CHECK_REJECT(yytext);
161 <CODEBLOCK>"yymore"	ACTION_ECHO; CHECK_YYMORE(yytext);
162 <CODEBLOCK>{NAME}|{NOT_NAME}|.	ACTION_ECHO;
163 <CODEBLOCK>{NL}		{
164 			++linenum;
165 			ACTION_ECHO;
166 			if ( indented_code )
167 				BEGIN(INITIAL);
168 			}
169 
170 
171 <PICKUPDEF>{WS}		/* separates name and definition */
172 
173 <PICKUPDEF>{NOT_WS}.*	{
174 			strcpy( (char *) nmdef, yytext );
175 
176 			/* Skip trailing whitespace. */
177 			for ( i = strlen( (char *) nmdef ) - 1;
178 			      i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
179 			      --i )
180 				;
181 
182 			nmdef[i + 1] = '\0';
183 
184 			ndinstal( nmstr, nmdef );
185 			didadef = true;
186 			}
187 
188 <PICKUPDEF>{NL}		{
189 			if ( ! didadef )
190 				synerr( "incomplete name definition" );
191 			BEGIN(INITIAL);
192 			++linenum;
193 			}
194 
195 <RECOVER>.*{NL}		++linenum; BEGIN(INITIAL); RETURNNAME;
196 
197 
198 <USED_LIST>{NL}		++linenum; BEGIN(INITIAL);
199 <USED_LIST>{WS}
200 <USED_LIST>"reject"	{
201 			if ( all_upper( yytext ) )
202 				reject_really_used = checking_used;
203 			else
204 				synerr(
205 				"unrecognized %used/%unused construct" );
206 			}
207 <USED_LIST>"yymore"	{
208 			if ( all_lower( yytext ) )
209 				yymore_really_used = checking_used;
210 			else
211 				synerr(
212 				"unrecognized %used/%unused construct" );
213 			}
214 <USED_LIST>{NOT_WS}+	synerr( "unrecognized %used/%unused construct" );
215 
216 
217 <SECT2PROLOG>^"%{".*	++bracelevel; yyless( 2 );	/* eat only %{ */
218 <SECT2PROLOG>^"%}".*	--bracelevel; yyless( 2 );	/* eat only %} */
219 
220 <SECT2PROLOG>^{WS}.*	ACTION_ECHO;	/* indented code in prolog */
221 
222 <SECT2PROLOG>^{NOT_WS}.*	{	/* non-indented code */
223 			if ( bracelevel <= 0 )
224 				{ /* not in %{ ... %} */
225 				yyless( 0 );	/* put it all back */
226 				mark_prolog();
227 				BEGIN(SECT2);
228 				}
229 			else
230 				ACTION_ECHO;
231 			}
232 
233 <SECT2PROLOG>.*		ACTION_ECHO;
234 <SECT2PROLOG>{NL}	++linenum; ACTION_ECHO;
235 
236 <SECT2PROLOG><<EOF>>	{
237 			mark_prolog();
238 			sectnum = 0;
239 			yyterminate(); /* to stop the parser */
240 			}
241 
242 <SECT2>^{OPTWS}{NL}	++linenum; /* allow blank lines in section 2 */
243 
244 <SECT2>^({WS}|"%{")	{
245 			indented_code = (yytext[0] != '%');
246 			doing_codeblock = true;
247 			bracelevel = 1;
248 
249 			if ( indented_code )
250 				ACTION_ECHO;
251 
252 			BEGIN(CODEBLOCK_2);
253 			}
254 
255 <SECT2>^"<"		BEGIN(SC); return '<';
256 <SECT2>^"^"		return '^';
257 <SECT2>\"		BEGIN(QUOTE); return '"';
258 <SECT2>"{"/[0-9]		BEGIN(NUM); return '{';
259 <SECT2>"{"[^0-9\n][^}\n]*	BEGIN(BRACEERROR);
260 <SECT2>"$"/([ \t]|{NL})	return '$';
261 
262 <SECT2>{WS}"%{"		{
263 			bracelevel = 1;
264 			BEGIN(PERCENT_BRACE_ACTION);
265 			return '\n';
266 			}
267 <SECT2>{WS}"|".*{NL}	continued_action = true; ++linenum; return '\n';
268 
269 <SECT2>{WS}		{
270 			/* This rule is separate from the one below because
271 			 * otherwise we get variable trailing context, so
272 			 * we can't build the scanner using -{f,F}.
273 			 */
274 			bracelevel = 0;
275 			continued_action = false;
276 			BEGIN(ACTION);
277 			return '\n';
278 			}
279 
280 <SECT2>{OPTWS}{NL}	{
281 			bracelevel = 0;
282 			continued_action = false;
283 			BEGIN(ACTION);
284 			unput( '\n' );	/* so <ACTION> sees it */
285 			return '\n';
286 			}
287 
288 <SECT2>"<<EOF>>"	return EOF_OP;
289 
290 <SECT2>^"%%".*		{
291 			sectnum = 3;
292 			BEGIN(SECT3);
293 			yyterminate(); /* to stop the parser */
294 			}
295 
296 <SECT2>"["{FIRST_CCL_CHAR}{CCL_CHAR}*	{
297 			int cclval;
298 
299 			strcpy( nmstr, yytext );
300 
301 			/* Check to see if we've already encountered this
302 			 * ccl.
303 			 */
304 			if ( (cclval = ccllookup( (Char *) nmstr )) )
305 				{
306 				if ( input() != ']' )
307 					synerr( "bad character class" );
308 
309 				yylval = cclval;
310 				++cclreuse;
311 				return PREVCCL;
312 				}
313 			else
314 				{
315 				/* We fudge a bit.  We know that this ccl will
316 				 * soon be numbered as lastccl + 1 by cclinit.
317 				 */
318 				cclinstal( (Char *) nmstr, lastccl + 1 );
319 
320 				/* Push back everything but the leading bracket
321 				 * so the ccl can be rescanned.
322 				 */
323 				yyless( 1 );
324 
325 				BEGIN(FIRSTCCL);
326 				return '[';
327 				}
328 			}
329 
330 <SECT2>"{"{NAME}"}"	{
331 			register Char *nmdefptr;
332 			Char *ndlookup();
333 
334 			strcpy( nmstr, yytext + 1 );
335 			nmstr[yyleng - 2] = '\0';  /* chop trailing brace */
336 
337 			if ( ! (nmdefptr = ndlookup( nmstr )) )
338 				format_synerr( "undefined definition {%s}",
339 						nmstr );
340 
341 			else
342 				{ /* push back name surrounded by ()'s */
343 				int len = strlen( (char *) nmdefptr );
344 
345 				if ( lex_compat || nmdefptr[0] == '^' ||
346 				     (len > 0 && nmdefptr[len - 1] == '$') )
347 					{ /* don't use ()'s after all */
348 					PUT_BACK_STRING((char *) nmdefptr, 0);
349 
350 					if ( nmdefptr[0] == '^' )
351 						BEGIN(CARETISBOL);
352 					}
353 
354 				else
355 					{
356 					unput(')');
357 					PUT_BACK_STRING((char *) nmdefptr, 0);
358 					unput('(');
359 					}
360 				}
361 			}
362 
363 <SECT2>[/|*+?.()]	return (unsigned char) yytext[0];
364 <SECT2>.		RETURNCHAR;
365 
366 
367 <SC>[,*]		return (unsigned char) yytext[0];
368 <SC>">"			BEGIN(SECT2); return '>';
369 <SC>">"/^		BEGIN(CARETISBOL); return '>';
370 <SC>{SCNAME}		RETURNNAME;
371 <SC>.			{
372 			format_synerr( "bad <start condition>: %s", yytext );
373 			}
374 
375 <CARETISBOL>"^"		BEGIN(SECT2); return '^';
376 
377 
378 <QUOTE>[^"\n]		RETURNCHAR;
379 <QUOTE>\"		BEGIN(SECT2); return '"';
380 
381 <QUOTE>{NL}		{
382 			synerr( "missing quote" );
383 			BEGIN(SECT2);
384 			++linenum;
385 			return '"';
386 			}
387 
388 
389 <FIRSTCCL>"^"/[^-\]\n]	BEGIN(CCL); return '^';
390 <FIRSTCCL>"^"/("-"|"]")	return '^';
391 <FIRSTCCL>.		BEGIN(CCL); RETURNCHAR;
392 
393 <CCL>-/[^\]\n]		return '-';
394 <CCL>[^\]\n]		RETURNCHAR;
395 <CCL>"]"		BEGIN(SECT2); return ']';
396 <CCL>.|{NL}		{
397 			synerr( "bad character class" );
398 			BEGIN(SECT2);
399 			return ']';
400 			}
401 
402 
403 <NUM>[0-9]+		{
404 			yylval = myctoi( yytext );
405 			return NUMBER;
406 			}
407 
408 <NUM>","		return ',';
409 <NUM>"}"		BEGIN(SECT2); return '}';
410 
411 <NUM>.			{
412 			synerr( "bad character inside {}'s" );
413 			BEGIN(SECT2);
414 			return '}';
415 			}
416 
417 <NUM>{NL}		{
418 			synerr( "missing }" );
419 			BEGIN(SECT2);
420 			++linenum;
421 			return '}';
422 			}
423 
424 
425 <BRACEERROR>"}"		synerr( "bad name in {}'s" ); BEGIN(SECT2);
426 <BRACEERROR>{NL}	synerr( "missing }" ); ++linenum; BEGIN(SECT2);
427 
428 
429 <CODEBLOCK_2>"/*"	ACTION_ECHO; BEGIN(ACTION_COMMENT);
430 <PERCENT_BRACE_ACTION,CODEBLOCK_2>{OPTWS}"%}".*		bracelevel = 0;
431 <PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"reject"	{
432 			ACTION_ECHO;
433 			CHECK_REJECT(yytext);
434 			}
435 <PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"yymore"	{
436 			ACTION_ECHO;
437 			CHECK_YYMORE(yytext);
438 			}
439 <PERCENT_BRACE_ACTION,CODEBLOCK_2>{NAME}|{NOT_NAME}|.	ACTION_ECHO;
440 <PERCENT_BRACE_ACTION,CODEBLOCK_2>{NL}			{
441 			++linenum;
442 			ACTION_ECHO;
443 			if ( bracelevel == 0 ||
444 			     (doing_codeblock && indented_code) )
445 				{
446 				if ( ! doing_codeblock )
447 					add_action( "\tYY_BREAK\n" );
448 
449 				doing_codeblock = false;
450 				BEGIN(SECT2);
451 				}
452 			}
453 
454 
455 	/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
456 <ACTION>"{"		ACTION_ECHO; ++bracelevel;
457 <ACTION>"}"		ACTION_ECHO; --bracelevel;
458 <ACTION>[^a-z_{}"'/\n]+	ACTION_ECHO;
459 <ACTION>{NAME}		ACTION_ECHO;
460 <ACTION>"/*"		ACTION_ECHO; BEGIN(ACTION_COMMENT);
461 <ACTION>"'"([^'\\\n]|\\.)*"'"	ACTION_ECHO; /* character constant */
462 <ACTION>\"		ACTION_ECHO; BEGIN(ACTION_STRING);
463 <ACTION>{NL}		{
464 			++linenum;
465 			ACTION_ECHO;
466 			if ( bracelevel == 0 )
467 				{
468 				add_action( "\tYY_BREAK\n" );
469 				BEGIN(SECT2);
470 				}
471 			}
472 <ACTION>.		ACTION_ECHO;
473 
474 <ACTION_COMMENT>"*/"	{
475 			ACTION_ECHO;
476 			if ( doing_codeblock )
477 				BEGIN(CODEBLOCK_2);
478 			else
479 				BEGIN(ACTION);
480 			}
481 
482 <ACTION_COMMENT>"*"	ACTION_ECHO;
483 <ACTION_COMMENT>[^*\n]+	ACTION_ECHO;
484 <ACTION_COMMENT>[^*\n]*{NL}	++linenum; ACTION_ECHO;
485 
486 <ACTION_STRING>[^"\\\n]+	ACTION_ECHO;
487 <ACTION_STRING>\\.	ACTION_ECHO;
488 <ACTION_STRING>{NL}	++linenum; ACTION_ECHO;
489 <ACTION_STRING>\"	ACTION_ECHO; BEGIN(ACTION);
490 <ACTION_STRING>.	ACTION_ECHO;
491 
492 <ACTION,ACTION_COMMENT,ACTION_STRING><<EOF>>	{
493 			synerr( "EOF encountered inside an action" );
494 			yyterminate();
495 			}
496 
497 
498 <SECT2,QUOTE,CCL>{ESCSEQ}	{
499 			yylval = myesc( (Char *) yytext );
500 			return CHAR;
501 			}
502 
503 <FIRSTCCL>{ESCSEQ}	{
504 			yylval = myesc( (Char *) yytext );
505 			BEGIN(CCL);
506 			return CHAR;
507 			}
508 
509 
510 <SECT3>.*(\n?)		ECHO;
511 <SECT3><<EOF>>		sectnum = 0; yyterminate();
512 
513 <*>.|\n			format_synerr( "bad character: %s", yytext );
514 
515 %%
516 
517 
518 int yywrap()
519 	{
520 	if ( --num_input_files > 0 )
521 		{
522 		set_input_file( *++input_files );
523 		return 0;
524 		}
525 
526 	else
527 		return 1;
528 	}
529 
530 
531 /* set_input_file - open the given file (if NULL, stdin) for scanning */
532 
533 void set_input_file( file )
534 char *file;
535 	{
536 	if ( file )
537 		{
538 		infilename = file;
539 		yyin = fopen( infilename, "r" );
540 
541 		if ( yyin == NULL )
542 			lerrsf( "can't open %s", file );
543 		}
544 
545 	else
546 		{
547 		yyin = stdin;
548 		infilename = "<stdin>";
549 		}
550 	}
551 
552 
553 /* Wrapper routines for accessing the scanner's malloc routines. */
554 
555 void *flex_alloc( size )
556 unsigned int size;
557 	{
558 	return yy_flex_alloc( size );
559 	}
560 
561 void *flex_realloc( ptr, size )
562 void *ptr;
563 unsigned int size;
564 	{
565 	return yy_flex_realloc( ptr, size );
566 	}
567 
568 void flex_free( ptr )
569 void *ptr;
570 	{
571 	yy_flex_free( ptr );
572 	}
573