xref: /illumos-gate/usr/src/cmd/oawk/awk.lx.l (revision 2a8bcb4e)
17c478bd9Sstevel@tonic-gate %{
27c478bd9Sstevel@tonic-gate /*
37c478bd9Sstevel@tonic-gate  * CDDL HEADER START
47c478bd9Sstevel@tonic-gate  *
57c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
67c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
77c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
87c478bd9Sstevel@tonic-gate  * with the License.
97c478bd9Sstevel@tonic-gate  *
107c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
117c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
127c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
137c478bd9Sstevel@tonic-gate  * and limitations under the License.
147c478bd9Sstevel@tonic-gate  *
157c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
167c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
177c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
187c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
197c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
207c478bd9Sstevel@tonic-gate  *
217c478bd9Sstevel@tonic-gate  * CDDL HEADER END
227c478bd9Sstevel@tonic-gate  */
23*dc5a8425Srobbin /*
24*dc5a8425Srobbin  * Copyright 1996 Sun Microsystems, Inc.  All rights reserved.
25*dc5a8425Srobbin  * Use is subject to license terms.
26*dc5a8425Srobbin  */
277c478bd9Sstevel@tonic-gate %}
287c478bd9Sstevel@tonic-gate %{
29*dc5a8425Srobbin /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
30*dc5a8425Srobbin %}
31*dc5a8425Srobbin %{
32*dc5a8425Srobbin /*	  All Rights Reserved  	*/
33*dc5a8425Srobbin %}
347c478bd9Sstevel@tonic-gate 
357c478bd9Sstevel@tonic-gate %Start A str chc sc reg comment
367c478bd9Sstevel@tonic-gate 
377c478bd9Sstevel@tonic-gate %{
387c478bd9Sstevel@tonic-gate #include	"awk.h"
397c478bd9Sstevel@tonic-gate #include	"awk.def"
407c478bd9Sstevel@tonic-gate #undef	input	/* defeat lex */
417c478bd9Sstevel@tonic-gate extern int	yylval;
427c478bd9Sstevel@tonic-gate extern int	mustfld;
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate long long	lineno	= 1;
457c478bd9Sstevel@tonic-gate #ifdef	DEBUG
467c478bd9Sstevel@tonic-gate #	define	RETURN(x)	{if (dbg) ptoken(x); return (x); }
477c478bd9Sstevel@tonic-gate #else
487c478bd9Sstevel@tonic-gate #	define	RETURN(x)	return (x)
497c478bd9Sstevel@tonic-gate #endif
507c478bd9Sstevel@tonic-gate #define	CADD	{ cbuf[clen++]=yytext[0]; if (clen>=CBUFLEN-1) { yyerror(\
517c478bd9Sstevel@tonic-gate 		"string too long", cbuf); BEGIN A; } }
527c478bd9Sstevel@tonic-gate #define	CBUFLEN	150
537c478bd9Sstevel@tonic-gate wchar_t cbuf[CBUFLEN];
547c478bd9Sstevel@tonic-gate int	clen, cflag;
557c478bd9Sstevel@tonic-gate %}
567c478bd9Sstevel@tonic-gate 
577c478bd9Sstevel@tonic-gate %a	50000
587c478bd9Sstevel@tonic-gate %o	50000
597c478bd9Sstevel@tonic-gate 
607c478bd9Sstevel@tonic-gate A	[a-zA-Z_]
617c478bd9Sstevel@tonic-gate B	[a-zA-Z0-9_]
627c478bd9Sstevel@tonic-gate D	[0-9]
637c478bd9Sstevel@tonic-gate WS	[ \t]
647c478bd9Sstevel@tonic-gate 
657c478bd9Sstevel@tonic-gate %%
667c478bd9Sstevel@tonic-gate 	switch (yybgin-yysvec-1) {	/* witchcraft */
677c478bd9Sstevel@tonic-gate 	case 0:
687c478bd9Sstevel@tonic-gate 		BEGIN A;
697c478bd9Sstevel@tonic-gate 		break;
707c478bd9Sstevel@tonic-gate 	case sc:
717c478bd9Sstevel@tonic-gate 		BEGIN A;
727c478bd9Sstevel@tonic-gate 		RETURN('}');
737c478bd9Sstevel@tonic-gate 	}
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate <A>^\n		lineno++;
767c478bd9Sstevel@tonic-gate <A>^{WS}*#.*\n	lineno++;	/* strip comment lines */
777c478bd9Sstevel@tonic-gate <A>{WS}		/* dummy for cstyle */;
787c478bd9Sstevel@tonic-gate <A>"\\"\n	lineno++;
797c478bd9Sstevel@tonic-gate <reg>"\\"\n	lineno++;
807c478bd9Sstevel@tonic-gate <A>"||"		RETURN(BOR);
817c478bd9Sstevel@tonic-gate <A>BEGIN	RETURN(XBEGIN);
827c478bd9Sstevel@tonic-gate <A>END		RETURN(XEND);
837c478bd9Sstevel@tonic-gate <A>PROGEND	RETURN(EOF);
847c478bd9Sstevel@tonic-gate <A>"&&"		RETURN(AND);
857c478bd9Sstevel@tonic-gate <A>"!"		RETURN(NOT);
867c478bd9Sstevel@tonic-gate <A>"!="		{ yylval = NE; RETURN(RELOP); }
877c478bd9Sstevel@tonic-gate <A>"~"		{ yylval = MATCH; RETURN(MATCHOP); }
887c478bd9Sstevel@tonic-gate <A>"!~"		{ yylval = NOTMATCH; RETURN(MATCHOP); }
897c478bd9Sstevel@tonic-gate <A>"<"		{ yylval = LT; RETURN(RELOP); }
907c478bd9Sstevel@tonic-gate <A>"<="		{ yylval = LE; RETURN(RELOP); }
917c478bd9Sstevel@tonic-gate <A>"=="		{ yylval = EQ; RETURN(RELOP); }
927c478bd9Sstevel@tonic-gate <A>">="		{ yylval = GE; RETURN(RELOP); }
937c478bd9Sstevel@tonic-gate <A>">"		{ yylval = GT; RETURN(RELOP); }
947c478bd9Sstevel@tonic-gate <A>">>"		{ yylval = APPEND; RETURN(RELOP); }
957c478bd9Sstevel@tonic-gate <A>"++"		{ yylval = INCR; RETURN(INCR); }
967c478bd9Sstevel@tonic-gate <A>"--"		{ yylval = DECR; RETURN(DECR); }
977c478bd9Sstevel@tonic-gate <A>"+="		{ yylval = ADDEQ; RETURN(ASGNOP); }
987c478bd9Sstevel@tonic-gate <A>"-="		{ yylval = SUBEQ; RETURN(ASGNOP); }
997c478bd9Sstevel@tonic-gate <A>"*="		{ yylval = MULTEQ; RETURN(ASGNOP); }
1007c478bd9Sstevel@tonic-gate <A>"/="		{ yylval = DIVEQ; RETURN(ASGNOP); }
1017c478bd9Sstevel@tonic-gate <A>"%="		{ yylval = MODEQ; RETURN(ASGNOP); }
1027c478bd9Sstevel@tonic-gate <A>"="		{ yylval = ASSIGN; RETURN(ASGNOP); }
1037c478bd9Sstevel@tonic-gate 
1047c478bd9Sstevel@tonic-gate <A>"$"{D}+	{
1057c478bd9Sstevel@tonic-gate 		static wchar_t L_record[] = L"$record";
1067c478bd9Sstevel@tonic-gate 		if (watoi(yytext+1)==0) {
1077c478bd9Sstevel@tonic-gate 				yylval = (int)lookup(L_record, symtab, 0);
1087c478bd9Sstevel@tonic-gate 				RETURN(STRING);
1097c478bd9Sstevel@tonic-gate 			} else {
1107c478bd9Sstevel@tonic-gate 				yylval = fieldadr(watoi(yytext+1));
1117c478bd9Sstevel@tonic-gate 				RETURN(FIELD);
1127c478bd9Sstevel@tonic-gate 			}
1137c478bd9Sstevel@tonic-gate 		}
1147c478bd9Sstevel@tonic-gate <A>"$"{WS}*	{ RETURN(INDIRECT); }
1157c478bd9Sstevel@tonic-gate <A>NF		{ mustfld=1;
1167c478bd9Sstevel@tonic-gate 		yylval = (int)setsymtab(yytext, NULL, 0.0, NUM, symtab);
1177c478bd9Sstevel@tonic-gate 		RETURN(VAR); }
1187c478bd9Sstevel@tonic-gate <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
1197c478bd9Sstevel@tonic-gate 		yylval = (int)setsymtab(yytext, NULL, watof(yytext),
1207c478bd9Sstevel@tonic-gate 			CON|NUM, symtab); RETURN(NUMBER); }
1217c478bd9Sstevel@tonic-gate <A>"}"{WS}*\n	{ BEGIN sc; lineno++; RETURN(';'); }
1227c478bd9Sstevel@tonic-gate <A>"}"		{ BEGIN sc; RETURN(';'); }
1237c478bd9Sstevel@tonic-gate <A>";"\n		{ lineno++; RETURN(';'); }
1247c478bd9Sstevel@tonic-gate <A>\n		{ lineno++; RETURN(NL); }
1257c478bd9Sstevel@tonic-gate <A>while	RETURN(WHILE);
1267c478bd9Sstevel@tonic-gate <A>for		RETURN(FOR);
1277c478bd9Sstevel@tonic-gate <A>if		RETURN(IF);
1287c478bd9Sstevel@tonic-gate <A>else		RETURN(ELSE);
1297c478bd9Sstevel@tonic-gate <A>next		RETURN(NEXT);
1307c478bd9Sstevel@tonic-gate <A>exit		RETURN(EXIT);
1317c478bd9Sstevel@tonic-gate <A>break	RETURN(BREAK);
1327c478bd9Sstevel@tonic-gate <A>continue	RETURN(CONTINUE);
1337c478bd9Sstevel@tonic-gate <A>print	{ yylval = PRINT; RETURN(PRINT); }
1347c478bd9Sstevel@tonic-gate <A>printf	{ yylval = PRINTF; RETURN(PRINTF); }
1357c478bd9Sstevel@tonic-gate <A>sprintf	{ yylval = SPRINTF; RETURN(SPRINTF); }
1367c478bd9Sstevel@tonic-gate <A>split	{ yylval = SPLIT; RETURN(SPLIT); }
1377c478bd9Sstevel@tonic-gate <A>substr	RETURN(SUBSTR);
1387c478bd9Sstevel@tonic-gate <A>index	RETURN(INDEX);
1397c478bd9Sstevel@tonic-gate <A>in		RETURN(IN);
1407c478bd9Sstevel@tonic-gate <A>getline	RETURN(GETLINE);
1417c478bd9Sstevel@tonic-gate <A>length	{ yylval = FLENGTH; RETURN(FNCN); }
1427c478bd9Sstevel@tonic-gate <A>log		{ yylval = FLOG; RETURN(FNCN); }
1437c478bd9Sstevel@tonic-gate <A>int		{ yylval = FINT; RETURN(FNCN); }
1447c478bd9Sstevel@tonic-gate <A>exp		{ yylval = FEXP; RETURN(FNCN); }
1457c478bd9Sstevel@tonic-gate <A>sqrt		{ yylval = FSQRT; RETURN(FNCN); }
1467c478bd9Sstevel@tonic-gate <A>{A}{B}*	{
1477c478bd9Sstevel@tonic-gate 		static wchar_t L_0[] = { 0 };
1487c478bd9Sstevel@tonic-gate 		yylval = (int)setsymtab(yytext, tostring(L_0), 0.0, STR|NUM,
1497c478bd9Sstevel@tonic-gate 			symtab);
1507c478bd9Sstevel@tonic-gate 		RETURN(VAR);
1517c478bd9Sstevel@tonic-gate 		}
1527c478bd9Sstevel@tonic-gate <A>\"		{ BEGIN str; clen=0; }
1537c478bd9Sstevel@tonic-gate 
1547c478bd9Sstevel@tonic-gate <A>#		{ BEGIN comment; }
1557c478bd9Sstevel@tonic-gate <comment>\n	{ BEGIN A; lineno++; RETURN(NL); }
1567c478bd9Sstevel@tonic-gate <comment>.	/* dummy */;
1577c478bd9Sstevel@tonic-gate 
1587c478bd9Sstevel@tonic-gate <A>.		{ yylval = yytext[0]; RETURN(yytext[0]); }
1597c478bd9Sstevel@tonic-gate 
1607c478bd9Sstevel@tonic-gate <reg>"["	{ BEGIN chc; clen=0; cflag=0; }
1617c478bd9Sstevel@tonic-gate <reg>"[^"	{ BEGIN chc; clen=0; cflag=1; }
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate <reg>"?"	RETURN(QUEST);
1647c478bd9Sstevel@tonic-gate <reg>"+"	RETURN(PLUS);
1657c478bd9Sstevel@tonic-gate <reg>"*"	RETURN(STAR);
1667c478bd9Sstevel@tonic-gate <reg>"|"	RETURN(OR);
1677c478bd9Sstevel@tonic-gate <reg>"."	RETURN(DOT);
1687c478bd9Sstevel@tonic-gate <reg>"("	RETURN('(');
1697c478bd9Sstevel@tonic-gate <reg>")"	RETURN(')');
1707c478bd9Sstevel@tonic-gate <reg>"^"	RETURN('^');
1717c478bd9Sstevel@tonic-gate <reg>"$"	RETURN('$');
1727c478bd9Sstevel@tonic-gate <reg>\\{D}{D}{D}	{ wsscanf(yytext+1, "%o", &yylval); RETURN(CHAR); }
1737c478bd9Sstevel@tonic-gate <reg>\\.	{	if (yytext[1]=='n') yylval = '\n';
1747c478bd9Sstevel@tonic-gate 			else if (yytext[1] == 't') yylval = '\t';
1757c478bd9Sstevel@tonic-gate 			else if (yytext[1] == 'b') yylval = '\b';
1767c478bd9Sstevel@tonic-gate 			else if (yytext[1] == 'r') yylval = '\r';
1777c478bd9Sstevel@tonic-gate 			else if (yytext[1] == 'f') yylval = '\f';
1787c478bd9Sstevel@tonic-gate 			else yylval = yytext[1];
1797c478bd9Sstevel@tonic-gate 			RETURN(CHAR);
1807c478bd9Sstevel@tonic-gate 		}
1817c478bd9Sstevel@tonic-gate <reg>"/"	{ BEGIN A; unput('/'); }
1827c478bd9Sstevel@tonic-gate <reg>\n		{ yyerror("newline in regular expression"); lineno++; BEGIN A; }
1837c478bd9Sstevel@tonic-gate <reg>.		{ yylval = yytext[0]; RETURN(CHAR); }
1847c478bd9Sstevel@tonic-gate 
1857c478bd9Sstevel@tonic-gate <str>\"		{ wchar_t *s; BEGIN A; cbuf[clen]=0; s = tostring(cbuf);
1867c478bd9Sstevel@tonic-gate 		cbuf[clen] = ' '; cbuf[++clen] = 0;
1877c478bd9Sstevel@tonic-gate 		yylval = (int)setsymtab(cbuf, s, 0.0, CON|STR, symtab);
1887c478bd9Sstevel@tonic-gate 		RETURN(STRING); }
1897c478bd9Sstevel@tonic-gate <str>\n		{ yyerror("newline in string"); lineno++; BEGIN A; }
1907c478bd9Sstevel@tonic-gate <str>"\\\""	{ cbuf[clen++]='"'; }
1917c478bd9Sstevel@tonic-gate <str>"\\"n	{ cbuf[clen++]='\n'; }
1927c478bd9Sstevel@tonic-gate <chc>"\\"n	{ cbuf[clen++]='\n'; }
1937c478bd9Sstevel@tonic-gate <str>"\\"t	{ cbuf[clen++]='\t'; }
1947c478bd9Sstevel@tonic-gate <chc>"\\"t	{ cbuf[clen++]='\t'; }
1957c478bd9Sstevel@tonic-gate <str>"\\"b	{ cbuf[clen++]='\b'; }
1967c478bd9Sstevel@tonic-gate <chc>"\\"b	{ cbuf[clen++]='\b'; }
1977c478bd9Sstevel@tonic-gate <str>"\\"r	{ cbuf[clen++]='\r'; }
1987c478bd9Sstevel@tonic-gate <chc>"\\"r	{ cbuf[clen++]='\r'; }
1997c478bd9Sstevel@tonic-gate <str>"\\"f 	{ cbuf[clen++]='\f'; }
2007c478bd9Sstevel@tonic-gate <chc>"\\"f 	{ cbuf[clen++]='\f'; }
2017c478bd9Sstevel@tonic-gate <str>"\\\\"	{ cbuf[clen++]='\\'; }
2027c478bd9Sstevel@tonic-gate <chc>"\\\\"	{ cbuf[clen++]='\\'; }
2037c478bd9Sstevel@tonic-gate <str>.		{ CADD; }
2047c478bd9Sstevel@tonic-gate 
2057c478bd9Sstevel@tonic-gate <chc>"\\""]"	{ cbuf[clen++]=']'; }
2067c478bd9Sstevel@tonic-gate <chc>"]"	{ BEGIN reg; cbuf[clen]=0; yylval = (int)tostring(cbuf);
2077c478bd9Sstevel@tonic-gate 		if (cflag==0) { RETURN(CCL); }
2087c478bd9Sstevel@tonic-gate 		else { RETURN(NCCL); } }
2097c478bd9Sstevel@tonic-gate <chc>\n		{ yyerror("newline in character class"); lineno++; BEGIN A; }
2107c478bd9Sstevel@tonic-gate <chc>.		{ CADD; }
2117c478bd9Sstevel@tonic-gate 
2127c478bd9Sstevel@tonic-gate %%
2137c478bd9Sstevel@tonic-gate 
214*dc5a8425Srobbin int
2157c478bd9Sstevel@tonic-gate input()
2167c478bd9Sstevel@tonic-gate {
217*dc5a8425Srobbin 	int c;
2187c478bd9Sstevel@tonic-gate 	extern wchar_t *lexprog;
2197c478bd9Sstevel@tonic-gate 
2207c478bd9Sstevel@tonic-gate 	if (yysptr > yysbuf)
2217c478bd9Sstevel@tonic-gate 		c = U(*--yysptr);
2227c478bd9Sstevel@tonic-gate 	else if (yyin == NULL)
2237c478bd9Sstevel@tonic-gate 		c = *lexprog++;
2247c478bd9Sstevel@tonic-gate 	else
2257c478bd9Sstevel@tonic-gate 		c = getwc(yyin);
2267c478bd9Sstevel@tonic-gate 	if (c == '\n')
2277c478bd9Sstevel@tonic-gate 		yylineno++;
2287c478bd9Sstevel@tonic-gate 	else if (c == EOF)
2297c478bd9Sstevel@tonic-gate 		c = 0;
2307c478bd9Sstevel@tonic-gate 	return (c);
2317c478bd9Sstevel@tonic-gate }
2327c478bd9Sstevel@tonic-gate 
233*dc5a8425Srobbin void
2347c478bd9Sstevel@tonic-gate startreg()
2357c478bd9Sstevel@tonic-gate {
2367c478bd9Sstevel@tonic-gate 	BEGIN reg;
2377c478bd9Sstevel@tonic-gate }
238