xref: /original-bsd/old/awk/awk.lx.l (revision f737e041)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.proprietary.c%
6  */
7 
8 %X str chc reg comment
9 
10 %{
11 #ifndef lint
12 static char sccsid[] = "@(#)awk.lx.l	4.6 (Berkeley) 01/28/94";
13 #endif /* not lint */
14 
15 #include	<string.h>
16 #include	"awk.h"
17 #include	"awk.def"
18 extern int	yylval;
19 extern int	mustfld;
20 extern int	ldbg;
21 extern char	*lexprog;
22 
23 #undef YY_INPUT
24 #define YY_INPUT(buf,result,max_size)				\
25 {								\
26 	if ( lexprog )						\
27 		{						\
28 		result = strlen( lexprog );			\
29 		if ( result > max_size )			\
30 			{					\
31 			result = max_size;			\
32 			strncpy( buf, lexprog, result );	\
33 			}					\
34 		else						\
35 			strcpy( buf, lexprog );			\
36 		lexprog += result;				\
37 		}						\
38 	else							\
39 		result = read( fileno(yyin), buf, max_size );	\
40 }
41 
42 int	lineno	= 1;
43 #define	RETURN(x)	{if (ldbg) ptoken(x); return(x); }
44 #define	CADD	cbuf[clen++]=yytext[0]; if(clen>=CBUFLEN-1) {yyerror("string too long", cbuf); BEGIN INITIAL;}
45 #define	CBUFLEN	150
46 char	cbuf[CBUFLEN];
47 int	clen, cflag;
48 %}
49 
50 A	[a-zA-Z_]
51 B	[a-zA-Z0-9_]
52 D	[0-9]
53 WS	[ \t]
54 
55 %%
56 	static int sc_flag = 0;
57 
58 	if ( sc_flag ) {
59 		BEGIN INITIAL;
60 		sc_flag = 0;
61 		RETURN('}');
62 	}
63 
64 ^\n		lineno++;
65 ^{WS}*#.*\n	lineno++;	/* strip comment lines */
66 {WS}		;
67 <INITIAL,reg>"\\"\n	lineno++;
68 "||"		RETURN(BOR);
69 BEGIN	RETURN(XBEGIN);
70 END		RETURN(XEND);
71 PROGEND	RETURN(EOF);
72 "&&"		RETURN(AND);
73 "!"		RETURN(NOT);
74 "!="		{ yylval = NE; RETURN(RELOP); }
75 "~"		{ yylval = MATCH; RETURN(MATCHOP); }
76 "!~"		{ yylval = NOTMATCH; RETURN(MATCHOP); }
77 "<"		{ yylval = LT; RETURN(RELOP); }
78 "<="		{ yylval = LE; RETURN(RELOP); }
79 "=="		{ yylval = EQ; RETURN(RELOP); }
80 ">="		{ yylval = GE; RETURN(RELOP); }
81 ">"		{ yylval = GT; RETURN(RELOP); }
82 ">>"		{ yylval = APPEND; RETURN(RELOP); }
83 "++"		{ yylval = INCR; RETURN(INCR); }
84 "--"		{ yylval = DECR; RETURN(DECR); }
85 "+="		{ yylval = ADDEQ; RETURN(ASGNOP); }
86 "-="		{ yylval = SUBEQ; RETURN(ASGNOP); }
87 "*="		{ yylval = MULTEQ; RETURN(ASGNOP); }
88 "/="		{ yylval = DIVEQ; RETURN(ASGNOP); }
89 "%="		{ yylval = MODEQ; RETURN(ASGNOP); }
90 "="		{ yylval = ASSIGN; RETURN(ASGNOP); }
91 
92 "$"{D}+	{	if (atoi(yytext+1)==0) {
93 				yylval = (hack)lookup("$record", symtab, 0);
94 				RETURN(STRING);
95 			} else {
96 				yylval = fieldadr(atoi(yytext+1));
97 				RETURN(FIELD);
98 			}
99 		}
100 "$"{WS}*	{ RETURN(INDIRECT); }
101 NF		{ mustfld=1; yylval = (hack)setsymtab(yytext, EMPTY, 0.0, NUM, symtab); RETURN(VAR); }
102 ({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
103 		yylval = (hack)setsymtab(yytext, EMPTY, atof(yytext), CON|NUM, symtab); RETURN(NUMBER); }
104 "}"{WS}*\n	{ sc_flag = 1; lineno++; RETURN(';'); }
105 "}"		{ sc_flag = 1; RETURN(';'); }
106 ;\n		{ lineno++; RETURN(';'); }
107 \n		{ lineno++; RETURN(NL); }
108 while	RETURN(WHILE);
109 for		RETURN(FOR);
110 if		RETURN(IF);
111 else		RETURN(ELSE);
112 next		RETURN(NEXT);
113 exit		RETURN(EXIT);
114 break	RETURN(BREAK);
115 continue	RETURN(CONTINUE);
116 print	{ yylval = PRINT; RETURN(PRINT); }
117 printf	{ yylval = PRINTF; RETURN(PRINTF); }
118 sprintf	{ yylval = SPRINTF; RETURN(SPRINTF); }
119 split	{ yylval = SPLIT; RETURN(SPLIT); }
120 substr	RETURN(SUBSTR);
121 index	RETURN(INDEX);
122 in		RETURN(IN);
123 getline	RETURN(GETLINE);
124 length	{ yylval = FLENGTH; RETURN(FNCN); }
125 log		{ yylval = FLOG; RETURN(FNCN); }
126 int		{ yylval = FINT; RETURN(FNCN); }
127 exp		{ yylval = FEXP; RETURN(FNCN); }
128 sqrt		{ yylval = FSQRT; RETURN(FNCN); }
129 {A}{B}*	{ yylval = (hack)setsymtab(yytext, tostring(""), 0.0, STR|NUM, symtab); RETURN(VAR); }
130 \"		{ BEGIN str; clen=0; }
131 
132 #		{ BEGIN comment; }
133 <comment>\n	{ BEGIN INITIAL; lineno++; RETURN(NL); }
134 <comment>.	;
135 
136 .		{ yylval = yytext[0]; RETURN(yytext[0]); }
137 
138 <reg>"["	{ BEGIN chc; clen=0; cflag=0; }
139 <reg>"[^"	{ BEGIN chc; clen=0; cflag=1; }
140 
141 <reg>"?"	RETURN(QUEST);
142 <reg>"+"	RETURN(PLUS);
143 <reg>"*"	RETURN(STAR);
144 <reg>"|"	RETURN(OR);
145 <reg>"."	RETURN(DOT);
146 <reg>"("	RETURN('(');
147 <reg>")"	RETURN(')');
148 <reg>"^"	RETURN('^');
149 <reg>"$"	RETURN('$');
150 <reg>\\{D}{D}{D}	{ sscanf(yytext+1, "%o", &yylval); RETURN(CHAR); }
151 <reg>\\.	{	if (yytext[1]=='n') yylval = '\n';
152 			else if (yytext[1] == 't') yylval = '\t';
153 			else yylval = yytext[1];
154 			RETURN(CHAR);
155 		}
156 <reg>"/"	{ BEGIN INITIAL; unput('/'); }
157 <reg>\n		{ yyerror("newline in regular expression"); lineno++; BEGIN INITIAL; }
158 <reg>.		{ yylval = yytext[0]; RETURN(CHAR); }
159 
160 <str>\"		{ char *s; BEGIN INITIAL; cbuf[clen]=0; s = tostring(cbuf);
161 		cbuf[clen] = ' '; cbuf[++clen] = 0;
162 		yylval = (hack)setsymtab(cbuf, s, 0.0, CON|STR, symtab); RETURN(STRING); }
163 <str>\n		{ yyerror("newline in string"); lineno++; BEGIN INITIAL; }
164 <str>"\\\""	{ cbuf[clen++]='"'; }
165 <str,chc>"\\"n	{ cbuf[clen++]='\n'; }
166 <str,chc>"\\"t	{ cbuf[clen++]='\t'; }
167 <str,chc>"\\\\"	{ cbuf[clen++]='\\'; }
168 <str>.		{ CADD; }
169 
170 <chc>"\\""]"	{ cbuf[clen++]=']'; }
171 <chc>"]"	{ BEGIN reg; cbuf[clen]=0; yylval = (hack)tostring(cbuf);
172 		if (cflag==0) { RETURN(CCL); }
173 		else { RETURN(NCCL); } }
174 <chc>\n		{ yyerror("newline in character class"); lineno++; BEGIN INITIAL; }
175 <chc>.		{ CADD; }
176 
177 %%
178 
179 startreg()
180 {
181 	BEGIN reg;
182 }
183 
184 ptoken(n)
185 {
186 	extern struct tok {
187 		char *tnm;
188 		int yval;
189 	} tok[];
190 	extern int yylval;
191 
192 	printf("lex:");
193 	if (n < 128) {
194 		printf(" %c\n",n);
195 		return;
196 	}
197 	if (n <= 256 || n >= LASTTOKEN) {
198 		printf("? %o\n",n);
199 		return;
200 	}
201 	printf(" %s",tok[n-257].tnm);
202 	switch (n) {
203 
204 	case RELOP:
205 	case MATCHOP:
206 	case ASGNOP:
207 	case STRING:
208 	case FIELD:
209 	case VAR:
210 	case NUMBER:
211 	case FNCN:
212 		printf(" (%s)", yytext);
213 		break;
214 
215 	case CHAR:
216 		printf(" (%o)", yylval);
217 		break;
218 	}
219 	putchar('\n');
220 }
221