1 %Start A str sc reg comment
2 
3 %{
4 /****************************************************************
5 Copyright (C) AT&T 1993
6 All Rights Reserved
7 
8 Permission to use, copy, modify, and distribute this software and
9 its documentation for any purpose and without fee is hereby
10 granted, provided that the above copyright notice appear in all
11 copies and that both that the copyright notice and this
12 permission notice and warranty disclaimer appear in supporting
13 documentation, and that the name of AT&T or any of its entities
14 not be used in advertising or publicity pertaining to
15 distribution of the software without specific, written prior
16 permission.
17 
18 AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
19 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
20 IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
21 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
22 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
23 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
24 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
25 THIS SOFTWARE.
26 ****************************************************************/
27 
28 #include <stdlib.h>
29 #include <string.h>
30 #include "awk.h"
31 #include "y.tab.h"
32 
33 extern YYSTYPE	yylval;
34 extern int	infunc;
35 
36 int	lineno	= 1;
37 int	bracecnt = 0;
38 int	brackcnt  = 0;
39 int	parencnt = 0;
40 #define DEBUG
41 #ifdef	DEBUG
42 #	define	RET(x)	{if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
43 #else
44 #	define	RET(x)	return(x)
45 #endif
46 
47 #define	CADD	cbuf[clen++] = yytext[0]; \
48 		if (clen >= CBUFLEN-1) { \
49 			ERROR "string/reg expr %.30s... too long", cbuf SYNTAX; \
50 			BEGIN A; \
51 		}
52 
53 uchar	cbuf[CBUFLEN];
54 uchar	*s;
55 int	clen, cflag;
56 
57 /* some of this depends on behavior of lex that
58    may not be preserved in other implementations of lex.
59 */
60 
61 static	int	my_input( YY_CHAR *buf, int max_size );
62 
63 #undef YY_INPUT
64 #define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size);
65 
66 #undef YY_USER_INIT
67 #define YY_USER_INIT init_input_source();
68 %}
69 
70 A	[a-zA-Z_]
71 B	[a-zA-Z0-9_]
72 D	[0-9]
73 O	[0-7]
74 H	[0-9a-fA-F]
75 WS	[ \t]
76 
77 %%
78 	switch ((yy_start - 1) / 2) {	/* witchcraft */
79 	case 0:
80 		BEGIN A;
81 		break;
82 	case sc:
83 		BEGIN A;
84 		RET('}');
85 	}
86 
87 <A>\n		{ lineno++; RET(NL); }
88 <A>#.*		{ ; }	/* strip comments */
89 <A>{WS}+	{ ; }
90 <A>;		{ RET(';'); }
91 
92 <A>"\\"\n	{ lineno++; }
93 <A>BEGIN	{ RET(XBEGIN); }
94 <A>END		{ RET(XEND); }
95 <A>func(tion)?	{ if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
96 <A>return	{ if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
97 <A>"&&"		{ RET(AND); }
98 <A>"||"		{ RET(BOR); }
99 <A>"!"		{ RET(NOT); }
100 <A>"!="		{ yylval.i = NE; RET(NE); }
101 <A>"~"		{ yylval.i = MATCH; RET(MATCHOP); }
102 <A>"!~"		{ yylval.i = NOTMATCH; RET(MATCHOP); }
103 <A>"<"		{ yylval.i = LT; RET(LT); }
104 <A>"<="		{ yylval.i = LE; RET(LE); }
105 <A>"=="		{ yylval.i = EQ; RET(EQ); }
106 <A>">="		{ yylval.i = GE; RET(GE); }
107 <A>">"		{ yylval.i = GT; RET(GT); }
108 <A>">>"		{ yylval.i = APPEND; RET(APPEND); }
109 <A>"++"		{ yylval.i = INCR; RET(INCR); }
110 <A>"--"		{ yylval.i = DECR; RET(DECR); }
111 <A>"+="		{ yylval.i = ADDEQ; RET(ASGNOP); }
112 <A>"-="		{ yylval.i = SUBEQ; RET(ASGNOP); }
113 <A>"*="		{ yylval.i = MULTEQ; RET(ASGNOP); }
114 <A>"/="		{ yylval.i = DIVEQ; RET(ASGNOP); }
115 <A>"%="		{ yylval.i = MODEQ; RET(ASGNOP); }
116 <A>"^="		{ yylval.i = POWEQ; RET(ASGNOP); }
117 <A>"**="	{ yylval.i = POWEQ; RET(ASGNOP); }
118 <A>"="		{ yylval.i = ASSIGN; RET(ASGNOP); }
119 <A>"**"		{ RET(POWER); }
120 <A>"^"		{ RET(POWER); }
121 
122 <A>"$"{D}+	{ yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
123 <A>"$NF"	{ unputstr("(NF)"); return(INDIRECT); }
124 <A>"$"{A}{B}*	{
125 		  int c;
126 		  char *yytext_copy = strdup(yytext);
127 		  c = input(); unput(c);	/* look for '(' or '[' */
128 		  if (c == '(' || c == '[' ||
129 		      infunc && isarg(yytext_copy+1) >= 0) {
130 			  unputstr(yytext_copy+1);
131 			  free(yytext_copy);
132 			return(INDIRECT);
133 		  } else {
134 			  yylval.cp =
135 				setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab);
136 			  free(yytext_copy);
137 			RET(IVAR);
138 		  }
139 		}
140 <A>"$"		{ RET(INDIRECT); }
141 <A>NF		{ yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }
142 
143 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
144 		  yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
145 		/* should this also have STR set? */
146 		  RET(NUMBER); }
147 
148 <A>while	{ RET(WHILE); }
149 <A>for		{ RET(FOR); }
150 <A>do		{ RET(DO); }
151 <A>if		{ RET(IF); }
152 <A>else		{ RET(ELSE); }
153 <A>next		{ RET(NEXT); }
154 <A>exit		{ RET(EXIT); }
155 <A>break	{ RET(BREAK); }
156 <A>continue	{ RET(CONTINUE); }
157 <A>print	{ yylval.i = PRINT; RET(PRINT); }
158 <A>printf	{ yylval.i = PRINTF; RET(PRINTF); }
159 <A>sprintf	{ yylval.i = SPRINTF; RET(SPRINTF); }
160 <A>split	{ yylval.i = SPLIT; RET(SPLIT); }
161 <A>substr	{ RET(SUBSTR); }
162 <A>sub		{ yylval.i = SUB; RET(SUB); }
163 <A>gsub		{ yylval.i = GSUB; RET(GSUB); }
164 <A>index	{ RET(INDEX); }
165 <A>match	{ RET(MATCHFCN); }
166 <A>in		{ RET(IN); }
167 <A>getline	{ RET(GETLINE); }
168 <A>close	{ RET(CLOSE); }
169 <A>delete	{ RET(DELETE); }
170 <A>length	{ yylval.i = FLENGTH; RET(BLTIN); }
171 <A>log		{ yylval.i = FLOG; RET(BLTIN); }
172 <A>int		{ yylval.i = FINT; RET(BLTIN); }
173 <A>exp		{ yylval.i = FEXP; RET(BLTIN); }
174 <A>sqrt		{ yylval.i = FSQRT; RET(BLTIN); }
175 <A>sin		{ yylval.i = FSIN; RET(BLTIN); }
176 <A>cos		{ yylval.i = FCOS; RET(BLTIN); }
177 <A>atan2	{ yylval.i = FATAN; RET(BLTIN); }
178 <A>system	{ yylval.i = FSYSTEM; RET(BLTIN); }
179 <A>rand		{ yylval.i = FRAND; RET(BLTIN); }
180 <A>srand	{ yylval.i = FSRAND; RET(BLTIN); }
181 <A>toupper	{ yylval.i = FTOUPPER; RET(BLTIN); }
182 <A>tolower	{ yylval.i = FTOLOWER; RET(BLTIN); }
183 <A>fflush	{ yylval.i = FFLUSH; RET(BLTIN); }
184 
185 <A>{A}{B}*	{ int n, c;
186 		  char *yytext_copy = strdup(yytext);
187 		  c = input(); unput(c);	/* look for '(' */
188 		  if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) {
189 			yylval.i = n;
190 			free(yytext_copy);
191 			RET(ARG);
192 		  } else {
193 			yylval.cp =
194 				setsymtab(yytext_copy,"",0.0,STR|NUM,symtab);
195 			free(yytext_copy);
196 			if (c == '(') {
197 				RET(CALL);
198 			} else {
199 				RET(VAR);
200 			}
201 		  }
202 		}
203 
204 <A>\"		{ BEGIN str; clen = 0; }
205 
206 <A>"}"		{ if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
207 <A>"]"		{ if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
208 <A>")"		{ if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
209 
210 <A>.		{ if (yytext[0] == '{') bracecnt++;
211 		  else if (yytext[0] == '[') brackcnt++;
212 		  else if (yytext[0] == '(') parencnt++;
213 		  RET(yylval.i = yytext[0]); /* everything else */ }
214 
215 <reg>\\.	{ cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
216 <reg>\n		{ ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
217 <reg>"/"	{ BEGIN A;
218 		  cbuf[clen] = 0;
219 		  yylval.s = tostring(cbuf);
220 		  unput('/');
221 		  RET(REGEXPR); }
222 <reg>.		{ CADD; }
223 
224 <str>\"		{ BEGIN A;
225 		  cbuf[clen] = 0; s = tostring(cbuf);
226 		  cbuf[clen] = ' '; cbuf[++clen] = 0;
227 		  yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab);
228 		  RET(STRING); }
229 <str>\n		{ ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
230 <str>"\\\""	{ cbuf[clen++] = '"'; }
231 <str>"\\"n	{ cbuf[clen++] = '\n'; }
232 <str>"\\"t	{ cbuf[clen++] = '\t'; }
233 <str>"\\"f	{ cbuf[clen++] = '\f'; }
234 <str>"\\"r	{ cbuf[clen++] = '\r'; }
235 <str>"\\"b	{ cbuf[clen++] = '\b'; }
236 <str>"\\"v	{ cbuf[clen++] = '\v'; }	/* these ANSIisms may not be known by */
237 <str>"\\"a	{ cbuf[clen++] = '\007'; }	/* your compiler. hence 007 for bell */
238 <str>"\\\\"	{ cbuf[clen++] = '\\'; }
239 <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
240 		  sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
241 <str>"\\"x({H}+) { int n;	/* ANSI permits any number! */
242 		  sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; }
243 <str>"\\".	{ cbuf[clen++] = yytext[1]; }
244 <str>.		{ CADD; }
245 
246 %%
247 
248 void startreg(void)	/* start parsing a regular expression */
249 {
250 	BEGIN reg;
251 	clen = 0;
252 }
253 
254 static int my_input( YY_CHAR *buf, int max_size )
255 {
256 	extern uchar *lexprog;
257 
258 	if ( lexprog ) {		/* awk '...' */
259 		int num_chars = strlen( lexprog );
260 		if ( num_chars > max_size )
261 			{
262 			num_chars = max_size;
263 			strncpy( buf, lexprog, num_chars );
264 }
265 		else
266 			strcpy( buf, lexprog );
267 		lexprog += num_chars;
268 		return num_chars;
269 
270 	} else {			/* awk -f ... */
271 		int c = pgetc();
272 		if (c == EOF)
273 			return 0;
274 		buf[0] = c;
275 		return 1;
276 }
277 }
278 
279 void unputstr(char *s)	/* put a string back on input */
280 {
281 	int i;
282 
283 	for (i = strlen(s)-1; i >= 0; i--)
284 		unput(s[i]);
285 }
286 
287 int lex_input()
288 {
289 	return input();
290 }
291