1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include "awk.h"
34 #include "awkgram.h"
35
36 extern YYSTYPE yylval;
37 extern int infunc;
38
39 int lineno = 1;
40 int bracecnt = 0;
41 int brackcnt = 0;
42 int parencnt = 0;
43
44 typedef struct Keyword {
45 const char *word;
46 int sub;
47 int type;
48 } Keyword;
49
50 const Keyword keywords[] = { /* keep sorted: binary searched */
51 { "BEGIN", XBEGIN, XBEGIN },
52 { "END", XEND, XEND },
53 { "NF", VARNF, VARNF },
54 { "and", FAND, BLTIN },
55 { "atan2", FATAN, BLTIN },
56 { "break", BREAK, BREAK },
57 { "close", CLOSE, CLOSE },
58 { "compl", FCOMPL, BLTIN },
59 { "continue", CONTINUE, CONTINUE },
60 { "cos", FCOS, BLTIN },
61 { "delete", DELETE, DELETE },
62 { "do", DO, DO },
63 { "else", ELSE, ELSE },
64 { "exit", EXIT, EXIT },
65 { "exp", FEXP, BLTIN },
66 { "fflush", FFLUSH, BLTIN },
67 { "for", FOR, FOR },
68 { "func", FUNC, FUNC },
69 { "function", FUNC, FUNC },
70 { "gensub", GENSUB, GENSUB },
71 { "getline", GETLINE, GETLINE },
72 { "gsub", GSUB, GSUB },
73 { "if", IF, IF },
74 { "in", IN, IN },
75 { "index", INDEX, INDEX },
76 { "int", FINT, BLTIN },
77 { "length", FLENGTH, BLTIN },
78 { "log", FLOG, BLTIN },
79 { "lshift", FLSHIFT, BLTIN },
80 { "match", MATCHFCN, MATCHFCN },
81 { "next", NEXT, NEXT },
82 { "nextfile", NEXTFILE, NEXTFILE },
83 { "or", FFOR, BLTIN },
84 { "print", PRINT, PRINT },
85 { "printf", PRINTF, PRINTF },
86 { "rand", FRAND, BLTIN },
87 { "return", RETURN, RETURN },
88 { "rshift", FRSHIFT, BLTIN },
89 { "sin", FSIN, BLTIN },
90 { "split", SPLIT, SPLIT },
91 { "sprintf", SPRINTF, SPRINTF },
92 { "sqrt", FSQRT, BLTIN },
93 { "srand", FSRAND, BLTIN },
94 { "strftime", FSTRFTIME, BLTIN },
95 { "sub", SUB, SUB },
96 { "substr", SUBSTR, SUBSTR },
97 { "system", FSYSTEM, BLTIN },
98 { "systime", FSYSTIME, BLTIN },
99 { "tolower", FTOLOWER, BLTIN },
100 { "toupper", FTOUPPER, BLTIN },
101 { "while", WHILE, WHILE },
102 { "xor", FXOR, BLTIN },
103 };
104
105 #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
106
peek(void)107 static int peek(void)
108 {
109 int c = input();
110 unput(c);
111 return c;
112 }
113
gettok(char ** pbuf,int * psz)114 static int gettok(char **pbuf, int *psz) /* get next input token */
115 {
116 int c, retc;
117 char *buf = *pbuf;
118 int sz = *psz;
119 char *bp = buf;
120
121 c = input();
122 if (c == 0)
123 return 0;
124 buf[0] = c;
125 buf[1] = 0;
126 if (!isalnum(c) && c != '.' && c != '_')
127 return c;
128
129 *bp++ = c;
130 if (isalpha(c) || c == '_') { /* it's a varname */
131 for ( ; (c = input()) != 0; ) {
132 if (bp-buf >= sz)
133 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
134 FATAL( "out of space for name %.10s...", buf );
135 if (isalnum(c) || c == '_')
136 *bp++ = c;
137 else {
138 *bp = 0;
139 unput(c);
140 break;
141 }
142 }
143 *bp = 0;
144 retc = 'a'; /* alphanumeric */
145 } else { /* maybe it's a number, but could be . */
146 char *rem;
147 /* read input until can't be a number */
148 for ( ; (c = input()) != 0; ) {
149 if (bp-buf >= sz)
150 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
151 FATAL( "out of space for number %.10s...", buf );
152 if (isdigit(c) || c == 'e' || c == 'E'
153 || c == '.' || c == '+' || c == '-')
154 *bp++ = c;
155 else {
156 unput(c);
157 break;
158 }
159 }
160 *bp = 0;
161 strtod(buf, &rem); /* parse the number */
162 if (rem == buf) { /* it wasn't a valid number at all */
163 buf[1] = 0; /* return one character as token */
164 retc = buf[0]; /* character is its own type */
165 unputstr(rem+1); /* put rest back for later */
166 } else { /* some prefix was a number */
167 unputstr(rem); /* put rest back for later */
168 rem[0] = 0; /* truncate buf after number part */
169 retc = '0'; /* type is number */
170 }
171 }
172 *pbuf = buf;
173 *psz = sz;
174 return retc;
175 }
176
177 int word(char *);
178 int string(void);
179 int regexpr(void);
180 bool sc = false; /* true => return a } right now */
181 bool reg = false; /* true => return a REGEXPR now */
182
yylex(void)183 int yylex(void)
184 {
185 int c;
186 static char *buf = NULL;
187 static int bufsize = 5; /* BUG: setting this small causes core dump! */
188
189 if (buf == NULL && (buf = malloc(bufsize)) == NULL)
190 FATAL( "out of space in yylex" );
191 if (sc) {
192 sc = false;
193 RET('}');
194 }
195 if (reg) {
196 reg = false;
197 return regexpr();
198 }
199 for (;;) {
200 c = gettok(&buf, &bufsize);
201 if (c == 0)
202 return 0;
203 if (isalpha(c) || c == '_')
204 return word(buf);
205 if (isdigit(c)) {
206 char *cp = tostring(buf);
207 yylval.cp = setsymtab(buf, cp, atof(buf), CON|NUM, symtab);
208 free(cp);
209 /* should this also have STR set? */
210 RET(NUMBER);
211 }
212
213 yylval.i = c;
214 switch (c) {
215 case '\n': /* {EOL} */
216 lineno++;
217 RET(NL);
218 case '\r': /* assume \n is coming */
219 case ' ': /* {WS}+ */
220 case '\t':
221 break;
222 case '#': /* #.* strip comments */
223 while ((c = input()) != '\n' && c != 0)
224 ;
225 unput(c);
226 /*
227 * Next line is a hack, itcompensates for
228 * unput's treatment of \n.
229 */
230 lineno++;
231 break;
232 case ';':
233 RET(';');
234 case '\\':
235 if (peek() == '\n') {
236 input();
237 lineno++;
238 } else if (peek() == '\r') {
239 input(); input(); /* \n */
240 lineno++;
241 } else {
242 RET(c);
243 }
244 break;
245 case '&':
246 if (peek() == '&') {
247 input(); RET(AND);
248 } else
249 RET('&');
250 case '|':
251 if (peek() == '|') {
252 input(); RET(BOR);
253 } else
254 RET('|');
255 case '!':
256 if (peek() == '=') {
257 input(); yylval.i = NE; RET(NE);
258 } else if (peek() == '~') {
259 input(); yylval.i = NOTMATCH; RET(MATCHOP);
260 } else
261 RET(NOT);
262 case '~':
263 yylval.i = MATCH;
264 RET(MATCHOP);
265 case '<':
266 if (peek() == '=') {
267 input(); yylval.i = LE; RET(LE);
268 } else {
269 yylval.i = LT; RET(LT);
270 }
271 case '=':
272 if (peek() == '=') {
273 input(); yylval.i = EQ; RET(EQ);
274 } else {
275 yylval.i = ASSIGN; RET(ASGNOP);
276 }
277 case '>':
278 if (peek() == '=') {
279 input(); yylval.i = GE; RET(GE);
280 } else if (peek() == '>') {
281 input(); yylval.i = APPEND; RET(APPEND);
282 } else {
283 yylval.i = GT; RET(GT);
284 }
285 case '+':
286 if (peek() == '+') {
287 input(); yylval.i = INCR; RET(INCR);
288 } else if (peek() == '=') {
289 input(); yylval.i = ADDEQ; RET(ASGNOP);
290 } else
291 RET('+');
292 case '-':
293 if (peek() == '-') {
294 input(); yylval.i = DECR; RET(DECR);
295 } else if (peek() == '=') {
296 input(); yylval.i = SUBEQ; RET(ASGNOP);
297 } else
298 RET('-');
299 case '*':
300 if (peek() == '=') { /* *= */
301 input(); yylval.i = MULTEQ; RET(ASGNOP);
302 } else if (peek() == '*') { /* ** or **= */
303 input(); /* eat 2nd * */
304 if (peek() == '=') {
305 input(); yylval.i = POWEQ; RET(ASGNOP);
306 } else {
307 RET(POWER);
308 }
309 } else
310 RET('*');
311 case '/':
312 RET('/');
313 case '%':
314 if (peek() == '=') {
315 input(); yylval.i = MODEQ; RET(ASGNOP);
316 } else
317 RET('%');
318 case '^':
319 if (peek() == '=') {
320 input(); yylval.i = POWEQ; RET(ASGNOP);
321 } else
322 RET(POWER);
323
324 case '$':
325 /* BUG: awkward, if not wrong */
326 c = gettok(&buf, &bufsize);
327 if (isalpha(c)) {
328 if (strcmp(buf, "NF") == 0) { /* very special */
329 unputstr("(NF)");
330 RET(INDIRECT);
331 }
332 c = peek();
333 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
334 unputstr(buf);
335 RET(INDIRECT);
336 }
337 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
338 RET(IVAR);
339 } else if (c == 0) { /* */
340 SYNTAX( "unexpected end of input after $" );
341 RET(';');
342 } else {
343 unputstr(buf);
344 RET(INDIRECT);
345 }
346
347 case '}':
348 if (--bracecnt < 0)
349 SYNTAX( "extra }" );
350 sc = true;
351 RET(';');
352 case ']':
353 if (--brackcnt < 0)
354 SYNTAX( "extra ]" );
355 RET(']');
356 case ')':
357 if (--parencnt < 0)
358 SYNTAX( "extra )" );
359 RET(')');
360 case '{':
361 bracecnt++;
362 RET('{');
363 case '[':
364 brackcnt++;
365 RET('[');
366 case '(':
367 parencnt++;
368 RET('(');
369
370 case '"':
371 return string(); /* BUG: should be like tran.c ? */
372
373 default:
374 RET(c);
375 }
376 }
377 }
378
string(void)379 int string(void)
380 {
381 int c, n;
382 char *s, *bp;
383 static char *buf = NULL;
384 static int bufsz = 500;
385
386 if (buf == NULL && (buf = malloc(bufsz)) == NULL)
387 FATAL("out of space for strings");
388 for (bp = buf; (c = input()) != '"'; ) {
389 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
390 FATAL("out of space for string %.10s...", buf);
391 switch (c) {
392 case '\n':
393 case '\r':
394 case 0:
395 *bp = '\0';
396 SYNTAX( "non-terminated string %.10s...", buf );
397 if (c == 0) /* hopeless */
398 FATAL( "giving up" );
399 lineno++;
400 break;
401 case '\\':
402 c = input();
403 switch (c) {
404 case '\n': break;
405 case '"': *bp++ = '"'; break;
406 case 'n': *bp++ = '\n'; break;
407 case 't': *bp++ = '\t'; break;
408 case 'f': *bp++ = '\f'; break;
409 case 'r': *bp++ = '\r'; break;
410 case 'b': *bp++ = '\b'; break;
411 case 'v': *bp++ = '\v'; break;
412 case 'a': *bp++ = '\a'; break;
413 case '\\': *bp++ = '\\'; break;
414
415 case '0': case '1': case '2': /* octal: \d \dd \ddd */
416 case '3': case '4': case '5': case '6': case '7':
417 n = c - '0';
418 if ((c = peek()) >= '0' && c < '8') {
419 n = 8 * n + input() - '0';
420 if ((c = peek()) >= '0' && c < '8')
421 n = 8 * n + input() - '0';
422 }
423 *bp++ = n;
424 break;
425
426 case 'x': /* hex \x0-9a-fA-F + */
427 { char xbuf[100], *px;
428 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
429 if (isdigit(c)
430 || (c >= 'a' && c <= 'f')
431 || (c >= 'A' && c <= 'F'))
432 *px++ = c;
433 else
434 break;
435 }
436 *px = 0;
437 unput(c);
438 sscanf(xbuf, "%x", (unsigned int *) &n);
439 *bp++ = n;
440 break;
441 }
442
443 default:
444 *bp++ = c;
445 break;
446 }
447 break;
448 default:
449 *bp++ = c;
450 break;
451 }
452 }
453 *bp = 0;
454 s = tostring(buf);
455 *bp++ = ' '; *bp++ = '\0';
456 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
457 free(s);
458 RET(STRING);
459 }
460
461
binsearch(char * w,const Keyword * kp,int n)462 static int binsearch(char *w, const Keyword *kp, int n)
463 {
464 int cond, low, mid, high;
465
466 low = 0;
467 high = n - 1;
468 while (low <= high) {
469 mid = (low + high) / 2;
470 if ((cond = strcmp(w, kp[mid].word)) < 0)
471 high = mid - 1;
472 else if (cond > 0)
473 low = mid + 1;
474 else
475 return mid;
476 }
477 return -1;
478 }
479
word(char * w)480 int word(char *w)
481 {
482 const Keyword *kp;
483 int c, n;
484
485 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
486 if (n != -1) { /* found in table */
487 kp = keywords + n;
488 yylval.i = kp->sub;
489 switch (kp->type) { /* special handling */
490 case BLTIN:
491 if (kp->sub == FSYSTEM && safe)
492 SYNTAX( "system is unsafe" );
493 RET(kp->type);
494 case FUNC:
495 if (infunc)
496 SYNTAX( "illegal nested function" );
497 RET(kp->type);
498 case RETURN:
499 if (!infunc)
500 SYNTAX( "return not in function" );
501 RET(kp->type);
502 case VARNF:
503 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
504 RET(VARNF);
505 default:
506 RET(kp->type);
507 }
508 }
509 c = peek(); /* look for '(' */
510 if (c != '(' && infunc && (n=isarg(w)) >= 0) {
511 yylval.i = n;
512 RET(ARG);
513 } else {
514 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
515 if (c == '(') {
516 RET(CALL);
517 } else {
518 RET(VAR);
519 }
520 }
521 }
522
startreg(void)523 void startreg(void) /* next call to yylex will return a regular expression */
524 {
525 reg = true;
526 }
527
regexpr(void)528 int regexpr(void)
529 {
530 int c;
531 static char *buf = NULL;
532 static int bufsz = 500;
533 char *bp;
534
535 if (buf == NULL && (buf = malloc(bufsz)) == NULL)
536 FATAL("out of space for rex expr");
537 bp = buf;
538 for ( ; (c = input()) != '/' && c != 0; ) {
539 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
540 FATAL("out of space for reg expr %.10s...", buf);
541 if (c == '\n') {
542 *bp = '\0';
543 SYNTAX( "newline in regular expression %.10s...", buf );
544 unput('\n');
545 break;
546 } else if (c == '\\') {
547 *bp++ = '\\';
548 *bp++ = input();
549 } else {
550 *bp++ = c;
551 }
552 }
553 *bp = 0;
554 if (c == 0)
555 SYNTAX("non-terminated regular expression %.10s...", buf);
556 yylval.s = tostring(buf);
557 unput('/');
558 RET(REGEXPR);
559 }
560
561 /* low-level lexical stuff, sort of inherited from lex */
562
563 char ebuf[300];
564 char *ep = ebuf;
565 char yysbuf[100]; /* pushback buffer */
566 char *yysptr = yysbuf;
567 FILE *yyin = NULL;
568
input(void)569 int input(void) /* get next lexical input character */
570 {
571 int c;
572 extern char *lexprog;
573
574 if (yysptr > yysbuf)
575 c = (uschar)*--yysptr;
576 else if (lexprog != NULL) { /* awk '...' */
577 if ((c = (uschar)*lexprog) != 0)
578 lexprog++;
579 } else /* awk -f ... */
580 c = pgetc();
581 if (c == EOF)
582 c = 0;
583 if (ep >= ebuf + sizeof ebuf)
584 ep = ebuf;
585 *ep = c;
586 if (c != 0) {
587 ep++;
588 }
589 return (c);
590 }
591
unput(int c)592 void unput(int c) /* put lexical character back on input */
593 {
594 if (c == '\n')
595 lineno--;
596 if (yysptr >= yysbuf + sizeof(yysbuf))
597 FATAL("pushed back too much: %.20s...", yysbuf);
598 *yysptr++ = c;
599 if (--ep < ebuf)
600 ep = ebuf + sizeof(ebuf) - 1;
601 }
602
unputstr(const char * s)603 void unputstr(const char *s) /* put a string back on input */
604 {
605 int i;
606
607 for (i = strlen(s)-1; i >= 0; i--)
608 unput(s[i]);
609 }
610