%Start A str sc reg comment %{ /**************************************************************** Copyright (C) AT&T 1993 All Rights Reserved Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the name of AT&T or any of its entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ #include #include #include "awk.h" #include "y.tab.h" extern YYSTYPE yylval; extern int infunc; int lineno = 1; int bracecnt = 0; int brackcnt = 0; int parencnt = 0; #define DEBUG #ifdef DEBUG # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } #else # define RET(x) return(x) #endif #define CADD cbuf[clen++] = yytext[0]; \ if (clen >= CBUFLEN-1) { \ ERROR "string/reg expr %.30s... too long", cbuf SYNTAX; \ BEGIN A; \ } uchar cbuf[CBUFLEN]; uchar *s; int clen, cflag; /* some of this depends on behavior of lex that may not be preserved in other implementations of lex. */ static int my_input( YY_CHAR *buf, int max_size ); #undef YY_INPUT #define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size); #undef YY_USER_INIT #define YY_USER_INIT init_input_source(); %} A [a-zA-Z_] B [a-zA-Z0-9_] D [0-9] O [0-7] H [0-9a-fA-F] WS [ \t] %% switch ((yy_start - 1) / 2) { /* witchcraft */ case 0: BEGIN A; break; case sc: BEGIN A; RET('}'); } \n { lineno++; RET(NL); } #.* { ; } /* strip comments */ {WS}+ { ; } ; { RET(';'); } "\\"\n { lineno++; } BEGIN { RET(XBEGIN); } END { RET(XEND); } func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); } return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); } "&&" { RET(AND); } "||" { RET(BOR); } "!" { RET(NOT); } "!=" { yylval.i = NE; RET(NE); } "~" { yylval.i = MATCH; RET(MATCHOP); } "!~" { yylval.i = NOTMATCH; RET(MATCHOP); } "<" { yylval.i = LT; RET(LT); } "<=" { yylval.i = LE; RET(LE); } "==" { yylval.i = EQ; RET(EQ); } ">=" { yylval.i = GE; RET(GE); } ">" { yylval.i = GT; RET(GT); } ">>" { yylval.i = APPEND; RET(APPEND); } "++" { yylval.i = INCR; RET(INCR); } "--" { yylval.i = DECR; RET(DECR); } "+=" { yylval.i = ADDEQ; RET(ASGNOP); } "-=" { yylval.i = SUBEQ; RET(ASGNOP); } "*=" { yylval.i = MULTEQ; RET(ASGNOP); } "/=" { yylval.i = DIVEQ; RET(ASGNOP); } "%=" { yylval.i = MODEQ; RET(ASGNOP); } "^=" { yylval.i = POWEQ; RET(ASGNOP); } "**=" { yylval.i = POWEQ; RET(ASGNOP); } "=" { yylval.i = ASSIGN; RET(ASGNOP); } "**" { RET(POWER); } "^" { RET(POWER); } "$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } "$NF" { unputstr("(NF)"); return(INDIRECT); } "$"{A}{B}* { int c; char *yytext_copy = strdup(yytext); c = input(); unput(c); /* look for '(' or '[' */ if (c == '(' || c == '[' || infunc && isarg(yytext_copy+1) >= 0) { unputstr(yytext_copy+1); free(yytext_copy); return(INDIRECT); } else { yylval.cp = setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab); free(yytext_copy); RET(IVAR); } } "$" { RET(INDIRECT); } NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); } ({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab); /* should this also have STR set? */ RET(NUMBER); } while { RET(WHILE); } for { RET(FOR); } do { RET(DO); } if { RET(IF); } else { RET(ELSE); } next { RET(NEXT); } exit { RET(EXIT); } break { RET(BREAK); } continue { RET(CONTINUE); } print { yylval.i = PRINT; RET(PRINT); } printf { yylval.i = PRINTF; RET(PRINTF); } sprintf { yylval.i = SPRINTF; RET(SPRINTF); } split { yylval.i = SPLIT; RET(SPLIT); } substr { RET(SUBSTR); } sub { yylval.i = SUB; RET(SUB); } gsub { yylval.i = GSUB; RET(GSUB); } index { RET(INDEX); } match { RET(MATCHFCN); } in { RET(IN); } getline { RET(GETLINE); } close { RET(CLOSE); } delete { RET(DELETE); } length { yylval.i = FLENGTH; RET(BLTIN); } log { yylval.i = FLOG; RET(BLTIN); } int { yylval.i = FINT; RET(BLTIN); } exp { yylval.i = FEXP; RET(BLTIN); } sqrt { yylval.i = FSQRT; RET(BLTIN); } sin { yylval.i = FSIN; RET(BLTIN); } cos { yylval.i = FCOS; RET(BLTIN); } atan2 { yylval.i = FATAN; RET(BLTIN); } system { yylval.i = FSYSTEM; RET(BLTIN); } rand { yylval.i = FRAND; RET(BLTIN); } srand { yylval.i = FSRAND; RET(BLTIN); } toupper { yylval.i = FTOUPPER; RET(BLTIN); } tolower { yylval.i = FTOLOWER; RET(BLTIN); } fflush { yylval.i = FFLUSH; RET(BLTIN); } {A}{B}* { int n, c; char *yytext_copy = strdup(yytext); c = input(); unput(c); /* look for '(' */ if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) { yylval.i = n; free(yytext_copy); RET(ARG); } else { yylval.cp = setsymtab(yytext_copy,"",0.0,STR|NUM,symtab); free(yytext_copy); if (c == '(') { RET(CALL); } else { RET(VAR); } } } \" { BEGIN str; clen = 0; } "}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); } "]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); } ")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); } . { if (yytext[0] == '{') bracecnt++; else if (yytext[0] == '[') brackcnt++; else if (yytext[0] == '(') parencnt++; RET(yylval.i = yytext[0]); /* everything else */ } \\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } \n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } "/" { BEGIN A; cbuf[clen] = 0; yylval.s = tostring(cbuf); unput('/'); RET(REGEXPR); } . { CADD; } \" { BEGIN A; cbuf[clen] = 0; s = tostring(cbuf); cbuf[clen] = ' '; cbuf[++clen] = 0; yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); RET(STRING); } \n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } "\\\"" { cbuf[clen++] = '"'; } "\\"n { cbuf[clen++] = '\n'; } "\\"t { cbuf[clen++] = '\t'; } "\\"f { cbuf[clen++] = '\f'; } "\\"r { cbuf[clen++] = '\r'; } "\\"b { cbuf[clen++] = '\b'; } "\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ "\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ "\\\\" { cbuf[clen++] = '\\'; } "\\"({O}{O}{O}|{O}{O}|{O}) { int n; sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } "\\"x({H}+) { int n; /* ANSI permits any number! */ sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } "\\". { cbuf[clen++] = yytext[1]; } . { CADD; } %% void startreg(void) /* start parsing a regular expression */ { BEGIN reg; clen = 0; } static int my_input( YY_CHAR *buf, int max_size ) { extern uchar *lexprog; if ( lexprog ) { /* awk '...' */ int num_chars = strlen( lexprog ); if ( num_chars > max_size ) { num_chars = max_size; strncpy( buf, lexprog, num_chars ); } else strcpy( buf, lexprog ); lexprog += num_chars; return num_chars; } else { /* awk -f ... */ int c = pgetc(); if (c == EOF) return 0; buf[0] = c; return 1; } } void unputstr(char *s) /* put a string back on input */ { int i; for (i = strlen(s)-1; i >= 0; i--) unput(s[i]); } int lex_input() { return input(); }