1 %Start A str sc reg comment 2 3 %{ 4 /**************************************************************** 5 Copyright (C) AT&T 1993 6 All Rights Reserved 7 8 Permission to use, copy, modify, and distribute this software and 9 its documentation for any purpose and without fee is hereby 10 granted, provided that the above copyright notice appear in all 11 copies and that both that the copyright notice and this 12 permission notice and warranty disclaimer appear in supporting 13 documentation, and that the name of AT&T or any of its entities 14 not be used in advertising or publicity pertaining to 15 distribution of the software without specific, written prior 16 permission. 17 18 AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 19 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 20 IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 21 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 22 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 23 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 24 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 25 THIS SOFTWARE. 26 ****************************************************************/ 27 28 #include <stdlib.h> 29 #include <string.h> 30 #include "awk.h" 31 #include "y.tab.h" 32 33 extern YYSTYPE yylval; 34 extern int infunc; 35 36 int lineno = 1; 37 int bracecnt = 0; 38 int brackcnt = 0; 39 int parencnt = 0; 40 #define DEBUG 41 #ifdef DEBUG 42 # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } 43 #else 44 # define RET(x) return(x) 45 #endif 46 47 #define CADD cbuf[clen++] = yytext[0]; \ 48 if (clen >= CBUFLEN-1) { \ 49 ERROR "string/reg expr %.30s... too long", cbuf SYNTAX; \ 50 BEGIN A; \ 51 } 52 53 uchar cbuf[CBUFLEN]; 54 uchar *s; 55 int clen, cflag; 56 57 /* some of this depends on behavior of lex that 58 may not be preserved in other implementations of lex. 59 */ 60 61 static int my_input( YY_CHAR *buf, int max_size ); 62 63 #undef YY_INPUT 64 #define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size); 65 66 #undef YY_USER_INIT 67 #define YY_USER_INIT init_input_source(); 68 %} 69 70 A [a-zA-Z_] 71 B [a-zA-Z0-9_] 72 D [0-9] 73 O [0-7] 74 H [0-9a-fA-F] 75 WS [ \t] 76 77 %% 78 switch ((yy_start - 1) / 2) { /* witchcraft */ 79 case 0: 80 BEGIN A; 81 break; 82 case sc: 83 BEGIN A; 84 RET('}'); 85 } 86 87 <A>\n { lineno++; RET(NL); } 88 <A>#.* { ; } /* strip comments */ 89 <A>{WS}+ { ; } 90 <A>; { RET(';'); } 91 92 <A>"\\"\n { lineno++; } 93 <A>BEGIN { RET(XBEGIN); } 94 <A>END { RET(XEND); } 95 <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); } 96 <A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); } 97 <A>"&&" { RET(AND); } 98 <A>"||" { RET(BOR); } 99 <A>"!" { RET(NOT); } 100 <A>"!=" { yylval.i = NE; RET(NE); } 101 <A>"~" { yylval.i = MATCH; RET(MATCHOP); } 102 <A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); } 103 <A>"<" { yylval.i = LT; RET(LT); } 104 <A>"<=" { yylval.i = LE; RET(LE); } 105 <A>"==" { yylval.i = EQ; RET(EQ); } 106 <A>">=" { yylval.i = GE; RET(GE); } 107 <A>">" { yylval.i = GT; RET(GT); } 108 <A>">>" { yylval.i = APPEND; RET(APPEND); } 109 <A>"++" { yylval.i = INCR; RET(INCR); } 110 <A>"--" { yylval.i = DECR; RET(DECR); } 111 <A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); } 112 <A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); } 113 <A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); } 114 <A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); } 115 <A>"%=" { yylval.i = MODEQ; RET(ASGNOP); } 116 <A>"^=" { yylval.i = POWEQ; RET(ASGNOP); } 117 <A>"**=" { yylval.i = POWEQ; RET(ASGNOP); } 118 <A>"=" { yylval.i = ASSIGN; RET(ASGNOP); } 119 <A>"**" { RET(POWER); } 120 <A>"^" { RET(POWER); } 121 122 <A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } 123 <A>"$NF" { unputstr("(NF)"); return(INDIRECT); } 124 <A>"$"{A}{B}* { 125 int c; 126 char *yytext_copy = strdup(yytext); 127 c = input(); unput(c); /* look for '(' or '[' */ 128 if (c == '(' || c == '[' || 129 infunc && isarg(yytext_copy+1) >= 0) { 130 unputstr(yytext_copy+1); 131 free(yytext_copy); 132 return(INDIRECT); 133 } else { 134 yylval.cp = 135 setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab); 136 free(yytext_copy); 137 RET(IVAR); 138 } 139 } 140 <A>"$" { RET(INDIRECT); } 141 <A>NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); } 142 143 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { 144 yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab); 145 /* should this also have STR set? */ 146 RET(NUMBER); } 147 148 <A>while { RET(WHILE); } 149 <A>for { RET(FOR); } 150 <A>do { RET(DO); } 151 <A>if { RET(IF); } 152 <A>else { RET(ELSE); } 153 <A>next { RET(NEXT); } 154 <A>exit { RET(EXIT); } 155 <A>break { RET(BREAK); } 156 <A>continue { RET(CONTINUE); } 157 <A>print { yylval.i = PRINT; RET(PRINT); } 158 <A>printf { yylval.i = PRINTF; RET(PRINTF); } 159 <A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); } 160 <A>split { yylval.i = SPLIT; RET(SPLIT); } 161 <A>substr { RET(SUBSTR); } 162 <A>sub { yylval.i = SUB; RET(SUB); } 163 <A>gsub { yylval.i = GSUB; RET(GSUB); } 164 <A>index { RET(INDEX); } 165 <A>match { RET(MATCHFCN); } 166 <A>in { RET(IN); } 167 <A>getline { RET(GETLINE); } 168 <A>close { RET(CLOSE); } 169 <A>delete { RET(DELETE); } 170 <A>length { yylval.i = FLENGTH; RET(BLTIN); } 171 <A>log { yylval.i = FLOG; RET(BLTIN); } 172 <A>int { yylval.i = FINT; RET(BLTIN); } 173 <A>exp { yylval.i = FEXP; RET(BLTIN); } 174 <A>sqrt { yylval.i = FSQRT; RET(BLTIN); } 175 <A>sin { yylval.i = FSIN; RET(BLTIN); } 176 <A>cos { yylval.i = FCOS; RET(BLTIN); } 177 <A>atan2 { yylval.i = FATAN; RET(BLTIN); } 178 <A>system { yylval.i = FSYSTEM; RET(BLTIN); } 179 <A>rand { yylval.i = FRAND; RET(BLTIN); } 180 <A>srand { yylval.i = FSRAND; RET(BLTIN); } 181 <A>toupper { yylval.i = FTOUPPER; RET(BLTIN); } 182 <A>tolower { yylval.i = FTOLOWER; RET(BLTIN); } 183 <A>fflush { yylval.i = FFLUSH; RET(BLTIN); } 184 185 <A>{A}{B}* { int n, c; 186 char *yytext_copy = strdup(yytext); 187 c = input(); unput(c); /* look for '(' */ 188 if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) { 189 yylval.i = n; 190 free(yytext_copy); 191 RET(ARG); 192 } else { 193 yylval.cp = 194 setsymtab(yytext_copy,"",0.0,STR|NUM,symtab); 195 free(yytext_copy); 196 if (c == '(') { 197 RET(CALL); 198 } else { 199 RET(VAR); 200 } 201 } 202 } 203 204 <A>\" { BEGIN str; clen = 0; } 205 206 <A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); } 207 <A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); } 208 <A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); } 209 210 <A>. { if (yytext[0] == '{') bracecnt++; 211 else if (yytext[0] == '[') brackcnt++; 212 else if (yytext[0] == '(') parencnt++; 213 RET(yylval.i = yytext[0]); /* everything else */ } 214 215 <reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } 216 <reg>\n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } 217 <reg>"/" { BEGIN A; 218 cbuf[clen] = 0; 219 yylval.s = tostring(cbuf); 220 unput('/'); 221 RET(REGEXPR); } 222 <reg>. { CADD; } 223 224 <str>\" { BEGIN A; 225 cbuf[clen] = 0; s = tostring(cbuf); 226 cbuf[clen] = ' '; cbuf[++clen] = 0; 227 yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); 228 RET(STRING); } 229 <str>\n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } 230 <str>"\\\"" { cbuf[clen++] = '"'; } 231 <str>"\\"n { cbuf[clen++] = '\n'; } 232 <str>"\\"t { cbuf[clen++] = '\t'; } 233 <str>"\\"f { cbuf[clen++] = '\f'; } 234 <str>"\\"r { cbuf[clen++] = '\r'; } 235 <str>"\\"b { cbuf[clen++] = '\b'; } 236 <str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ 237 <str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ 238 <str>"\\\\" { cbuf[clen++] = '\\'; } 239 <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n; 240 sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } 241 <str>"\\"x({H}+) { int n; /* ANSI permits any number! */ 242 sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } 243 <str>"\\". { cbuf[clen++] = yytext[1]; } 244 <str>. { CADD; } 245 246 %% 247 248 void startreg(void) /* start parsing a regular expression */ 249 { 250 BEGIN reg; 251 clen = 0; 252 } 253 254 static int my_input( YY_CHAR *buf, int max_size ) 255 { 256 extern uchar *lexprog; 257 258 if ( lexprog ) { /* awk '...' */ 259 int num_chars = strlen( lexprog ); 260 if ( num_chars > max_size ) 261 { 262 num_chars = max_size; 263 strncpy( buf, lexprog, num_chars ); 264 } 265 else 266 strcpy( buf, lexprog ); 267 lexprog += num_chars; 268 return num_chars; 269 270 } else { /* awk -f ... */ 271 int c = pgetc(); 272 if (c == EOF) 273 return 0; 274 buf[0] = c; 275 return 1; 276 } 277 } 278 279 void unputstr(char *s) /* put a string back on input */ 280 { 281 int i; 282 283 for (i = strlen(s)-1; i >= 0; i--) 284 unput(s[i]); 285 } 286 287 int lex_input() 288 { 289 return input(); 290 } 291