1 /* 2 * Copyright (c) 1983, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Edward Wang at The University of California, Berkeley. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 static char sccsid[] = "@(#)scanner.c 8.1 (Berkeley) 06/06/93"; 13 #endif /* not lint */ 14 15 #include "value.h" 16 #include "token.h" 17 #include "context.h" 18 #include "string.h" 19 20 s_getc() 21 { 22 register c; 23 24 switch (cx.x_type) { 25 case X_FILE: 26 c = getc(cx.x_fp); 27 if (cx.x_bol && c != EOF) { 28 cx.x_bol = 0; 29 cx.x_lineno++; 30 } 31 if (c == '\n') 32 cx.x_bol = 1; 33 return c; 34 case X_BUF: 35 if (*cx.x_bufp != 0) 36 return *cx.x_bufp++ & 0xff; 37 else 38 return EOF; 39 } 40 /*NOTREACHED*/ 41 } 42 43 s_ungetc(c) 44 { 45 if (c == EOF) 46 return EOF; 47 switch (cx.x_type) { 48 case X_FILE: 49 cx.x_bol = 0; 50 return ungetc(c, cx.x_fp); 51 case X_BUF: 52 if (cx.x_bufp > cx.x_buf) 53 return *--cx.x_bufp = c; 54 else 55 return EOF; 56 } 57 /*NOTREACHED*/ 58 } 59 60 s_gettok() 61 { 62 char buf[100]; 63 register char *p = buf; 64 register c; 65 register state = 0; 66 67 loop: 68 c = s_getc(); 69 switch (state) { 70 case 0: 71 switch (c) { 72 case ' ': 73 case '\t': 74 break; 75 case '\n': 76 case ';': 77 cx.x_token = T_EOL; 78 state = -1; 79 break; 80 case '#': 81 state = 1; 82 break; 83 case EOF: 84 cx.x_token = T_EOF; 85 state = -1; 86 break; 87 case 'a': case 'b': case 'c': case 'd': case 'e': 88 case 'f': case 'g': case 'h': case 'i': case 'j': 89 case 'k': case 'l': case 'm': case 'n': case 'o': 90 case 'p': case 'q': case 'r': case 's': case 't': 91 case 'u': case 'v': case 'w': case 'x': case 'y': 92 case 'z': 93 case 'A': case 'B': case 'C': case 'D': case 'E': 94 case 'F': case 'G': case 'H': case 'I': case 'J': 95 case 'K': case 'L': case 'M': case 'N': case 'O': 96 case 'P': case 'Q': case 'R': case 'S': case 'T': 97 case 'U': case 'V': case 'W': case 'X': case 'Y': 98 case 'Z': 99 case '_': case '.': 100 *p++ = c; 101 state = 2; 102 break; 103 case '"': 104 state = 3; 105 break; 106 case '\'': 107 state = 4; 108 break; 109 case '\\': 110 switch (c = s_gettok1()) { 111 case -1: 112 break; 113 case -2: 114 state = 0; 115 break; 116 default: 117 *p++ = c; 118 state = 2; 119 } 120 break; 121 case '0': 122 cx.x_val.v_num = 0; 123 state = 10; 124 break; 125 case '1': case '2': case '3': case '4': 126 case '5': case '6': case '7': case '8': case '9': 127 cx.x_val.v_num = c - '0'; 128 state = 11; 129 break; 130 case '>': 131 state = 20; 132 break; 133 case '<': 134 state = 21; 135 break; 136 case '=': 137 state = 22; 138 break; 139 case '!': 140 state = 23; 141 break; 142 case '&': 143 state = 24; 144 break; 145 case '|': 146 state = 25; 147 break; 148 case '$': 149 state = 26; 150 break; 151 case '~': 152 cx.x_token = T_COMP; 153 state = -1; 154 break; 155 case '+': 156 cx.x_token = T_PLUS; 157 state = -1; 158 break; 159 case '-': 160 cx.x_token = T_MINUS; 161 state = -1; 162 break; 163 case '*': 164 cx.x_token = T_MUL; 165 state = -1; 166 break; 167 case '/': 168 cx.x_token = T_DIV; 169 state = -1; 170 break; 171 case '%': 172 cx.x_token = T_MOD; 173 state = -1; 174 break; 175 case '^': 176 cx.x_token = T_XOR; 177 state = -1; 178 break; 179 case '(': 180 cx.x_token = T_LP; 181 state = -1; 182 break; 183 case ')': 184 cx.x_token = T_RP; 185 state = -1; 186 break; 187 case ',': 188 cx.x_token = T_COMMA; 189 state = -1; 190 break; 191 case '?': 192 cx.x_token = T_QUEST; 193 state = -1; 194 break; 195 case ':': 196 cx.x_token = T_COLON; 197 state = -1; 198 break; 199 case '[': 200 cx.x_token = T_LB; 201 state = -1; 202 break; 203 case ']': 204 cx.x_token = T_RB; 205 state = -1; 206 break; 207 default: 208 cx.x_val.v_num = c; 209 cx.x_token = T_CHAR; 210 state = -1; 211 break; 212 } 213 break; 214 case 1: /* got # */ 215 if (c == '\n' || c == EOF) { 216 (void) s_ungetc(c); 217 state = 0; 218 } 219 break; 220 case 2: /* unquoted string */ 221 switch (c) { 222 case 'a': case 'b': case 'c': case 'd': case 'e': 223 case 'f': case 'g': case 'h': case 'i': case 'j': 224 case 'k': case 'l': case 'm': case 'n': case 'o': 225 case 'p': case 'q': case 'r': case 's': case 't': 226 case 'u': case 'v': case 'w': case 'x': case 'y': 227 case 'z': 228 case 'A': case 'B': case 'C': case 'D': case 'E': 229 case 'F': case 'G': case 'H': case 'I': case 'J': 230 case 'K': case 'L': case 'M': case 'N': case 'O': 231 case 'P': case 'Q': case 'R': case 'S': case 'T': 232 case 'U': case 'V': case 'W': case 'X': case 'Y': 233 case 'Z': 234 case '_': case '.': 235 case '0': case '1': case '2': case '3': case '4': 236 case '5': case '6': case '7': case '8': case '9': 237 if (p < buf + sizeof buf - 1) 238 *p++ = c; 239 break; 240 case '"': 241 state = 3; 242 break; 243 case '\'': 244 state = 4; 245 break; 246 case '\\': 247 switch (c = s_gettok1()) { 248 case -2: 249 (void) s_ungetc(' '); 250 case -1: 251 break; 252 default: 253 if (p < buf + sizeof buf - 1) 254 *p++ = c; 255 } 256 break; 257 default: 258 (void) s_ungetc(c); 259 case EOF: 260 *p = 0; 261 cx.x_token = T_STR; 262 switch (*buf) { 263 case 'i': 264 if (buf[1] == 'f' && buf[2] == 0) 265 cx.x_token = T_IF; 266 break; 267 case 't': 268 if (buf[1] == 'h' && buf[2] == 'e' 269 && buf[3] == 'n' && buf[4] == 0) 270 cx.x_token = T_THEN; 271 break; 272 case 'e': 273 if (buf[1] == 'n' && buf[2] == 'd' 274 && buf[3] == 'i' && buf[4] == 'f' 275 && buf[5] == 0) 276 cx.x_token = T_ENDIF; 277 else if (buf[1] == 'l' && buf[2] == 's') 278 if (buf[3] == 'i' && buf[4] == 'f' 279 && buf[5] == 0) 280 cx.x_token = T_ELSIF; 281 else if (buf[3] == 'e' && buf[4] == 0) 282 cx.x_token = T_ELSE; 283 break; 284 } 285 if (cx.x_token == T_STR 286 && (cx.x_val.v_str = str_cpy(buf)) == 0) { 287 p_memerror(); 288 cx.x_token = T_EOF; 289 } 290 state = -1; 291 break; 292 } 293 break; 294 case 3: /* " quoted string */ 295 switch (c) { 296 case '\n': 297 (void) s_ungetc(c); 298 case EOF: 299 case '"': 300 state = 2; 301 break; 302 case '\\': 303 switch (c = s_gettok1()) { 304 case -1: 305 case -2: /* newlines are invisible */ 306 break; 307 default: 308 if (p < buf + sizeof buf - 1) 309 *p++ = c; 310 } 311 break; 312 default: 313 if (p < buf + sizeof buf - 1) 314 *p++ = c; 315 break; 316 } 317 break; 318 case 4: /* ' quoted string */ 319 switch (c) { 320 case '\n': 321 (void) s_ungetc(c); 322 case EOF: 323 case '\'': 324 state = 2; 325 break; 326 case '\\': 327 switch (c = s_gettok1()) { 328 case -1: 329 case -2: /* newlines are invisible */ 330 break; 331 default: 332 if (p < buf + sizeof buf - 1) 333 *p++ = c; 334 } 335 break; 336 default: 337 if (p < buf + sizeof buf - 1) 338 *p++ = c; 339 break; 340 } 341 break; 342 case 10: /* got 0 */ 343 switch (c) { 344 case 'x': 345 case 'X': 346 cx.x_val.v_num = 0; 347 state = 12; 348 break; 349 case '0': case '1': case '2': case '3': case '4': 350 case '5': case '6': case '7': 351 cx.x_val.v_num = c - '0'; 352 state = 13; 353 break; 354 case '8': case '9': 355 cx.x_val.v_num = c - '0'; 356 state = 11; 357 break; 358 default: 359 (void) s_ungetc(c); 360 state = -1; 361 cx.x_token = T_NUM; 362 } 363 break; 364 case 11: /* decimal number */ 365 switch (c) { 366 case '0': case '1': case '2': case '3': case '4': 367 case '5': case '6': case '7': case '8': case '9': 368 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; 369 break; 370 default: 371 (void) s_ungetc(c); 372 state = -1; 373 cx.x_token = T_NUM; 374 } 375 break; 376 case 12: /* hex number */ 377 switch (c) { 378 case '0': case '1': case '2': case '3': case '4': 379 case '5': case '6': case '7': case '8': case '9': 380 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; 381 break; 382 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 383 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; 384 break; 385 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 386 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; 387 break; 388 default: 389 (void) s_ungetc(c); 390 state = -1; 391 cx.x_token = T_NUM; 392 } 393 break; 394 case 13: /* octal number */ 395 switch (c) { 396 case '0': case '1': case '2': case '3': case '4': 397 case '5': case '6': case '7': 398 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; 399 break; 400 default: 401 (void) s_ungetc(c); 402 state = -1; 403 cx.x_token = T_NUM; 404 } 405 break; 406 case 20: /* got > */ 407 switch (c) { 408 case '=': 409 cx.x_token = T_GE; 410 state = -1; 411 break; 412 case '>': 413 cx.x_token = T_RS; 414 state = -1; 415 break; 416 default: 417 (void) s_ungetc(c); 418 cx.x_token = T_GT; 419 state = -1; 420 } 421 break; 422 case 21: /* got < */ 423 switch (c) { 424 case '=': 425 cx.x_token = T_LE; 426 state = -1; 427 break; 428 case '<': 429 cx.x_token = T_LS; 430 state = -1; 431 break; 432 default: 433 (void) s_ungetc(c); 434 cx.x_token = T_LT; 435 state = -1; 436 } 437 break; 438 case 22: /* got = */ 439 switch (c) { 440 case '=': 441 cx.x_token = T_EQ; 442 state = -1; 443 break; 444 default: 445 (void) s_ungetc(c); 446 cx.x_token = T_ASSIGN; 447 state = -1; 448 } 449 break; 450 case 23: /* got ! */ 451 switch (c) { 452 case '=': 453 cx.x_token = T_NE; 454 state = -1; 455 break; 456 default: 457 (void) s_ungetc(c); 458 cx.x_token = T_NOT; 459 state = -1; 460 } 461 break; 462 case 24: /* got & */ 463 switch (c) { 464 case '&': 465 cx.x_token = T_ANDAND; 466 state = -1; 467 break; 468 default: 469 (void) s_ungetc(c); 470 cx.x_token = T_AND; 471 state = -1; 472 } 473 break; 474 case 25: /* got | */ 475 switch (c) { 476 case '|': 477 cx.x_token = T_OROR; 478 state = -1; 479 break; 480 default: 481 (void) s_ungetc(c); 482 cx.x_token = T_OR; 483 state = -1; 484 } 485 break; 486 case 26: /* got $ */ 487 switch (c) { 488 case '?': 489 cx.x_token = T_DQ; 490 state = -1; 491 break; 492 default: 493 (void) s_ungetc(c); 494 cx.x_token = T_DOLLAR; 495 state = -1; 496 } 497 break; 498 default: 499 abort(); 500 } 501 if (state >= 0) 502 goto loop; 503 return cx.x_token; 504 } 505 506 s_gettok1() 507 { 508 register c; 509 register n; 510 511 c = s_getc(); /* got \ */ 512 switch (c) { 513 case EOF: 514 return -1; 515 case '\n': 516 return -2; 517 case 'b': 518 return '\b'; 519 case 'f': 520 return '\f'; 521 case 'n': 522 return '\n'; 523 case 'r': 524 return '\r'; 525 case 't': 526 return '\t'; 527 default: 528 return c; 529 case '0': case '1': case '2': case '3': case '4': 530 case '5': case '6': case '7': 531 break; 532 } 533 n = c - '0'; 534 c = s_getc(); /* got \[0-7] */ 535 if (c < '0' || c > '7') { 536 (void) s_ungetc(c); 537 return n; 538 } 539 n = n * 8 + c - '0'; 540 c = s_getc(); /* got \[0-7][0-7] */ 541 if (c < '0' || c > '7') { 542 (void) s_ungetc(c); 543 return n; 544 } 545 return n * 8 + c - '0'; 546 } 547