1 /* @(#)scanner.c 8.1 (Berkeley) 6/6/93 */ 2 /* $NetBSD: scanner.c,v 1.9 2003/08/07 11:17:29 agc Exp $ */ 3 4 /* 5 * Copyright (c) 1983, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Edward Wang at The University of California, Berkeley. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <stdlib.h> 37 38 #include "defs.h" 39 #include "token.h" 40 #include "context.h" 41 #include "window_string.h" 42 43 int s_getc(void); 44 int s_gettok1(void); 45 int s_ungetc(int); 46 47 int 48 s_getc(void) 49 { 50 int c; 51 52 switch (cx.x_type) { 53 case X_FILE: 54 c = getc(cx.x_fp); 55 if (cx.x_bol && c != EOF) { 56 cx.x_bol = 0; 57 cx.x_lineno++; 58 } 59 if (c == '\n') 60 cx.x_bol = 1; 61 return c; 62 case X_BUF: 63 if (*cx.x_bufp != 0) 64 return *cx.x_bufp++ & 0xff; 65 else 66 return EOF; 67 } 68 /*NOTREACHED*/ 69 return(0); /* XXX: placate gcc */ 70 } 71 72 int 73 s_ungetc(int c) 74 { 75 if (c == EOF) 76 return EOF; 77 switch (cx.x_type) { 78 case X_FILE: 79 cx.x_bol = 0; 80 return ungetc(c, cx.x_fp); 81 case X_BUF: 82 if (cx.x_bufp > cx.x_buf) 83 return *--cx.x_bufp = c; 84 else 85 return EOF; 86 } 87 /*NOTREACHED*/ 88 return(0); /* XXX: placate gcc */ 89 } 90 91 int 92 s_gettok(void) 93 { 94 char buf[100]; 95 char *p = buf; 96 int c; 97 int state = 0; 98 99 loop: 100 c = s_getc(); 101 switch (state) { 102 case 0: 103 switch (c) { 104 case ' ': 105 case '\t': 106 break; 107 case '\n': 108 case ';': 109 cx.x_token = T_EOL; 110 state = -1; 111 break; 112 case '#': 113 state = 1; 114 break; 115 case EOF: 116 cx.x_token = T_EOF; 117 state = -1; 118 break; 119 case 'a': case 'b': case 'c': case 'd': case 'e': 120 case 'f': case 'g': case 'h': case 'i': case 'j': 121 case 'k': case 'l': case 'm': case 'n': case 'o': 122 case 'p': case 'q': case 'r': case 's': case 't': 123 case 'u': case 'v': case 'w': case 'x': case 'y': 124 case 'z': 125 case 'A': case 'B': case 'C': case 'D': case 'E': 126 case 'F': case 'G': case 'H': case 'I': case 'J': 127 case 'K': case 'L': case 'M': case 'N': case 'O': 128 case 'P': case 'Q': case 'R': case 'S': case 'T': 129 case 'U': case 'V': case 'W': case 'X': case 'Y': 130 case 'Z': 131 case '_': case '.': 132 *p++ = c; 133 state = 2; 134 break; 135 case '"': 136 state = 3; 137 break; 138 case '\'': 139 state = 4; 140 break; 141 case '\\': 142 switch (c = s_gettok1()) { 143 case -1: 144 break; 145 case -2: 146 state = 0; 147 break; 148 default: 149 *p++ = c; 150 state = 2; 151 } 152 break; 153 case '0': 154 cx.x_val.v_num = 0; 155 state = 10; 156 break; 157 case '1': case '2': case '3': case '4': 158 case '5': case '6': case '7': case '8': case '9': 159 cx.x_val.v_num = c - '0'; 160 state = 11; 161 break; 162 case '>': 163 state = 20; 164 break; 165 case '<': 166 state = 21; 167 break; 168 case '=': 169 state = 22; 170 break; 171 case '!': 172 state = 23; 173 break; 174 case '&': 175 state = 24; 176 break; 177 case '|': 178 state = 25; 179 break; 180 case '$': 181 state = 26; 182 break; 183 case '~': 184 cx.x_token = T_COMP; 185 state = -1; 186 break; 187 case '+': 188 cx.x_token = T_PLUS; 189 state = -1; 190 break; 191 case '-': 192 cx.x_token = T_MINUS; 193 state = -1; 194 break; 195 case '*': 196 cx.x_token = T_MUL; 197 state = -1; 198 break; 199 case '/': 200 cx.x_token = T_DIV; 201 state = -1; 202 break; 203 case '%': 204 cx.x_token = T_MOD; 205 state = -1; 206 break; 207 case '^': 208 cx.x_token = T_XOR; 209 state = -1; 210 break; 211 case '(': 212 cx.x_token = T_LP; 213 state = -1; 214 break; 215 case ')': 216 cx.x_token = T_RP; 217 state = -1; 218 break; 219 case ',': 220 cx.x_token = T_COMMA; 221 state = -1; 222 break; 223 case '?': 224 cx.x_token = T_QUEST; 225 state = -1; 226 break; 227 case ':': 228 cx.x_token = T_COLON; 229 state = -1; 230 break; 231 case '[': 232 cx.x_token = T_LB; 233 state = -1; 234 break; 235 case ']': 236 cx.x_token = T_RB; 237 state = -1; 238 break; 239 default: 240 cx.x_val.v_num = c; 241 cx.x_token = T_CHAR; 242 state = -1; 243 break; 244 } 245 break; 246 case 1: /* got # */ 247 if (c == '\n' || c == EOF) { 248 (void) s_ungetc(c); 249 state = 0; 250 } 251 break; 252 case 2: /* unquoted string */ 253 switch (c) { 254 case 'a': case 'b': case 'c': case 'd': case 'e': 255 case 'f': case 'g': case 'h': case 'i': case 'j': 256 case 'k': case 'l': case 'm': case 'n': case 'o': 257 case 'p': case 'q': case 'r': case 's': case 't': 258 case 'u': case 'v': case 'w': case 'x': case 'y': 259 case 'z': 260 case 'A': case 'B': case 'C': case 'D': case 'E': 261 case 'F': case 'G': case 'H': case 'I': case 'J': 262 case 'K': case 'L': case 'M': case 'N': case 'O': 263 case 'P': case 'Q': case 'R': case 'S': case 'T': 264 case 'U': case 'V': case 'W': case 'X': case 'Y': 265 case 'Z': 266 case '_': case '.': 267 case '0': case '1': case '2': case '3': case '4': 268 case '5': case '6': case '7': case '8': case '9': 269 if (p < buf + sizeof buf - 1) 270 *p++ = c; 271 break; 272 case '"': 273 state = 3; 274 break; 275 case '\'': 276 state = 4; 277 break; 278 case '\\': 279 switch (c = s_gettok1()) { 280 case -2: 281 (void) s_ungetc(' '); 282 case -1: 283 break; 284 default: 285 if (p < buf + sizeof buf - 1) 286 *p++ = c; 287 } 288 break; 289 default: 290 (void) s_ungetc(c); 291 /* FALLTHROUGH */ 292 case EOF: 293 *p = 0; 294 cx.x_token = T_STR; 295 switch (*buf) { 296 case 'i': 297 if (buf[1] == 'f' && buf[2] == 0) 298 cx.x_token = T_IF; 299 break; 300 case 't': 301 if (buf[1] == 'h' && buf[2] == 'e' 302 && buf[3] == 'n' && buf[4] == 0) 303 cx.x_token = T_THEN; 304 break; 305 case 'e': 306 if (buf[1] == 'n' && buf[2] == 'd' 307 && buf[3] == 'i' && buf[4] == 'f' 308 && buf[5] == 0) 309 cx.x_token = T_ENDIF; 310 else { 311 if (buf[1] == 'l' && buf[2] == 's') { 312 if (buf[3] == 'i' 313 && buf[4] == 'f' 314 && buf[5] == 0) 315 cx.x_token = T_ELSIF; 316 else { 317 if (buf[3] == 'e' 318 && buf[4] == 0) 319 cx.x_token = 320 T_ELSE; 321 } 322 } 323 } 324 break; 325 } 326 if (cx.x_token == T_STR 327 && (cx.x_val.v_str = str_cpy(buf)) == 0) { 328 p_memerror(); 329 cx.x_token = T_EOF; 330 } 331 state = -1; 332 break; 333 } 334 break; 335 case 3: /* " quoted string */ 336 switch (c) { 337 case '\n': 338 (void) s_ungetc(c); 339 /* FALLTHROUGH */ 340 case EOF: 341 case '"': 342 state = 2; 343 break; 344 case '\\': 345 switch (c = s_gettok1()) { 346 case -1: 347 case -2: /* newlines are invisible */ 348 break; 349 default: 350 if (p < buf + sizeof buf - 1) 351 *p++ = c; 352 } 353 break; 354 default: 355 if (p < buf + sizeof buf - 1) 356 *p++ = c; 357 break; 358 } 359 break; 360 case 4: /* ' quoted string */ 361 switch (c) { 362 case '\n': 363 (void) s_ungetc(c); 364 /* FALLTHROUGH */ 365 case EOF: 366 case '\'': 367 state = 2; 368 break; 369 case '\\': 370 switch (c = s_gettok1()) { 371 case -1: 372 case -2: /* newlines are invisible */ 373 break; 374 default: 375 if (p < buf + sizeof buf - 1) 376 *p++ = c; 377 } 378 break; 379 default: 380 if (p < buf + sizeof buf - 1) 381 *p++ = c; 382 break; 383 } 384 break; 385 case 10: /* got 0 */ 386 switch (c) { 387 case 'x': 388 case 'X': 389 cx.x_val.v_num = 0; 390 state = 12; 391 break; 392 case '0': case '1': case '2': case '3': case '4': 393 case '5': case '6': case '7': 394 cx.x_val.v_num = c - '0'; 395 state = 13; 396 break; 397 case '8': case '9': 398 cx.x_val.v_num = c - '0'; 399 state = 11; 400 break; 401 default: 402 (void) s_ungetc(c); 403 state = -1; 404 cx.x_token = T_NUM; 405 } 406 break; 407 case 11: /* decimal number */ 408 switch (c) { 409 case '0': case '1': case '2': case '3': case '4': 410 case '5': case '6': case '7': case '8': case '9': 411 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; 412 break; 413 default: 414 (void) s_ungetc(c); 415 state = -1; 416 cx.x_token = T_NUM; 417 } 418 break; 419 case 12: /* hex number */ 420 switch (c) { 421 case '0': case '1': case '2': case '3': case '4': 422 case '5': case '6': case '7': case '8': case '9': 423 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; 424 break; 425 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 426 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; 427 break; 428 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 429 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; 430 break; 431 default: 432 (void) s_ungetc(c); 433 state = -1; 434 cx.x_token = T_NUM; 435 } 436 break; 437 case 13: /* octal number */ 438 switch (c) { 439 case '0': case '1': case '2': case '3': case '4': 440 case '5': case '6': case '7': 441 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; 442 break; 443 default: 444 (void) s_ungetc(c); 445 state = -1; 446 cx.x_token = T_NUM; 447 } 448 break; 449 case 20: /* got > */ 450 switch (c) { 451 case '=': 452 cx.x_token = T_GE; 453 state = -1; 454 break; 455 case '>': 456 cx.x_token = T_RS; 457 state = -1; 458 break; 459 default: 460 (void) s_ungetc(c); 461 cx.x_token = T_GT; 462 state = -1; 463 } 464 break; 465 case 21: /* got < */ 466 switch (c) { 467 case '=': 468 cx.x_token = T_LE; 469 state = -1; 470 break; 471 case '<': 472 cx.x_token = T_LS; 473 state = -1; 474 break; 475 default: 476 (void) s_ungetc(c); 477 cx.x_token = T_LT; 478 state = -1; 479 } 480 break; 481 case 22: /* got = */ 482 switch (c) { 483 case '=': 484 cx.x_token = T_EQ; 485 state = -1; 486 break; 487 default: 488 (void) s_ungetc(c); 489 cx.x_token = T_ASSIGN; 490 state = -1; 491 } 492 break; 493 case 23: /* got ! */ 494 switch (c) { 495 case '=': 496 cx.x_token = T_NE; 497 state = -1; 498 break; 499 default: 500 (void) s_ungetc(c); 501 cx.x_token = T_NOT; 502 state = -1; 503 } 504 break; 505 case 24: /* got & */ 506 switch (c) { 507 case '&': 508 cx.x_token = T_ANDAND; 509 state = -1; 510 break; 511 default: 512 (void) s_ungetc(c); 513 cx.x_token = T_AND; 514 state = -1; 515 } 516 break; 517 case 25: /* got | */ 518 switch (c) { 519 case '|': 520 cx.x_token = T_OROR; 521 state = -1; 522 break; 523 default: 524 (void) s_ungetc(c); 525 cx.x_token = T_OR; 526 state = -1; 527 } 528 break; 529 case 26: /* got $ */ 530 switch (c) { 531 case '?': 532 cx.x_token = T_DQ; 533 state = -1; 534 break; 535 default: 536 (void) s_ungetc(c); 537 cx.x_token = T_DOLLAR; 538 state = -1; 539 } 540 break; 541 default: 542 abort(); 543 } 544 if (state >= 0) 545 goto loop; 546 return cx.x_token; 547 } 548 549 int 550 s_gettok1(void) 551 { 552 int c; 553 int n; 554 555 c = s_getc(); /* got \ */ 556 switch (c) { 557 case EOF: 558 return -1; 559 case '\n': 560 return -2; 561 case 'b': 562 return '\b'; 563 case 'f': 564 return '\f'; 565 case 'n': 566 return '\n'; 567 case 'r': 568 return '\r'; 569 case 't': 570 return '\t'; 571 default: 572 return c; 573 case '0': case '1': case '2': case '3': case '4': 574 case '5': case '6': case '7': 575 break; 576 } 577 n = c - '0'; 578 c = s_getc(); /* got \[0-7] */ 579 if (c < '0' || c > '7') { 580 (void) s_ungetc(c); 581 return n; 582 } 583 n = n * 8 + c - '0'; 584 c = s_getc(); /* got \[0-7][0-7] */ 585 if (c < '0' || c > '7') { 586 (void) s_ungetc(c); 587 return n; 588 } 589 return n * 8 + c - '0'; 590 } 591