1 /* @(#)scanner.c 8.1 (Berkeley) 6/6/93 */ 2 /* $NetBSD: scanner.c,v 1.9 2003/08/07 11:17:29 agc Exp $ */ 3 4 /* 5 * Copyright (c) 1983, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Edward Wang at The University of California, Berkeley. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <stdlib.h> 37 38 #include "defs.h" 39 #include "token.h" 40 #include "context.h" 41 #include "window_string.h" 42 43 int s_getc(void); 44 int s_gettok1(void); 45 int s_ungetc(int); 46 47 int 48 s_getc(void) 49 { 50 int c; 51 52 switch (cx.x_type) { 53 case X_FILE: 54 c = getc(cx.x_fp); 55 if (cx.x_bol && c != EOF) { 56 cx.x_bol = 0; 57 cx.x_lineno++; 58 } 59 if (c == '\n') 60 cx.x_bol = 1; 61 return c; 62 case X_BUF: 63 if (*cx.x_bufp != 0) 64 return *cx.x_bufp++ & 0xff; 65 else 66 return EOF; 67 } 68 /*NOTREACHED*/ 69 return(0); /* XXX: placate gcc */ 70 } 71 72 int 73 s_ungetc(int c) 74 { 75 if (c == EOF) 76 return EOF; 77 switch (cx.x_type) { 78 case X_FILE: 79 cx.x_bol = 0; 80 return ungetc(c, cx.x_fp); 81 case X_BUF: 82 if (cx.x_bufp > cx.x_buf) 83 return *--cx.x_bufp = c; 84 else 85 return EOF; 86 } 87 /*NOTREACHED*/ 88 return(0); /* XXX: placate gcc */ 89 } 90 91 int 92 s_gettok(void) 93 { 94 char buf[100]; 95 char *p = buf; 96 int c; 97 int state = 0; 98 99 loop: 100 c = s_getc(); 101 switch (state) { 102 case 0: 103 switch (c) { 104 case ' ': 105 case '\t': 106 break; 107 case '\n': 108 case ';': 109 cx.x_token = T_EOL; 110 state = -1; 111 break; 112 case '#': 113 state = 1; 114 break; 115 case EOF: 116 cx.x_token = T_EOF; 117 state = -1; 118 break; 119 case 'a': case 'b': case 'c': case 'd': case 'e': 120 case 'f': case 'g': case 'h': case 'i': case 'j': 121 case 'k': case 'l': case 'm': case 'n': case 'o': 122 case 'p': case 'q': case 'r': case 's': case 't': 123 case 'u': case 'v': case 'w': case 'x': case 'y': 124 case 'z': 125 case 'A': case 'B': case 'C': case 'D': case 'E': 126 case 'F': case 'G': case 'H': case 'I': case 'J': 127 case 'K': case 'L': case 'M': case 'N': case 'O': 128 case 'P': case 'Q': case 'R': case 'S': case 'T': 129 case 'U': case 'V': case 'W': case 'X': case 'Y': 130 case 'Z': 131 case '_': case '.': 132 *p++ = c; 133 state = 2; 134 break; 135 case '"': 136 state = 3; 137 break; 138 case '\'': 139 state = 4; 140 break; 141 case '\\': 142 switch (c = s_gettok1()) { 143 case -1: 144 break; 145 case -2: 146 state = 0; 147 break; 148 default: 149 *p++ = c; 150 state = 2; 151 } 152 break; 153 case '0': 154 cx.x_val.v_num = 0; 155 state = 10; 156 break; 157 case '1': case '2': case '3': case '4': 158 case '5': case '6': case '7': case '8': case '9': 159 cx.x_val.v_num = c - '0'; 160 state = 11; 161 break; 162 case '>': 163 state = 20; 164 break; 165 case '<': 166 state = 21; 167 break; 168 case '=': 169 state = 22; 170 break; 171 case '!': 172 state = 23; 173 break; 174 case '&': 175 state = 24; 176 break; 177 case '|': 178 state = 25; 179 break; 180 case '$': 181 state = 26; 182 break; 183 case '~': 184 cx.x_token = T_COMP; 185 state = -1; 186 break; 187 case '+': 188 cx.x_token = T_PLUS; 189 state = -1; 190 break; 191 case '-': 192 cx.x_token = T_MINUS; 193 state = -1; 194 break; 195 case '*': 196 cx.x_token = T_MUL; 197 state = -1; 198 break; 199 case '/': 200 cx.x_token = T_DIV; 201 state = -1; 202 break; 203 case '%': 204 cx.x_token = T_MOD; 205 state = -1; 206 break; 207 case '^': 208 cx.x_token = T_XOR; 209 state = -1; 210 break; 211 case '(': 212 cx.x_token = T_LP; 213 state = -1; 214 break; 215 case ')': 216 cx.x_token = T_RP; 217 state = -1; 218 break; 219 case ',': 220 cx.x_token = T_COMMA; 221 state = -1; 222 break; 223 case '?': 224 cx.x_token = T_QUEST; 225 state = -1; 226 break; 227 case ':': 228 cx.x_token = T_COLON; 229 state = -1; 230 break; 231 case '[': 232 cx.x_token = T_LB; 233 state = -1; 234 break; 235 case ']': 236 cx.x_token = T_RB; 237 state = -1; 238 break; 239 default: 240 cx.x_val.v_num = c; 241 cx.x_token = T_CHAR; 242 state = -1; 243 break; 244 } 245 break; 246 case 1: /* got # */ 247 if (c == '\n' || c == EOF) { 248 (void) s_ungetc(c); 249 state = 0; 250 } 251 break; 252 case 2: /* unquoted string */ 253 switch (c) { 254 case 'a': case 'b': case 'c': case 'd': case 'e': 255 case 'f': case 'g': case 'h': case 'i': case 'j': 256 case 'k': case 'l': case 'm': case 'n': case 'o': 257 case 'p': case 'q': case 'r': case 's': case 't': 258 case 'u': case 'v': case 'w': case 'x': case 'y': 259 case 'z': 260 case 'A': case 'B': case 'C': case 'D': case 'E': 261 case 'F': case 'G': case 'H': case 'I': case 'J': 262 case 'K': case 'L': case 'M': case 'N': case 'O': 263 case 'P': case 'Q': case 'R': case 'S': case 'T': 264 case 'U': case 'V': case 'W': case 'X': case 'Y': 265 case 'Z': 266 case '_': case '.': 267 case '0': case '1': case '2': case '3': case '4': 268 case '5': case '6': case '7': case '8': case '9': 269 if (p < buf + sizeof buf - 1) 270 *p++ = c; 271 break; 272 case '"': 273 state = 3; 274 break; 275 case '\'': 276 state = 4; 277 break; 278 case '\\': 279 switch (c = s_gettok1()) { 280 case -2: 281 (void) s_ungetc(' '); 282 case -1: 283 break; 284 default: 285 if (p < buf + sizeof buf - 1) 286 *p++ = c; 287 } 288 break; 289 default: 290 (void) s_ungetc(c); 291 case EOF: 292 *p = 0; 293 cx.x_token = T_STR; 294 switch (*buf) { 295 case 'i': 296 if (buf[1] == 'f' && buf[2] == 0) 297 cx.x_token = T_IF; 298 break; 299 case 't': 300 if (buf[1] == 'h' && buf[2] == 'e' 301 && buf[3] == 'n' && buf[4] == 0) 302 cx.x_token = T_THEN; 303 break; 304 case 'e': 305 if (buf[1] == 'n' && buf[2] == 'd' 306 && buf[3] == 'i' && buf[4] == 'f' 307 && buf[5] == 0) 308 cx.x_token = T_ENDIF; 309 else { 310 if (buf[1] == 'l' && buf[2] == 's') { 311 if (buf[3] == 'i' 312 && buf[4] == 'f' 313 && buf[5] == 0) 314 cx.x_token = T_ELSIF; 315 else { 316 if (buf[3] == 'e' 317 && buf[4] == 0) 318 cx.x_token = 319 T_ELSE; 320 } 321 } 322 } 323 break; 324 } 325 if (cx.x_token == T_STR 326 && (cx.x_val.v_str = str_cpy(buf)) == 0) { 327 p_memerror(); 328 cx.x_token = T_EOF; 329 } 330 state = -1; 331 break; 332 } 333 break; 334 case 3: /* " quoted string */ 335 switch (c) { 336 case '\n': 337 (void) s_ungetc(c); 338 case EOF: 339 case '"': 340 state = 2; 341 break; 342 case '\\': 343 switch (c = s_gettok1()) { 344 case -1: 345 case -2: /* newlines are invisible */ 346 break; 347 default: 348 if (p < buf + sizeof buf - 1) 349 *p++ = c; 350 } 351 break; 352 default: 353 if (p < buf + sizeof buf - 1) 354 *p++ = c; 355 break; 356 } 357 break; 358 case 4: /* ' quoted string */ 359 switch (c) { 360 case '\n': 361 (void) s_ungetc(c); 362 case EOF: 363 case '\'': 364 state = 2; 365 break; 366 case '\\': 367 switch (c = s_gettok1()) { 368 case -1: 369 case -2: /* newlines are invisible */ 370 break; 371 default: 372 if (p < buf + sizeof buf - 1) 373 *p++ = c; 374 } 375 break; 376 default: 377 if (p < buf + sizeof buf - 1) 378 *p++ = c; 379 break; 380 } 381 break; 382 case 10: /* got 0 */ 383 switch (c) { 384 case 'x': 385 case 'X': 386 cx.x_val.v_num = 0; 387 state = 12; 388 break; 389 case '0': case '1': case '2': case '3': case '4': 390 case '5': case '6': case '7': 391 cx.x_val.v_num = c - '0'; 392 state = 13; 393 break; 394 case '8': case '9': 395 cx.x_val.v_num = c - '0'; 396 state = 11; 397 break; 398 default: 399 (void) s_ungetc(c); 400 state = -1; 401 cx.x_token = T_NUM; 402 } 403 break; 404 case 11: /* decimal number */ 405 switch (c) { 406 case '0': case '1': case '2': case '3': case '4': 407 case '5': case '6': case '7': case '8': case '9': 408 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; 409 break; 410 default: 411 (void) s_ungetc(c); 412 state = -1; 413 cx.x_token = T_NUM; 414 } 415 break; 416 case 12: /* hex number */ 417 switch (c) { 418 case '0': case '1': case '2': case '3': case '4': 419 case '5': case '6': case '7': case '8': case '9': 420 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; 421 break; 422 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 423 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; 424 break; 425 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 426 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; 427 break; 428 default: 429 (void) s_ungetc(c); 430 state = -1; 431 cx.x_token = T_NUM; 432 } 433 break; 434 case 13: /* octal number */ 435 switch (c) { 436 case '0': case '1': case '2': case '3': case '4': 437 case '5': case '6': case '7': 438 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; 439 break; 440 default: 441 (void) s_ungetc(c); 442 state = -1; 443 cx.x_token = T_NUM; 444 } 445 break; 446 case 20: /* got > */ 447 switch (c) { 448 case '=': 449 cx.x_token = T_GE; 450 state = -1; 451 break; 452 case '>': 453 cx.x_token = T_RS; 454 state = -1; 455 break; 456 default: 457 (void) s_ungetc(c); 458 cx.x_token = T_GT; 459 state = -1; 460 } 461 break; 462 case 21: /* got < */ 463 switch (c) { 464 case '=': 465 cx.x_token = T_LE; 466 state = -1; 467 break; 468 case '<': 469 cx.x_token = T_LS; 470 state = -1; 471 break; 472 default: 473 (void) s_ungetc(c); 474 cx.x_token = T_LT; 475 state = -1; 476 } 477 break; 478 case 22: /* got = */ 479 switch (c) { 480 case '=': 481 cx.x_token = T_EQ; 482 state = -1; 483 break; 484 default: 485 (void) s_ungetc(c); 486 cx.x_token = T_ASSIGN; 487 state = -1; 488 } 489 break; 490 case 23: /* got ! */ 491 switch (c) { 492 case '=': 493 cx.x_token = T_NE; 494 state = -1; 495 break; 496 default: 497 (void) s_ungetc(c); 498 cx.x_token = T_NOT; 499 state = -1; 500 } 501 break; 502 case 24: /* got & */ 503 switch (c) { 504 case '&': 505 cx.x_token = T_ANDAND; 506 state = -1; 507 break; 508 default: 509 (void) s_ungetc(c); 510 cx.x_token = T_AND; 511 state = -1; 512 } 513 break; 514 case 25: /* got | */ 515 switch (c) { 516 case '|': 517 cx.x_token = T_OROR; 518 state = -1; 519 break; 520 default: 521 (void) s_ungetc(c); 522 cx.x_token = T_OR; 523 state = -1; 524 } 525 break; 526 case 26: /* got $ */ 527 switch (c) { 528 case '?': 529 cx.x_token = T_DQ; 530 state = -1; 531 break; 532 default: 533 (void) s_ungetc(c); 534 cx.x_token = T_DOLLAR; 535 state = -1; 536 } 537 break; 538 default: 539 abort(); 540 } 541 if (state >= 0) 542 goto loop; 543 return cx.x_token; 544 } 545 546 int 547 s_gettok1(void) 548 { 549 int c; 550 int n; 551 552 c = s_getc(); /* got \ */ 553 switch (c) { 554 case EOF: 555 return -1; 556 case '\n': 557 return -2; 558 case 'b': 559 return '\b'; 560 case 'f': 561 return '\f'; 562 case 'n': 563 return '\n'; 564 case 'r': 565 return '\r'; 566 case 't': 567 return '\t'; 568 default: 569 return c; 570 case '0': case '1': case '2': case '3': case '4': 571 case '5': case '6': case '7': 572 break; 573 } 574 n = c - '0'; 575 c = s_getc(); /* got \[0-7] */ 576 if (c < '0' || c > '7') { 577 (void) s_ungetc(c); 578 return n; 579 } 580 n = n * 8 + c - '0'; 581 c = s_getc(); /* got \[0-7][0-7] */ 582 if (c < '0' || c > '7') { 583 (void) s_ungetc(c); 584 return n; 585 } 586 return n * 8 + c - '0'; 587 } 588