1 /* $NetBSD: scanner.c,v 1.9 2003/08/07 11:17:29 agc Exp $ */ 2 3 /* 4 * Copyright (c) 1983, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Edward Wang at The University of California, Berkeley. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 #if 0 38 static char sccsid[] = "@(#)scanner.c 8.1 (Berkeley) 6/6/93"; 39 #else 40 __RCSID("$NetBSD: scanner.c,v 1.9 2003/08/07 11:17:29 agc Exp $"); 41 #endif 42 #endif /* not lint */ 43 44 #include <stdlib.h> 45 46 #include "defs.h" 47 #include "token.h" 48 #include "context.h" 49 #include "window_string.h" 50 51 int s_getc(void); 52 int s_gettok1(void); 53 int s_ungetc(int); 54 55 int 56 s_getc(void) 57 { 58 int c; 59 60 switch (cx.x_type) { 61 case X_FILE: 62 c = getc(cx.x_fp); 63 if (cx.x_bol && c != EOF) { 64 cx.x_bol = 0; 65 cx.x_lineno++; 66 } 67 if (c == '\n') 68 cx.x_bol = 1; 69 return c; 70 case X_BUF: 71 if (*cx.x_bufp != 0) 72 return *cx.x_bufp++ & 0xff; 73 else 74 return EOF; 75 } 76 /*NOTREACHED*/ 77 return(0); /* XXX: placate gcc */ 78 } 79 80 int 81 s_ungetc(int c) 82 { 83 if (c == EOF) 84 return EOF; 85 switch (cx.x_type) { 86 case X_FILE: 87 cx.x_bol = 0; 88 return ungetc(c, cx.x_fp); 89 case X_BUF: 90 if (cx.x_bufp > cx.x_buf) 91 return *--cx.x_bufp = c; 92 else 93 return EOF; 94 } 95 /*NOTREACHED*/ 96 return(0); /* XXX: placate gcc */ 97 } 98 99 int 100 s_gettok(void) 101 { 102 char buf[100]; 103 char *p = buf; 104 int c; 105 int state = 0; 106 107 loop: 108 c = s_getc(); 109 switch (state) { 110 case 0: 111 switch (c) { 112 case ' ': 113 case '\t': 114 break; 115 case '\n': 116 case ';': 117 cx.x_token = T_EOL; 118 state = -1; 119 break; 120 case '#': 121 state = 1; 122 break; 123 case EOF: 124 cx.x_token = T_EOF; 125 state = -1; 126 break; 127 case 'a': case 'b': case 'c': case 'd': case 'e': 128 case 'f': case 'g': case 'h': case 'i': case 'j': 129 case 'k': case 'l': case 'm': case 'n': case 'o': 130 case 'p': case 'q': case 'r': case 's': case 't': 131 case 'u': case 'v': case 'w': case 'x': case 'y': 132 case 'z': 133 case 'A': case 'B': case 'C': case 'D': case 'E': 134 case 'F': case 'G': case 'H': case 'I': case 'J': 135 case 'K': case 'L': case 'M': case 'N': case 'O': 136 case 'P': case 'Q': case 'R': case 'S': case 'T': 137 case 'U': case 'V': case 'W': case 'X': case 'Y': 138 case 'Z': 139 case '_': case '.': 140 *p++ = c; 141 state = 2; 142 break; 143 case '"': 144 state = 3; 145 break; 146 case '\'': 147 state = 4; 148 break; 149 case '\\': 150 switch (c = s_gettok1()) { 151 case -1: 152 break; 153 case -2: 154 state = 0; 155 break; 156 default: 157 *p++ = c; 158 state = 2; 159 } 160 break; 161 case '0': 162 cx.x_val.v_num = 0; 163 state = 10; 164 break; 165 case '1': case '2': case '3': case '4': 166 case '5': case '6': case '7': case '8': case '9': 167 cx.x_val.v_num = c - '0'; 168 state = 11; 169 break; 170 case '>': 171 state = 20; 172 break; 173 case '<': 174 state = 21; 175 break; 176 case '=': 177 state = 22; 178 break; 179 case '!': 180 state = 23; 181 break; 182 case '&': 183 state = 24; 184 break; 185 case '|': 186 state = 25; 187 break; 188 case '$': 189 state = 26; 190 break; 191 case '~': 192 cx.x_token = T_COMP; 193 state = -1; 194 break; 195 case '+': 196 cx.x_token = T_PLUS; 197 state = -1; 198 break; 199 case '-': 200 cx.x_token = T_MINUS; 201 state = -1; 202 break; 203 case '*': 204 cx.x_token = T_MUL; 205 state = -1; 206 break; 207 case '/': 208 cx.x_token = T_DIV; 209 state = -1; 210 break; 211 case '%': 212 cx.x_token = T_MOD; 213 state = -1; 214 break; 215 case '^': 216 cx.x_token = T_XOR; 217 state = -1; 218 break; 219 case '(': 220 cx.x_token = T_LP; 221 state = -1; 222 break; 223 case ')': 224 cx.x_token = T_RP; 225 state = -1; 226 break; 227 case ',': 228 cx.x_token = T_COMMA; 229 state = -1; 230 break; 231 case '?': 232 cx.x_token = T_QUEST; 233 state = -1; 234 break; 235 case ':': 236 cx.x_token = T_COLON; 237 state = -1; 238 break; 239 case '[': 240 cx.x_token = T_LB; 241 state = -1; 242 break; 243 case ']': 244 cx.x_token = T_RB; 245 state = -1; 246 break; 247 default: 248 cx.x_val.v_num = c; 249 cx.x_token = T_CHAR; 250 state = -1; 251 break; 252 } 253 break; 254 case 1: /* got # */ 255 if (c == '\n' || c == EOF) { 256 (void) s_ungetc(c); 257 state = 0; 258 } 259 break; 260 case 2: /* unquoted string */ 261 switch (c) { 262 case 'a': case 'b': case 'c': case 'd': case 'e': 263 case 'f': case 'g': case 'h': case 'i': case 'j': 264 case 'k': case 'l': case 'm': case 'n': case 'o': 265 case 'p': case 'q': case 'r': case 's': case 't': 266 case 'u': case 'v': case 'w': case 'x': case 'y': 267 case 'z': 268 case 'A': case 'B': case 'C': case 'D': case 'E': 269 case 'F': case 'G': case 'H': case 'I': case 'J': 270 case 'K': case 'L': case 'M': case 'N': case 'O': 271 case 'P': case 'Q': case 'R': case 'S': case 'T': 272 case 'U': case 'V': case 'W': case 'X': case 'Y': 273 case 'Z': 274 case '_': case '.': 275 case '0': case '1': case '2': case '3': case '4': 276 case '5': case '6': case '7': case '8': case '9': 277 if (p < buf + sizeof buf - 1) 278 *p++ = c; 279 break; 280 case '"': 281 state = 3; 282 break; 283 case '\'': 284 state = 4; 285 break; 286 case '\\': 287 switch (c = s_gettok1()) { 288 case -2: 289 (void) s_ungetc(' '); 290 case -1: 291 break; 292 default: 293 if (p < buf + sizeof buf - 1) 294 *p++ = c; 295 } 296 break; 297 default: 298 (void) s_ungetc(c); 299 case EOF: 300 *p = 0; 301 cx.x_token = T_STR; 302 switch (*buf) { 303 case 'i': 304 if (buf[1] == 'f' && buf[2] == 0) 305 cx.x_token = T_IF; 306 break; 307 case 't': 308 if (buf[1] == 'h' && buf[2] == 'e' 309 && buf[3] == 'n' && buf[4] == 0) 310 cx.x_token = T_THEN; 311 break; 312 case 'e': 313 if (buf[1] == 'n' && buf[2] == 'd' 314 && buf[3] == 'i' && buf[4] == 'f' 315 && buf[5] == 0) 316 cx.x_token = T_ENDIF; 317 else { 318 if (buf[1] == 'l' && buf[2] == 's') { 319 if (buf[3] == 'i' 320 && buf[4] == 'f' 321 && buf[5] == 0) 322 cx.x_token = T_ELSIF; 323 else { 324 if (buf[3] == 'e' 325 && buf[4] == 0) 326 cx.x_token = 327 T_ELSE; 328 } 329 } 330 } 331 break; 332 } 333 if (cx.x_token == T_STR 334 && (cx.x_val.v_str = str_cpy(buf)) == 0) { 335 p_memerror(); 336 cx.x_token = T_EOF; 337 } 338 state = -1; 339 break; 340 } 341 break; 342 case 3: /* " quoted string */ 343 switch (c) { 344 case '\n': 345 (void) s_ungetc(c); 346 case EOF: 347 case '"': 348 state = 2; 349 break; 350 case '\\': 351 switch (c = s_gettok1()) { 352 case -1: 353 case -2: /* newlines are invisible */ 354 break; 355 default: 356 if (p < buf + sizeof buf - 1) 357 *p++ = c; 358 } 359 break; 360 default: 361 if (p < buf + sizeof buf - 1) 362 *p++ = c; 363 break; 364 } 365 break; 366 case 4: /* ' quoted string */ 367 switch (c) { 368 case '\n': 369 (void) s_ungetc(c); 370 case EOF: 371 case '\'': 372 state = 2; 373 break; 374 case '\\': 375 switch (c = s_gettok1()) { 376 case -1: 377 case -2: /* newlines are invisible */ 378 break; 379 default: 380 if (p < buf + sizeof buf - 1) 381 *p++ = c; 382 } 383 break; 384 default: 385 if (p < buf + sizeof buf - 1) 386 *p++ = c; 387 break; 388 } 389 break; 390 case 10: /* got 0 */ 391 switch (c) { 392 case 'x': 393 case 'X': 394 cx.x_val.v_num = 0; 395 state = 12; 396 break; 397 case '0': case '1': case '2': case '3': case '4': 398 case '5': case '6': case '7': 399 cx.x_val.v_num = c - '0'; 400 state = 13; 401 break; 402 case '8': case '9': 403 cx.x_val.v_num = c - '0'; 404 state = 11; 405 break; 406 default: 407 (void) s_ungetc(c); 408 state = -1; 409 cx.x_token = T_NUM; 410 } 411 break; 412 case 11: /* decimal number */ 413 switch (c) { 414 case '0': case '1': case '2': case '3': case '4': 415 case '5': case '6': case '7': case '8': case '9': 416 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; 417 break; 418 default: 419 (void) s_ungetc(c); 420 state = -1; 421 cx.x_token = T_NUM; 422 } 423 break; 424 case 12: /* hex number */ 425 switch (c) { 426 case '0': case '1': case '2': case '3': case '4': 427 case '5': case '6': case '7': case '8': case '9': 428 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; 429 break; 430 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 431 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; 432 break; 433 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 434 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; 435 break; 436 default: 437 (void) s_ungetc(c); 438 state = -1; 439 cx.x_token = T_NUM; 440 } 441 break; 442 case 13: /* octal number */ 443 switch (c) { 444 case '0': case '1': case '2': case '3': case '4': 445 case '5': case '6': case '7': 446 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; 447 break; 448 default: 449 (void) s_ungetc(c); 450 state = -1; 451 cx.x_token = T_NUM; 452 } 453 break; 454 case 20: /* got > */ 455 switch (c) { 456 case '=': 457 cx.x_token = T_GE; 458 state = -1; 459 break; 460 case '>': 461 cx.x_token = T_RS; 462 state = -1; 463 break; 464 default: 465 (void) s_ungetc(c); 466 cx.x_token = T_GT; 467 state = -1; 468 } 469 break; 470 case 21: /* got < */ 471 switch (c) { 472 case '=': 473 cx.x_token = T_LE; 474 state = -1; 475 break; 476 case '<': 477 cx.x_token = T_LS; 478 state = -1; 479 break; 480 default: 481 (void) s_ungetc(c); 482 cx.x_token = T_LT; 483 state = -1; 484 } 485 break; 486 case 22: /* got = */ 487 switch (c) { 488 case '=': 489 cx.x_token = T_EQ; 490 state = -1; 491 break; 492 default: 493 (void) s_ungetc(c); 494 cx.x_token = T_ASSIGN; 495 state = -1; 496 } 497 break; 498 case 23: /* got ! */ 499 switch (c) { 500 case '=': 501 cx.x_token = T_NE; 502 state = -1; 503 break; 504 default: 505 (void) s_ungetc(c); 506 cx.x_token = T_NOT; 507 state = -1; 508 } 509 break; 510 case 24: /* got & */ 511 switch (c) { 512 case '&': 513 cx.x_token = T_ANDAND; 514 state = -1; 515 break; 516 default: 517 (void) s_ungetc(c); 518 cx.x_token = T_AND; 519 state = -1; 520 } 521 break; 522 case 25: /* got | */ 523 switch (c) { 524 case '|': 525 cx.x_token = T_OROR; 526 state = -1; 527 break; 528 default: 529 (void) s_ungetc(c); 530 cx.x_token = T_OR; 531 state = -1; 532 } 533 break; 534 case 26: /* got $ */ 535 switch (c) { 536 case '?': 537 cx.x_token = T_DQ; 538 state = -1; 539 break; 540 default: 541 (void) s_ungetc(c); 542 cx.x_token = T_DOLLAR; 543 state = -1; 544 } 545 break; 546 default: 547 abort(); 548 } 549 if (state >= 0) 550 goto loop; 551 return cx.x_token; 552 } 553 554 int 555 s_gettok1(void) 556 { 557 int c; 558 int n; 559 560 c = s_getc(); /* got \ */ 561 switch (c) { 562 case EOF: 563 return -1; 564 case '\n': 565 return -2; 566 case 'b': 567 return '\b'; 568 case 'f': 569 return '\f'; 570 case 'n': 571 return '\n'; 572 case 'r': 573 return '\r'; 574 case 't': 575 return '\t'; 576 default: 577 return c; 578 case '0': case '1': case '2': case '3': case '4': 579 case '5': case '6': case '7': 580 break; 581 } 582 n = c - '0'; 583 c = s_getc(); /* got \[0-7] */ 584 if (c < '0' || c > '7') { 585 (void) s_ungetc(c); 586 return n; 587 } 588 n = n * 8 + c - '0'; 589 c = s_getc(); /* got \[0-7][0-7] */ 590 if (c < '0' || c > '7') { 591 (void) s_ungetc(c); 592 return n; 593 } 594 return n * 8 + c - '0'; 595 } 596