1 /* 2 * Copyright (c) 1983, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Edward Wang at The University of California, Berkeley. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)scanner.c 8.1 (Berkeley) 6/6/93 37 * $FreeBSD: src/usr.bin/window/scanner.c,v 1.2.14.2 2001/05/17 09:46:29 obrien Exp $ 38 * $DragonFly: src/usr.bin/window/scanner.c,v 1.2 2003/06/17 04:29:34 dillon Exp $ 39 */ 40 41 #include <ctype.h> 42 #include "value.h" 43 #include "token.h" 44 #include "context.h" 45 #include "string.h" 46 #include "mystring.h" 47 48 s_getc() 49 { 50 register c; 51 52 switch (cx.x_type) { 53 case X_FILE: 54 c = getc(cx.x_fp); 55 if (cx.x_bol && c != EOF) { 56 cx.x_bol = 0; 57 cx.x_lineno++; 58 } 59 if (c == '\n') 60 cx.x_bol = 1; 61 return c; 62 case X_BUF: 63 if (*cx.x_bufp != 0) 64 return *cx.x_bufp++ & 0xff; 65 else 66 return EOF; 67 } 68 /*NOTREACHED*/ 69 } 70 71 s_ungetc(c) 72 { 73 if (c == EOF) 74 return EOF; 75 switch (cx.x_type) { 76 case X_FILE: 77 cx.x_bol = 0; 78 return ungetc(c, cx.x_fp); 79 case X_BUF: 80 if (cx.x_bufp > cx.x_buf) 81 return *--cx.x_bufp = c; 82 else 83 return EOF; 84 } 85 /*NOTREACHED*/ 86 } 87 88 s_gettok() 89 { 90 char buf[100]; 91 register char *p = buf; 92 register c; 93 register state = 0; 94 95 loop: 96 c = s_getc(); 97 switch (state) { 98 case 0: 99 switch (c) { 100 case ' ': 101 case '\t': 102 break; 103 case '\n': 104 case ';': 105 cx.x_token = T_EOL; 106 state = -1; 107 break; 108 case '#': 109 state = 1; 110 break; 111 case EOF: 112 cx.x_token = T_EOF; 113 state = -1; 114 break; 115 case '"': 116 state = 3; 117 break; 118 case '\'': 119 state = 4; 120 break; 121 case '\\': 122 switch (c = s_gettok1()) { 123 case -1: 124 break; 125 case -2: 126 state = 0; 127 break; 128 default: 129 *p++ = c; 130 state = 2; 131 } 132 break; 133 case '0': 134 cx.x_val.v_num = 0; 135 state = 10; 136 break; 137 case '1': case '2': case '3': case '4': 138 case '5': case '6': case '7': case '8': case '9': 139 cx.x_val.v_num = c - '0'; 140 state = 11; 141 break; 142 case '>': 143 state = 20; 144 break; 145 case '<': 146 state = 21; 147 break; 148 case '=': 149 state = 22; 150 break; 151 case '!': 152 state = 23; 153 break; 154 case '&': 155 state = 24; 156 break; 157 case '|': 158 state = 25; 159 break; 160 case '$': 161 state = 26; 162 break; 163 case '~': 164 cx.x_token = T_COMP; 165 state = -1; 166 break; 167 case '+': 168 cx.x_token = T_PLUS; 169 state = -1; 170 break; 171 case '-': 172 cx.x_token = T_MINUS; 173 state = -1; 174 break; 175 case '*': 176 cx.x_token = T_MUL; 177 state = -1; 178 break; 179 case '/': 180 cx.x_token = T_DIV; 181 state = -1; 182 break; 183 case '%': 184 cx.x_token = T_MOD; 185 state = -1; 186 break; 187 case '^': 188 cx.x_token = T_XOR; 189 state = -1; 190 break; 191 case '(': 192 cx.x_token = T_LP; 193 state = -1; 194 break; 195 case ')': 196 cx.x_token = T_RP; 197 state = -1; 198 break; 199 case ',': 200 cx.x_token = T_COMMA; 201 state = -1; 202 break; 203 case '?': 204 cx.x_token = T_QUEST; 205 state = -1; 206 break; 207 case ':': 208 cx.x_token = T_COLON; 209 state = -1; 210 break; 211 case '[': 212 cx.x_token = T_LB; 213 state = -1; 214 break; 215 case ']': 216 cx.x_token = T_RB; 217 state = -1; 218 break; 219 default: 220 if (isalpha(c) || c == '_' || c == '.') { 221 *p++ = c; 222 state = 2; 223 break; 224 } 225 cx.x_val.v_num = c; 226 cx.x_token = T_CHAR; 227 state = -1; 228 break; 229 } 230 break; 231 case 1: /* got # */ 232 if (c == '\n' || c == EOF) { 233 (void) s_ungetc(c); 234 state = 0; 235 } 236 break; 237 case 2: /* unquoted string */ 238 switch (c) { 239 case '"': 240 state = 3; 241 break; 242 case '\'': 243 state = 4; 244 break; 245 case '\\': 246 switch (c = s_gettok1()) { 247 case -2: 248 (void) s_ungetc(' '); 249 case -1: 250 break; 251 default: 252 if (p < buf + sizeof buf - 1) 253 *p++ = c; 254 } 255 break; 256 default: 257 if (isalnum(c) || c == '_' || c == '.') { 258 if (p < buf + sizeof buf - 1) 259 *p++ = c; 260 break; 261 } 262 (void) s_ungetc(c); 263 case EOF: 264 *p = 0; 265 cx.x_token = T_STR; 266 switch (*buf) { 267 case 'i': 268 if (buf[1] == 'f' && buf[2] == 0) 269 cx.x_token = T_IF; 270 break; 271 case 't': 272 if (buf[1] == 'h' && buf[2] == 'e' 273 && buf[3] == 'n' && buf[4] == 0) 274 cx.x_token = T_THEN; 275 break; 276 case 'e': 277 if (buf[1] == 'n' && buf[2] == 'd' 278 && buf[3] == 'i' && buf[4] == 'f' 279 && buf[5] == 0) 280 cx.x_token = T_ENDIF; 281 else if (buf[1] == 'l' && buf[2] == 's') 282 if (buf[3] == 'i' && buf[4] == 'f' 283 && buf[5] == 0) 284 cx.x_token = T_ELSIF; 285 else if (buf[3] == 'e' && buf[4] == 0) 286 cx.x_token = T_ELSE; 287 break; 288 } 289 if (cx.x_token == T_STR 290 && (cx.x_val.v_str = str_cpy(buf)) == 0) { 291 p_memerror(); 292 cx.x_token = T_EOF; 293 } 294 state = -1; 295 break; 296 } 297 break; 298 case 3: /* " quoted string */ 299 switch (c) { 300 case '\n': 301 (void) s_ungetc(c); 302 case EOF: 303 case '"': 304 state = 2; 305 break; 306 case '\\': 307 switch (c = s_gettok1()) { 308 case -1: 309 case -2: /* newlines are invisible */ 310 break; 311 default: 312 if (p < buf + sizeof buf - 1) 313 *p++ = c; 314 } 315 break; 316 default: 317 if (p < buf + sizeof buf - 1) 318 *p++ = c; 319 break; 320 } 321 break; 322 case 4: /* ' quoted string */ 323 switch (c) { 324 case '\n': 325 (void) s_ungetc(c); 326 case EOF: 327 case '\'': 328 state = 2; 329 break; 330 case '\\': 331 switch (c = s_gettok1()) { 332 case -1: 333 case -2: /* newlines are invisible */ 334 break; 335 default: 336 if (p < buf + sizeof buf - 1) 337 *p++ = c; 338 } 339 break; 340 default: 341 if (p < buf + sizeof buf - 1) 342 *p++ = c; 343 break; 344 } 345 break; 346 case 10: /* got 0 */ 347 switch (c) { 348 case 'x': 349 case 'X': 350 cx.x_val.v_num = 0; 351 state = 12; 352 break; 353 case '0': case '1': case '2': case '3': case '4': 354 case '5': case '6': case '7': 355 cx.x_val.v_num = c - '0'; 356 state = 13; 357 break; 358 case '8': case '9': 359 cx.x_val.v_num = c - '0'; 360 state = 11; 361 break; 362 default: 363 (void) s_ungetc(c); 364 state = -1; 365 cx.x_token = T_NUM; 366 } 367 break; 368 case 11: /* decimal number */ 369 switch (c) { 370 case '0': case '1': case '2': case '3': case '4': 371 case '5': case '6': case '7': case '8': case '9': 372 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; 373 break; 374 default: 375 (void) s_ungetc(c); 376 state = -1; 377 cx.x_token = T_NUM; 378 } 379 break; 380 case 12: /* hex number */ 381 switch (c) { 382 case '0': case '1': case '2': case '3': case '4': 383 case '5': case '6': case '7': case '8': case '9': 384 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; 385 break; 386 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 387 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; 388 break; 389 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 390 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; 391 break; 392 default: 393 (void) s_ungetc(c); 394 state = -1; 395 cx.x_token = T_NUM; 396 } 397 break; 398 case 13: /* octal number */ 399 switch (c) { 400 case '0': case '1': case '2': case '3': case '4': 401 case '5': case '6': case '7': 402 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; 403 break; 404 default: 405 (void) s_ungetc(c); 406 state = -1; 407 cx.x_token = T_NUM; 408 } 409 break; 410 case 20: /* got > */ 411 switch (c) { 412 case '=': 413 cx.x_token = T_GE; 414 state = -1; 415 break; 416 case '>': 417 cx.x_token = T_RS; 418 state = -1; 419 break; 420 default: 421 (void) s_ungetc(c); 422 cx.x_token = T_GT; 423 state = -1; 424 } 425 break; 426 case 21: /* got < */ 427 switch (c) { 428 case '=': 429 cx.x_token = T_LE; 430 state = -1; 431 break; 432 case '<': 433 cx.x_token = T_LS; 434 state = -1; 435 break; 436 default: 437 (void) s_ungetc(c); 438 cx.x_token = T_LT; 439 state = -1; 440 } 441 break; 442 case 22: /* got = */ 443 switch (c) { 444 case '=': 445 cx.x_token = T_EQ; 446 state = -1; 447 break; 448 default: 449 (void) s_ungetc(c); 450 cx.x_token = T_ASSIGN; 451 state = -1; 452 } 453 break; 454 case 23: /* got ! */ 455 switch (c) { 456 case '=': 457 cx.x_token = T_NE; 458 state = -1; 459 break; 460 default: 461 (void) s_ungetc(c); 462 cx.x_token = T_NOT; 463 state = -1; 464 } 465 break; 466 case 24: /* got & */ 467 switch (c) { 468 case '&': 469 cx.x_token = T_ANDAND; 470 state = -1; 471 break; 472 default: 473 (void) s_ungetc(c); 474 cx.x_token = T_AND; 475 state = -1; 476 } 477 break; 478 case 25: /* got | */ 479 switch (c) { 480 case '|': 481 cx.x_token = T_OROR; 482 state = -1; 483 break; 484 default: 485 (void) s_ungetc(c); 486 cx.x_token = T_OR; 487 state = -1; 488 } 489 break; 490 case 26: /* got $ */ 491 switch (c) { 492 case '?': 493 cx.x_token = T_DQ; 494 state = -1; 495 break; 496 default: 497 (void) s_ungetc(c); 498 cx.x_token = T_DOLLAR; 499 state = -1; 500 } 501 break; 502 default: 503 abort(); 504 } 505 if (state >= 0) 506 goto loop; 507 return cx.x_token; 508 } 509 510 s_gettok1() 511 { 512 register c; 513 register n; 514 515 c = s_getc(); /* got \ */ 516 switch (c) { 517 case EOF: 518 return -1; 519 case '\n': 520 return -2; 521 case 'b': 522 return '\b'; 523 case 'f': 524 return '\f'; 525 case 'n': 526 return '\n'; 527 case 'r': 528 return '\r'; 529 case 't': 530 return '\t'; 531 default: 532 return c; 533 case '0': case '1': case '2': case '3': case '4': 534 case '5': case '6': case '7': 535 break; 536 } 537 n = c - '0'; 538 c = s_getc(); /* got \[0-7] */ 539 if (c < '0' || c > '7') { 540 (void) s_ungetc(c); 541 return n; 542 } 543 n = n * 8 + c - '0'; 544 c = s_getc(); /* got \[0-7][0-7] */ 545 if (c < '0' || c > '7') { 546 (void) s_ungetc(c); 547 return n; 548 } 549 return n * 8 + c - '0'; 550 } 551