1 /* 2 * Copyright (c) 1983, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Edward Wang at The University of California, Berkeley. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)scanner.c 8.1 (Berkeley) 6/6/93 37 * $FreeBSD: src/usr.bin/window/scanner.c,v 1.2.14.2 2001/05/17 09:46:29 obrien Exp $ 38 * $DragonFly: src/usr.bin/window/scanner.c,v 1.2 2003/06/17 04:29:34 dillon Exp $ 39 */ 40 41 #include <ctype.h> 42 #include <stdlib.h> 43 44 #include "value.h" 45 #include "token.h" 46 #include "context.h" 47 #include "string.h" 48 #include "mystring.h" 49 50 s_getc() 51 { 52 register c; 53 54 switch (cx.x_type) { 55 case X_FILE: 56 c = getc(cx.x_fp); 57 if (cx.x_bol && c != EOF) { 58 cx.x_bol = 0; 59 cx.x_lineno++; 60 } 61 if (c == '\n') 62 cx.x_bol = 1; 63 return c; 64 case X_BUF: 65 if (*cx.x_bufp != 0) 66 return *cx.x_bufp++ & 0xff; 67 else 68 return EOF; 69 } 70 /*NOTREACHED*/ 71 } 72 73 s_ungetc(c) 74 { 75 if (c == EOF) 76 return EOF; 77 switch (cx.x_type) { 78 case X_FILE: 79 cx.x_bol = 0; 80 return ungetc(c, cx.x_fp); 81 case X_BUF: 82 if (cx.x_bufp > cx.x_buf) 83 return *--cx.x_bufp = c; 84 else 85 return EOF; 86 } 87 /*NOTREACHED*/ 88 } 89 90 s_gettok() 91 { 92 char buf[100]; 93 register char *p = buf; 94 register c; 95 register state = 0; 96 97 loop: 98 c = s_getc(); 99 switch (state) { 100 case 0: 101 switch (c) { 102 case ' ': 103 case '\t': 104 break; 105 case '\n': 106 case ';': 107 cx.x_token = T_EOL; 108 state = -1; 109 break; 110 case '#': 111 state = 1; 112 break; 113 case EOF: 114 cx.x_token = T_EOF; 115 state = -1; 116 break; 117 case '"': 118 state = 3; 119 break; 120 case '\'': 121 state = 4; 122 break; 123 case '\\': 124 switch (c = s_gettok1()) { 125 case -1: 126 break; 127 case -2: 128 state = 0; 129 break; 130 default: 131 *p++ = c; 132 state = 2; 133 } 134 break; 135 case '0': 136 cx.x_val.v_num = 0; 137 state = 10; 138 break; 139 case '1': case '2': case '3': case '4': 140 case '5': case '6': case '7': case '8': case '9': 141 cx.x_val.v_num = c - '0'; 142 state = 11; 143 break; 144 case '>': 145 state = 20; 146 break; 147 case '<': 148 state = 21; 149 break; 150 case '=': 151 state = 22; 152 break; 153 case '!': 154 state = 23; 155 break; 156 case '&': 157 state = 24; 158 break; 159 case '|': 160 state = 25; 161 break; 162 case '$': 163 state = 26; 164 break; 165 case '~': 166 cx.x_token = T_COMP; 167 state = -1; 168 break; 169 case '+': 170 cx.x_token = T_PLUS; 171 state = -1; 172 break; 173 case '-': 174 cx.x_token = T_MINUS; 175 state = -1; 176 break; 177 case '*': 178 cx.x_token = T_MUL; 179 state = -1; 180 break; 181 case '/': 182 cx.x_token = T_DIV; 183 state = -1; 184 break; 185 case '%': 186 cx.x_token = T_MOD; 187 state = -1; 188 break; 189 case '^': 190 cx.x_token = T_XOR; 191 state = -1; 192 break; 193 case '(': 194 cx.x_token = T_LP; 195 state = -1; 196 break; 197 case ')': 198 cx.x_token = T_RP; 199 state = -1; 200 break; 201 case ',': 202 cx.x_token = T_COMMA; 203 state = -1; 204 break; 205 case '?': 206 cx.x_token = T_QUEST; 207 state = -1; 208 break; 209 case ':': 210 cx.x_token = T_COLON; 211 state = -1; 212 break; 213 case '[': 214 cx.x_token = T_LB; 215 state = -1; 216 break; 217 case ']': 218 cx.x_token = T_RB; 219 state = -1; 220 break; 221 default: 222 if (isalpha(c) || c == '_' || c == '.') { 223 *p++ = c; 224 state = 2; 225 break; 226 } 227 cx.x_val.v_num = c; 228 cx.x_token = T_CHAR; 229 state = -1; 230 break; 231 } 232 break; 233 case 1: /* got # */ 234 if (c == '\n' || c == EOF) { 235 (void) s_ungetc(c); 236 state = 0; 237 } 238 break; 239 case 2: /* unquoted string */ 240 switch (c) { 241 case '"': 242 state = 3; 243 break; 244 case '\'': 245 state = 4; 246 break; 247 case '\\': 248 switch (c = s_gettok1()) { 249 case -2: 250 (void) s_ungetc(' '); 251 case -1: 252 break; 253 default: 254 if (p < buf + sizeof buf - 1) 255 *p++ = c; 256 } 257 break; 258 default: 259 if (isalnum(c) || c == '_' || c == '.') { 260 if (p < buf + sizeof buf - 1) 261 *p++ = c; 262 break; 263 } 264 (void) s_ungetc(c); 265 case EOF: 266 *p = 0; 267 cx.x_token = T_STR; 268 switch (*buf) { 269 case 'i': 270 if (buf[1] == 'f' && buf[2] == 0) 271 cx.x_token = T_IF; 272 break; 273 case 't': 274 if (buf[1] == 'h' && buf[2] == 'e' 275 && buf[3] == 'n' && buf[4] == 0) 276 cx.x_token = T_THEN; 277 break; 278 case 'e': 279 if (buf[1] == 'n' && buf[2] == 'd' 280 && buf[3] == 'i' && buf[4] == 'f' 281 && buf[5] == 0) 282 cx.x_token = T_ENDIF; 283 else if (buf[1] == 'l' && buf[2] == 's') 284 if (buf[3] == 'i' && buf[4] == 'f' 285 && buf[5] == 0) 286 cx.x_token = T_ELSIF; 287 else if (buf[3] == 'e' && buf[4] == 0) 288 cx.x_token = T_ELSE; 289 break; 290 } 291 if (cx.x_token == T_STR 292 && (cx.x_val.v_str = str_cpy(buf)) == 0) { 293 p_memerror(); 294 cx.x_token = T_EOF; 295 } 296 state = -1; 297 break; 298 } 299 break; 300 case 3: /* " quoted string */ 301 switch (c) { 302 case '\n': 303 (void) s_ungetc(c); 304 case EOF: 305 case '"': 306 state = 2; 307 break; 308 case '\\': 309 switch (c = s_gettok1()) { 310 case -1: 311 case -2: /* newlines are invisible */ 312 break; 313 default: 314 if (p < buf + sizeof buf - 1) 315 *p++ = c; 316 } 317 break; 318 default: 319 if (p < buf + sizeof buf - 1) 320 *p++ = c; 321 break; 322 } 323 break; 324 case 4: /* ' quoted string */ 325 switch (c) { 326 case '\n': 327 (void) s_ungetc(c); 328 case EOF: 329 case '\'': 330 state = 2; 331 break; 332 case '\\': 333 switch (c = s_gettok1()) { 334 case -1: 335 case -2: /* newlines are invisible */ 336 break; 337 default: 338 if (p < buf + sizeof buf - 1) 339 *p++ = c; 340 } 341 break; 342 default: 343 if (p < buf + sizeof buf - 1) 344 *p++ = c; 345 break; 346 } 347 break; 348 case 10: /* got 0 */ 349 switch (c) { 350 case 'x': 351 case 'X': 352 cx.x_val.v_num = 0; 353 state = 12; 354 break; 355 case '0': case '1': case '2': case '3': case '4': 356 case '5': case '6': case '7': 357 cx.x_val.v_num = c - '0'; 358 state = 13; 359 break; 360 case '8': case '9': 361 cx.x_val.v_num = c - '0'; 362 state = 11; 363 break; 364 default: 365 (void) s_ungetc(c); 366 state = -1; 367 cx.x_token = T_NUM; 368 } 369 break; 370 case 11: /* decimal number */ 371 switch (c) { 372 case '0': case '1': case '2': case '3': case '4': 373 case '5': case '6': case '7': case '8': case '9': 374 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; 375 break; 376 default: 377 (void) s_ungetc(c); 378 state = -1; 379 cx.x_token = T_NUM; 380 } 381 break; 382 case 12: /* hex number */ 383 switch (c) { 384 case '0': case '1': case '2': case '3': case '4': 385 case '5': case '6': case '7': case '8': case '9': 386 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; 387 break; 388 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 389 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; 390 break; 391 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 392 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; 393 break; 394 default: 395 (void) s_ungetc(c); 396 state = -1; 397 cx.x_token = T_NUM; 398 } 399 break; 400 case 13: /* octal number */ 401 switch (c) { 402 case '0': case '1': case '2': case '3': case '4': 403 case '5': case '6': case '7': 404 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; 405 break; 406 default: 407 (void) s_ungetc(c); 408 state = -1; 409 cx.x_token = T_NUM; 410 } 411 break; 412 case 20: /* got > */ 413 switch (c) { 414 case '=': 415 cx.x_token = T_GE; 416 state = -1; 417 break; 418 case '>': 419 cx.x_token = T_RS; 420 state = -1; 421 break; 422 default: 423 (void) s_ungetc(c); 424 cx.x_token = T_GT; 425 state = -1; 426 } 427 break; 428 case 21: /* got < */ 429 switch (c) { 430 case '=': 431 cx.x_token = T_LE; 432 state = -1; 433 break; 434 case '<': 435 cx.x_token = T_LS; 436 state = -1; 437 break; 438 default: 439 (void) s_ungetc(c); 440 cx.x_token = T_LT; 441 state = -1; 442 } 443 break; 444 case 22: /* got = */ 445 switch (c) { 446 case '=': 447 cx.x_token = T_EQ; 448 state = -1; 449 break; 450 default: 451 (void) s_ungetc(c); 452 cx.x_token = T_ASSIGN; 453 state = -1; 454 } 455 break; 456 case 23: /* got ! */ 457 switch (c) { 458 case '=': 459 cx.x_token = T_NE; 460 state = -1; 461 break; 462 default: 463 (void) s_ungetc(c); 464 cx.x_token = T_NOT; 465 state = -1; 466 } 467 break; 468 case 24: /* got & */ 469 switch (c) { 470 case '&': 471 cx.x_token = T_ANDAND; 472 state = -1; 473 break; 474 default: 475 (void) s_ungetc(c); 476 cx.x_token = T_AND; 477 state = -1; 478 } 479 break; 480 case 25: /* got | */ 481 switch (c) { 482 case '|': 483 cx.x_token = T_OROR; 484 state = -1; 485 break; 486 default: 487 (void) s_ungetc(c); 488 cx.x_token = T_OR; 489 state = -1; 490 } 491 break; 492 case 26: /* got $ */ 493 switch (c) { 494 case '?': 495 cx.x_token = T_DQ; 496 state = -1; 497 break; 498 default: 499 (void) s_ungetc(c); 500 cx.x_token = T_DOLLAR; 501 state = -1; 502 } 503 break; 504 default: 505 abort(); 506 } 507 if (state >= 0) 508 goto loop; 509 return cx.x_token; 510 } 511 512 s_gettok1() 513 { 514 register c; 515 register n; 516 517 c = s_getc(); /* got \ */ 518 switch (c) { 519 case EOF: 520 return -1; 521 case '\n': 522 return -2; 523 case 'b': 524 return '\b'; 525 case 'f': 526 return '\f'; 527 case 'n': 528 return '\n'; 529 case 'r': 530 return '\r'; 531 case 't': 532 return '\t'; 533 default: 534 return c; 535 case '0': case '1': case '2': case '3': case '4': 536 case '5': case '6': case '7': 537 break; 538 } 539 n = c - '0'; 540 c = s_getc(); /* got \[0-7] */ 541 if (c < '0' || c > '7') { 542 (void) s_ungetc(c); 543 return n; 544 } 545 n = n * 8 + c - '0'; 546 c = s_getc(); /* got \[0-7][0-7] */ 547 if (c < '0' || c > '7') { 548 (void) s_ungetc(c); 549 return n; 550 } 551 return n * 8 + c - '0'; 552 } 553