1 /* 2 * Copyright (c) 1983 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 */ 17 18 #ifndef lint 19 static char sccsid[] = "@(#)scanner.c 3.12 (Berkeley) 10/13/89"; 20 #endif /* not lint */ 21 22 #include "value.h" 23 #include "token.h" 24 #include "context.h" 25 #include "string.h" 26 27 s_getc() 28 { 29 register c; 30 31 switch (cx.x_type) { 32 case X_FILE: 33 c = getc(cx.x_fp); 34 if (cx.x_bol && c != EOF) { 35 cx.x_bol = 0; 36 cx.x_lineno++; 37 } 38 if (c == '\n') 39 cx.x_bol = 1; 40 return c; 41 case X_BUF: 42 if (*cx.x_bufp != 0) 43 return *cx.x_bufp++ & 0xff; 44 else 45 return EOF; 46 } 47 /*NOTREACHED*/ 48 } 49 50 s_ungetc(c) 51 { 52 if (c == EOF) 53 return EOF; 54 switch (cx.x_type) { 55 case X_FILE: 56 cx.x_bol = 0; 57 return ungetc(c, cx.x_fp); 58 case X_BUF: 59 if (cx.x_bufp > cx.x_buf) 60 return *--cx.x_bufp = c; 61 else 62 return EOF; 63 } 64 /*NOTREACHED*/ 65 } 66 67 s_gettok() 68 { 69 char buf[100]; 70 register char *p = buf; 71 register c; 72 register state = 0; 73 74 loop: 75 c = s_getc(); 76 switch (state) { 77 case 0: 78 switch (c) { 79 case ' ': 80 case '\t': 81 break; 82 case '\n': 83 case ';': 84 cx.x_token = T_EOL; 85 state = -1; 86 break; 87 case '#': 88 state = 1; 89 break; 90 case EOF: 91 cx.x_token = T_EOF; 92 state = -1; 93 break; 94 case 'a': case 'b': case 'c': case 'd': case 'e': 95 case 'f': case 'g': case 'h': case 'i': case 'j': 96 case 'k': case 'l': case 'm': case 'n': case 'o': 97 case 'p': case 'q': case 'r': case 's': case 't': 98 case 'u': case 'v': case 'w': case 'x': case 'y': 99 case 'z': 100 case 'A': case 'B': case 'C': case 'D': case 'E': 101 case 'F': case 'G': case 'H': case 'I': case 'J': 102 case 'K': case 'L': case 'M': case 'N': case 'O': 103 case 'P': case 'Q': case 'R': case 'S': case 'T': 104 case 'U': case 'V': case 'W': case 'X': case 'Y': 105 case 'Z': 106 case '_': case '.': 107 *p++ = c; 108 state = 2; 109 break; 110 case '"': 111 state = 3; 112 break; 113 case '\'': 114 state = 4; 115 break; 116 case '\\': 117 switch (c = s_gettok1()) { 118 case -1: 119 break; 120 case -2: 121 state = 0; 122 break; 123 default: 124 *p++ = c; 125 state = 2; 126 } 127 break; 128 case '0': 129 cx.x_val.v_num = 0; 130 state = 10; 131 break; 132 case '1': case '2': case '3': case '4': 133 case '5': case '6': case '7': case '8': case '9': 134 cx.x_val.v_num = c - '0'; 135 state = 11; 136 break; 137 case '>': 138 state = 20; 139 break; 140 case '<': 141 state = 21; 142 break; 143 case '=': 144 state = 22; 145 break; 146 case '!': 147 state = 23; 148 break; 149 case '&': 150 state = 24; 151 break; 152 case '|': 153 state = 25; 154 break; 155 case '$': 156 state = 26; 157 break; 158 case '~': 159 cx.x_token = T_COMP; 160 state = -1; 161 break; 162 case '+': 163 cx.x_token = T_PLUS; 164 state = -1; 165 break; 166 case '-': 167 cx.x_token = T_MINUS; 168 state = -1; 169 break; 170 case '*': 171 cx.x_token = T_MUL; 172 state = -1; 173 break; 174 case '/': 175 cx.x_token = T_DIV; 176 state = -1; 177 break; 178 case '%': 179 cx.x_token = T_MOD; 180 state = -1; 181 break; 182 case '^': 183 cx.x_token = T_XOR; 184 state = -1; 185 break; 186 case '(': 187 cx.x_token = T_LP; 188 state = -1; 189 break; 190 case ')': 191 cx.x_token = T_RP; 192 state = -1; 193 break; 194 case ',': 195 cx.x_token = T_COMMA; 196 state = -1; 197 break; 198 case '?': 199 cx.x_token = T_QUEST; 200 state = -1; 201 break; 202 case ':': 203 cx.x_token = T_COLON; 204 state = -1; 205 break; 206 case '[': 207 cx.x_token = T_LB; 208 state = -1; 209 break; 210 case ']': 211 cx.x_token = T_RB; 212 state = -1; 213 break; 214 default: 215 cx.x_val.v_num = c; 216 cx.x_token = T_CHAR; 217 state = -1; 218 break; 219 } 220 break; 221 case 1: /* got # */ 222 if (c == '\n' || c == EOF) { 223 (void) s_ungetc(c); 224 state = 0; 225 } 226 break; 227 case 2: /* unquoted string */ 228 switch (c) { 229 case 'a': case 'b': case 'c': case 'd': case 'e': 230 case 'f': case 'g': case 'h': case 'i': case 'j': 231 case 'k': case 'l': case 'm': case 'n': case 'o': 232 case 'p': case 'q': case 'r': case 's': case 't': 233 case 'u': case 'v': case 'w': case 'x': case 'y': 234 case 'z': 235 case 'A': case 'B': case 'C': case 'D': case 'E': 236 case 'F': case 'G': case 'H': case 'I': case 'J': 237 case 'K': case 'L': case 'M': case 'N': case 'O': 238 case 'P': case 'Q': case 'R': case 'S': case 'T': 239 case 'U': case 'V': case 'W': case 'X': case 'Y': 240 case 'Z': 241 case '_': case '.': 242 case '0': case '1': case '2': case '3': case '4': 243 case '5': case '6': case '7': case '8': case '9': 244 if (p < buf + sizeof buf - 1) 245 *p++ = c; 246 break; 247 case '"': 248 state = 3; 249 break; 250 case '\'': 251 state = 4; 252 break; 253 case '\\': 254 switch (c = s_gettok1()) { 255 case -2: 256 (void) s_ungetc(' '); 257 case -1: 258 break; 259 default: 260 if (p < buf + sizeof buf - 1) 261 *p++ = c; 262 } 263 break; 264 default: 265 (void) s_ungetc(c); 266 case EOF: 267 *p = 0; 268 cx.x_token = T_STR; 269 switch (*buf) { 270 case 'i': 271 if (buf[1] == 'f' && buf[2] == 0) 272 cx.x_token = T_IF; 273 break; 274 case 't': 275 if (buf[1] == 'h' && buf[2] == 'e' 276 && buf[3] == 'n' && buf[4] == 0) 277 cx.x_token = T_THEN; 278 break; 279 case 'e': 280 if (buf[1] == 'n' && buf[2] == 'd' 281 && buf[3] == 'i' && buf[4] == 'f' 282 && buf[5] == 0) 283 cx.x_token = T_ENDIF; 284 else if (buf[1] == 'l' && buf[2] == 's') 285 if (buf[3] == 'i' && buf[4] == 'f' 286 && buf[5] == 0) 287 cx.x_token = T_ELSIF; 288 else if (buf[3] == 'e' && buf[4] == 0) 289 cx.x_token = T_ELSE; 290 break; 291 } 292 if (cx.x_token == T_STR 293 && (cx.x_val.v_str = str_cpy(buf)) == 0) { 294 p_memerror(); 295 cx.x_token = T_EOF; 296 } 297 state = -1; 298 break; 299 } 300 break; 301 case 3: /* " quoted string */ 302 switch (c) { 303 case '\n': 304 (void) s_ungetc(c); 305 case EOF: 306 case '"': 307 state = 2; 308 break; 309 case '\\': 310 switch (c = s_gettok1()) { 311 case -1: 312 case -2: /* newlines are invisible */ 313 break; 314 default: 315 if (p < buf + sizeof buf - 1) 316 *p++ = c; 317 } 318 break; 319 default: 320 if (p < buf + sizeof buf - 1) 321 *p++ = c; 322 break; 323 } 324 break; 325 case 4: /* ' quoted string */ 326 switch (c) { 327 case '\n': 328 (void) s_ungetc(c); 329 case EOF: 330 case '\'': 331 state = 2; 332 break; 333 case '\\': 334 switch (c = s_gettok1()) { 335 case -1: 336 case -2: /* newlines are invisible */ 337 break; 338 default: 339 if (p < buf + sizeof buf - 1) 340 *p++ = c; 341 } 342 break; 343 default: 344 if (p < buf + sizeof buf - 1) 345 *p++ = c; 346 break; 347 } 348 break; 349 case 10: /* got 0 */ 350 switch (c) { 351 case 'x': 352 case 'X': 353 cx.x_val.v_num = 0; 354 state = 12; 355 break; 356 case '0': case '1': case '2': case '3': case '4': 357 case '5': case '6': case '7': 358 cx.x_val.v_num = c - '0'; 359 state = 13; 360 break; 361 case '8': case '9': 362 cx.x_val.v_num = c - '0'; 363 state = 11; 364 break; 365 default: 366 (void) s_ungetc(c); 367 state = -1; 368 cx.x_token = T_NUM; 369 } 370 break; 371 case 11: /* decimal number */ 372 switch (c) { 373 case '0': case '1': case '2': case '3': case '4': 374 case '5': case '6': case '7': case '8': case '9': 375 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; 376 break; 377 default: 378 (void) s_ungetc(c); 379 state = -1; 380 cx.x_token = T_NUM; 381 } 382 break; 383 case 12: /* hex number */ 384 switch (c) { 385 case '0': case '1': case '2': case '3': case '4': 386 case '5': case '6': case '7': case '8': case '9': 387 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; 388 break; 389 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 390 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; 391 break; 392 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 393 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; 394 break; 395 default: 396 (void) s_ungetc(c); 397 state = -1; 398 cx.x_token = T_NUM; 399 } 400 break; 401 case 13: /* octal number */ 402 switch (c) { 403 case '0': case '1': case '2': case '3': case '4': 404 case '5': case '6': case '7': 405 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; 406 break; 407 default: 408 (void) s_ungetc(c); 409 state = -1; 410 cx.x_token = T_NUM; 411 } 412 break; 413 case 20: /* got > */ 414 switch (c) { 415 case '=': 416 cx.x_token = T_GE; 417 state = -1; 418 break; 419 case '>': 420 cx.x_token = T_RS; 421 state = -1; 422 break; 423 default: 424 (void) s_ungetc(c); 425 cx.x_token = T_GT; 426 state = -1; 427 } 428 break; 429 case 21: /* got < */ 430 switch (c) { 431 case '=': 432 cx.x_token = T_LE; 433 state = -1; 434 break; 435 case '<': 436 cx.x_token = T_LS; 437 state = -1; 438 break; 439 default: 440 (void) s_ungetc(c); 441 cx.x_token = T_LT; 442 state = -1; 443 } 444 break; 445 case 22: /* got = */ 446 switch (c) { 447 case '=': 448 cx.x_token = T_EQ; 449 state = -1; 450 break; 451 default: 452 (void) s_ungetc(c); 453 cx.x_token = T_ASSIGN; 454 state = -1; 455 } 456 break; 457 case 23: /* got ! */ 458 switch (c) { 459 case '=': 460 cx.x_token = T_NE; 461 state = -1; 462 break; 463 default: 464 (void) s_ungetc(c); 465 cx.x_token = T_NOT; 466 state = -1; 467 } 468 break; 469 case 24: /* got & */ 470 switch (c) { 471 case '&': 472 cx.x_token = T_ANDAND; 473 state = -1; 474 break; 475 default: 476 (void) s_ungetc(c); 477 cx.x_token = T_AND; 478 state = -1; 479 } 480 break; 481 case 25: /* got | */ 482 switch (c) { 483 case '|': 484 cx.x_token = T_OROR; 485 state = -1; 486 break; 487 default: 488 (void) s_ungetc(c); 489 cx.x_token = T_OR; 490 state = -1; 491 } 492 break; 493 case 26: /* got $ */ 494 switch (c) { 495 case '?': 496 cx.x_token = T_DQ; 497 state = -1; 498 break; 499 default: 500 (void) s_ungetc(c); 501 cx.x_token = T_DOLLAR; 502 state = -1; 503 } 504 break; 505 default: 506 abort(); 507 } 508 if (state >= 0) 509 goto loop; 510 return cx.x_token; 511 } 512 513 s_gettok1() 514 { 515 register c; 516 register n; 517 518 c = s_getc(); /* got \ */ 519 switch (c) { 520 case EOF: 521 return -1; 522 case '\n': 523 return -2; 524 case 'b': 525 return '\b'; 526 case 'f': 527 return '\f'; 528 case 'n': 529 return '\n'; 530 case 'r': 531 return '\r'; 532 case 't': 533 return '\t'; 534 default: 535 return c; 536 case '0': case '1': case '2': case '3': case '4': 537 case '5': case '6': case '7': 538 break; 539 } 540 n = c - '0'; 541 c = s_getc(); /* got \[0-7] */ 542 if (c < '0' || c > '7') { 543 (void) s_ungetc(c); 544 return n; 545 } 546 n = n * 8 + c - '0'; 547 c = s_getc(); /* got \[0-7][0-7] */ 548 if (c < '0' || c > '7') { 549 (void) s_ungetc(c); 550 return n; 551 } 552 return n * 8 + c - '0'; 553 } 554