1 /* 2 * Copyright (c) 1983 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 */ 17 18 #ifndef lint 19 static char sccsid[] = "@(#)scanner.c 3.11 (Berkeley) 06/29/88"; 20 #endif /* not lint */ 21 22 #include <stdio.h> 23 #include "value.h" 24 #include "token.h" 25 #include "context.h" 26 #include "string.h" 27 28 s_getc() 29 { 30 register c; 31 32 switch (cx.x_type) { 33 case X_FILE: 34 c = getc(cx.x_fp); 35 if (cx.x_bol && c != EOF) { 36 cx.x_bol = 0; 37 cx.x_lineno++; 38 } 39 if (c == '\n') 40 cx.x_bol = 1; 41 return c; 42 case X_BUF: 43 if (*cx.x_bufp != 0) 44 return *cx.x_bufp++ & 0xff; 45 else 46 return EOF; 47 } 48 /*NOTREACHED*/ 49 } 50 51 s_ungetc(c) 52 { 53 if (c == EOF) 54 return EOF; 55 switch (cx.x_type) { 56 case X_FILE: 57 cx.x_bol = 0; 58 return ungetc(c, cx.x_fp); 59 case X_BUF: 60 if (cx.x_bufp > cx.x_buf) 61 return *--cx.x_bufp = c; 62 else 63 return EOF; 64 } 65 /*NOTREACHED*/ 66 } 67 68 s_gettok() 69 { 70 char buf[100]; 71 register char *p = buf; 72 register c; 73 register state = 0; 74 75 loop: 76 c = s_getc(); 77 switch (state) { 78 case 0: 79 switch (c) { 80 case ' ': 81 case '\t': 82 break; 83 case '\n': 84 case ';': 85 cx.x_token = T_EOL; 86 state = -1; 87 break; 88 case '#': 89 state = 1; 90 break; 91 case EOF: 92 cx.x_token = T_EOF; 93 state = -1; 94 break; 95 case 'a': case 'b': case 'c': case 'd': case 'e': 96 case 'f': case 'g': case 'h': case 'i': case 'j': 97 case 'k': case 'l': case 'm': case 'n': case 'o': 98 case 'p': case 'q': case 'r': case 's': case 't': 99 case 'u': case 'v': case 'w': case 'x': case 'y': 100 case 'z': 101 case 'A': case 'B': case 'C': case 'D': case 'E': 102 case 'F': case 'G': case 'H': case 'I': case 'J': 103 case 'K': case 'L': case 'M': case 'N': case 'O': 104 case 'P': case 'Q': case 'R': case 'S': case 'T': 105 case 'U': case 'V': case 'W': case 'X': case 'Y': 106 case 'Z': 107 case '_': case '.': 108 *p++ = c; 109 state = 2; 110 break; 111 case '"': 112 state = 3; 113 break; 114 case '\'': 115 state = 4; 116 break; 117 case '\\': 118 switch (c = s_gettok1()) { 119 case -1: 120 break; 121 case -2: 122 state = 0; 123 break; 124 default: 125 *p++ = c; 126 state = 2; 127 } 128 break; 129 case '0': 130 cx.x_val.v_num = 0; 131 state = 10; 132 break; 133 case '1': case '2': case '3': case '4': 134 case '5': case '6': case '7': case '8': case '9': 135 cx.x_val.v_num = c - '0'; 136 state = 11; 137 break; 138 case '>': 139 state = 20; 140 break; 141 case '<': 142 state = 21; 143 break; 144 case '=': 145 state = 22; 146 break; 147 case '!': 148 state = 23; 149 break; 150 case '&': 151 state = 24; 152 break; 153 case '|': 154 state = 25; 155 break; 156 case '$': 157 state = 26; 158 break; 159 case '~': 160 cx.x_token = T_COMP; 161 state = -1; 162 break; 163 case '+': 164 cx.x_token = T_PLUS; 165 state = -1; 166 break; 167 case '-': 168 cx.x_token = T_MINUS; 169 state = -1; 170 break; 171 case '*': 172 cx.x_token = T_MUL; 173 state = -1; 174 break; 175 case '/': 176 cx.x_token = T_DIV; 177 state = -1; 178 break; 179 case '%': 180 cx.x_token = T_MOD; 181 state = -1; 182 break; 183 case '^': 184 cx.x_token = T_XOR; 185 state = -1; 186 break; 187 case '(': 188 cx.x_token = T_LP; 189 state = -1; 190 break; 191 case ')': 192 cx.x_token = T_RP; 193 state = -1; 194 break; 195 case ',': 196 cx.x_token = T_COMMA; 197 state = -1; 198 break; 199 case '?': 200 cx.x_token = T_QUEST; 201 state = -1; 202 break; 203 case ':': 204 cx.x_token = T_COLON; 205 state = -1; 206 break; 207 case '[': 208 cx.x_token = T_LB; 209 state = -1; 210 break; 211 case ']': 212 cx.x_token = T_RB; 213 state = -1; 214 break; 215 default: 216 cx.x_val.v_num = c; 217 cx.x_token = T_CHAR; 218 state = -1; 219 break; 220 } 221 break; 222 case 1: /* got # */ 223 if (c == '\n' || c == EOF) { 224 (void) s_ungetc(c); 225 state = 0; 226 } 227 break; 228 case 2: /* unquoted string */ 229 switch (c) { 230 case 'a': case 'b': case 'c': case 'd': case 'e': 231 case 'f': case 'g': case 'h': case 'i': case 'j': 232 case 'k': case 'l': case 'm': case 'n': case 'o': 233 case 'p': case 'q': case 'r': case 's': case 't': 234 case 'u': case 'v': case 'w': case 'x': case 'y': 235 case 'z': 236 case 'A': case 'B': case 'C': case 'D': case 'E': 237 case 'F': case 'G': case 'H': case 'I': case 'J': 238 case 'K': case 'L': case 'M': case 'N': case 'O': 239 case 'P': case 'Q': case 'R': case 'S': case 'T': 240 case 'U': case 'V': case 'W': case 'X': case 'Y': 241 case 'Z': 242 case '_': case '.': 243 case '0': case '1': case '2': case '3': case '4': 244 case '5': case '6': case '7': case '8': case '9': 245 if (p < buf + sizeof buf - 1) 246 *p++ = c; 247 break; 248 case '"': 249 state = 3; 250 break; 251 case '\'': 252 state = 4; 253 break; 254 case '\\': 255 switch (c = s_gettok1()) { 256 case -2: 257 (void) s_ungetc(' '); 258 case -1: 259 break; 260 default: 261 if (p < buf + sizeof buf - 1) 262 *p++ = c; 263 } 264 break; 265 default: 266 (void) s_ungetc(c); 267 case EOF: 268 *p = 0; 269 cx.x_token = T_STR; 270 switch (*buf) { 271 case 'i': 272 if (buf[1] == 'f' && buf[2] == 0) 273 cx.x_token = T_IF; 274 break; 275 case 't': 276 if (buf[1] == 'h' && buf[2] == 'e' 277 && buf[3] == 'n' && buf[4] == 0) 278 cx.x_token = T_THEN; 279 break; 280 case 'e': 281 if (buf[1] == 'n' && buf[2] == 'd' 282 && buf[3] == 'i' && buf[4] == 'f' 283 && buf[5] == 0) 284 cx.x_token = T_ENDIF; 285 else if (buf[1] == 'l' && buf[2] == 's') 286 if (buf[3] == 'i' && buf[4] == 'f' 287 && buf[5] == 0) 288 cx.x_token = T_ELSIF; 289 else if (buf[3] == 'e' && buf[4] == 0) 290 cx.x_token = T_ELSE; 291 break; 292 } 293 if (cx.x_token == T_STR 294 && (cx.x_val.v_str = str_cpy(buf)) == 0) { 295 p_memerror(); 296 cx.x_token = T_EOF; 297 } 298 state = -1; 299 break; 300 } 301 break; 302 case 3: /* " quoted string */ 303 switch (c) { 304 case '\n': 305 (void) s_ungetc(c); 306 case EOF: 307 case '"': 308 state = 2; 309 break; 310 case '\\': 311 switch (c = s_gettok1()) { 312 case -1: 313 case -2: /* newlines are invisible */ 314 break; 315 default: 316 if (p < buf + sizeof buf - 1) 317 *p++ = c; 318 } 319 break; 320 default: 321 if (p < buf + sizeof buf - 1) 322 *p++ = c; 323 break; 324 } 325 break; 326 case 4: /* ' quoted string */ 327 switch (c) { 328 case '\n': 329 (void) s_ungetc(c); 330 case EOF: 331 case '\'': 332 state = 2; 333 break; 334 case '\\': 335 switch (c = s_gettok1()) { 336 case -1: 337 case -2: /* newlines are invisible */ 338 break; 339 default: 340 if (p < buf + sizeof buf - 1) 341 *p++ = c; 342 } 343 break; 344 default: 345 if (p < buf + sizeof buf - 1) 346 *p++ = c; 347 break; 348 } 349 break; 350 case 10: /* got 0 */ 351 switch (c) { 352 case 'x': 353 case 'X': 354 cx.x_val.v_num = 0; 355 state = 12; 356 break; 357 case '0': case '1': case '2': case '3': case '4': 358 case '5': case '6': case '7': 359 cx.x_val.v_num = c - '0'; 360 state = 13; 361 break; 362 case '8': case '9': 363 cx.x_val.v_num = c - '0'; 364 state = 11; 365 break; 366 default: 367 (void) s_ungetc(c); 368 state = -1; 369 cx.x_token = T_NUM; 370 } 371 break; 372 case 11: /* decimal number */ 373 switch (c) { 374 case '0': case '1': case '2': case '3': case '4': 375 case '5': case '6': case '7': case '8': case '9': 376 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; 377 break; 378 default: 379 (void) s_ungetc(c); 380 state = -1; 381 cx.x_token = T_NUM; 382 } 383 break; 384 case 12: /* hex number */ 385 switch (c) { 386 case '0': case '1': case '2': case '3': case '4': 387 case '5': case '6': case '7': case '8': case '9': 388 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; 389 break; 390 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 391 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; 392 break; 393 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 394 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; 395 break; 396 default: 397 (void) s_ungetc(c); 398 state = -1; 399 cx.x_token = T_NUM; 400 } 401 break; 402 case 13: /* octal number */ 403 switch (c) { 404 case '0': case '1': case '2': case '3': case '4': 405 case '5': case '6': case '7': 406 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; 407 break; 408 default: 409 (void) s_ungetc(c); 410 state = -1; 411 cx.x_token = T_NUM; 412 } 413 break; 414 case 20: /* got > */ 415 switch (c) { 416 case '=': 417 cx.x_token = T_GE; 418 state = -1; 419 break; 420 case '>': 421 cx.x_token = T_RS; 422 state = -1; 423 break; 424 default: 425 (void) s_ungetc(c); 426 cx.x_token = T_GT; 427 state = -1; 428 } 429 break; 430 case 21: /* got < */ 431 switch (c) { 432 case '=': 433 cx.x_token = T_LE; 434 state = -1; 435 break; 436 case '<': 437 cx.x_token = T_LS; 438 state = -1; 439 break; 440 default: 441 (void) s_ungetc(c); 442 cx.x_token = T_LT; 443 state = -1; 444 } 445 break; 446 case 22: /* got = */ 447 switch (c) { 448 case '=': 449 cx.x_token = T_EQ; 450 state = -1; 451 break; 452 default: 453 (void) s_ungetc(c); 454 cx.x_token = T_ASSIGN; 455 state = -1; 456 } 457 break; 458 case 23: /* got ! */ 459 switch (c) { 460 case '=': 461 cx.x_token = T_NE; 462 state = -1; 463 break; 464 default: 465 (void) s_ungetc(c); 466 cx.x_token = T_NOT; 467 state = -1; 468 } 469 break; 470 case 24: /* got & */ 471 switch (c) { 472 case '&': 473 cx.x_token = T_ANDAND; 474 state = -1; 475 break; 476 default: 477 (void) s_ungetc(c); 478 cx.x_token = T_AND; 479 state = -1; 480 } 481 break; 482 case 25: /* got | */ 483 switch (c) { 484 case '|': 485 cx.x_token = T_OROR; 486 state = -1; 487 break; 488 default: 489 (void) s_ungetc(c); 490 cx.x_token = T_OR; 491 state = -1; 492 } 493 break; 494 case 26: /* got $ */ 495 switch (c) { 496 case '?': 497 cx.x_token = T_DQ; 498 state = -1; 499 break; 500 default: 501 (void) s_ungetc(c); 502 cx.x_token = T_DOLLAR; 503 state = -1; 504 } 505 break; 506 default: 507 abort(); 508 } 509 if (state >= 0) 510 goto loop; 511 return cx.x_token; 512 } 513 514 s_gettok1() 515 { 516 register c; 517 register n; 518 519 c = s_getc(); /* got \ */ 520 switch (c) { 521 case EOF: 522 return -1; 523 case '\n': 524 return -2; 525 case 'b': 526 return '\b'; 527 case 'f': 528 return '\f'; 529 case 'n': 530 return '\n'; 531 case 'r': 532 return '\r'; 533 case 't': 534 return '\t'; 535 default: 536 return c; 537 case '0': case '1': case '2': case '3': case '4': 538 case '5': case '6': case '7': 539 break; 540 } 541 n = c - '0'; 542 c = s_getc(); /* got \[0-7] */ 543 if (c < '0' || c > '7') { 544 (void) s_ungetc(c); 545 return n; 546 } 547 n = n * 8 + c - '0'; 548 c = s_getc(); /* got \[0-7][0-7] */ 549 if (c < '0' || c > '7') { 550 (void) s_ungetc(c); 551 return n; 552 } 553 return n * 8 + c - '0'; 554 } 555