1 /* 2 * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.tools.java; 27 28 import java.io.IOException; 29 import java.io.InputStream; 30 import java.util.Hashtable; 31 32 /** 33 * A Scanner for Java tokens. Errors are reported 34 * to the environment object.<p> 35 * 36 * The scanner keeps track of the current token, 37 * the value of the current token (if any), and the start 38 * position of the current token.<p> 39 * 40 * The scan() method advances the scanner to the next 41 * token in the input.<p> 42 * 43 * The match() method is used to quickly match opening 44 * brackets (ie: '(', '{', or '[') with their closing 45 * counter part. This is useful during error recovery.<p> 46 * 47 * An position consists of: ((linenr << WHEREOFFSETBITS) | offset) 48 * this means that both the line number and the exact offset into 49 * the file are encoded in each position value.<p> 50 * 51 * The compiler treats either "\n", "\r" or "\r\n" as the 52 * end of a line.<p> 53 * 54 * WARNING: The contents of this source file are not part of any 55 * supported API. Code that depends on them does so at its own risk: 56 * they are subject to change or removal without notice. 57 * 58 * @author Arthur van Hoff 59 */ 60 61 public 62 class Scanner implements Constants { 63 /** 64 * The increment for each character. 65 */ 66 public static final long OFFSETINC = 1; 67 68 /** 69 * The increment for each line. 70 */ 71 public static final long LINEINC = 1L << WHEREOFFSETBITS; 72 73 /** 74 * End of input 75 */ 76 public static final int EOF = -1; 77 78 /** 79 * Where errors are reported 80 */ 81 public Environment env; 82 83 /** 84 * Input reader 85 */ 86 protected ScannerInputReader in; 87 88 /** 89 * If true, present all comments as tokens. 90 * Contents are not saved, but positions are recorded accurately, 91 * so the comment can be recovered from the text. 92 * Line terminations are also returned as comment tokens, 93 * and may be distinguished by their start and end positions, 94 * which are equal (meaning, these tokens contain no chars). 95 */ 96 public boolean scanComments = false; 97 98 /** 99 * Current token 100 */ 101 public int token; 102 103 /** 104 * The position of the current token 105 */ 106 public long pos; 107 108 /** 109 * The position of the previous token 110 */ 111 public long prevPos; 112 113 /** 114 * The current character 115 */ 116 protected int ch; 117 118 /* 119 * Token values. 120 */ 121 public char charValue; 122 public int intValue; 123 public long longValue; 124 public float floatValue; 125 public double doubleValue; 126 public String stringValue; 127 public Identifier idValue; 128 public int radix; // Radix, when reading int or long 129 130 /* 131 * A doc comment preceding the most recent token 132 */ 133 public String docComment; 134 135 /* 136 * A growable character buffer. 137 */ 138 private int count; 139 private char buffer[] = new char[1024]; growBuffer()140 private void growBuffer() { 141 char newBuffer[] = new char[buffer.length * 2]; 142 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); 143 buffer = newBuffer; 144 } 145 146 // The following two methods have been hand-inlined in 147 // scanDocComment. If you make changes here, you should 148 // check to see if scanDocComment also needs modification. putc(int ch)149 private void putc(int ch) { 150 if (count == buffer.length) { 151 growBuffer(); 152 } 153 buffer[count++] = (char)ch; 154 } 155 bufferString()156 private String bufferString() { 157 return new String(buffer, 0, count); 158 } 159 160 /** 161 * Create a scanner to scan an input stream. 162 */ Scanner(Environment env, InputStream in)163 public Scanner(Environment env, InputStream in) throws IOException { 164 this.env = env; 165 useInputStream(in); 166 } 167 168 /** 169 * Setup input from the given input stream, 170 * and scan the first token from it. 171 */ useInputStream(InputStream in)172 protected void useInputStream(InputStream in) throws IOException { 173 try { 174 this.in = new ScannerInputReader(env, in); 175 } catch (Exception e) { 176 env.setCharacterEncoding(null); 177 this.in = new ScannerInputReader(env, in); 178 } 179 180 ch = this.in.read(); 181 prevPos = this.in.pos; 182 183 scan(); 184 } 185 186 /** 187 * Create a scanner to scan an input stream. 188 */ Scanner(Environment env)189 protected Scanner(Environment env) { 190 this.env = env; 191 // Expect the subclass to call useInputStream at the right time. 192 } 193 194 /** 195 * Define a keyword. 196 */ defineKeyword(int val)197 private static void defineKeyword(int val) { 198 Identifier.lookup(opNames[val]).setType(val); 199 } 200 201 /** 202 * Initialized keyword and token Hashtables 203 */ 204 static { 205 // Statement keywords 206 defineKeyword(FOR); 207 defineKeyword(IF); 208 defineKeyword(ELSE); 209 defineKeyword(WHILE); 210 defineKeyword(DO); 211 defineKeyword(SWITCH); 212 defineKeyword(CASE); 213 defineKeyword(DEFAULT); 214 defineKeyword(BREAK); 215 defineKeyword(CONTINUE); 216 defineKeyword(RETURN); 217 defineKeyword(TRY); 218 defineKeyword(CATCH); 219 defineKeyword(FINALLY); 220 defineKeyword(THROW); 221 222 // Type defineKeywords 223 defineKeyword(BYTE); 224 defineKeyword(CHAR); 225 defineKeyword(SHORT); 226 defineKeyword(INT); 227 defineKeyword(LONG); 228 defineKeyword(FLOAT); 229 defineKeyword(DOUBLE); 230 defineKeyword(VOID); 231 defineKeyword(BOOLEAN); 232 233 // Expression keywords 234 defineKeyword(INSTANCEOF); 235 defineKeyword(TRUE); 236 defineKeyword(FALSE); 237 defineKeyword(NEW); 238 defineKeyword(THIS); 239 defineKeyword(SUPER); 240 defineKeyword(NULL); 241 242 // Declaration keywords 243 defineKeyword(IMPORT); 244 defineKeyword(CLASS); 245 defineKeyword(EXTENDS); 246 defineKeyword(IMPLEMENTS); 247 defineKeyword(INTERFACE); 248 defineKeyword(PACKAGE); 249 defineKeyword(THROWS); 250 251 // Modifier keywords 252 defineKeyword(PRIVATE); 253 defineKeyword(PUBLIC); 254 defineKeyword(PROTECTED); 255 defineKeyword(STATIC); 256 defineKeyword(TRANSIENT); 257 defineKeyword(SYNCHRONIZED); 258 defineKeyword(NATIVE); 259 defineKeyword(ABSTRACT); 260 defineKeyword(VOLATILE); 261 defineKeyword(FINAL); 262 defineKeyword(STRICTFP); 263 264 // reserved keywords 265 defineKeyword(CONST); 266 defineKeyword(GOTO); 267 } 268 269 /** 270 * Scan a comment. This method should be 271 * called once the initial /, * and the next 272 * character have been read. 273 */ skipComment()274 private void skipComment() throws IOException { 275 while (true) { 276 switch (ch) { 277 case EOF: 278 env.error(pos, "eof.in.comment"); 279 return; 280 281 case '*': 282 if ((ch = in.read()) == '/') { 283 ch = in.read(); 284 return; 285 } 286 break; 287 288 default: 289 ch = in.read(); 290 break; 291 } 292 } 293 } 294 295 /** 296 * Scan a doc comment. This method should be called 297 * once the initial /, * and * have been read. It gathers 298 * the content of the comment (witout leading spaces and '*'s) 299 * in the string buffer. 300 */ scanDocComment()301 private String scanDocComment() throws IOException { 302 // Note: this method has been hand-optimized to yield 303 // better performance. This was done after it was noted 304 // that javadoc spent a great deal of its time here. 305 // This should also help the performance of the compiler 306 // as well -- it scans the doc comments to find 307 // @deprecated tags. 308 // 309 // The logic of the method has been completely rewritten 310 // to avoid the use of flags that need to be looked at 311 // for every character read. Members that are accessed 312 // more than once have been stored in local variables. 313 // The methods putc() and bufferString() have been 314 // inlined by hand. Extra cases have been added to 315 // switch statements to trick the compiler into generating 316 // a tableswitch instead of a lookupswitch. 317 // 318 // This implementation aims to preserve the previous 319 // behavior of this method. 320 321 int c; 322 323 // Put `in' in a local variable. 324 final ScannerInputReader in = this.in; 325 326 // We maintain the buffer locally rather than calling putc(). 327 char[] buffer = this.buffer; 328 int count = 0; 329 330 // We are called pointing at the second star of the doc 331 // comment: 332 // 333 // Input: /** the rest of the comment ... */ 334 // ^ 335 // 336 // We rely on this in the code below. 337 338 // Consume any number of stars. 339 while ((c = in.read()) == '*') 340 ; 341 342 // Is the comment of the form /**/, /***/, /****/, etc.? 343 if (c == '/') { 344 // Set ch and return 345 ch = in.read(); 346 return ""; 347 } 348 349 // Skip a newline on the first line of the comment. 350 if (c == '\n') { 351 c = in.read(); 352 } 353 354 outerLoop: 355 // The outerLoop processes the doc comment, looping once 356 // for each line. For each line, it first strips off 357 // whitespace, then it consumes any stars, then it 358 // puts the rest of the line into our buffer. 359 while (true) { 360 361 // The wsLoop consumes whitespace from the beginning 362 // of each line. 363 wsLoop: 364 while (true) { 365 switch (c) { 366 case ' ': 367 case '\t': 368 // We could check for other forms of whitespace 369 // as well, but this is left as is for minimum 370 // disturbance of functionality. 371 // 372 // Just skip whitespace. 373 c = in.read(); 374 break; 375 376 // We have added extra cases here to trick the 377 // compiler into using a tableswitch instead of 378 // a lookupswitch. They can be removed without 379 // a change in meaning. 380 case 10: case 11: case 12: case 13: case 14: case 15: 381 case 16: case 17: case 18: case 19: case 20: case 21: 382 case 22: case 23: case 24: case 25: case 26: case 27: 383 case 28: case 29: case 30: case 31: 384 default: 385 // We've seen something that isn't whitespace, 386 // jump out. 387 break wsLoop; 388 } 389 } // end wsLoop. 390 391 // Are there stars here? If so, consume them all 392 // and check for the end of comment. 393 if (c == '*') { 394 // Skip all of the stars... 395 do { 396 c = in.read(); 397 } while (c == '*'); 398 399 // ...then check for the closing slash. 400 if (c == '/') { 401 // We're done with the doc comment. 402 // Set ch and break out. 403 ch = in.read(); 404 break outerLoop; 405 } 406 } 407 408 // The textLoop processes the rest of the characters 409 // on the line, adding them to our buffer. 410 textLoop: 411 while (true) { 412 switch (c) { 413 case EOF: 414 // We've seen a premature EOF. Break out 415 // of the loop. 416 env.error(pos, "eof.in.comment"); 417 ch = EOF; 418 break outerLoop; 419 420 case '*': 421 // Is this just a star? Or is this the 422 // end of a comment? 423 c = in.read(); 424 if (c == '/') { 425 // This is the end of the comment, 426 // set ch and return our buffer. 427 ch = in.read(); 428 break outerLoop; 429 } 430 // This is just an ordinary star. Add it to 431 // the buffer. 432 if (count == buffer.length) { 433 growBuffer(); 434 buffer = this.buffer; 435 } 436 buffer[count++] = '*'; 437 break; 438 439 case '\n': 440 // We've seen a newline. Add it to our 441 // buffer and break out of this loop, 442 // starting fresh on a new line. 443 if (count == buffer.length) { 444 growBuffer(); 445 buffer = this.buffer; 446 } 447 buffer[count++] = '\n'; 448 c = in.read(); 449 break textLoop; 450 451 // Again, the extra cases here are a trick 452 // to get the compiler to generate a tableswitch. 453 case 0: case 1: case 2: case 3: case 4: case 5: 454 case 6: case 7: case 8: case 11: case 12: case 13: 455 case 14: case 15: case 16: case 17: case 18: case 19: 456 case 20: case 21: case 22: case 23: case 24: case 25: 457 case 26: case 27: case 28: case 29: case 30: case 31: 458 case 32: case 33: case 34: case 35: case 36: case 37: 459 case 38: case 39: case 40: 460 default: 461 // Add the character to our buffer. 462 if (count == buffer.length) { 463 growBuffer(); 464 buffer = this.buffer; 465 } 466 buffer[count++] = (char)c; 467 c = in.read(); 468 break; 469 } 470 } // end textLoop 471 } // end outerLoop 472 473 // We have scanned our doc comment. It is stored in 474 // buffer. The previous implementation of scanDocComment 475 // stripped off all trailing spaces and stars from the comment. 476 // We will do this as well, so as to cause a minimum of 477 // disturbance. Is this what we want? 478 if (count > 0) { 479 int i = count - 1; 480 trailLoop: 481 while (i > -1) { 482 switch (buffer[i]) { 483 case ' ': 484 case '\t': 485 case '*': 486 i--; 487 break; 488 // And again, the extra cases here are a trick 489 // to get the compiler to generate a tableswitch. 490 case 0: case 1: case 2: case 3: case 4: case 5: 491 case 6: case 7: case 8: case 10: case 11: case 12: 492 case 13: case 14: case 15: case 16: case 17: case 18: 493 case 19: case 20: case 21: case 22: case 23: case 24: 494 case 25: case 26: case 27: case 28: case 29: case 30: 495 case 31: case 33: case 34: case 35: case 36: case 37: 496 case 38: case 39: case 40: 497 default: 498 break trailLoop; 499 } 500 } 501 count = i + 1; 502 503 // Return the text of the doc comment. 504 return new String(buffer, 0, count); 505 } else { 506 return ""; 507 } 508 } 509 510 /** 511 * Scan a number. The first digit of the number should be the current 512 * character. We may be scanning hex, decimal, or octal at this point 513 */ scanNumber()514 private void scanNumber() throws IOException { 515 boolean seenNonOctal = false; 516 boolean overflow = false; 517 boolean seenDigit = false; // used to detect invalid hex number 0xL 518 radix = (ch == '0' ? 8 : 10); 519 long value = ch - '0'; 520 count = 0; 521 putc(ch); // save character in buffer 522 numberLoop: 523 for (;;) { 524 switch (ch = in.read()) { 525 case '.': 526 if (radix == 16) 527 break numberLoop; // an illegal character 528 scanReal(); 529 return; 530 531 case '8': case '9': 532 // We can't yet throw an error if reading an octal. We might 533 // discover we're really reading a real. 534 seenNonOctal = true; 535 case '0': case '1': case '2': case '3': 536 case '4': case '5': case '6': case '7': 537 seenDigit = true; 538 putc(ch); 539 if (radix == 10) { 540 overflow = overflow || (value * 10)/10 != value; 541 value = (value * 10) + (ch - '0'); 542 overflow = overflow || (value - 1 < -1); 543 } else if (radix == 8) { 544 overflow = overflow || (value >>> 61) != 0; 545 value = (value << 3) + (ch - '0'); 546 } else { 547 overflow = overflow || (value >>> 60) != 0; 548 value = (value << 4) + (ch - '0'); 549 } 550 break; 551 552 case 'd': case 'D': case 'e': case 'E': case 'f': case 'F': 553 if (radix != 16) { 554 scanReal(); 555 return; 556 } 557 // fall through 558 case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': 559 seenDigit = true; 560 putc(ch); 561 if (radix != 16) 562 break numberLoop; // an illegal character 563 overflow = overflow || (value >>> 60) != 0; 564 value = (value << 4) + 10 + 565 Character.toLowerCase((char)ch) - 'a'; 566 break; 567 568 case 'l': case 'L': 569 ch = in.read(); // skip over 'l' 570 longValue = value; 571 token = LONGVAL; 572 break numberLoop; 573 574 case 'x': case 'X': 575 // if the first character is a '0' and this is the second 576 // letter, then read in a hexadecimal number. Otherwise, error. 577 if (count == 1 && radix == 8) { 578 radix = 16; 579 seenDigit = false; 580 break; 581 } else { 582 // we'll get an illegal character error 583 break numberLoop; 584 } 585 586 default: 587 intValue = (int)value; 588 token = INTVAL; 589 break numberLoop; 590 } 591 } // while true 592 593 // We have just finished reading the number. The next thing better 594 // not be a letter or digit. 595 // Note: There will be deprecation warnings against these uses 596 // of Character.isJavaLetterOrDigit and Character.isJavaLetter. 597 // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs. 598 if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') { 599 env.error(in.pos, "invalid.number"); 600 do { ch = in.read(); } 601 while (Character.isJavaLetterOrDigit((char)ch) || ch == '.'); 602 intValue = 0; 603 token = INTVAL; 604 } else if (radix == 8 && seenNonOctal) { 605 // A bogus octal literal. 606 intValue = 0; 607 token = INTVAL; 608 env.error(pos, "invalid.octal.number"); 609 } else if (radix == 16 && seenDigit == false) { 610 // A hex literal with no digits, 0xL, for example. 611 intValue = 0; 612 token = INTVAL; 613 env.error(pos, "invalid.hex.number"); 614 } else { 615 if (token == INTVAL) { 616 // Check for overflow. Note that base 10 literals 617 // have different rules than base 8 and 16. 618 overflow = overflow || 619 (value & 0xFFFFFFFF00000000L) != 0 || 620 (radix == 10 && value > 2147483648L); 621 622 if (overflow) { 623 intValue = 0; 624 625 // Give a specific error message which tells 626 // the user the range. 627 switch (radix) { 628 case 8: 629 env.error(pos, "overflow.int.oct"); 630 break; 631 case 10: 632 env.error(pos, "overflow.int.dec"); 633 break; 634 case 16: 635 env.error(pos, "overflow.int.hex"); 636 break; 637 default: 638 throw new CompilerError("invalid radix"); 639 } 640 } 641 } else { 642 if (overflow) { 643 longValue = 0; 644 645 // Give a specific error message which tells 646 // the user the range. 647 switch (radix) { 648 case 8: 649 env.error(pos, "overflow.long.oct"); 650 break; 651 case 10: 652 env.error(pos, "overflow.long.dec"); 653 break; 654 case 16: 655 env.error(pos, "overflow.long.hex"); 656 break; 657 default: 658 throw new CompilerError("invalid radix"); 659 } 660 } 661 } 662 } 663 } 664 665 /** 666 * Scan a float. We are either looking at the decimal, or we have already 667 * seen it and put it into the buffer. We haven't seen an exponent. 668 * Scan a float. Should be called with the current character is either 669 * the 'e', 'E' or '.' 670 */ scanReal()671 private void scanReal() throws IOException { 672 boolean seenExponent = false; 673 boolean isSingleFloat = false; 674 char lastChar; 675 if (ch == '.') { 676 putc(ch); 677 ch = in.read(); 678 } 679 680 numberLoop: 681 for ( ; ; ch = in.read()) { 682 switch (ch) { 683 case '0': case '1': case '2': case '3': case '4': 684 case '5': case '6': case '7': case '8': case '9': 685 putc(ch); 686 break; 687 688 case 'e': case 'E': 689 if (seenExponent) 690 break numberLoop; // we'll get a format error 691 putc(ch); 692 seenExponent = true; 693 break; 694 695 case '+': case '-': 696 lastChar = buffer[count - 1]; 697 if (lastChar != 'e' && lastChar != 'E') 698 break numberLoop; // this isn't an error, though! 699 putc(ch); 700 break; 701 702 case 'f': case 'F': 703 ch = in.read(); // skip over 'f' 704 isSingleFloat = true; 705 break numberLoop; 706 707 case 'd': case 'D': 708 ch = in.read(); // skip over 'd' 709 // fall through 710 default: 711 break numberLoop; 712 } // sswitch 713 } // loop 714 715 // we have just finished reading the number. The next thing better 716 // not be a letter or digit. 717 if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') { 718 env.error(in.pos, "invalid.number"); 719 do { ch = in.read(); } 720 while (Character.isJavaLetterOrDigit((char)ch) || ch == '.'); 721 doubleValue = 0; 722 token = DOUBLEVAL; 723 } else { 724 token = isSingleFloat ? FLOATVAL : DOUBLEVAL; 725 try { 726 lastChar = buffer[count - 1]; 727 if (lastChar == 'e' || lastChar == 'E' 728 || lastChar == '+' || lastChar == '-') { 729 env.error(in.pos -1, "float.format"); 730 } else if (isSingleFloat) { 731 String string = bufferString(); 732 floatValue = Float.valueOf(string).floatValue(); 733 if (Float.isInfinite(floatValue)) { 734 env.error(pos, "overflow.float"); 735 } else if (floatValue == 0 && !looksLikeZero(string)) { 736 env.error(pos, "underflow.float"); 737 } 738 } else { 739 String string = bufferString(); 740 doubleValue = Double.valueOf(string).doubleValue(); 741 if (Double.isInfinite(doubleValue)) { 742 env.error(pos, "overflow.double"); 743 } else if (doubleValue == 0 && !looksLikeZero(string)) { 744 env.error(pos, "underflow.double"); 745 } 746 } 747 } catch (NumberFormatException ee) { 748 env.error(pos, "float.format"); 749 doubleValue = 0; 750 floatValue = 0; 751 } 752 } 753 return; 754 } 755 756 // We have a token that parses as a number. Is this token possibly zero? 757 // i.e. does it have a non-zero value in the mantissa? looksLikeZero(String token)758 private static boolean looksLikeZero(String token) { 759 int length = token.length(); 760 for (int i = 0; i < length; i++) { 761 switch (token.charAt(i)) { 762 case 0: case '.': 763 continue; 764 case '1': case '2': case '3': case '4': case '5': 765 case '6': case '7': case '8': case '9': 766 return false; 767 case 'e': case 'E': case 'f': case 'F': 768 return true; 769 } 770 } 771 return true; 772 } 773 774 /** 775 * Scan an escape character. 776 * @return the character or -1 if it escaped an 777 * end-of-line. 778 */ scanEscapeChar()779 private int scanEscapeChar() throws IOException { 780 long p = in.pos; 781 782 switch (ch = in.read()) { 783 case '0': case '1': case '2': case '3': 784 case '4': case '5': case '6': case '7': { 785 int n = ch - '0'; 786 for (int i = 2 ; i > 0 ; i--) { 787 switch (ch = in.read()) { 788 case '0': case '1': case '2': case '3': 789 case '4': case '5': case '6': case '7': 790 n = (n << 3) + ch - '0'; 791 break; 792 793 default: 794 if (n > 0xFF) { 795 env.error(p, "invalid.escape.char"); 796 } 797 return n; 798 } 799 } 800 ch = in.read(); 801 if (n > 0xFF) { 802 env.error(p, "invalid.escape.char"); 803 } 804 return n; 805 } 806 807 case 'r': ch = in.read(); return '\r'; 808 case 'n': ch = in.read(); return '\n'; 809 case 'f': ch = in.read(); return '\f'; 810 case 'b': ch = in.read(); return '\b'; 811 case 't': ch = in.read(); return '\t'; 812 case '\\': ch = in.read(); return '\\'; 813 case '\"': ch = in.read(); return '\"'; 814 case '\'': ch = in.read(); return '\''; 815 } 816 817 env.error(p, "invalid.escape.char"); 818 ch = in.read(); 819 return -1; 820 } 821 822 /** 823 * Scan a string. The current character 824 * should be the opening " of the string. 825 */ scanString()826 private void scanString() throws IOException { 827 token = STRINGVAL; 828 count = 0; 829 ch = in.read(); 830 831 // Scan a String 832 while (true) { 833 switch (ch) { 834 case EOF: 835 env.error(pos, "eof.in.string"); 836 stringValue = bufferString(); 837 return; 838 839 case '\r': 840 case '\n': 841 ch = in.read(); 842 env.error(pos, "newline.in.string"); 843 stringValue = bufferString(); 844 return; 845 846 case '"': 847 ch = in.read(); 848 stringValue = bufferString(); 849 return; 850 851 case '\\': { 852 int c = scanEscapeChar(); 853 if (c >= 0) { 854 putc((char)c); 855 } 856 break; 857 } 858 859 default: 860 putc(ch); 861 ch = in.read(); 862 break; 863 } 864 } 865 } 866 867 /** 868 * Scan a character. The current character should be 869 * the opening ' of the character constant. 870 */ scanCharacter()871 private void scanCharacter() throws IOException { 872 token = CHARVAL; 873 874 switch (ch = in.read()) { 875 case '\\': 876 int c = scanEscapeChar(); 877 charValue = (char)((c >= 0) ? c : 0); 878 break; 879 880 case '\'': 881 // There are two standard problems this case deals with. One 882 // is the malformed single quote constant (i.e. the programmer 883 // uses ''' instead of '\'') and the other is the empty 884 // character constant (i.e. ''). Just consume any number of 885 // single quotes and emit an error message. 886 charValue = 0; 887 env.error(pos, "invalid.char.constant"); 888 ch = in.read(); 889 while (ch == '\'') { 890 ch = in.read(); 891 } 892 return; 893 894 case '\r': 895 case '\n': 896 charValue = 0; 897 env.error(pos, "invalid.char.constant"); 898 return; 899 900 default: 901 charValue = (char)ch; 902 ch = in.read(); 903 break; 904 } 905 906 if (ch == '\'') { 907 ch = in.read(); 908 } else { 909 env.error(pos, "invalid.char.constant"); 910 while (true) { 911 switch (ch) { 912 case '\'': 913 ch = in.read(); 914 return; 915 case ';': 916 case '\n': 917 case EOF: 918 return; 919 default: 920 ch = in.read(); 921 } 922 } 923 } 924 } 925 926 /** 927 * Scan an Identifier. The current character should 928 * be the first character of the identifier. 929 */ scanIdentifier()930 private void scanIdentifier() throws IOException { 931 count = 0; 932 933 while (true) { 934 putc(ch); 935 switch (ch = in.read()) { 936 case 'a': case 'b': case 'c': case 'd': case 'e': 937 case 'f': case 'g': case 'h': case 'i': case 'j': 938 case 'k': case 'l': case 'm': case 'n': case 'o': 939 case 'p': case 'q': case 'r': case 's': case 't': 940 case 'u': case 'v': case 'w': case 'x': case 'y': 941 case 'z': 942 case 'A': case 'B': case 'C': case 'D': case 'E': 943 case 'F': case 'G': case 'H': case 'I': case 'J': 944 case 'K': case 'L': case 'M': case 'N': case 'O': 945 case 'P': case 'Q': case 'R': case 'S': case 'T': 946 case 'U': case 'V': case 'W': case 'X': case 'Y': 947 case 'Z': 948 case '0': case '1': case '2': case '3': case '4': 949 case '5': case '6': case '7': case '8': case '9': 950 case '$': case '_': 951 break; 952 953 default: 954 if (!Character.isJavaLetterOrDigit((char)ch)) { 955 idValue = Identifier.lookup(bufferString()); 956 token = idValue.getType(); 957 return; 958 } 959 } 960 } 961 } 962 963 /** 964 * The ending position of the current token 965 */ 966 // Note: This should be part of the pos itself. getEndPos()967 public long getEndPos() { 968 return in.pos; 969 } 970 971 /** 972 * If the current token is IDENT, return the identifier occurrence. 973 * It will be freshly allocated. 974 */ getIdToken()975 public IdentifierToken getIdToken() { 976 return (token != IDENT) ? null : new IdentifierToken(pos, idValue); 977 } 978 979 /** 980 * Scan the next token. 981 * @return the position of the previous token. 982 */ scan()983 public long scan() throws IOException { 984 return xscan(); 985 } 986 xscan()987 protected long xscan() throws IOException { 988 final ScannerInputReader in = this.in; 989 long retPos = pos; 990 prevPos = in.pos; 991 docComment = null; 992 while (true) { 993 pos = in.pos; 994 995 switch (ch) { 996 case EOF: 997 token = EOF; 998 return retPos; 999 1000 case '\n': 1001 if (scanComments) { 1002 ch = ' '; 1003 // Avoid this path the next time around. 1004 // Do not just call in.read; we want to present 1005 // a null token (and also avoid read-ahead). 1006 token = COMMENT; 1007 return retPos; 1008 } 1009 case ' ': 1010 case '\t': 1011 case '\f': 1012 ch = in.read(); 1013 break; 1014 1015 case '/': 1016 switch (ch = in.read()) { 1017 case '/': 1018 // Parse a // comment 1019 while (((ch = in.read()) != EOF) && (ch != '\n')); 1020 if (scanComments) { 1021 token = COMMENT; 1022 return retPos; 1023 } 1024 break; 1025 1026 case '*': 1027 ch = in.read(); 1028 if (ch == '*') { 1029 docComment = scanDocComment(); 1030 } else { 1031 skipComment(); 1032 } 1033 if (scanComments) { 1034 return retPos; 1035 } 1036 break; 1037 1038 case '=': 1039 ch = in.read(); 1040 token = ASGDIV; 1041 return retPos; 1042 1043 default: 1044 token = DIV; 1045 return retPos; 1046 } 1047 break; 1048 1049 case '"': 1050 scanString(); 1051 return retPos; 1052 1053 case '\'': 1054 scanCharacter(); 1055 return retPos; 1056 1057 case '0': case '1': case '2': case '3': case '4': 1058 case '5': case '6': case '7': case '8': case '9': 1059 scanNumber(); 1060 return retPos; 1061 1062 case '.': 1063 switch (ch = in.read()) { 1064 case '0': case '1': case '2': case '3': case '4': 1065 case '5': case '6': case '7': case '8': case '9': 1066 count = 0; 1067 putc('.'); 1068 scanReal(); 1069 break; 1070 default: 1071 token = FIELD; 1072 } 1073 return retPos; 1074 1075 case '{': 1076 ch = in.read(); 1077 token = LBRACE; 1078 return retPos; 1079 1080 case '}': 1081 ch = in.read(); 1082 token = RBRACE; 1083 return retPos; 1084 1085 case '(': 1086 ch = in.read(); 1087 token = LPAREN; 1088 return retPos; 1089 1090 case ')': 1091 ch = in.read(); 1092 token = RPAREN; 1093 return retPos; 1094 1095 case '[': 1096 ch = in.read(); 1097 token = LSQBRACKET; 1098 return retPos; 1099 1100 case ']': 1101 ch = in.read(); 1102 token = RSQBRACKET; 1103 return retPos; 1104 1105 case ',': 1106 ch = in.read(); 1107 token = COMMA; 1108 return retPos; 1109 1110 case ';': 1111 ch = in.read(); 1112 token = SEMICOLON; 1113 return retPos; 1114 1115 case '?': 1116 ch = in.read(); 1117 token = QUESTIONMARK; 1118 return retPos; 1119 1120 case '~': 1121 ch = in.read(); 1122 token = BITNOT; 1123 return retPos; 1124 1125 case ':': 1126 ch = in.read(); 1127 token = COLON; 1128 return retPos; 1129 1130 case '-': 1131 switch (ch = in.read()) { 1132 case '-': 1133 ch = in.read(); 1134 token = DEC; 1135 return retPos; 1136 1137 case '=': 1138 ch = in.read(); 1139 token = ASGSUB; 1140 return retPos; 1141 } 1142 token = SUB; 1143 return retPos; 1144 1145 case '+': 1146 switch (ch = in.read()) { 1147 case '+': 1148 ch = in.read(); 1149 token = INC; 1150 return retPos; 1151 1152 case '=': 1153 ch = in.read(); 1154 token = ASGADD; 1155 return retPos; 1156 } 1157 token = ADD; 1158 return retPos; 1159 1160 case '<': 1161 switch (ch = in.read()) { 1162 case '<': 1163 if ((ch = in.read()) == '=') { 1164 ch = in.read(); 1165 token = ASGLSHIFT; 1166 return retPos; 1167 } 1168 token = LSHIFT; 1169 return retPos; 1170 1171 case '=': 1172 ch = in.read(); 1173 token = LE; 1174 return retPos; 1175 } 1176 token = LT; 1177 return retPos; 1178 1179 case '>': 1180 switch (ch = in.read()) { 1181 case '>': 1182 switch (ch = in.read()) { 1183 case '=': 1184 ch = in.read(); 1185 token = ASGRSHIFT; 1186 return retPos; 1187 1188 case '>': 1189 if ((ch = in.read()) == '=') { 1190 ch = in.read(); 1191 token = ASGURSHIFT; 1192 return retPos; 1193 } 1194 token = URSHIFT; 1195 return retPos; 1196 } 1197 token = RSHIFT; 1198 return retPos; 1199 1200 case '=': 1201 ch = in.read(); 1202 token = GE; 1203 return retPos; 1204 } 1205 token = GT; 1206 return retPos; 1207 1208 case '|': 1209 switch (ch = in.read()) { 1210 case '|': 1211 ch = in.read(); 1212 token = OR; 1213 return retPos; 1214 1215 case '=': 1216 ch = in.read(); 1217 token = ASGBITOR; 1218 return retPos; 1219 } 1220 token = BITOR; 1221 return retPos; 1222 1223 case '&': 1224 switch (ch = in.read()) { 1225 case '&': 1226 ch = in.read(); 1227 token = AND; 1228 return retPos; 1229 1230 case '=': 1231 ch = in.read(); 1232 token = ASGBITAND; 1233 return retPos; 1234 } 1235 token = BITAND; 1236 return retPos; 1237 1238 case '=': 1239 if ((ch = in.read()) == '=') { 1240 ch = in.read(); 1241 token = EQ; 1242 return retPos; 1243 } 1244 token = ASSIGN; 1245 return retPos; 1246 1247 case '%': 1248 if ((ch = in.read()) == '=') { 1249 ch = in.read(); 1250 token = ASGREM; 1251 return retPos; 1252 } 1253 token = REM; 1254 return retPos; 1255 1256 case '^': 1257 if ((ch = in.read()) == '=') { 1258 ch = in.read(); 1259 token = ASGBITXOR; 1260 return retPos; 1261 } 1262 token = BITXOR; 1263 return retPos; 1264 1265 case '!': 1266 if ((ch = in.read()) == '=') { 1267 ch = in.read(); 1268 token = NE; 1269 return retPos; 1270 } 1271 token = NOT; 1272 return retPos; 1273 1274 case '*': 1275 if ((ch = in.read()) == '=') { 1276 ch = in.read(); 1277 token = ASGMUL; 1278 return retPos; 1279 } 1280 token = MUL; 1281 return retPos; 1282 1283 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 1284 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 1285 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 1286 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 1287 case 'y': case 'z': 1288 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 1289 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 1290 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 1291 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 1292 case 'Y': case 'Z': 1293 case '$': case '_': 1294 scanIdentifier(); 1295 return retPos; 1296 1297 case '\u001a': 1298 // Our one concession to DOS. 1299 if ((ch = in.read()) == EOF) { 1300 token = EOF; 1301 return retPos; 1302 } 1303 env.error(pos, "funny.char"); 1304 ch = in.read(); 1305 break; 1306 1307 1308 default: 1309 if (Character.isJavaLetter((char)ch)) { 1310 scanIdentifier(); 1311 return retPos; 1312 } 1313 env.error(pos, "funny.char"); 1314 ch = in.read(); 1315 break; 1316 } 1317 } 1318 } 1319 1320 /** 1321 * Scan to a matching '}', ']' or ')'. The current token must be 1322 * a '{', '[' or '('; 1323 */ match(int open, int close)1324 public void match(int open, int close) throws IOException { 1325 int depth = 1; 1326 1327 while (true) { 1328 scan(); 1329 if (token == open) { 1330 depth++; 1331 } else if (token == close) { 1332 if (--depth == 0) { 1333 return; 1334 } 1335 } else if (token == EOF) { 1336 env.error(pos, "unbalanced.paren"); 1337 return; 1338 } 1339 } 1340 } 1341 } 1342