1 /* 2 * Copyright (c) 1994, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.tools.java; 27 28 import java.io.IOException; 29 import java.io.InputStream; 30 import java.util.Hashtable; 31 32 /** 33 * A Scanner for Java tokens. Errors are reported 34 * to the environment object.<p> 35 * 36 * The scanner keeps track of the current token, 37 * the value of the current token (if any), and the start 38 * position of the current token.<p> 39 * 40 * The scan() method advances the scanner to the next 41 * token in the input.<p> 42 * 43 * The match() method is used to quickly match opening 44 * brackets (ie: '(', '{', or '[') with their closing 45 * counter part. This is useful during error recovery.<p> 46 * 47 * An position consists of: ((linenr << WHEREOFFSETBITS) | offset) 48 * this means that both the line number and the exact offset into 49 * the file are encoded in each position value.<p> 50 * 51 * The compiler treats either "\n", "\r" or "\r\n" as the 52 * end of a line.<p> 53 * 54 * WARNING: The contents of this source file are not part of any 55 * supported API. Code that depends on them does so at its own risk: 56 * they are subject to change or removal without notice. 57 * 58 * @author Arthur van Hoff 59 */ 60 61 @SuppressWarnings("deprecation") 62 public 63 class Scanner implements Constants { 64 /** 65 * The increment for each character. 66 */ 67 public static final long OFFSETINC = 1; 68 69 /** 70 * The increment for each line. 71 */ 72 public static final long LINEINC = 1L << WHEREOFFSETBITS; 73 74 /** 75 * End of input 76 */ 77 public static final int EOF = -1; 78 79 /** 80 * Where errors are reported 81 */ 82 public Environment env; 83 84 /** 85 * Input reader 86 */ 87 protected ScannerInputReader in; 88 89 /** 90 * If true, present all comments as tokens. 91 * Contents are not saved, but positions are recorded accurately, 92 * so the comment can be recovered from the text. 93 * Line terminations are also returned as comment tokens, 94 * and may be distinguished by their start and end positions, 95 * which are equal (meaning, these tokens contain no chars). 96 */ 97 public boolean scanComments = false; 98 99 /** 100 * Current token 101 */ 102 public int token; 103 104 /** 105 * The position of the current token 106 */ 107 public long pos; 108 109 /** 110 * The position of the previous token 111 */ 112 public long prevPos; 113 114 /** 115 * The current character 116 */ 117 protected int ch; 118 119 /* 120 * Token values. 121 */ 122 public char charValue; 123 public int intValue; 124 public long longValue; 125 public float floatValue; 126 public double doubleValue; 127 public String stringValue; 128 public Identifier idValue; 129 public int radix; // Radix, when reading int or long 130 131 /* 132 * A doc comment preceding the most recent token 133 */ 134 public String docComment; 135 136 /* 137 * A growable character buffer. 138 */ 139 private int count; 140 private char buffer[] = new char[1024]; growBuffer()141 private void growBuffer() { 142 char newBuffer[] = new char[buffer.length * 2]; 143 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); 144 buffer = newBuffer; 145 } 146 147 // The following two methods have been hand-inlined in 148 // scanDocComment. If you make changes here, you should 149 // check to see if scanDocComment also needs modification. putc(int ch)150 private void putc(int ch) { 151 if (count == buffer.length) { 152 growBuffer(); 153 } 154 buffer[count++] = (char)ch; 155 } 156 bufferString()157 private String bufferString() { 158 return new String(buffer, 0, count); 159 } 160 161 /** 162 * Create a scanner to scan an input stream. 163 */ Scanner(Environment env, InputStream in)164 public Scanner(Environment env, InputStream in) throws IOException { 165 this.env = env; 166 useInputStream(in); 167 } 168 169 /** 170 * Setup input from the given input stream, 171 * and scan the first token from it. 172 */ useInputStream(InputStream in)173 protected void useInputStream(InputStream in) throws IOException { 174 try { 175 this.in = new ScannerInputReader(env, in); 176 } catch (Exception e) { 177 env.setCharacterEncoding(null); 178 this.in = new ScannerInputReader(env, in); 179 } 180 181 ch = this.in.read(); 182 prevPos = this.in.pos; 183 184 scan(); 185 } 186 187 /** 188 * Create a scanner to scan an input stream. 189 */ Scanner(Environment env)190 protected Scanner(Environment env) { 191 this.env = env; 192 // Expect the subclass to call useInputStream at the right time. 193 } 194 195 /** 196 * Define a keyword. 197 */ defineKeyword(int val)198 private static void defineKeyword(int val) { 199 Identifier.lookup(opNames[val]).setType(val); 200 } 201 202 /** 203 * Initialized keyword and token Hashtables 204 */ 205 static { 206 // Statement keywords 207 defineKeyword(FOR); 208 defineKeyword(IF); 209 defineKeyword(ELSE); 210 defineKeyword(WHILE); 211 defineKeyword(DO); 212 defineKeyword(SWITCH); 213 defineKeyword(CASE); 214 defineKeyword(DEFAULT); 215 defineKeyword(BREAK); 216 defineKeyword(CONTINUE); 217 defineKeyword(RETURN); 218 defineKeyword(TRY); 219 defineKeyword(CATCH); 220 defineKeyword(FINALLY); 221 defineKeyword(THROW); 222 223 // Type defineKeywords 224 defineKeyword(BYTE); 225 defineKeyword(CHAR); 226 defineKeyword(SHORT); 227 defineKeyword(INT); 228 defineKeyword(LONG); 229 defineKeyword(FLOAT); 230 defineKeyword(DOUBLE); 231 defineKeyword(VOID); 232 defineKeyword(BOOLEAN); 233 234 // Expression keywords 235 defineKeyword(INSTANCEOF); 236 defineKeyword(TRUE); 237 defineKeyword(FALSE); 238 defineKeyword(NEW); 239 defineKeyword(THIS); 240 defineKeyword(SUPER); 241 defineKeyword(NULL); 242 243 // Declaration keywords 244 defineKeyword(IMPORT); 245 defineKeyword(CLASS); 246 defineKeyword(EXTENDS); 247 defineKeyword(IMPLEMENTS); 248 defineKeyword(INTERFACE); 249 defineKeyword(PACKAGE); 250 defineKeyword(THROWS); 251 252 // Modifier keywords 253 defineKeyword(PRIVATE); 254 defineKeyword(PUBLIC); 255 defineKeyword(PROTECTED); 256 defineKeyword(STATIC); 257 defineKeyword(TRANSIENT); 258 defineKeyword(SYNCHRONIZED); 259 defineKeyword(NATIVE); 260 defineKeyword(ABSTRACT); 261 defineKeyword(VOLATILE); 262 defineKeyword(FINAL); 263 defineKeyword(STRICTFP); 264 265 // reserved keywords 266 defineKeyword(CONST); 267 defineKeyword(GOTO); 268 } 269 270 /** 271 * Scan a comment. This method should be 272 * called once the initial /, * and the next 273 * character have been read. 274 */ skipComment()275 private void skipComment() throws IOException { 276 while (true) { 277 switch (ch) { 278 case EOF: 279 env.error(pos, "eof.in.comment"); 280 return; 281 282 case '*': 283 if ((ch = in.read()) == '/') { 284 ch = in.read(); 285 return; 286 } 287 break; 288 289 default: 290 ch = in.read(); 291 break; 292 } 293 } 294 } 295 296 /** 297 * Scan a doc comment. This method should be called 298 * once the initial /, * and * have been read. It gathers 299 * the content of the comment (witout leading spaces and '*'s) 300 * in the string buffer. 301 */ scanDocComment()302 private String scanDocComment() throws IOException { 303 // Note: this method has been hand-optimized to yield 304 // better performance. This was done after it was noted 305 // that javadoc spent a great deal of its time here. 306 // This should also help the performance of the compiler 307 // as well -- it scans the doc comments to find 308 // @deprecated tags. 309 // 310 // The logic of the method has been completely rewritten 311 // to avoid the use of flags that need to be looked at 312 // for every character read. Members that are accessed 313 // more than once have been stored in local variables. 314 // The methods putc() and bufferString() have been 315 // inlined by hand. Extra cases have been added to 316 // switch statements to trick the compiler into generating 317 // a tableswitch instead of a lookupswitch. 318 // 319 // This implementation aims to preserve the previous 320 // behavior of this method. 321 322 int c; 323 324 // Put `in' in a local variable. 325 final ScannerInputReader in = this.in; 326 327 // We maintain the buffer locally rather than calling putc(). 328 char[] buffer = this.buffer; 329 int count = 0; 330 331 // We are called pointing at the second star of the doc 332 // comment: 333 // 334 // Input: /** the rest of the comment ... */ 335 // ^ 336 // 337 // We rely on this in the code below. 338 339 // Consume any number of stars. 340 while ((c = in.read()) == '*') 341 ; 342 343 // Is the comment of the form /**/, /***/, /****/, etc.? 344 if (c == '/') { 345 // Set ch and return 346 ch = in.read(); 347 return ""; 348 } 349 350 // Skip a newline on the first line of the comment. 351 if (c == '\n') { 352 c = in.read(); 353 } 354 355 outerLoop: 356 // The outerLoop processes the doc comment, looping once 357 // for each line. For each line, it first strips off 358 // whitespace, then it consumes any stars, then it 359 // puts the rest of the line into our buffer. 360 while (true) { 361 362 // The wsLoop consumes whitespace from the beginning 363 // of each line. 364 wsLoop: 365 while (true) { 366 switch (c) { 367 case ' ': 368 case '\t': 369 // We could check for other forms of whitespace 370 // as well, but this is left as is for minimum 371 // disturbance of functionality. 372 // 373 // Just skip whitespace. 374 c = in.read(); 375 break; 376 377 // We have added extra cases here to trick the 378 // compiler into using a tableswitch instead of 379 // a lookupswitch. They can be removed without 380 // a change in meaning. 381 case 10: case 11: case 12: case 13: case 14: case 15: 382 case 16: case 17: case 18: case 19: case 20: case 21: 383 case 22: case 23: case 24: case 25: case 26: case 27: 384 case 28: case 29: case 30: case 31: 385 default: 386 // We've seen something that isn't whitespace, 387 // jump out. 388 break wsLoop; 389 } 390 } // end wsLoop. 391 392 // Are there stars here? If so, consume them all 393 // and check for the end of comment. 394 if (c == '*') { 395 // Skip all of the stars... 396 do { 397 c = in.read(); 398 } while (c == '*'); 399 400 // ...then check for the closing slash. 401 if (c == '/') { 402 // We're done with the doc comment. 403 // Set ch and break out. 404 ch = in.read(); 405 break outerLoop; 406 } 407 } 408 409 // The textLoop processes the rest of the characters 410 // on the line, adding them to our buffer. 411 textLoop: 412 while (true) { 413 switch (c) { 414 case EOF: 415 // We've seen a premature EOF. Break out 416 // of the loop. 417 env.error(pos, "eof.in.comment"); 418 ch = EOF; 419 break outerLoop; 420 421 case '*': 422 // Is this just a star? Or is this the 423 // end of a comment? 424 c = in.read(); 425 if (c == '/') { 426 // This is the end of the comment, 427 // set ch and return our buffer. 428 ch = in.read(); 429 break outerLoop; 430 } 431 // This is just an ordinary star. Add it to 432 // the buffer. 433 if (count == buffer.length) { 434 growBuffer(); 435 buffer = this.buffer; 436 } 437 buffer[count++] = '*'; 438 break; 439 440 case '\n': 441 // We've seen a newline. Add it to our 442 // buffer and break out of this loop, 443 // starting fresh on a new line. 444 if (count == buffer.length) { 445 growBuffer(); 446 buffer = this.buffer; 447 } 448 buffer[count++] = '\n'; 449 c = in.read(); 450 break textLoop; 451 452 // Again, the extra cases here are a trick 453 // to get the compiler to generate a tableswitch. 454 case 0: case 1: case 2: case 3: case 4: case 5: 455 case 6: case 7: case 8: case 11: case 12: case 13: 456 case 14: case 15: case 16: case 17: case 18: case 19: 457 case 20: case 21: case 22: case 23: case 24: case 25: 458 case 26: case 27: case 28: case 29: case 30: case 31: 459 case 32: case 33: case 34: case 35: case 36: case 37: 460 case 38: case 39: case 40: 461 default: 462 // Add the character to our buffer. 463 if (count == buffer.length) { 464 growBuffer(); 465 buffer = this.buffer; 466 } 467 buffer[count++] = (char)c; 468 c = in.read(); 469 break; 470 } 471 } // end textLoop 472 } // end outerLoop 473 474 // We have scanned our doc comment. It is stored in 475 // buffer. The previous implementation of scanDocComment 476 // stripped off all trailing spaces and stars from the comment. 477 // We will do this as well, so as to cause a minimum of 478 // disturbance. Is this what we want? 479 if (count > 0) { 480 int i = count - 1; 481 trailLoop: 482 while (i > -1) { 483 switch (buffer[i]) { 484 case ' ': 485 case '\t': 486 case '*': 487 i--; 488 break; 489 // And again, the extra cases here are a trick 490 // to get the compiler to generate a tableswitch. 491 case 0: case 1: case 2: case 3: case 4: case 5: 492 case 6: case 7: case 8: case 10: case 11: case 12: 493 case 13: case 14: case 15: case 16: case 17: case 18: 494 case 19: case 20: case 21: case 22: case 23: case 24: 495 case 25: case 26: case 27: case 28: case 29: case 30: 496 case 31: case 33: case 34: case 35: case 36: case 37: 497 case 38: case 39: case 40: 498 default: 499 break trailLoop; 500 } 501 } 502 count = i + 1; 503 504 // Return the text of the doc comment. 505 return new String(buffer, 0, count); 506 } else { 507 return ""; 508 } 509 } 510 511 /** 512 * Scan a number. The first digit of the number should be the current 513 * character. We may be scanning hex, decimal, or octal at this point 514 */ 515 @SuppressWarnings("fallthrough") scanNumber()516 private void scanNumber() throws IOException { 517 boolean seenNonOctal = false; 518 boolean overflow = false; 519 boolean seenDigit = false; // used to detect invalid hex number 0xL 520 radix = (ch == '0' ? 8 : 10); 521 long value = ch - '0'; 522 count = 0; 523 putc(ch); // save character in buffer 524 numberLoop: 525 for (;;) { 526 switch (ch = in.read()) { 527 case '.': 528 if (radix == 16) 529 break numberLoop; // an illegal character 530 scanReal(); 531 return; 532 533 case '8': case '9': 534 // We can't yet throw an error if reading an octal. We might 535 // discover we're really reading a real. 536 seenNonOctal = true; 537 // Fall through 538 case '0': case '1': case '2': case '3': 539 case '4': case '5': case '6': case '7': 540 seenDigit = true; 541 putc(ch); 542 if (radix == 10) { 543 overflow = overflow || (value * 10)/10 != value; 544 value = (value * 10) + (ch - '0'); 545 overflow = overflow || (value - 1 < -1); 546 } else if (radix == 8) { 547 overflow = overflow || (value >>> 61) != 0; 548 value = (value << 3) + (ch - '0'); 549 } else { 550 overflow = overflow || (value >>> 60) != 0; 551 value = (value << 4) + (ch - '0'); 552 } 553 break; 554 555 case 'd': case 'D': case 'e': case 'E': case 'f': case 'F': 556 if (radix != 16) { 557 scanReal(); 558 return; 559 } 560 // fall through 561 case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': 562 seenDigit = true; 563 putc(ch); 564 if (radix != 16) 565 break numberLoop; // an illegal character 566 overflow = overflow || (value >>> 60) != 0; 567 value = (value << 4) + 10 + 568 Character.toLowerCase((char)ch) - 'a'; 569 break; 570 571 case 'l': case 'L': 572 ch = in.read(); // skip over 'l' 573 longValue = value; 574 token = LONGVAL; 575 break numberLoop; 576 577 case 'x': case 'X': 578 // if the first character is a '0' and this is the second 579 // letter, then read in a hexadecimal number. Otherwise, error. 580 if (count == 1 && radix == 8) { 581 radix = 16; 582 seenDigit = false; 583 break; 584 } else { 585 // we'll get an illegal character error 586 break numberLoop; 587 } 588 589 default: 590 intValue = (int)value; 591 token = INTVAL; 592 break numberLoop; 593 } 594 } // while true 595 596 // We have just finished reading the number. The next thing better 597 // not be a letter or digit. 598 // Note: There will be deprecation warnings against these uses 599 // of Character.isJavaLetterOrDigit and Character.isJavaLetter. 600 // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs. 601 if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') { 602 env.error(in.pos, "invalid.number"); 603 do { ch = in.read(); } 604 while (Character.isJavaLetterOrDigit((char)ch) || ch == '.'); 605 intValue = 0; 606 token = INTVAL; 607 } else if (radix == 8 && seenNonOctal) { 608 // A bogus octal literal. 609 intValue = 0; 610 token = INTVAL; 611 env.error(pos, "invalid.octal.number"); 612 } else if (radix == 16 && seenDigit == false) { 613 // A hex literal with no digits, 0xL, for example. 614 intValue = 0; 615 token = INTVAL; 616 env.error(pos, "invalid.hex.number"); 617 } else { 618 if (token == INTVAL) { 619 // Check for overflow. Note that base 10 literals 620 // have different rules than base 8 and 16. 621 overflow = overflow || 622 (value & 0xFFFFFFFF00000000L) != 0 || 623 (radix == 10 && value > 2147483648L); 624 625 if (overflow) { 626 intValue = 0; 627 628 // Give a specific error message which tells 629 // the user the range. 630 switch (radix) { 631 case 8: 632 env.error(pos, "overflow.int.oct"); 633 break; 634 case 10: 635 env.error(pos, "overflow.int.dec"); 636 break; 637 case 16: 638 env.error(pos, "overflow.int.hex"); 639 break; 640 default: 641 throw new CompilerError("invalid radix"); 642 } 643 } 644 } else { 645 if (overflow) { 646 longValue = 0; 647 648 // Give a specific error message which tells 649 // the user the range. 650 switch (radix) { 651 case 8: 652 env.error(pos, "overflow.long.oct"); 653 break; 654 case 10: 655 env.error(pos, "overflow.long.dec"); 656 break; 657 case 16: 658 env.error(pos, "overflow.long.hex"); 659 break; 660 default: 661 throw new CompilerError("invalid radix"); 662 } 663 } 664 } 665 } 666 } 667 668 /** 669 * Scan a float. We are either looking at the decimal, or we have already 670 * seen it and put it into the buffer. We haven't seen an exponent. 671 * Scan a float. Should be called with the current character is either 672 * the 'e', 'E' or '.' 673 */ 674 @SuppressWarnings("fallthrough") scanReal()675 private void scanReal() throws IOException { 676 boolean seenExponent = false; 677 boolean isSingleFloat = false; 678 char lastChar; 679 if (ch == '.') { 680 putc(ch); 681 ch = in.read(); 682 } 683 684 numberLoop: 685 for ( ; ; ch = in.read()) { 686 switch (ch) { 687 case '0': case '1': case '2': case '3': case '4': 688 case '5': case '6': case '7': case '8': case '9': 689 putc(ch); 690 break; 691 692 case 'e': case 'E': 693 if (seenExponent) 694 break numberLoop; // we'll get a format error 695 putc(ch); 696 seenExponent = true; 697 break; 698 699 case '+': case '-': 700 lastChar = buffer[count - 1]; 701 if (lastChar != 'e' && lastChar != 'E') 702 break numberLoop; // this isn't an error, though! 703 putc(ch); 704 break; 705 706 case 'f': case 'F': 707 ch = in.read(); // skip over 'f' 708 isSingleFloat = true; 709 break numberLoop; 710 711 case 'd': case 'D': 712 ch = in.read(); // skip over 'd' 713 // fall through 714 default: 715 break numberLoop; 716 } // sswitch 717 } // loop 718 719 // we have just finished reading the number. The next thing better 720 // not be a letter or digit. 721 if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') { 722 env.error(in.pos, "invalid.number"); 723 do { ch = in.read(); } 724 while (Character.isJavaLetterOrDigit((char)ch) || ch == '.'); 725 doubleValue = 0; 726 token = DOUBLEVAL; 727 } else { 728 token = isSingleFloat ? FLOATVAL : DOUBLEVAL; 729 try { 730 lastChar = buffer[count - 1]; 731 if (lastChar == 'e' || lastChar == 'E' 732 || lastChar == '+' || lastChar == '-') { 733 env.error(in.pos -1, "float.format"); 734 } else if (isSingleFloat) { 735 String string = bufferString(); 736 floatValue = Float.valueOf(string).floatValue(); 737 if (Float.isInfinite(floatValue)) { 738 env.error(pos, "overflow.float"); 739 } else if (floatValue == 0 && !looksLikeZero(string)) { 740 env.error(pos, "underflow.float"); 741 } 742 } else { 743 String string = bufferString(); 744 doubleValue = Double.valueOf(string).doubleValue(); 745 if (Double.isInfinite(doubleValue)) { 746 env.error(pos, "overflow.double"); 747 } else if (doubleValue == 0 && !looksLikeZero(string)) { 748 env.error(pos, "underflow.double"); 749 } 750 } 751 } catch (NumberFormatException ee) { 752 env.error(pos, "float.format"); 753 doubleValue = 0; 754 floatValue = 0; 755 } 756 } 757 return; 758 } 759 760 // We have a token that parses as a number. Is this token possibly zero? 761 // i.e. does it have a non-zero value in the mantissa? looksLikeZero(String token)762 private static boolean looksLikeZero(String token) { 763 int length = token.length(); 764 for (int i = 0; i < length; i++) { 765 switch (token.charAt(i)) { 766 case 0: case '.': 767 continue; 768 case '1': case '2': case '3': case '4': case '5': 769 case '6': case '7': case '8': case '9': 770 return false; 771 case 'e': case 'E': case 'f': case 'F': 772 return true; 773 } 774 } 775 return true; 776 } 777 778 /** 779 * Scan an escape character. 780 * @return the character or -1 if it escaped an 781 * end-of-line. 782 */ scanEscapeChar()783 private int scanEscapeChar() throws IOException { 784 long p = in.pos; 785 786 switch (ch = in.read()) { 787 case '0': case '1': case '2': case '3': 788 case '4': case '5': case '6': case '7': { 789 int n = ch - '0'; 790 for (int i = 2 ; i > 0 ; i--) { 791 switch (ch = in.read()) { 792 case '0': case '1': case '2': case '3': 793 case '4': case '5': case '6': case '7': 794 n = (n << 3) + ch - '0'; 795 break; 796 797 default: 798 if (n > 0xFF) { 799 env.error(p, "invalid.escape.char"); 800 } 801 return n; 802 } 803 } 804 ch = in.read(); 805 if (n > 0xFF) { 806 env.error(p, "invalid.escape.char"); 807 } 808 return n; 809 } 810 811 case 'r': ch = in.read(); return '\r'; 812 case 'n': ch = in.read(); return '\n'; 813 case 'f': ch = in.read(); return '\f'; 814 case 'b': ch = in.read(); return '\b'; 815 case 't': ch = in.read(); return '\t'; 816 case '\\': ch = in.read(); return '\\'; 817 case '\"': ch = in.read(); return '\"'; 818 case '\'': ch = in.read(); return '\''; 819 } 820 821 env.error(p, "invalid.escape.char"); 822 ch = in.read(); 823 return -1; 824 } 825 826 /** 827 * Scan a string. The current character 828 * should be the opening " of the string. 829 */ scanString()830 private void scanString() throws IOException { 831 token = STRINGVAL; 832 count = 0; 833 ch = in.read(); 834 835 // Scan a String 836 while (true) { 837 switch (ch) { 838 case EOF: 839 env.error(pos, "eof.in.string"); 840 stringValue = bufferString(); 841 return; 842 843 case '\r': 844 case '\n': 845 ch = in.read(); 846 env.error(pos, "newline.in.string"); 847 stringValue = bufferString(); 848 return; 849 850 case '"': 851 ch = in.read(); 852 stringValue = bufferString(); 853 return; 854 855 case '\\': { 856 int c = scanEscapeChar(); 857 if (c >= 0) { 858 putc((char)c); 859 } 860 break; 861 } 862 863 default: 864 putc(ch); 865 ch = in.read(); 866 break; 867 } 868 } 869 } 870 871 /** 872 * Scan a character. The current character should be 873 * the opening ' of the character constant. 874 */ scanCharacter()875 private void scanCharacter() throws IOException { 876 token = CHARVAL; 877 878 switch (ch = in.read()) { 879 case '\\': 880 int c = scanEscapeChar(); 881 charValue = (char)((c >= 0) ? c : 0); 882 break; 883 884 case '\'': 885 // There are two standard problems this case deals with. One 886 // is the malformed single quote constant (i.e. the programmer 887 // uses ''' instead of '\'') and the other is the empty 888 // character constant (i.e. ''). Just consume any number of 889 // single quotes and emit an error message. 890 charValue = 0; 891 env.error(pos, "invalid.char.constant"); 892 ch = in.read(); 893 while (ch == '\'') { 894 ch = in.read(); 895 } 896 return; 897 898 case '\r': 899 case '\n': 900 charValue = 0; 901 env.error(pos, "invalid.char.constant"); 902 return; 903 904 default: 905 charValue = (char)ch; 906 ch = in.read(); 907 break; 908 } 909 910 if (ch == '\'') { 911 ch = in.read(); 912 } else { 913 env.error(pos, "invalid.char.constant"); 914 while (true) { 915 switch (ch) { 916 case '\'': 917 ch = in.read(); 918 return; 919 case ';': 920 case '\n': 921 case EOF: 922 return; 923 default: 924 ch = in.read(); 925 } 926 } 927 } 928 } 929 930 /** 931 * Scan an Identifier. The current character should 932 * be the first character of the identifier. 933 */ scanIdentifier()934 private void scanIdentifier() throws IOException { 935 count = 0; 936 937 while (true) { 938 putc(ch); 939 switch (ch = in.read()) { 940 case 'a': case 'b': case 'c': case 'd': case 'e': 941 case 'f': case 'g': case 'h': case 'i': case 'j': 942 case 'k': case 'l': case 'm': case 'n': case 'o': 943 case 'p': case 'q': case 'r': case 's': case 't': 944 case 'u': case 'v': case 'w': case 'x': case 'y': 945 case 'z': 946 case 'A': case 'B': case 'C': case 'D': case 'E': 947 case 'F': case 'G': case 'H': case 'I': case 'J': 948 case 'K': case 'L': case 'M': case 'N': case 'O': 949 case 'P': case 'Q': case 'R': case 'S': case 'T': 950 case 'U': case 'V': case 'W': case 'X': case 'Y': 951 case 'Z': 952 case '0': case '1': case '2': case '3': case '4': 953 case '5': case '6': case '7': case '8': case '9': 954 case '$': case '_': 955 break; 956 957 default: 958 if (!Character.isJavaLetterOrDigit((char)ch)) { 959 idValue = Identifier.lookup(bufferString()); 960 token = idValue.getType(); 961 return; 962 } 963 } 964 } 965 } 966 967 /** 968 * The ending position of the current token 969 */ 970 // Note: This should be part of the pos itself. getEndPos()971 public long getEndPos() { 972 return in.pos; 973 } 974 975 /** 976 * If the current token is IDENT, return the identifier occurrence. 977 * It will be freshly allocated. 978 */ getIdToken()979 public IdentifierToken getIdToken() { 980 return (token != IDENT) ? null : new IdentifierToken(pos, idValue); 981 } 982 983 /** 984 * Scan the next token. 985 * @return the position of the previous token. 986 */ scan()987 public long scan() throws IOException { 988 return xscan(); 989 } 990 991 @SuppressWarnings("fallthrough") xscan()992 protected long xscan() throws IOException { 993 final ScannerInputReader in = this.in; 994 long retPos = pos; 995 prevPos = in.pos; 996 docComment = null; 997 while (true) { 998 pos = in.pos; 999 1000 switch (ch) { 1001 case EOF: 1002 token = EOF; 1003 return retPos; 1004 1005 case '\n': 1006 if (scanComments) { 1007 ch = ' '; 1008 // Avoid this path the next time around. 1009 // Do not just call in.read; we want to present 1010 // a null token (and also avoid read-ahead). 1011 token = COMMENT; 1012 return retPos; 1013 } 1014 // Fall through 1015 case ' ': 1016 case '\t': 1017 case '\f': 1018 ch = in.read(); 1019 break; 1020 1021 case '/': 1022 switch (ch = in.read()) { 1023 case '/': 1024 // Parse a // comment 1025 while (((ch = in.read()) != EOF) && (ch != '\n')); 1026 if (scanComments) { 1027 token = COMMENT; 1028 return retPos; 1029 } 1030 break; 1031 1032 case '*': 1033 ch = in.read(); 1034 if (ch == '*') { 1035 docComment = scanDocComment(); 1036 } else { 1037 skipComment(); 1038 } 1039 if (scanComments) { 1040 return retPos; 1041 } 1042 break; 1043 1044 case '=': 1045 ch = in.read(); 1046 token = ASGDIV; 1047 return retPos; 1048 1049 default: 1050 token = DIV; 1051 return retPos; 1052 } 1053 break; 1054 1055 case '"': 1056 scanString(); 1057 return retPos; 1058 1059 case '\'': 1060 scanCharacter(); 1061 return retPos; 1062 1063 case '0': case '1': case '2': case '3': case '4': 1064 case '5': case '6': case '7': case '8': case '9': 1065 scanNumber(); 1066 return retPos; 1067 1068 case '.': 1069 switch (ch = in.read()) { 1070 case '0': case '1': case '2': case '3': case '4': 1071 case '5': case '6': case '7': case '8': case '9': 1072 count = 0; 1073 putc('.'); 1074 scanReal(); 1075 break; 1076 default: 1077 token = FIELD; 1078 } 1079 return retPos; 1080 1081 case '{': 1082 ch = in.read(); 1083 token = LBRACE; 1084 return retPos; 1085 1086 case '}': 1087 ch = in.read(); 1088 token = RBRACE; 1089 return retPos; 1090 1091 case '(': 1092 ch = in.read(); 1093 token = LPAREN; 1094 return retPos; 1095 1096 case ')': 1097 ch = in.read(); 1098 token = RPAREN; 1099 return retPos; 1100 1101 case '[': 1102 ch = in.read(); 1103 token = LSQBRACKET; 1104 return retPos; 1105 1106 case ']': 1107 ch = in.read(); 1108 token = RSQBRACKET; 1109 return retPos; 1110 1111 case ',': 1112 ch = in.read(); 1113 token = COMMA; 1114 return retPos; 1115 1116 case ';': 1117 ch = in.read(); 1118 token = SEMICOLON; 1119 return retPos; 1120 1121 case '?': 1122 ch = in.read(); 1123 token = QUESTIONMARK; 1124 return retPos; 1125 1126 case '~': 1127 ch = in.read(); 1128 token = BITNOT; 1129 return retPos; 1130 1131 case ':': 1132 ch = in.read(); 1133 token = COLON; 1134 return retPos; 1135 1136 case '-': 1137 switch (ch = in.read()) { 1138 case '-': 1139 ch = in.read(); 1140 token = DEC; 1141 return retPos; 1142 1143 case '=': 1144 ch = in.read(); 1145 token = ASGSUB; 1146 return retPos; 1147 } 1148 token = SUB; 1149 return retPos; 1150 1151 case '+': 1152 switch (ch = in.read()) { 1153 case '+': 1154 ch = in.read(); 1155 token = INC; 1156 return retPos; 1157 1158 case '=': 1159 ch = in.read(); 1160 token = ASGADD; 1161 return retPos; 1162 } 1163 token = ADD; 1164 return retPos; 1165 1166 case '<': 1167 switch (ch = in.read()) { 1168 case '<': 1169 if ((ch = in.read()) == '=') { 1170 ch = in.read(); 1171 token = ASGLSHIFT; 1172 return retPos; 1173 } 1174 token = LSHIFT; 1175 return retPos; 1176 1177 case '=': 1178 ch = in.read(); 1179 token = LE; 1180 return retPos; 1181 } 1182 token = LT; 1183 return retPos; 1184 1185 case '>': 1186 switch (ch = in.read()) { 1187 case '>': 1188 switch (ch = in.read()) { 1189 case '=': 1190 ch = in.read(); 1191 token = ASGRSHIFT; 1192 return retPos; 1193 1194 case '>': 1195 if ((ch = in.read()) == '=') { 1196 ch = in.read(); 1197 token = ASGURSHIFT; 1198 return retPos; 1199 } 1200 token = URSHIFT; 1201 return retPos; 1202 } 1203 token = RSHIFT; 1204 return retPos; 1205 1206 case '=': 1207 ch = in.read(); 1208 token = GE; 1209 return retPos; 1210 } 1211 token = GT; 1212 return retPos; 1213 1214 case '|': 1215 switch (ch = in.read()) { 1216 case '|': 1217 ch = in.read(); 1218 token = OR; 1219 return retPos; 1220 1221 case '=': 1222 ch = in.read(); 1223 token = ASGBITOR; 1224 return retPos; 1225 } 1226 token = BITOR; 1227 return retPos; 1228 1229 case '&': 1230 switch (ch = in.read()) { 1231 case '&': 1232 ch = in.read(); 1233 token = AND; 1234 return retPos; 1235 1236 case '=': 1237 ch = in.read(); 1238 token = ASGBITAND; 1239 return retPos; 1240 } 1241 token = BITAND; 1242 return retPos; 1243 1244 case '=': 1245 if ((ch = in.read()) == '=') { 1246 ch = in.read(); 1247 token = EQ; 1248 return retPos; 1249 } 1250 token = ASSIGN; 1251 return retPos; 1252 1253 case '%': 1254 if ((ch = in.read()) == '=') { 1255 ch = in.read(); 1256 token = ASGREM; 1257 return retPos; 1258 } 1259 token = REM; 1260 return retPos; 1261 1262 case '^': 1263 if ((ch = in.read()) == '=') { 1264 ch = in.read(); 1265 token = ASGBITXOR; 1266 return retPos; 1267 } 1268 token = BITXOR; 1269 return retPos; 1270 1271 case '!': 1272 if ((ch = in.read()) == '=') { 1273 ch = in.read(); 1274 token = NE; 1275 return retPos; 1276 } 1277 token = NOT; 1278 return retPos; 1279 1280 case '*': 1281 if ((ch = in.read()) == '=') { 1282 ch = in.read(); 1283 token = ASGMUL; 1284 return retPos; 1285 } 1286 token = MUL; 1287 return retPos; 1288 1289 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 1290 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 1291 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 1292 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 1293 case 'y': case 'z': 1294 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 1295 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 1296 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 1297 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 1298 case 'Y': case 'Z': 1299 case '$': case '_': 1300 scanIdentifier(); 1301 return retPos; 1302 1303 case '\u001a': 1304 // Our one concession to DOS. 1305 if ((ch = in.read()) == EOF) { 1306 token = EOF; 1307 return retPos; 1308 } 1309 env.error(pos, "funny.char"); 1310 ch = in.read(); 1311 break; 1312 1313 1314 default: 1315 if (Character.isJavaLetter((char)ch)) { 1316 scanIdentifier(); 1317 return retPos; 1318 } 1319 env.error(pos, "funny.char"); 1320 ch = in.read(); 1321 break; 1322 } 1323 } 1324 } 1325 1326 /** 1327 * Scan to a matching '}', ']' or ')'. The current token must be 1328 * a '{', '[' or '('; 1329 */ match(int open, int close)1330 public void match(int open, int close) throws IOException { 1331 int depth = 1; 1332 1333 while (true) { 1334 scan(); 1335 if (token == open) { 1336 depth++; 1337 } else if (token == close) { 1338 if (--depth == 0) { 1339 return; 1340 } 1341 } else if (token == EOF) { 1342 env.error(pos, "unbalanced.paren"); 1343 return; 1344 } 1345 } 1346 } 1347 } 1348