1 /* 2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.tools.javac.parser; 27 28 import com.sun.tools.javac.code.Source; 29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 30 import com.sun.tools.javac.util.*; 31 32 import java.nio.CharBuffer; 33 34 import static com.sun.tools.javac.parser.Tokens.*; 35 import static com.sun.tools.javac.util.LayoutCharacters.*; 36 37 /** The lexical analyzer maps an input stream consisting of 38 * ASCII characters and Unicode escapes into a token sequence. 39 * 40 * <p><b>This is NOT part of any supported API. 41 * If you write code that depends on this, you do so at your own risk. 42 * This code and its internal interfaces are subject to change or 43 * deletion without notice.</b> 44 */ 45 public class JavaTokenizer { 46 47 private static final boolean scannerDebug = false; 48 49 /** Allow hex floating-point literals. 50 */ 51 private boolean allowHexFloats; 52 53 /** Allow binary literals. 54 */ 55 private boolean allowBinaryLiterals; 56 57 /** Allow underscores in literals. 58 */ 59 private boolean allowUnderscoresInLiterals; 60 61 /** The source language setting. 62 */ 63 private Source source; 64 65 /** The log to be used for error reporting. 66 */ 67 private final Log log; 68 69 /** The token factory. */ 70 private final Tokens tokens; 71 72 /** The token kind, set by nextToken(). 73 */ 74 protected TokenKind tk; 75 76 /** The token's radix, set by nextToken(). 77 */ 78 protected int radix; 79 80 /** The token's name, set by nextToken(). 81 */ 82 protected Name name; 83 84 /** The position where a lexical error occurred; 85 */ 86 protected int errPos = Position.NOPOS; 87 88 /** The Unicode reader (low-level stream reader). 89 */ 90 protected UnicodeReader reader; 91 92 protected ScannerFactory fac; 93 94 private static final boolean hexFloatsWork = hexFloatsWork(); hexFloatsWork()95 private static boolean hexFloatsWork() { 96 try { 97 Float.valueOf("0x1.0p1"); 98 return true; 99 } catch (NumberFormatException ex) { 100 return false; 101 } 102 } 103 104 /** 105 * Create a scanner from the input array. This method might 106 * modify the array. To avoid copying the input array, ensure 107 * that {@code inputLength < input.length} or 108 * {@code input[input.length -1]} is a white space character. 109 * 110 * @param fac the factory which created this Scanner 111 * @param buf the input, might be modified 112 * Must be positive and less than or equal to input.length. 113 */ JavaTokenizer(ScannerFactory fac, CharBuffer buf)114 protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) { 115 this(fac, new UnicodeReader(fac, buf)); 116 } 117 JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength)118 protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { 119 this(fac, new UnicodeReader(fac, buf, inputLength)); 120 } 121 JavaTokenizer(ScannerFactory fac, UnicodeReader reader)122 protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { 123 this.fac = fac; 124 this.log = fac.log; 125 this.tokens = fac.tokens; 126 this.source = fac.source; 127 this.reader = reader; 128 this.allowBinaryLiterals = source.allowBinaryLiterals(); 129 this.allowHexFloats = source.allowHexFloats(); 130 this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals(); 131 } 132 133 /** Report an error at the given position using the provided arguments. 134 */ lexError(int pos, String key, Object... args)135 protected void lexError(int pos, String key, Object... args) { 136 log.error(pos, key, args); 137 tk = TokenKind.ERROR; 138 errPos = pos; 139 } 140 141 /** Read next character in character or string literal and copy into sbuf. 142 */ scanLitChar(int pos)143 private void scanLitChar(int pos) { 144 if (reader.ch == '\\') { 145 if (reader.peekChar() == '\\' && !reader.isUnicode()) { 146 reader.skipChar(); 147 reader.putChar('\\', true); 148 } else { 149 reader.scanChar(); 150 switch (reader.ch) { 151 case '0': case '1': case '2': case '3': 152 case '4': case '5': case '6': case '7': 153 char leadch = reader.ch; 154 int oct = reader.digit(pos, 8); 155 reader.scanChar(); 156 if ('0' <= reader.ch && reader.ch <= '7') { 157 oct = oct * 8 + reader.digit(pos, 8); 158 reader.scanChar(); 159 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { 160 oct = oct * 8 + reader.digit(pos, 8); 161 reader.scanChar(); 162 } 163 } 164 reader.putChar((char)oct); 165 break; 166 case 'b': 167 reader.putChar('\b', true); break; 168 case 't': 169 reader.putChar('\t', true); break; 170 case 'n': 171 reader.putChar('\n', true); break; 172 case 'f': 173 reader.putChar('\f', true); break; 174 case 'r': 175 reader.putChar('\r', true); break; 176 case '\'': 177 reader.putChar('\'', true); break; 178 case '\"': 179 reader.putChar('\"', true); break; 180 case '\\': 181 reader.putChar('\\', true); break; 182 default: 183 lexError(reader.bp, "illegal.esc.char"); 184 } 185 } 186 } else if (reader.bp != reader.buflen) { 187 reader.putChar(true); 188 } 189 } 190 scanDigits(int pos, int digitRadix)191 private void scanDigits(int pos, int digitRadix) { 192 char saveCh; 193 int savePos; 194 do { 195 if (reader.ch != '_') { 196 reader.putChar(false); 197 } else { 198 if (!allowUnderscoresInLiterals) { 199 lexError(pos, "unsupported.underscore.lit", source.name); 200 allowUnderscoresInLiterals = true; 201 } 202 } 203 saveCh = reader.ch; 204 savePos = reader.bp; 205 reader.scanChar(); 206 } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_'); 207 if (saveCh == '_') 208 lexError(savePos, "illegal.underscore"); 209 } 210 211 /** Read fractional part of hexadecimal floating point number. 212 */ scanHexExponentAndSuffix(int pos)213 private void scanHexExponentAndSuffix(int pos) { 214 if (reader.ch == 'p' || reader.ch == 'P') { 215 reader.putChar(true); 216 skipIllegalUnderscores(); 217 if (reader.ch == '+' || reader.ch == '-') { 218 reader.putChar(true); 219 } 220 skipIllegalUnderscores(); 221 if ('0' <= reader.ch && reader.ch <= '9') { 222 scanDigits(pos, 10); 223 if (!allowHexFloats) { 224 lexError(pos, "unsupported.fp.lit", source.name); 225 allowHexFloats = true; 226 } 227 else if (!hexFloatsWork) 228 lexError(pos, "unsupported.cross.fp.lit"); 229 } else 230 lexError(pos, "malformed.fp.lit"); 231 } else { 232 lexError(pos, "malformed.fp.lit"); 233 } 234 if (reader.ch == 'f' || reader.ch == 'F') { 235 reader.putChar(true); 236 tk = TokenKind.FLOATLITERAL; 237 radix = 16; 238 } else { 239 if (reader.ch == 'd' || reader.ch == 'D') { 240 reader.putChar(true); 241 } 242 tk = TokenKind.DOUBLELITERAL; 243 radix = 16; 244 } 245 } 246 247 /** Read fractional part of floating point number. 248 */ scanFraction(int pos)249 private void scanFraction(int pos) { 250 skipIllegalUnderscores(); 251 if ('0' <= reader.ch && reader.ch <= '9') { 252 scanDigits(pos, 10); 253 } 254 int sp1 = reader.sp; 255 if (reader.ch == 'e' || reader.ch == 'E') { 256 reader.putChar(true); 257 skipIllegalUnderscores(); 258 if (reader.ch == '+' || reader.ch == '-') { 259 reader.putChar(true); 260 } 261 skipIllegalUnderscores(); 262 if ('0' <= reader.ch && reader.ch <= '9') { 263 scanDigits(pos, 10); 264 return; 265 } 266 lexError(pos, "malformed.fp.lit"); 267 reader.sp = sp1; 268 } 269 } 270 271 /** Read fractional part and 'd' or 'f' suffix of floating point number. 272 */ scanFractionAndSuffix(int pos)273 private void scanFractionAndSuffix(int pos) { 274 radix = 10; 275 scanFraction(pos); 276 if (reader.ch == 'f' || reader.ch == 'F') { 277 reader.putChar(true); 278 tk = TokenKind.FLOATLITERAL; 279 } else { 280 if (reader.ch == 'd' || reader.ch == 'D') { 281 reader.putChar(true); 282 } 283 tk = TokenKind.DOUBLELITERAL; 284 } 285 } 286 287 /** Read fractional part and 'd' or 'f' suffix of floating point number. 288 */ scanHexFractionAndSuffix(int pos, boolean seendigit)289 private void scanHexFractionAndSuffix(int pos, boolean seendigit) { 290 radix = 16; 291 Assert.check(reader.ch == '.'); 292 reader.putChar(true); 293 skipIllegalUnderscores(); 294 if (reader.digit(pos, 16) >= 0) { 295 seendigit = true; 296 scanDigits(pos, 16); 297 } 298 if (!seendigit) 299 lexError(pos, "invalid.hex.number"); 300 else 301 scanHexExponentAndSuffix(pos); 302 } 303 skipIllegalUnderscores()304 private void skipIllegalUnderscores() { 305 if (reader.ch == '_') { 306 lexError(reader.bp, "illegal.underscore"); 307 while (reader.ch == '_') 308 reader.scanChar(); 309 } 310 } 311 312 /** Read a number. 313 * @param radix The radix of the number; one of 2, j8, 10, 16. 314 */ scanNumber(int pos, int radix)315 private void scanNumber(int pos, int radix) { 316 // for octal, allow base-10 digit in case it's a float literal 317 this.radix = radix; 318 int digitRadix = (radix == 8 ? 10 : radix); 319 boolean seendigit = false; 320 if (reader.digit(pos, digitRadix) >= 0) { 321 seendigit = true; 322 scanDigits(pos, digitRadix); 323 } 324 if (radix == 16 && reader.ch == '.') { 325 scanHexFractionAndSuffix(pos, seendigit); 326 } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { 327 scanHexExponentAndSuffix(pos); 328 } else if (digitRadix == 10 && reader.ch == '.') { 329 reader.putChar(true); 330 scanFractionAndSuffix(pos); 331 } else if (digitRadix == 10 && 332 (reader.ch == 'e' || reader.ch == 'E' || 333 reader.ch == 'f' || reader.ch == 'F' || 334 reader.ch == 'd' || reader.ch == 'D')) { 335 scanFractionAndSuffix(pos); 336 } else { 337 if (reader.ch == 'l' || reader.ch == 'L') { 338 reader.scanChar(); 339 tk = TokenKind.LONGLITERAL; 340 } else { 341 tk = TokenKind.INTLITERAL; 342 } 343 } 344 } 345 346 /** Read an identifier. 347 */ scanIdent()348 private void scanIdent() { 349 boolean isJavaIdentifierPart; 350 char high; 351 reader.putChar(true); 352 do { 353 switch (reader.ch) { 354 case 'A': case 'B': case 'C': case 'D': case 'E': 355 case 'F': case 'G': case 'H': case 'I': case 'J': 356 case 'K': case 'L': case 'M': case 'N': case 'O': 357 case 'P': case 'Q': case 'R': case 'S': case 'T': 358 case 'U': case 'V': case 'W': case 'X': case 'Y': 359 case 'Z': 360 case 'a': case 'b': case 'c': case 'd': case 'e': 361 case 'f': case 'g': case 'h': case 'i': case 'j': 362 case 'k': case 'l': case 'm': case 'n': case 'o': 363 case 'p': case 'q': case 'r': case 's': case 't': 364 case 'u': case 'v': case 'w': case 'x': case 'y': 365 case 'z': 366 case '$': case '_': 367 case '0': case '1': case '2': case '3': case '4': 368 case '5': case '6': case '7': case '8': case '9': 369 break; 370 case '\u0000': case '\u0001': case '\u0002': case '\u0003': 371 case '\u0004': case '\u0005': case '\u0006': case '\u0007': 372 case '\u0008': case '\u000E': case '\u000F': case '\u0010': 373 case '\u0011': case '\u0012': case '\u0013': case '\u0014': 374 case '\u0015': case '\u0016': case '\u0017': 375 case '\u0018': case '\u0019': case '\u001B': 376 case '\u007F': 377 reader.scanChar(); 378 continue; 379 case '\u001A': // EOI is also a legal identifier part 380 if (reader.bp >= reader.buflen) { 381 name = reader.name(); 382 tk = tokens.lookupKind(name); 383 return; 384 } 385 reader.scanChar(); 386 continue; 387 default: 388 if (reader.ch < '\u0080') { 389 // all ASCII range chars already handled, above 390 isJavaIdentifierPart = false; 391 } else { 392 if (Character.isIdentifierIgnorable(reader.ch)) { 393 reader.scanChar(); 394 continue; 395 } else { 396 high = reader.scanSurrogates(); 397 if (high != 0) { 398 reader.putChar(high); 399 isJavaIdentifierPart = Character.isJavaIdentifierPart( 400 Character.toCodePoint(high, reader.ch)); 401 } else { 402 isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); 403 } 404 } 405 } 406 if (!isJavaIdentifierPart) { 407 name = reader.name(); 408 tk = tokens.lookupKind(name); 409 return; 410 } 411 } 412 reader.putChar(true); 413 } while (true); 414 } 415 416 /** Return true if reader.ch can be part of an operator. 417 */ isSpecial(char ch)418 private boolean isSpecial(char ch) { 419 switch (ch) { 420 case '!': case '%': case '&': case '*': case '?': 421 case '+': case '-': case ':': case '<': case '=': 422 case '>': case '^': case '|': case '~': 423 case '@': 424 return true; 425 default: 426 return false; 427 } 428 } 429 430 /** Read longest possible sequence of special characters and convert 431 * to token. 432 */ scanOperator()433 private void scanOperator() { 434 while (true) { 435 reader.putChar(false); 436 Name newname = reader.name(); 437 TokenKind tk1 = tokens.lookupKind(newname); 438 if (tk1 == TokenKind.IDENTIFIER) { 439 reader.sp--; 440 break; 441 } 442 tk = tk1; 443 reader.scanChar(); 444 if (!isSpecial(reader.ch)) break; 445 } 446 } 447 448 /** Read token. 449 */ readToken()450 public Token readToken() { 451 452 reader.sp = 0; 453 name = null; 454 radix = 0; 455 456 int pos = 0; 457 int endPos = 0; 458 List<Comment> comments = null; 459 460 try { 461 loop: while (true) { 462 pos = reader.bp; 463 switch (reader.ch) { 464 case ' ': // (Spec 3.6) 465 case '\t': // (Spec 3.6) 466 case FF: // (Spec 3.6) 467 do { 468 reader.scanChar(); 469 } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF); 470 processWhiteSpace(pos, reader.bp); 471 break; 472 case LF: // (Spec 3.4) 473 reader.scanChar(); 474 processLineTerminator(pos, reader.bp); 475 break; 476 case CR: // (Spec 3.4) 477 reader.scanChar(); 478 if (reader.ch == LF) { 479 reader.scanChar(); 480 } 481 processLineTerminator(pos, reader.bp); 482 break; 483 case 'A': case 'B': case 'C': case 'D': case 'E': 484 case 'F': case 'G': case 'H': case 'I': case 'J': 485 case 'K': case 'L': case 'M': case 'N': case 'O': 486 case 'P': case 'Q': case 'R': case 'S': case 'T': 487 case 'U': case 'V': case 'W': case 'X': case 'Y': 488 case 'Z': 489 case 'a': case 'b': case 'c': case 'd': case 'e': 490 case 'f': case 'g': case 'h': case 'i': case 'j': 491 case 'k': case 'l': case 'm': case 'n': case 'o': 492 case 'p': case 'q': case 'r': case 's': case 't': 493 case 'u': case 'v': case 'w': case 'x': case 'y': 494 case 'z': 495 case '$': case '_': 496 scanIdent(); 497 break loop; 498 case '0': 499 reader.scanChar(); 500 if (reader.ch == 'x' || reader.ch == 'X') { 501 reader.scanChar(); 502 skipIllegalUnderscores(); 503 if (reader.ch == '.') { 504 scanHexFractionAndSuffix(pos, false); 505 } else if (reader.digit(pos, 16) < 0) { 506 lexError(pos, "invalid.hex.number"); 507 } else { 508 scanNumber(pos, 16); 509 } 510 } else if (reader.ch == 'b' || reader.ch == 'B') { 511 if (!allowBinaryLiterals) { 512 lexError(pos, "unsupported.binary.lit", source.name); 513 allowBinaryLiterals = true; 514 } 515 reader.scanChar(); 516 skipIllegalUnderscores(); 517 if (reader.digit(pos, 2) < 0) { 518 lexError(pos, "invalid.binary.number"); 519 } else { 520 scanNumber(pos, 2); 521 } 522 } else { 523 reader.putChar('0'); 524 if (reader.ch == '_') { 525 int savePos = reader.bp; 526 do { 527 reader.scanChar(); 528 } while (reader.ch == '_'); 529 if (reader.digit(pos, 10) < 0) { 530 lexError(savePos, "illegal.underscore"); 531 } 532 } 533 scanNumber(pos, 8); 534 } 535 break loop; 536 case '1': case '2': case '3': case '4': 537 case '5': case '6': case '7': case '8': case '9': 538 scanNumber(pos, 10); 539 break loop; 540 case '.': 541 reader.scanChar(); 542 if ('0' <= reader.ch && reader.ch <= '9') { 543 reader.putChar('.'); 544 scanFractionAndSuffix(pos); 545 } else if (reader.ch == '.') { 546 int savePos = reader.bp; 547 reader.putChar('.'); reader.putChar('.', true); 548 if (reader.ch == '.') { 549 reader.scanChar(); 550 reader.putChar('.'); 551 tk = TokenKind.ELLIPSIS; 552 } else { 553 lexError(savePos, "illegal.dot"); 554 } 555 } else { 556 tk = TokenKind.DOT; 557 } 558 break loop; 559 case ',': 560 reader.scanChar(); tk = TokenKind.COMMA; break loop; 561 case ';': 562 reader.scanChar(); tk = TokenKind.SEMI; break loop; 563 case '(': 564 reader.scanChar(); tk = TokenKind.LPAREN; break loop; 565 case ')': 566 reader.scanChar(); tk = TokenKind.RPAREN; break loop; 567 case '[': 568 reader.scanChar(); tk = TokenKind.LBRACKET; break loop; 569 case ']': 570 reader.scanChar(); tk = TokenKind.RBRACKET; break loop; 571 case '{': 572 reader.scanChar(); tk = TokenKind.LBRACE; break loop; 573 case '}': 574 reader.scanChar(); tk = TokenKind.RBRACE; break loop; 575 case '/': 576 reader.scanChar(); 577 if (reader.ch == '/') { 578 do { 579 reader.scanCommentChar(); 580 } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); 581 if (reader.bp < reader.buflen) { 582 comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE)); 583 } 584 break; 585 } else if (reader.ch == '*') { 586 boolean isEmpty = false; 587 reader.scanChar(); 588 CommentStyle style; 589 if (reader.ch == '*') { 590 style = CommentStyle.JAVADOC; 591 reader.scanCommentChar(); 592 if (reader.ch == '/') { 593 isEmpty = true; 594 } 595 } else { 596 style = CommentStyle.BLOCK; 597 } 598 while (!isEmpty && reader.bp < reader.buflen) { 599 if (reader.ch == '*') { 600 reader.scanChar(); 601 if (reader.ch == '/') break; 602 } else { 603 reader.scanCommentChar(); 604 } 605 } 606 if (reader.ch == '/') { 607 reader.scanChar(); 608 comments = addComment(comments, processComment(pos, reader.bp, style)); 609 break; 610 } else { 611 lexError(pos, "unclosed.comment"); 612 break loop; 613 } 614 } else if (reader.ch == '=') { 615 tk = TokenKind.SLASHEQ; 616 reader.scanChar(); 617 } else { 618 tk = TokenKind.SLASH; 619 } 620 break loop; 621 case '\'': 622 reader.scanChar(); 623 if (reader.ch == '\'') { 624 lexError(pos, "empty.char.lit"); 625 } else { 626 if (reader.ch == CR || reader.ch == LF) 627 lexError(pos, "illegal.line.end.in.char.lit"); 628 scanLitChar(pos); 629 char ch2 = reader.ch; 630 if (reader.ch == '\'') { 631 reader.scanChar(); 632 tk = TokenKind.CHARLITERAL; 633 } else { 634 lexError(pos, "unclosed.char.lit"); 635 } 636 } 637 break loop; 638 case '\"': 639 reader.scanChar(); 640 while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen) 641 scanLitChar(pos); 642 if (reader.ch == '\"') { 643 tk = TokenKind.STRINGLITERAL; 644 reader.scanChar(); 645 } else { 646 lexError(pos, "unclosed.str.lit"); 647 } 648 break loop; 649 default: 650 if (isSpecial(reader.ch)) { 651 scanOperator(); 652 } else { 653 boolean isJavaIdentifierStart; 654 if (reader.ch < '\u0080') { 655 // all ASCII range chars already handled, above 656 isJavaIdentifierStart = false; 657 } else { 658 char high = reader.scanSurrogates(); 659 if (high != 0) { 660 reader.putChar(high); 661 662 isJavaIdentifierStart = Character.isJavaIdentifierStart( 663 Character.toCodePoint(high, reader.ch)); 664 } else { 665 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); 666 } 667 } 668 if (isJavaIdentifierStart) { 669 scanIdent(); 670 } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 671 tk = TokenKind.EOF; 672 pos = reader.buflen; 673 } else { 674 String arg = (32 < reader.ch && reader.ch < 127) ? 675 String.format("%s", reader.ch) : 676 String.format("\\u%04x", (int)reader.ch); 677 lexError(pos, "illegal.char", arg); 678 reader.scanChar(); 679 } 680 } 681 break loop; 682 } 683 } 684 endPos = reader.bp; 685 switch (tk.tag) { 686 case DEFAULT: return new Token(tk, pos, endPos, comments); 687 case NAMED: return new NamedToken(tk, pos, endPos, name, comments); 688 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments); 689 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); 690 default: throw new AssertionError(); 691 } 692 } 693 finally { 694 if (scannerDebug) { 695 System.out.println("nextToken(" + pos 696 + "," + endPos + ")=|" + 697 new String(reader.getRawCharacters(pos, endPos)) 698 + "|"); 699 } 700 } 701 } 702 //where addComment(List<Comment> comments, Comment comment)703 List<Comment> addComment(List<Comment> comments, Comment comment) { 704 return comments == null ? 705 List.of(comment) : 706 comments.prepend(comment); 707 } 708 709 /** Return the position where a lexical error occurred; 710 */ errPos()711 public int errPos() { 712 return errPos; 713 } 714 715 /** Set the position where a lexical error occurred; 716 */ errPos(int pos)717 public void errPos(int pos) { 718 errPos = pos; 719 } 720 721 /** 722 * Called when a complete comment has been scanned. pos and endPos 723 * will mark the comment boundary. 724 */ processComment(int pos, int endPos, CommentStyle style)725 protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) { 726 if (scannerDebug) 727 System.out.println("processComment(" + pos 728 + "," + endPos + "," + style + ")=|" 729 + new String(reader.getRawCharacters(pos, endPos)) 730 + "|"); 731 char[] buf = reader.getRawCharacters(pos, endPos); 732 return new BasicComment<UnicodeReader>(new UnicodeReader(fac, buf, buf.length), style); 733 } 734 735 /** 736 * Called when a complete whitespace run has been scanned. pos and endPos 737 * will mark the whitespace boundary. 738 */ processWhiteSpace(int pos, int endPos)739 protected void processWhiteSpace(int pos, int endPos) { 740 if (scannerDebug) 741 System.out.println("processWhitespace(" + pos 742 + "," + endPos + ")=|" + 743 new String(reader.getRawCharacters(pos, endPos)) 744 + "|"); 745 } 746 747 /** 748 * Called when a line terminator has been processed. 749 */ processLineTerminator(int pos, int endPos)750 protected void processLineTerminator(int pos, int endPos) { 751 if (scannerDebug) 752 System.out.println("processTerminator(" + pos 753 + "," + endPos + ")=|" + 754 new String(reader.getRawCharacters(pos, endPos)) 755 + "|"); 756 } 757 758 /** Build a map for translating between line numbers and 759 * positions in the input. 760 * 761 * @return a LineMap */ getLineMap()762 public Position.LineMap getLineMap() { 763 return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); 764 } 765 766 767 /** 768 * Scan a documentation comment; determine if a deprecated tag is present. 769 * Called once the initial /, * have been skipped, positioned at the second * 770 * (which is treated as the beginning of the first line). 771 * Stops positioned at the closing '/'. 772 */ 773 protected static class BasicComment<U extends UnicodeReader> implements Comment { 774 775 CommentStyle cs; 776 U comment_reader; 777 778 protected boolean deprecatedFlag = false; 779 protected boolean scanned = false; 780 BasicComment(U comment_reader, CommentStyle cs)781 protected BasicComment(U comment_reader, CommentStyle cs) { 782 this.comment_reader = comment_reader; 783 this.cs = cs; 784 } 785 getText()786 public String getText() { 787 return null; 788 } 789 getSourcePos(int pos)790 public int getSourcePos(int pos) { 791 return -1; 792 } 793 getStyle()794 public CommentStyle getStyle() { 795 return cs; 796 } 797 isDeprecated()798 public boolean isDeprecated() { 799 if (!scanned && cs == CommentStyle.JAVADOC) { 800 scanDocComment(); 801 } 802 return deprecatedFlag; 803 } 804 805 @SuppressWarnings("fallthrough") scanDocComment()806 protected void scanDocComment() { 807 try { 808 boolean deprecatedPrefix = false; 809 810 comment_reader.bp += 3; // '/**' 811 comment_reader.ch = comment_reader.buf[comment_reader.bp]; 812 813 forEachLine: 814 while (comment_reader.bp < comment_reader.buflen) { 815 816 // Skip optional WhiteSpace at beginning of line 817 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 818 comment_reader.scanCommentChar(); 819 } 820 821 // Skip optional consecutive Stars 822 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { 823 comment_reader.scanCommentChar(); 824 if (comment_reader.ch == '/') { 825 return; 826 } 827 } 828 829 // Skip optional WhiteSpace after Stars 830 while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { 831 comment_reader.scanCommentChar(); 832 } 833 834 deprecatedPrefix = false; 835 // At beginning of line in the JavaDoc sense. 836 if (!deprecatedFlag) { 837 String deprecated = "@deprecated"; 838 int i = 0; 839 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) { 840 comment_reader.scanCommentChar(); 841 i++; 842 if (i == deprecated.length()) { 843 deprecatedPrefix = true; 844 break; 845 } 846 } 847 } 848 849 if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) { 850 if (Character.isWhitespace(comment_reader.ch)) { 851 deprecatedFlag = true; 852 } else if (comment_reader.ch == '*') { 853 comment_reader.scanCommentChar(); 854 if (comment_reader.ch == '/') { 855 deprecatedFlag = true; 856 return; 857 } 858 } 859 } 860 861 // Skip rest of line 862 while (comment_reader.bp < comment_reader.buflen) { 863 switch (comment_reader.ch) { 864 case '*': 865 comment_reader.scanCommentChar(); 866 if (comment_reader.ch == '/') { 867 return; 868 } 869 break; 870 case CR: // (Spec 3.4) 871 comment_reader.scanCommentChar(); 872 if (comment_reader.ch != LF) { 873 continue forEachLine; 874 } 875 /* fall through to LF case */ 876 case LF: // (Spec 3.4) 877 comment_reader.scanCommentChar(); 878 continue forEachLine; 879 default: 880 comment_reader.scanCommentChar(); 881 } 882 } // rest of line 883 } // forEachLine 884 return; 885 } finally { 886 scanned = true; 887 } 888 } 889 } 890 } 891