1 /*
2  * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package com.sun.tools.javac.parser;
27 
28 import com.sun.tools.javac.code.Source;
29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
30 import com.sun.tools.javac.util.*;
31 
32 import java.nio.CharBuffer;
33 
34 import static com.sun.tools.javac.parser.Tokens.*;
35 import static com.sun.tools.javac.util.LayoutCharacters.*;
36 
37 /** The lexical analyzer maps an input stream consisting of
38  *  ASCII characters and Unicode escapes into a token sequence.
39  *
40  *  <p><b>This is NOT part of any supported API.
41  *  If you write code that depends on this, you do so at your own risk.
42  *  This code and its internal interfaces are subject to change or
43  *  deletion without notice.</b>
44  */
45 public class JavaTokenizer {
46 
47     private static final boolean scannerDebug = false;
48 
49     /** Allow hex floating-point literals.
50      */
51     private boolean allowHexFloats;
52 
53     /** Allow binary literals.
54      */
55     private boolean allowBinaryLiterals;
56 
57     /** Allow underscores in literals.
58      */
59     private boolean allowUnderscoresInLiterals;
60 
61     /** The source language setting.
62      */
63     private Source source;
64 
65     /** The log to be used for error reporting.
66      */
67     private final Log log;
68 
69     /** The token factory. */
70     private final Tokens tokens;
71 
72     /** The token kind, set by nextToken().
73      */
74     protected TokenKind tk;
75 
76     /** The token's radix, set by nextToken().
77      */
78     protected int radix;
79 
80     /** The token's name, set by nextToken().
81      */
82     protected Name name;
83 
84     /** The position where a lexical error occurred;
85      */
86     protected int errPos = Position.NOPOS;
87 
88     /** The Unicode reader (low-level stream reader).
89      */
90     protected UnicodeReader reader;
91 
92     protected ScannerFactory fac;
93 
94     private static final boolean hexFloatsWork = hexFloatsWork();
hexFloatsWork()95     private static boolean hexFloatsWork() {
96         try {
97             Float.valueOf("0x1.0p1");
98             return true;
99         } catch (NumberFormatException ex) {
100             return false;
101         }
102     }
103 
104     /**
105      * Create a scanner from the input array.  This method might
106      * modify the array.  To avoid copying the input array, ensure
107      * that {@code inputLength < input.length} or
108      * {@code input[input.length -1]} is a white space character.
109      *
110      * @param fac the factory which created this Scanner
111      * @param buf the input, might be modified
112      * Must be positive and less than or equal to input.length.
113      */
JavaTokenizer(ScannerFactory fac, CharBuffer buf)114     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
115         this(fac, new UnicodeReader(fac, buf));
116     }
117 
JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength)118     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
119         this(fac, new UnicodeReader(fac, buf, inputLength));
120     }
121 
JavaTokenizer(ScannerFactory fac, UnicodeReader reader)122     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
123         this.fac = fac;
124         this.log = fac.log;
125         this.tokens = fac.tokens;
126         this.source = fac.source;
127         this.reader = reader;
128         this.allowBinaryLiterals = source.allowBinaryLiterals();
129         this.allowHexFloats = source.allowHexFloats();
130         this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
131     }
132 
133     /** Report an error at the given position using the provided arguments.
134      */
lexError(int pos, String key, Object... args)135     protected void lexError(int pos, String key, Object... args) {
136         log.error(pos, key, args);
137         tk = TokenKind.ERROR;
138         errPos = pos;
139     }
140 
141     /** Read next character in character or string literal and copy into sbuf.
142      */
scanLitChar(int pos)143     private void scanLitChar(int pos) {
144         if (reader.ch == '\\') {
145             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
146                 reader.skipChar();
147                 reader.putChar('\\', true);
148             } else {
149                 reader.scanChar();
150                 switch (reader.ch) {
151                 case '0': case '1': case '2': case '3':
152                 case '4': case '5': case '6': case '7':
153                     char leadch = reader.ch;
154                     int oct = reader.digit(pos, 8);
155                     reader.scanChar();
156                     if ('0' <= reader.ch && reader.ch <= '7') {
157                         oct = oct * 8 + reader.digit(pos, 8);
158                         reader.scanChar();
159                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
160                             oct = oct * 8 + reader.digit(pos, 8);
161                             reader.scanChar();
162                         }
163                     }
164                     reader.putChar((char)oct);
165                     break;
166                 case 'b':
167                     reader.putChar('\b', true); break;
168                 case 't':
169                     reader.putChar('\t', true); break;
170                 case 'n':
171                     reader.putChar('\n', true); break;
172                 case 'f':
173                     reader.putChar('\f', true); break;
174                 case 'r':
175                     reader.putChar('\r', true); break;
176                 case '\'':
177                     reader.putChar('\'', true); break;
178                 case '\"':
179                     reader.putChar('\"', true); break;
180                 case '\\':
181                     reader.putChar('\\', true); break;
182                 default:
183                     lexError(reader.bp, "illegal.esc.char");
184                 }
185             }
186         } else if (reader.bp != reader.buflen) {
187             reader.putChar(true);
188         }
189     }
190 
scanDigits(int pos, int digitRadix)191     private void scanDigits(int pos, int digitRadix) {
192         char saveCh;
193         int savePos;
194         do {
195             if (reader.ch != '_') {
196                 reader.putChar(false);
197             } else {
198                 if (!allowUnderscoresInLiterals) {
199                     lexError(pos, "unsupported.underscore.lit", source.name);
200                     allowUnderscoresInLiterals = true;
201                 }
202             }
203             saveCh = reader.ch;
204             savePos = reader.bp;
205             reader.scanChar();
206         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
207         if (saveCh == '_')
208             lexError(savePos, "illegal.underscore");
209     }
210 
211     /** Read fractional part of hexadecimal floating point number.
212      */
scanHexExponentAndSuffix(int pos)213     private void scanHexExponentAndSuffix(int pos) {
214         if (reader.ch == 'p' || reader.ch == 'P') {
215             reader.putChar(true);
216             skipIllegalUnderscores();
217             if (reader.ch == '+' || reader.ch == '-') {
218                 reader.putChar(true);
219             }
220             skipIllegalUnderscores();
221             if ('0' <= reader.ch && reader.ch <= '9') {
222                 scanDigits(pos, 10);
223                 if (!allowHexFloats) {
224                     lexError(pos, "unsupported.fp.lit", source.name);
225                     allowHexFloats = true;
226                 }
227                 else if (!hexFloatsWork)
228                     lexError(pos, "unsupported.cross.fp.lit");
229             } else
230                 lexError(pos, "malformed.fp.lit");
231         } else {
232             lexError(pos, "malformed.fp.lit");
233         }
234         if (reader.ch == 'f' || reader.ch == 'F') {
235             reader.putChar(true);
236             tk = TokenKind.FLOATLITERAL;
237             radix = 16;
238         } else {
239             if (reader.ch == 'd' || reader.ch == 'D') {
240                 reader.putChar(true);
241             }
242             tk = TokenKind.DOUBLELITERAL;
243             radix = 16;
244         }
245     }
246 
247     /** Read fractional part of floating point number.
248      */
scanFraction(int pos)249     private void scanFraction(int pos) {
250         skipIllegalUnderscores();
251         if ('0' <= reader.ch && reader.ch <= '9') {
252             scanDigits(pos, 10);
253         }
254         int sp1 = reader.sp;
255         if (reader.ch == 'e' || reader.ch == 'E') {
256             reader.putChar(true);
257             skipIllegalUnderscores();
258             if (reader.ch == '+' || reader.ch == '-') {
259                 reader.putChar(true);
260             }
261             skipIllegalUnderscores();
262             if ('0' <= reader.ch && reader.ch <= '9') {
263                 scanDigits(pos, 10);
264                 return;
265             }
266             lexError(pos, "malformed.fp.lit");
267             reader.sp = sp1;
268         }
269     }
270 
271     /** Read fractional part and 'd' or 'f' suffix of floating point number.
272      */
scanFractionAndSuffix(int pos)273     private void scanFractionAndSuffix(int pos) {
274         radix = 10;
275         scanFraction(pos);
276         if (reader.ch == 'f' || reader.ch == 'F') {
277             reader.putChar(true);
278             tk = TokenKind.FLOATLITERAL;
279         } else {
280             if (reader.ch == 'd' || reader.ch == 'D') {
281                 reader.putChar(true);
282             }
283             tk = TokenKind.DOUBLELITERAL;
284         }
285     }
286 
287     /** Read fractional part and 'd' or 'f' suffix of floating point number.
288      */
scanHexFractionAndSuffix(int pos, boolean seendigit)289     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
290         radix = 16;
291         Assert.check(reader.ch == '.');
292         reader.putChar(true);
293         skipIllegalUnderscores();
294         if (reader.digit(pos, 16) >= 0) {
295             seendigit = true;
296             scanDigits(pos, 16);
297         }
298         if (!seendigit)
299             lexError(pos, "invalid.hex.number");
300         else
301             scanHexExponentAndSuffix(pos);
302     }
303 
skipIllegalUnderscores()304     private void skipIllegalUnderscores() {
305         if (reader.ch == '_') {
306             lexError(reader.bp, "illegal.underscore");
307             while (reader.ch == '_')
308                 reader.scanChar();
309         }
310     }
311 
312     /** Read a number.
313      *  @param radix  The radix of the number; one of 2, j8, 10, 16.
314      */
scanNumber(int pos, int radix)315     private void scanNumber(int pos, int radix) {
316         // for octal, allow base-10 digit in case it's a float literal
317         this.radix = radix;
318         int digitRadix = (radix == 8 ? 10 : radix);
319         boolean seendigit = false;
320         if (reader.digit(pos, digitRadix) >= 0) {
321             seendigit = true;
322             scanDigits(pos, digitRadix);
323         }
324         if (radix == 16 && reader.ch == '.') {
325             scanHexFractionAndSuffix(pos, seendigit);
326         } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
327             scanHexExponentAndSuffix(pos);
328         } else if (digitRadix == 10 && reader.ch == '.') {
329             reader.putChar(true);
330             scanFractionAndSuffix(pos);
331         } else if (digitRadix == 10 &&
332                    (reader.ch == 'e' || reader.ch == 'E' ||
333                     reader.ch == 'f' || reader.ch == 'F' ||
334                     reader.ch == 'd' || reader.ch == 'D')) {
335             scanFractionAndSuffix(pos);
336         } else {
337             if (reader.ch == 'l' || reader.ch == 'L') {
338                 reader.scanChar();
339                 tk = TokenKind.LONGLITERAL;
340             } else {
341                 tk = TokenKind.INTLITERAL;
342             }
343         }
344     }
345 
346     /** Read an identifier.
347      */
scanIdent()348     private void scanIdent() {
349         boolean isJavaIdentifierPart;
350         char high;
351         reader.putChar(true);
352         do {
353             switch (reader.ch) {
354             case 'A': case 'B': case 'C': case 'D': case 'E':
355             case 'F': case 'G': case 'H': case 'I': case 'J':
356             case 'K': case 'L': case 'M': case 'N': case 'O':
357             case 'P': case 'Q': case 'R': case 'S': case 'T':
358             case 'U': case 'V': case 'W': case 'X': case 'Y':
359             case 'Z':
360             case 'a': case 'b': case 'c': case 'd': case 'e':
361             case 'f': case 'g': case 'h': case 'i': case 'j':
362             case 'k': case 'l': case 'm': case 'n': case 'o':
363             case 'p': case 'q': case 'r': case 's': case 't':
364             case 'u': case 'v': case 'w': case 'x': case 'y':
365             case 'z':
366             case '$': case '_':
367             case '0': case '1': case '2': case '3': case '4':
368             case '5': case '6': case '7': case '8': case '9':
369                 break;
370             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
371             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
372             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
373             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
374             case '\u0015': case '\u0016': case '\u0017':
375             case '\u0018': case '\u0019': case '\u001B':
376             case '\u007F':
377                 reader.scanChar();
378                 continue;
379             case '\u001A': // EOI is also a legal identifier part
380                 if (reader.bp >= reader.buflen) {
381                     name = reader.name();
382                     tk = tokens.lookupKind(name);
383                     return;
384                 }
385                 reader.scanChar();
386                 continue;
387             default:
388                 if (reader.ch < '\u0080') {
389                     // all ASCII range chars already handled, above
390                     isJavaIdentifierPart = false;
391                 } else {
392                     if (Character.isIdentifierIgnorable(reader.ch)) {
393                         reader.scanChar();
394                         continue;
395                     } else {
396                         high = reader.scanSurrogates();
397                         if (high != 0) {
398                             reader.putChar(high);
399                             isJavaIdentifierPart = Character.isJavaIdentifierPart(
400                                 Character.toCodePoint(high, reader.ch));
401                         } else {
402                             isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
403                         }
404                     }
405                 }
406                 if (!isJavaIdentifierPart) {
407                     name = reader.name();
408                     tk = tokens.lookupKind(name);
409                     return;
410                 }
411             }
412             reader.putChar(true);
413         } while (true);
414     }
415 
416     /** Return true if reader.ch can be part of an operator.
417      */
isSpecial(char ch)418     private boolean isSpecial(char ch) {
419         switch (ch) {
420         case '!': case '%': case '&': case '*': case '?':
421         case '+': case '-': case ':': case '<': case '=':
422         case '>': case '^': case '|': case '~':
423         case '@':
424             return true;
425         default:
426             return false;
427         }
428     }
429 
430     /** Read longest possible sequence of special characters and convert
431      *  to token.
432      */
scanOperator()433     private void scanOperator() {
434         while (true) {
435             reader.putChar(false);
436             Name newname = reader.name();
437             TokenKind tk1 = tokens.lookupKind(newname);
438             if (tk1 == TokenKind.IDENTIFIER) {
439                 reader.sp--;
440                 break;
441             }
442             tk = tk1;
443             reader.scanChar();
444             if (!isSpecial(reader.ch)) break;
445         }
446     }
447 
448     /** Read token.
449      */
readToken()450     public Token readToken() {
451 
452         reader.sp = 0;
453         name = null;
454         radix = 0;
455 
456         int pos = 0;
457         int endPos = 0;
458         List<Comment> comments = null;
459 
460         try {
461             loop: while (true) {
462                 pos = reader.bp;
463                 switch (reader.ch) {
464                 case ' ': // (Spec 3.6)
465                 case '\t': // (Spec 3.6)
466                 case FF: // (Spec 3.6)
467                     do {
468                         reader.scanChar();
469                     } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
470                     processWhiteSpace(pos, reader.bp);
471                     break;
472                 case LF: // (Spec 3.4)
473                     reader.scanChar();
474                     processLineTerminator(pos, reader.bp);
475                     break;
476                 case CR: // (Spec 3.4)
477                     reader.scanChar();
478                     if (reader.ch == LF) {
479                         reader.scanChar();
480                     }
481                     processLineTerminator(pos, reader.bp);
482                     break;
483                 case 'A': case 'B': case 'C': case 'D': case 'E':
484                 case 'F': case 'G': case 'H': case 'I': case 'J':
485                 case 'K': case 'L': case 'M': case 'N': case 'O':
486                 case 'P': case 'Q': case 'R': case 'S': case 'T':
487                 case 'U': case 'V': case 'W': case 'X': case 'Y':
488                 case 'Z':
489                 case 'a': case 'b': case 'c': case 'd': case 'e':
490                 case 'f': case 'g': case 'h': case 'i': case 'j':
491                 case 'k': case 'l': case 'm': case 'n': case 'o':
492                 case 'p': case 'q': case 'r': case 's': case 't':
493                 case 'u': case 'v': case 'w': case 'x': case 'y':
494                 case 'z':
495                 case '$': case '_':
496                     scanIdent();
497                     break loop;
498                 case '0':
499                     reader.scanChar();
500                     if (reader.ch == 'x' || reader.ch == 'X') {
501                         reader.scanChar();
502                         skipIllegalUnderscores();
503                         if (reader.ch == '.') {
504                             scanHexFractionAndSuffix(pos, false);
505                         } else if (reader.digit(pos, 16) < 0) {
506                             lexError(pos, "invalid.hex.number");
507                         } else {
508                             scanNumber(pos, 16);
509                         }
510                     } else if (reader.ch == 'b' || reader.ch == 'B') {
511                         if (!allowBinaryLiterals) {
512                             lexError(pos, "unsupported.binary.lit", source.name);
513                             allowBinaryLiterals = true;
514                         }
515                         reader.scanChar();
516                         skipIllegalUnderscores();
517                         if (reader.digit(pos, 2) < 0) {
518                             lexError(pos, "invalid.binary.number");
519                         } else {
520                             scanNumber(pos, 2);
521                         }
522                     } else {
523                         reader.putChar('0');
524                         if (reader.ch == '_') {
525                             int savePos = reader.bp;
526                             do {
527                                 reader.scanChar();
528                             } while (reader.ch == '_');
529                             if (reader.digit(pos, 10) < 0) {
530                                 lexError(savePos, "illegal.underscore");
531                             }
532                         }
533                         scanNumber(pos, 8);
534                     }
535                     break loop;
536                 case '1': case '2': case '3': case '4':
537                 case '5': case '6': case '7': case '8': case '9':
538                     scanNumber(pos, 10);
539                     break loop;
540                 case '.':
541                     reader.scanChar();
542                     if ('0' <= reader.ch && reader.ch <= '9') {
543                         reader.putChar('.');
544                         scanFractionAndSuffix(pos);
545                     } else if (reader.ch == '.') {
546                         int savePos = reader.bp;
547                         reader.putChar('.'); reader.putChar('.', true);
548                         if (reader.ch == '.') {
549                             reader.scanChar();
550                             reader.putChar('.');
551                             tk = TokenKind.ELLIPSIS;
552                         } else {
553                             lexError(savePos, "illegal.dot");
554                         }
555                     } else {
556                         tk = TokenKind.DOT;
557                     }
558                     break loop;
559                 case ',':
560                     reader.scanChar(); tk = TokenKind.COMMA; break loop;
561                 case ';':
562                     reader.scanChar(); tk = TokenKind.SEMI; break loop;
563                 case '(':
564                     reader.scanChar(); tk = TokenKind.LPAREN; break loop;
565                 case ')':
566                     reader.scanChar(); tk = TokenKind.RPAREN; break loop;
567                 case '[':
568                     reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
569                 case ']':
570                     reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
571                 case '{':
572                     reader.scanChar(); tk = TokenKind.LBRACE; break loop;
573                 case '}':
574                     reader.scanChar(); tk = TokenKind.RBRACE; break loop;
575                 case '/':
576                     reader.scanChar();
577                     if (reader.ch == '/') {
578                         do {
579                             reader.scanCommentChar();
580                         } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
581                         if (reader.bp < reader.buflen) {
582                             comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
583                         }
584                         break;
585                     } else if (reader.ch == '*') {
586                         boolean isEmpty = false;
587                         reader.scanChar();
588                         CommentStyle style;
589                         if (reader.ch == '*') {
590                             style = CommentStyle.JAVADOC;
591                             reader.scanCommentChar();
592                             if (reader.ch == '/') {
593                                 isEmpty = true;
594                             }
595                         } else {
596                             style = CommentStyle.BLOCK;
597                         }
598                         while (!isEmpty && reader.bp < reader.buflen) {
599                             if (reader.ch == '*') {
600                                 reader.scanChar();
601                                 if (reader.ch == '/') break;
602                             } else {
603                                 reader.scanCommentChar();
604                             }
605                         }
606                         if (reader.ch == '/') {
607                             reader.scanChar();
608                             comments = addComment(comments, processComment(pos, reader.bp, style));
609                             break;
610                         } else {
611                             lexError(pos, "unclosed.comment");
612                             break loop;
613                         }
614                     } else if (reader.ch == '=') {
615                         tk = TokenKind.SLASHEQ;
616                         reader.scanChar();
617                     } else {
618                         tk = TokenKind.SLASH;
619                     }
620                     break loop;
621                 case '\'':
622                     reader.scanChar();
623                     if (reader.ch == '\'') {
624                         lexError(pos, "empty.char.lit");
625                     } else {
626                         if (reader.ch == CR || reader.ch == LF)
627                             lexError(pos, "illegal.line.end.in.char.lit");
628                         scanLitChar(pos);
629                         char ch2 = reader.ch;
630                         if (reader.ch == '\'') {
631                             reader.scanChar();
632                             tk = TokenKind.CHARLITERAL;
633                         } else {
634                             lexError(pos, "unclosed.char.lit");
635                         }
636                     }
637                     break loop;
638                 case '\"':
639                     reader.scanChar();
640                     while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
641                         scanLitChar(pos);
642                     if (reader.ch == '\"') {
643                         tk = TokenKind.STRINGLITERAL;
644                         reader.scanChar();
645                     } else {
646                         lexError(pos, "unclosed.str.lit");
647                     }
648                     break loop;
649                 default:
650                     if (isSpecial(reader.ch)) {
651                         scanOperator();
652                     } else {
653                         boolean isJavaIdentifierStart;
654                         if (reader.ch < '\u0080') {
655                             // all ASCII range chars already handled, above
656                             isJavaIdentifierStart = false;
657                         } else {
658                             char high = reader.scanSurrogates();
659                             if (high != 0) {
660                                 reader.putChar(high);
661 
662                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(
663                                     Character.toCodePoint(high, reader.ch));
664                             } else {
665                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
666                             }
667                         }
668                         if (isJavaIdentifierStart) {
669                             scanIdent();
670                         } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
671                             tk = TokenKind.EOF;
672                             pos = reader.buflen;
673                         } else {
674                             String arg = (32 < reader.ch && reader.ch < 127) ?
675                                             String.format("%s", reader.ch) :
676                                             String.format("\\u%04x", (int)reader.ch);
677                             lexError(pos, "illegal.char", arg);
678                             reader.scanChar();
679                         }
680                     }
681                     break loop;
682                 }
683             }
684             endPos = reader.bp;
685             switch (tk.tag) {
686                 case DEFAULT: return new Token(tk, pos, endPos, comments);
687                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
688                 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
689                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
690                 default: throw new AssertionError();
691             }
692         }
693         finally {
694             if (scannerDebug) {
695                     System.out.println("nextToken(" + pos
696                                        + "," + endPos + ")=|" +
697                                        new String(reader.getRawCharacters(pos, endPos))
698                                        + "|");
699             }
700         }
701     }
702     //where
addComment(List<Comment> comments, Comment comment)703         List<Comment> addComment(List<Comment> comments, Comment comment) {
704             return comments == null ?
705                     List.of(comment) :
706                     comments.prepend(comment);
707         }
708 
709     /** Return the position where a lexical error occurred;
710      */
errPos()711     public int errPos() {
712         return errPos;
713     }
714 
715     /** Set the position where a lexical error occurred;
716      */
errPos(int pos)717     public void errPos(int pos) {
718         errPos = pos;
719     }
720 
721     /**
722      * Called when a complete comment has been scanned. pos and endPos
723      * will mark the comment boundary.
724      */
processComment(int pos, int endPos, CommentStyle style)725     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
726         if (scannerDebug)
727             System.out.println("processComment(" + pos
728                                + "," + endPos + "," + style + ")=|"
729                                + new String(reader.getRawCharacters(pos, endPos))
730                                + "|");
731         char[] buf = reader.getRawCharacters(pos, endPos);
732         return new BasicComment<UnicodeReader>(new UnicodeReader(fac, buf, buf.length), style);
733     }
734 
735     /**
736      * Called when a complete whitespace run has been scanned. pos and endPos
737      * will mark the whitespace boundary.
738      */
processWhiteSpace(int pos, int endPos)739     protected void processWhiteSpace(int pos, int endPos) {
740         if (scannerDebug)
741             System.out.println("processWhitespace(" + pos
742                                + "," + endPos + ")=|" +
743                                new String(reader.getRawCharacters(pos, endPos))
744                                + "|");
745     }
746 
747     /**
748      * Called when a line terminator has been processed.
749      */
processLineTerminator(int pos, int endPos)750     protected void processLineTerminator(int pos, int endPos) {
751         if (scannerDebug)
752             System.out.println("processTerminator(" + pos
753                                + "," + endPos + ")=|" +
754                                new String(reader.getRawCharacters(pos, endPos))
755                                + "|");
756     }
757 
758     /** Build a map for translating between line numbers and
759      * positions in the input.
760      *
761      * @return a LineMap */
getLineMap()762     public Position.LineMap getLineMap() {
763         return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
764     }
765 
766 
767     /**
768     * Scan a documentation comment; determine if a deprecated tag is present.
769     * Called once the initial /, * have been skipped, positioned at the second *
770     * (which is treated as the beginning of the first line).
771     * Stops positioned at the closing '/'.
772     */
773     protected static class BasicComment<U extends UnicodeReader> implements Comment {
774 
775         CommentStyle cs;
776         U comment_reader;
777 
778         protected boolean deprecatedFlag = false;
779         protected boolean scanned = false;
780 
BasicComment(U comment_reader, CommentStyle cs)781         protected BasicComment(U comment_reader, CommentStyle cs) {
782             this.comment_reader = comment_reader;
783             this.cs = cs;
784         }
785 
getText()786         public String getText() {
787             return null;
788         }
789 
getSourcePos(int pos)790         public int getSourcePos(int pos) {
791             return -1;
792         }
793 
getStyle()794         public CommentStyle getStyle() {
795             return cs;
796         }
797 
isDeprecated()798         public boolean isDeprecated() {
799             if (!scanned && cs == CommentStyle.JAVADOC) {
800                 scanDocComment();
801             }
802             return deprecatedFlag;
803         }
804 
805         @SuppressWarnings("fallthrough")
scanDocComment()806         protected void scanDocComment() {
807             try {
808                 boolean deprecatedPrefix = false;
809 
810                 comment_reader.bp += 3; // '/**'
811                 comment_reader.ch = comment_reader.buf[comment_reader.bp];
812 
813                 forEachLine:
814                 while (comment_reader.bp < comment_reader.buflen) {
815 
816                     // Skip optional WhiteSpace at beginning of line
817                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
818                         comment_reader.scanCommentChar();
819                     }
820 
821                     // Skip optional consecutive Stars
822                     while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
823                         comment_reader.scanCommentChar();
824                         if (comment_reader.ch == '/') {
825                             return;
826                         }
827                     }
828 
829                     // Skip optional WhiteSpace after Stars
830                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
831                         comment_reader.scanCommentChar();
832                     }
833 
834                     deprecatedPrefix = false;
835                     // At beginning of line in the JavaDoc sense.
836                     if (!deprecatedFlag) {
837                         String deprecated = "@deprecated";
838                         int i = 0;
839                         while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
840                             comment_reader.scanCommentChar();
841                             i++;
842                             if (i == deprecated.length()) {
843                                 deprecatedPrefix = true;
844                                 break;
845                             }
846                         }
847                     }
848 
849                     if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
850                         if (Character.isWhitespace(comment_reader.ch)) {
851                             deprecatedFlag = true;
852                         } else if (comment_reader.ch == '*') {
853                             comment_reader.scanCommentChar();
854                             if (comment_reader.ch == '/') {
855                                 deprecatedFlag = true;
856                                 return;
857                             }
858                         }
859                     }
860 
861                     // Skip rest of line
862                     while (comment_reader.bp < comment_reader.buflen) {
863                         switch (comment_reader.ch) {
864                             case '*':
865                                 comment_reader.scanCommentChar();
866                                 if (comment_reader.ch == '/') {
867                                     return;
868                                 }
869                                 break;
870                             case CR: // (Spec 3.4)
871                                 comment_reader.scanCommentChar();
872                                 if (comment_reader.ch != LF) {
873                                     continue forEachLine;
874                                 }
875                             /* fall through to LF case */
876                             case LF: // (Spec 3.4)
877                                 comment_reader.scanCommentChar();
878                                 continue forEachLine;
879                             default:
880                                 comment_reader.scanCommentChar();
881                         }
882                     } // rest of line
883                 } // forEachLine
884                 return;
885             } finally {
886                 scanned = true;
887             }
888         }
889     }
890 }
891