1 /*
2  * Copyright (c) 1994, 2015, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package sun.tools.java;
27 
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.util.Hashtable;
31 
32 /**
33  * A Scanner for Java tokens. Errors are reported
34  * to the environment object.<p>
35  *
36  * The scanner keeps track of the current token,
37  * the value of the current token (if any), and the start
38  * position of the current token.<p>
39  *
40  * The scan() method advances the scanner to the next
41  * token in the input.<p>
42  *
43  * The match() method is used to quickly match opening
44  * brackets (ie: '(', '{', or '[') with their closing
45  * counter part. This is useful during error recovery.<p>
46  *
47  * An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
48  * this means that both the line number and the exact offset into
49  * the file are encoded in each position value.<p>
50  *
51  * The compiler treats either "\n", "\r" or "\r\n" as the
52  * end of a line.<p>
53  *
54  * WARNING: The contents of this source file are not part of any
55  * supported API.  Code that depends on them does so at its own risk:
56  * they are subject to change or removal without notice.
57  *
58  * @author      Arthur van Hoff
59  */
60 
61 @SuppressWarnings("deprecation")
62 public
63 class Scanner implements Constants {
64     /**
65      * The increment for each character.
66      */
67     public static final long OFFSETINC = 1;
68 
69     /**
70      * The increment for each line.
71      */
72     public static final long LINEINC = 1L << WHEREOFFSETBITS;
73 
74     /**
75      * End of input
76      */
77     public static final int EOF = -1;
78 
79     /**
80      * Where errors are reported
81      */
82     public Environment env;
83 
84     /**
85      * Input reader
86      */
87     protected ScannerInputReader in;
88 
89     /**
90      * If true, present all comments as tokens.
91      * Contents are not saved, but positions are recorded accurately,
92      * so the comment can be recovered from the text.
93      * Line terminations are also returned as comment tokens,
94      * and may be distinguished by their start and end positions,
95      * which are equal (meaning, these tokens contain no chars).
96      */
97    public boolean scanComments = false;
98 
99     /**
100      * Current token
101      */
102     public int token;
103 
104     /**
105      * The position of the current token
106      */
107     public long pos;
108 
109     /**
110      * The position of the previous token
111      */
112     public long prevPos;
113 
114     /**
115      * The current character
116      */
117     protected int ch;
118 
119     /*
120      * Token values.
121      */
122     public char charValue;
123     public int intValue;
124     public long longValue;
125     public float floatValue;
126     public double doubleValue;
127     public String stringValue;
128     public Identifier idValue;
129     public int radix;   // Radix, when reading int or long
130 
131     /*
132      * A doc comment preceding the most recent token
133      */
134     public String docComment;
135 
136     /*
137      * A growable character buffer.
138      */
139     private int count;
140     private char buffer[] = new char[1024];
growBuffer()141     private void growBuffer() {
142         char newBuffer[] = new char[buffer.length * 2];
143         System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
144         buffer = newBuffer;
145     }
146 
147     // The following two methods have been hand-inlined in
148     // scanDocComment.  If you make changes here, you should
149     // check to see if scanDocComment also needs modification.
putc(int ch)150     private void putc(int ch) {
151         if (count == buffer.length) {
152             growBuffer();
153         }
154         buffer[count++] = (char)ch;
155     }
156 
bufferString()157     private String bufferString() {
158         return new String(buffer, 0, count);
159     }
160 
161     /**
162      * Create a scanner to scan an input stream.
163      */
Scanner(Environment env, InputStream in)164     public Scanner(Environment env, InputStream in) throws IOException {
165         this.env = env;
166         useInputStream(in);
167     }
168 
169     /**
170      * Setup input from the given input stream,
171      * and scan the first token from it.
172      */
useInputStream(InputStream in)173     protected void useInputStream(InputStream in) throws IOException {
174         try {
175             this.in = new ScannerInputReader(env, in);
176         } catch (Exception e) {
177             env.setCharacterEncoding(null);
178             this.in = new ScannerInputReader(env, in);
179         }
180 
181         ch = this.in.read();
182         prevPos = this.in.pos;
183 
184         scan();
185     }
186 
187     /**
188      * Create a scanner to scan an input stream.
189      */
Scanner(Environment env)190     protected Scanner(Environment env) {
191         this.env = env;
192         // Expect the subclass to call useInputStream at the right time.
193     }
194 
195     /**
196      * Define a keyword.
197      */
defineKeyword(int val)198     private static void defineKeyword(int val) {
199         Identifier.lookup(opNames[val]).setType(val);
200     }
201 
202     /**
203      * Initialized keyword and token Hashtables
204      */
205     static {
206         // Statement keywords
207         defineKeyword(FOR);
208         defineKeyword(IF);
209         defineKeyword(ELSE);
210         defineKeyword(WHILE);
211         defineKeyword(DO);
212         defineKeyword(SWITCH);
213         defineKeyword(CASE);
214         defineKeyword(DEFAULT);
215         defineKeyword(BREAK);
216         defineKeyword(CONTINUE);
217         defineKeyword(RETURN);
218         defineKeyword(TRY);
219         defineKeyword(CATCH);
220         defineKeyword(FINALLY);
221         defineKeyword(THROW);
222 
223         // Type defineKeywords
224         defineKeyword(BYTE);
225         defineKeyword(CHAR);
226         defineKeyword(SHORT);
227         defineKeyword(INT);
228         defineKeyword(LONG);
229         defineKeyword(FLOAT);
230         defineKeyword(DOUBLE);
231         defineKeyword(VOID);
232         defineKeyword(BOOLEAN);
233 
234         // Expression keywords
235         defineKeyword(INSTANCEOF);
236         defineKeyword(TRUE);
237         defineKeyword(FALSE);
238         defineKeyword(NEW);
239         defineKeyword(THIS);
240         defineKeyword(SUPER);
241         defineKeyword(NULL);
242 
243         // Declaration keywords
244         defineKeyword(IMPORT);
245         defineKeyword(CLASS);
246         defineKeyword(EXTENDS);
247         defineKeyword(IMPLEMENTS);
248         defineKeyword(INTERFACE);
249         defineKeyword(PACKAGE);
250         defineKeyword(THROWS);
251 
252         // Modifier keywords
253         defineKeyword(PRIVATE);
254         defineKeyword(PUBLIC);
255         defineKeyword(PROTECTED);
256         defineKeyword(STATIC);
257         defineKeyword(TRANSIENT);
258         defineKeyword(SYNCHRONIZED);
259         defineKeyword(NATIVE);
260         defineKeyword(ABSTRACT);
261         defineKeyword(VOLATILE);
262         defineKeyword(FINAL);
263         defineKeyword(STRICTFP);
264 
265         // reserved keywords
266         defineKeyword(CONST);
267         defineKeyword(GOTO);
268     }
269 
270     /**
271      * Scan a comment. This method should be
272      * called once the initial /, * and the next
273      * character have been read.
274      */
skipComment()275     private void skipComment() throws IOException {
276         while (true) {
277             switch (ch) {
278               case EOF:
279                 env.error(pos, "eof.in.comment");
280                 return;
281 
282               case '*':
283                 if ((ch = in.read()) == '/')  {
284                     ch = in.read();
285                     return;
286                 }
287                 break;
288 
289               default:
290                 ch = in.read();
291                 break;
292             }
293         }
294     }
295 
296     /**
297      * Scan a doc comment. This method should be called
298      * once the initial /, * and * have been read. It gathers
299      * the content of the comment (witout leading spaces and '*'s)
300      * in the string buffer.
301      */
scanDocComment()302     private String scanDocComment() throws IOException {
303         // Note: this method has been hand-optimized to yield
304         // better performance.  This was done after it was noted
305         // that javadoc spent a great deal of its time here.
306         // This should also help the performance of the compiler
307         // as well -- it scans the doc comments to find
308         // @deprecated tags.
309         //
310         // The logic of the method has been completely rewritten
311         // to avoid the use of flags that need to be looked at
312         // for every character read.  Members that are accessed
313         // more than once have been stored in local variables.
314         // The methods putc() and bufferString() have been
315         // inlined by hand.  Extra cases have been added to
316         // switch statements to trick the compiler into generating
317         // a tableswitch instead of a lookupswitch.
318         //
319         // This implementation aims to preserve the previous
320         // behavior of this method.
321 
322         int c;
323 
324         // Put `in' in a local variable.
325         final ScannerInputReader in = this.in;
326 
327         // We maintain the buffer locally rather than calling putc().
328         char[] buffer = this.buffer;
329         int count = 0;
330 
331         // We are called pointing at the second star of the doc
332         // comment:
333         //
334         // Input: /** the rest of the comment ... */
335         //          ^
336         //
337         // We rely on this in the code below.
338 
339         // Consume any number of stars.
340         while ((c = in.read()) == '*')
341             ;
342 
343         // Is the comment of the form /**/, /***/, /****/, etc.?
344         if (c == '/') {
345             // Set ch and return
346             ch = in.read();
347             return "";
348         }
349 
350         // Skip a newline on the first line of the comment.
351         if (c == '\n') {
352             c = in.read();
353         }
354 
355     outerLoop:
356         // The outerLoop processes the doc comment, looping once
357         // for each line.  For each line, it first strips off
358         // whitespace, then it consumes any stars, then it
359         // puts the rest of the line into our buffer.
360         while (true) {
361 
362             // The wsLoop consumes whitespace from the beginning
363             // of each line.
364         wsLoop:
365             while (true) {
366                 switch (c) {
367                 case ' ':
368                 case '\t':
369                     // We could check for other forms of whitespace
370                     // as well, but this is left as is for minimum
371                     // disturbance of functionality.
372                     //
373                     // Just skip whitespace.
374                     c = in.read();
375                     break;
376 
377                 // We have added extra cases here to trick the
378                 // compiler into using a tableswitch instead of
379                 // a lookupswitch.  They can be removed without
380                 // a change in meaning.
381                 case 10: case 11: case 12: case 13: case 14: case 15:
382                 case 16: case 17: case 18: case 19: case 20: case 21:
383                 case 22: case 23: case 24: case 25: case 26: case 27:
384                 case 28: case 29: case 30: case 31:
385                 default:
386                     // We've seen something that isn't whitespace,
387                     // jump out.
388                     break wsLoop;
389                 }
390             } // end wsLoop.
391 
392             // Are there stars here?  If so, consume them all
393             // and check for the end of comment.
394             if (c == '*') {
395                 // Skip all of the stars...
396                 do {
397                     c = in.read();
398                 } while (c == '*');
399 
400                 // ...then check for the closing slash.
401                 if (c == '/') {
402                     // We're done with the doc comment.
403                     // Set ch and break out.
404                     ch = in.read();
405                     break outerLoop;
406                 }
407             }
408 
409             // The textLoop processes the rest of the characters
410             // on the line, adding them to our buffer.
411         textLoop:
412             while (true) {
413                 switch (c) {
414                 case EOF:
415                     // We've seen a premature EOF.  Break out
416                     // of the loop.
417                     env.error(pos, "eof.in.comment");
418                     ch = EOF;
419                     break outerLoop;
420 
421                 case '*':
422                     // Is this just a star?  Or is this the
423                     // end of a comment?
424                     c = in.read();
425                     if (c == '/') {
426                         // This is the end of the comment,
427                         // set ch and return our buffer.
428                         ch = in.read();
429                         break outerLoop;
430                     }
431                     // This is just an ordinary star.  Add it to
432                     // the buffer.
433                     if (count == buffer.length) {
434                         growBuffer();
435                         buffer = this.buffer;
436                     }
437                     buffer[count++] = '*';
438                     break;
439 
440                 case '\n':
441                     // We've seen a newline.  Add it to our
442                     // buffer and break out of this loop,
443                     // starting fresh on a new line.
444                     if (count == buffer.length) {
445                         growBuffer();
446                         buffer = this.buffer;
447                     }
448                     buffer[count++] = '\n';
449                     c = in.read();
450                     break textLoop;
451 
452                 // Again, the extra cases here are a trick
453                 // to get the compiler to generate a tableswitch.
454                 case 0: case 1: case 2: case 3: case 4: case 5:
455                 case 6: case 7: case 8: case 11: case 12: case 13:
456                 case 14: case 15: case 16: case 17: case 18: case 19:
457                 case 20: case 21: case 22: case 23: case 24: case 25:
458                 case 26: case 27: case 28: case 29: case 30: case 31:
459                 case 32: case 33: case 34: case 35: case 36: case 37:
460                 case 38: case 39: case 40:
461                 default:
462                     // Add the character to our buffer.
463                     if (count == buffer.length) {
464                         growBuffer();
465                         buffer = this.buffer;
466                     }
467                     buffer[count++] = (char)c;
468                     c = in.read();
469                     break;
470                 }
471             } // end textLoop
472         } // end outerLoop
473 
474         // We have scanned our doc comment.  It is stored in
475         // buffer.  The previous implementation of scanDocComment
476         // stripped off all trailing spaces and stars from the comment.
477         // We will do this as well, so as to cause a minimum of
478         // disturbance.  Is this what we want?
479         if (count > 0) {
480             int i = count - 1;
481         trailLoop:
482             while (i > -1) {
483                 switch (buffer[i]) {
484                 case ' ':
485                 case '\t':
486                 case '*':
487                     i--;
488                     break;
489                 // And again, the extra cases here are a trick
490                 // to get the compiler to generate a tableswitch.
491                 case 0: case 1: case 2: case 3: case 4: case 5:
492                 case 6: case 7: case 8: case 10: case 11: case 12:
493                 case 13: case 14: case 15: case 16: case 17: case 18:
494                 case 19: case 20: case 21: case 22: case 23: case 24:
495                 case 25: case 26: case 27: case 28: case 29: case 30:
496                 case 31: case 33: case 34: case 35: case 36: case 37:
497                 case 38: case 39: case 40:
498                 default:
499                     break trailLoop;
500                 }
501             }
502             count = i + 1;
503 
504             // Return the text of the doc comment.
505             return new String(buffer, 0, count);
506         } else {
507             return "";
508         }
509     }
510 
511     /**
512      * Scan a number. The first digit of the number should be the current
513      * character.  We may be scanning hex, decimal, or octal at this point
514      */
515     @SuppressWarnings("fallthrough")
scanNumber()516     private void scanNumber() throws IOException {
517         boolean seenNonOctal = false;
518         boolean overflow = false;
519         boolean seenDigit = false; // used to detect invalid hex number 0xL
520         radix = (ch == '0' ? 8 : 10);
521         long value = ch - '0';
522         count = 0;
523         putc(ch);               // save character in buffer
524     numberLoop:
525         for (;;) {
526             switch (ch = in.read()) {
527               case '.':
528                 if (radix == 16)
529                     break numberLoop; // an illegal character
530                 scanReal();
531                 return;
532 
533               case '8': case '9':
534                 // We can't yet throw an error if reading an octal.  We might
535                 // discover we're really reading a real.
536                 seenNonOctal = true;
537                 // Fall through
538               case '0': case '1': case '2': case '3':
539               case '4': case '5': case '6': case '7':
540                 seenDigit = true;
541                 putc(ch);
542                 if (radix == 10) {
543                     overflow = overflow || (value * 10)/10 != value;
544                     value = (value * 10) + (ch - '0');
545                     overflow = overflow || (value - 1 < -1);
546                 } else if (radix == 8) {
547                     overflow = overflow || (value >>> 61) != 0;
548                     value = (value << 3) + (ch - '0');
549                 } else {
550                     overflow = overflow || (value >>> 60) != 0;
551                     value = (value << 4) + (ch - '0');
552                 }
553                 break;
554 
555               case 'd': case 'D': case 'e': case 'E': case 'f': case 'F':
556                 if (radix != 16) {
557                     scanReal();
558                     return;
559                 }
560                 // fall through
561               case 'a': case 'A': case 'b': case 'B': case 'c': case 'C':
562                 seenDigit = true;
563                 putc(ch);
564                 if (radix != 16)
565                     break numberLoop; // an illegal character
566                 overflow = overflow || (value >>> 60) != 0;
567                 value = (value << 4) + 10 +
568                          Character.toLowerCase((char)ch) - 'a';
569                 break;
570 
571               case 'l': case 'L':
572                 ch = in.read(); // skip over 'l'
573                 longValue = value;
574                 token = LONGVAL;
575                 break numberLoop;
576 
577               case 'x': case 'X':
578                 // if the first character is a '0' and this is the second
579                 // letter, then read in a hexadecimal number.  Otherwise, error.
580                 if (count == 1 && radix == 8) {
581                     radix = 16;
582                     seenDigit = false;
583                     break;
584                 } else {
585                     // we'll get an illegal character error
586                     break numberLoop;
587                 }
588 
589               default:
590                 intValue = (int)value;
591                 token = INTVAL;
592                 break numberLoop;
593             }
594         } // while true
595 
596         // We have just finished reading the number.  The next thing better
597         // not be a letter or digit.
598         // Note:  There will be deprecation warnings against these uses
599         // of Character.isJavaLetterOrDigit and Character.isJavaLetter.
600         // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
601         if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
602             env.error(in.pos, "invalid.number");
603             do { ch = in.read(); }
604             while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
605             intValue = 0;
606             token = INTVAL;
607         } else if (radix == 8 && seenNonOctal) {
608             // A bogus octal literal.
609             intValue = 0;
610             token = INTVAL;
611             env.error(pos, "invalid.octal.number");
612         } else if (radix == 16 && seenDigit == false) {
613             // A hex literal with no digits, 0xL, for example.
614             intValue = 0;
615             token = INTVAL;
616             env.error(pos, "invalid.hex.number");
617         } else {
618             if (token == INTVAL) {
619                 // Check for overflow.  Note that base 10 literals
620                 // have different rules than base 8 and 16.
621                 overflow = overflow ||
622                     (value & 0xFFFFFFFF00000000L) != 0 ||
623                     (radix == 10 && value > 2147483648L);
624 
625                 if (overflow) {
626                     intValue = 0;
627 
628                     // Give a specific error message which tells
629                     // the user the range.
630                     switch (radix) {
631                     case 8:
632                         env.error(pos, "overflow.int.oct");
633                         break;
634                     case 10:
635                         env.error(pos, "overflow.int.dec");
636                         break;
637                     case 16:
638                         env.error(pos, "overflow.int.hex");
639                         break;
640                     default:
641                         throw new CompilerError("invalid radix");
642                     }
643                 }
644             } else {
645                 if (overflow) {
646                     longValue = 0;
647 
648                     // Give a specific error message which tells
649                     // the user the range.
650                     switch (radix) {
651                     case 8:
652                         env.error(pos, "overflow.long.oct");
653                         break;
654                     case 10:
655                         env.error(pos, "overflow.long.dec");
656                         break;
657                     case 16:
658                         env.error(pos, "overflow.long.hex");
659                         break;
660                     default:
661                         throw new CompilerError("invalid radix");
662                     }
663                 }
664             }
665         }
666     }
667 
668     /**
669      * Scan a float.  We are either looking at the decimal, or we have already
670      * seen it and put it into the buffer.  We haven't seen an exponent.
671      * Scan a float.  Should be called with the current character is either
672      * the 'e', 'E' or '.'
673      */
674     @SuppressWarnings("fallthrough")
scanReal()675     private void scanReal() throws IOException {
676         boolean seenExponent = false;
677         boolean isSingleFloat = false;
678         char lastChar;
679         if (ch == '.') {
680             putc(ch);
681             ch = in.read();
682         }
683 
684     numberLoop:
685         for ( ; ; ch = in.read()) {
686             switch (ch) {
687                 case '0': case '1': case '2': case '3': case '4':
688                 case '5': case '6': case '7': case '8': case '9':
689                     putc(ch);
690                     break;
691 
692                 case 'e': case 'E':
693                     if (seenExponent)
694                         break numberLoop; // we'll get a format error
695                     putc(ch);
696                     seenExponent = true;
697                     break;
698 
699                 case '+': case '-':
700                     lastChar = buffer[count - 1];
701                     if (lastChar != 'e' && lastChar != 'E')
702                         break numberLoop; // this isn't an error, though!
703                     putc(ch);
704                     break;
705 
706                 case 'f': case 'F':
707                     ch = in.read(); // skip over 'f'
708                     isSingleFloat = true;
709                     break numberLoop;
710 
711                 case 'd': case 'D':
712                     ch = in.read(); // skip over 'd'
713                     // fall through
714                 default:
715                     break numberLoop;
716             } // sswitch
717         } // loop
718 
719         // we have just finished reading the number.  The next thing better
720         // not be a letter or digit.
721         if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
722             env.error(in.pos, "invalid.number");
723             do { ch = in.read(); }
724             while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
725             doubleValue = 0;
726             token = DOUBLEVAL;
727         } else {
728             token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
729             try {
730                 lastChar = buffer[count - 1];
731                 if (lastChar == 'e' || lastChar == 'E'
732                        || lastChar == '+' || lastChar == '-') {
733                     env.error(in.pos -1, "float.format");
734                 } else if (isSingleFloat) {
735                     String string = bufferString();
736                     floatValue = Float.valueOf(string).floatValue();
737                     if (Float.isInfinite(floatValue)) {
738                         env.error(pos, "overflow.float");
739                     } else if (floatValue == 0 && !looksLikeZero(string)) {
740                         env.error(pos, "underflow.float");
741                     }
742                 } else {
743                     String string = bufferString();
744                     doubleValue = Double.valueOf(string).doubleValue();
745                     if (Double.isInfinite(doubleValue)) {
746                         env.error(pos, "overflow.double");
747                     } else if (doubleValue == 0 && !looksLikeZero(string)) {
748                         env.error(pos, "underflow.double");
749                     }
750                 }
751             } catch (NumberFormatException ee) {
752                 env.error(pos, "float.format");
753                 doubleValue = 0;
754                 floatValue = 0;
755             }
756         }
757         return;
758     }
759 
760     // We have a token that parses as a number.  Is this token possibly zero?
761     // i.e. does it have a non-zero value in the mantissa?
looksLikeZero(String token)762     private static boolean looksLikeZero(String token) {
763         int length = token.length();
764         for (int i = 0; i < length; i++) {
765             switch (token.charAt(i)) {
766                 case 0: case '.':
767                     continue;
768                 case '1': case '2': case '3': case '4': case '5':
769                 case '6': case '7': case '8': case '9':
770                     return false;
771                 case 'e': case 'E': case 'f': case 'F':
772                     return true;
773             }
774         }
775         return true;
776     }
777 
778     /**
779      * Scan an escape character.
780      * @return the character or -1 if it escaped an
781      * end-of-line.
782      */
scanEscapeChar()783     private int scanEscapeChar() throws IOException {
784         long p = in.pos;
785 
786         switch (ch = in.read()) {
787           case '0': case '1': case '2': case '3':
788           case '4': case '5': case '6': case '7': {
789             int n = ch - '0';
790             for (int i = 2 ; i > 0 ; i--) {
791                 switch (ch = in.read()) {
792                   case '0': case '1': case '2': case '3':
793                   case '4': case '5': case '6': case '7':
794                     n = (n << 3) + ch - '0';
795                     break;
796 
797                   default:
798                     if (n > 0xFF) {
799                         env.error(p, "invalid.escape.char");
800                     }
801                     return n;
802                 }
803             }
804             ch = in.read();
805             if (n > 0xFF) {
806                 env.error(p, "invalid.escape.char");
807             }
808             return n;
809           }
810 
811           case 'r':  ch = in.read(); return '\r';
812           case 'n':  ch = in.read(); return '\n';
813           case 'f':  ch = in.read(); return '\f';
814           case 'b':  ch = in.read(); return '\b';
815           case 't':  ch = in.read(); return '\t';
816           case '\\': ch = in.read(); return '\\';
817           case '\"': ch = in.read(); return '\"';
818           case '\'': ch = in.read(); return '\'';
819         }
820 
821         env.error(p, "invalid.escape.char");
822         ch = in.read();
823         return -1;
824     }
825 
826     /**
827      * Scan a string. The current character
828      * should be the opening " of the string.
829      */
scanString()830     private void scanString() throws IOException {
831         token = STRINGVAL;
832         count = 0;
833         ch = in.read();
834 
835         // Scan a String
836         while (true) {
837             switch (ch) {
838               case EOF:
839                 env.error(pos, "eof.in.string");
840                 stringValue = bufferString();
841                 return;
842 
843               case '\r':
844               case '\n':
845                 ch = in.read();
846                 env.error(pos, "newline.in.string");
847                 stringValue = bufferString();
848                 return;
849 
850               case '"':
851                 ch = in.read();
852                 stringValue = bufferString();
853                 return;
854 
855               case '\\': {
856                 int c = scanEscapeChar();
857                 if (c >= 0) {
858                     putc((char)c);
859                 }
860                 break;
861               }
862 
863               default:
864                 putc(ch);
865                 ch = in.read();
866                 break;
867             }
868         }
869     }
870 
871     /**
872      * Scan a character. The current character should be
873      * the opening ' of the character constant.
874      */
scanCharacter()875     private void scanCharacter() throws IOException {
876         token = CHARVAL;
877 
878         switch (ch = in.read()) {
879           case '\\':
880             int c = scanEscapeChar();
881             charValue = (char)((c >= 0) ? c : 0);
882             break;
883 
884         case '\'':
885             // There are two standard problems this case deals with.  One
886             // is the malformed single quote constant (i.e. the programmer
887             // uses ''' instead of '\'') and the other is the empty
888             // character constant (i.e. '').  Just consume any number of
889             // single quotes and emit an error message.
890             charValue = 0;
891             env.error(pos, "invalid.char.constant");
892             ch = in.read();
893             while (ch == '\'') {
894                 ch = in.read();
895             }
896             return;
897 
898           case '\r':
899           case '\n':
900             charValue = 0;
901             env.error(pos, "invalid.char.constant");
902             return;
903 
904           default:
905             charValue = (char)ch;
906             ch = in.read();
907             break;
908         }
909 
910         if (ch == '\'') {
911             ch = in.read();
912         } else {
913             env.error(pos, "invalid.char.constant");
914             while (true) {
915                 switch (ch) {
916                   case '\'':
917                     ch = in.read();
918                     return;
919                   case ';':
920                   case '\n':
921                   case EOF:
922                     return;
923                   default:
924                     ch = in.read();
925                 }
926             }
927         }
928     }
929 
930     /**
931      * Scan an Identifier. The current character should
932      * be the first character of the identifier.
933      */
scanIdentifier()934     private void scanIdentifier() throws IOException {
935         count = 0;
936 
937         while (true) {
938             putc(ch);
939             switch (ch = in.read()) {
940               case 'a': case 'b': case 'c': case 'd': case 'e':
941               case 'f': case 'g': case 'h': case 'i': case 'j':
942               case 'k': case 'l': case 'm': case 'n': case 'o':
943               case 'p': case 'q': case 'r': case 's': case 't':
944               case 'u': case 'v': case 'w': case 'x': case 'y':
945               case 'z':
946               case 'A': case 'B': case 'C': case 'D': case 'E':
947               case 'F': case 'G': case 'H': case 'I': case 'J':
948               case 'K': case 'L': case 'M': case 'N': case 'O':
949               case 'P': case 'Q': case 'R': case 'S': case 'T':
950               case 'U': case 'V': case 'W': case 'X': case 'Y':
951               case 'Z':
952               case '0': case '1': case '2': case '3': case '4':
953               case '5': case '6': case '7': case '8': case '9':
954               case '$': case '_':
955                 break;
956 
957               default:
958                 if (!Character.isJavaLetterOrDigit((char)ch)) {
959                     idValue = Identifier.lookup(bufferString());
960                     token = idValue.getType();
961                     return;
962                 }
963             }
964         }
965     }
966 
967     /**
968      * The ending position of the current token
969      */
970     // Note: This should be part of the pos itself.
getEndPos()971     public long getEndPos() {
972         return in.pos;
973     }
974 
975     /**
976      * If the current token is IDENT, return the identifier occurrence.
977      * It will be freshly allocated.
978      */
getIdToken()979     public IdentifierToken getIdToken() {
980         return (token != IDENT) ? null : new IdentifierToken(pos, idValue);
981     }
982 
983     /**
984      * Scan the next token.
985      * @return the position of the previous token.
986      */
scan()987    public long scan() throws IOException {
988        return xscan();
989    }
990 
991     @SuppressWarnings("fallthrough")
xscan()992     protected long xscan() throws IOException {
993         final ScannerInputReader in = this.in;
994         long retPos = pos;
995         prevPos = in.pos;
996         docComment = null;
997         while (true) {
998             pos = in.pos;
999 
1000             switch (ch) {
1001               case EOF:
1002                 token = EOF;
1003                 return retPos;
1004 
1005               case '\n':
1006                 if (scanComments) {
1007                     ch = ' ';
1008                     // Avoid this path the next time around.
1009                     // Do not just call in.read; we want to present
1010                     // a null token (and also avoid read-ahead).
1011                     token = COMMENT;
1012                     return retPos;
1013                 }
1014                 // Fall through
1015               case ' ':
1016               case '\t':
1017               case '\f':
1018                 ch = in.read();
1019                 break;
1020 
1021               case '/':
1022                 switch (ch = in.read()) {
1023                   case '/':
1024                     // Parse a // comment
1025                     while (((ch = in.read()) != EOF) && (ch != '\n'));
1026                     if (scanComments) {
1027                         token = COMMENT;
1028                         return retPos;
1029                     }
1030                     break;
1031 
1032                   case '*':
1033                     ch = in.read();
1034                     if (ch == '*') {
1035                         docComment = scanDocComment();
1036                     } else {
1037                         skipComment();
1038                     }
1039                     if (scanComments) {
1040                         return retPos;
1041                     }
1042                     break;
1043 
1044                   case '=':
1045                     ch = in.read();
1046                     token = ASGDIV;
1047                     return retPos;
1048 
1049                   default:
1050                     token = DIV;
1051                     return retPos;
1052                 }
1053                 break;
1054 
1055               case '"':
1056                 scanString();
1057                 return retPos;
1058 
1059               case '\'':
1060                 scanCharacter();
1061                 return retPos;
1062 
1063               case '0': case '1': case '2': case '3': case '4':
1064               case '5': case '6': case '7': case '8': case '9':
1065                 scanNumber();
1066                 return retPos;
1067 
1068               case '.':
1069                 switch (ch = in.read()) {
1070                   case '0': case '1': case '2': case '3': case '4':
1071                   case '5': case '6': case '7': case '8': case '9':
1072                     count = 0;
1073                     putc('.');
1074                     scanReal();
1075                     break;
1076                   default:
1077                     token = FIELD;
1078                 }
1079                 return retPos;
1080 
1081               case '{':
1082                 ch = in.read();
1083                 token = LBRACE;
1084                 return retPos;
1085 
1086               case '}':
1087                 ch = in.read();
1088                 token = RBRACE;
1089                 return retPos;
1090 
1091               case '(':
1092                 ch = in.read();
1093                 token = LPAREN;
1094                 return retPos;
1095 
1096               case ')':
1097                 ch = in.read();
1098                 token = RPAREN;
1099                 return retPos;
1100 
1101               case '[':
1102                 ch = in.read();
1103                 token = LSQBRACKET;
1104                 return retPos;
1105 
1106               case ']':
1107                 ch = in.read();
1108                 token = RSQBRACKET;
1109                 return retPos;
1110 
1111               case ',':
1112                 ch = in.read();
1113                 token = COMMA;
1114                 return retPos;
1115 
1116               case ';':
1117                 ch = in.read();
1118                 token = SEMICOLON;
1119                 return retPos;
1120 
1121               case '?':
1122                 ch = in.read();
1123                 token = QUESTIONMARK;
1124                 return retPos;
1125 
1126               case '~':
1127                 ch = in.read();
1128                 token = BITNOT;
1129                 return retPos;
1130 
1131               case ':':
1132                 ch = in.read();
1133                 token = COLON;
1134                 return retPos;
1135 
1136               case '-':
1137                 switch (ch = in.read()) {
1138                   case '-':
1139                     ch = in.read();
1140                     token = DEC;
1141                     return retPos;
1142 
1143                   case '=':
1144                     ch = in.read();
1145                     token = ASGSUB;
1146                     return retPos;
1147                 }
1148                 token = SUB;
1149                 return retPos;
1150 
1151               case '+':
1152                 switch (ch = in.read()) {
1153                   case '+':
1154                     ch = in.read();
1155                     token = INC;
1156                     return retPos;
1157 
1158                   case '=':
1159                     ch = in.read();
1160                     token = ASGADD;
1161                     return retPos;
1162                 }
1163                 token = ADD;
1164                 return retPos;
1165 
1166               case '<':
1167                 switch (ch = in.read()) {
1168                   case '<':
1169                     if ((ch = in.read()) == '=') {
1170                         ch = in.read();
1171                         token = ASGLSHIFT;
1172                         return retPos;
1173                     }
1174                     token = LSHIFT;
1175                     return retPos;
1176 
1177                   case '=':
1178                     ch = in.read();
1179                     token = LE;
1180                     return retPos;
1181                 }
1182                 token = LT;
1183                 return retPos;
1184 
1185               case '>':
1186                 switch (ch = in.read()) {
1187                   case '>':
1188                     switch (ch = in.read()) {
1189                       case '=':
1190                         ch = in.read();
1191                         token = ASGRSHIFT;
1192                         return retPos;
1193 
1194                       case '>':
1195                         if ((ch = in.read()) == '=') {
1196                             ch = in.read();
1197                             token = ASGURSHIFT;
1198                             return retPos;
1199                         }
1200                         token = URSHIFT;
1201                         return retPos;
1202                     }
1203                     token = RSHIFT;
1204                     return retPos;
1205 
1206                   case '=':
1207                     ch = in.read();
1208                     token = GE;
1209                     return retPos;
1210                 }
1211                 token = GT;
1212                 return retPos;
1213 
1214               case '|':
1215                 switch (ch = in.read()) {
1216                   case '|':
1217                     ch = in.read();
1218                     token = OR;
1219                     return retPos;
1220 
1221                   case '=':
1222                     ch = in.read();
1223                     token = ASGBITOR;
1224                     return retPos;
1225                 }
1226                 token = BITOR;
1227                 return retPos;
1228 
1229               case '&':
1230                 switch (ch = in.read()) {
1231                   case '&':
1232                     ch = in.read();
1233                     token = AND;
1234                     return retPos;
1235 
1236                   case '=':
1237                     ch = in.read();
1238                     token = ASGBITAND;
1239                     return retPos;
1240                 }
1241                 token = BITAND;
1242                 return retPos;
1243 
1244               case '=':
1245                 if ((ch = in.read()) == '=') {
1246                     ch = in.read();
1247                     token = EQ;
1248                     return retPos;
1249                 }
1250                 token = ASSIGN;
1251                 return retPos;
1252 
1253               case '%':
1254                 if ((ch = in.read()) == '=') {
1255                     ch = in.read();
1256                     token = ASGREM;
1257                     return retPos;
1258                 }
1259                 token = REM;
1260                 return retPos;
1261 
1262               case '^':
1263                 if ((ch = in.read()) == '=') {
1264                     ch = in.read();
1265                     token = ASGBITXOR;
1266                     return retPos;
1267                 }
1268                 token = BITXOR;
1269                 return retPos;
1270 
1271               case '!':
1272                 if ((ch = in.read()) == '=') {
1273                     ch = in.read();
1274                     token = NE;
1275                     return retPos;
1276                 }
1277                 token = NOT;
1278                 return retPos;
1279 
1280               case '*':
1281                 if ((ch = in.read()) == '=') {
1282                     ch = in.read();
1283                     token = ASGMUL;
1284                     return retPos;
1285                 }
1286                 token = MUL;
1287                 return retPos;
1288 
1289               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1290               case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1291               case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1292               case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1293               case 'y': case 'z':
1294               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1295               case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1296               case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1297               case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1298               case 'Y': case 'Z':
1299               case '$': case '_':
1300                 scanIdentifier();
1301                 return retPos;
1302 
1303               case '\u001a':
1304                 // Our one concession to DOS.
1305                 if ((ch = in.read()) == EOF) {
1306                     token = EOF;
1307                     return retPos;
1308                 }
1309                 env.error(pos, "funny.char");
1310                 ch = in.read();
1311                 break;
1312 
1313 
1314               default:
1315                 if (Character.isJavaLetter((char)ch)) {
1316                     scanIdentifier();
1317                     return retPos;
1318                 }
1319                 env.error(pos, "funny.char");
1320                 ch = in.read();
1321                 break;
1322             }
1323         }
1324     }
1325 
1326     /**
1327      * Scan to a matching '}', ']' or ')'. The current token must be
1328      * a '{', '[' or '(';
1329      */
match(int open, int close)1330     public void match(int open, int close) throws IOException {
1331         int depth = 1;
1332 
1333         while (true) {
1334             scan();
1335             if (token == open) {
1336                 depth++;
1337             } else if (token == close) {
1338                 if (--depth == 0) {
1339                     return;
1340                 }
1341             } else if (token == EOF) {
1342                 env.error(pos, "unbalanced.paren");
1343                 return;
1344             }
1345         }
1346     }
1347 }
1348