1 /*
2  * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package sun.tools.java;
27 
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.util.Hashtable;
31 
32 /**
33  * A Scanner for Java tokens. Errors are reported
34  * to the environment object.<p>
35  *
36  * The scanner keeps track of the current token,
37  * the value of the current token (if any), and the start
38  * position of the current token.<p>
39  *
40  * The scan() method advances the scanner to the next
41  * token in the input.<p>
42  *
43  * The match() method is used to quickly match opening
44  * brackets (ie: '(', '{', or '[') with their closing
45  * counter part. This is useful during error recovery.<p>
46  *
47  * An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
48  * this means that both the line number and the exact offset into
49  * the file are encoded in each position value.<p>
50  *
51  * The compiler treats either "\n", "\r" or "\r\n" as the
52  * end of a line.<p>
53  *
54  * WARNING: The contents of this source file are not part of any
55  * supported API.  Code that depends on them does so at its own risk:
56  * they are subject to change or removal without notice.
57  *
58  * @author      Arthur van Hoff
59  */
60 
61 public
62 class Scanner implements Constants {
63     /**
64      * The increment for each character.
65      */
66     public static final long OFFSETINC = 1;
67 
68     /**
69      * The increment for each line.
70      */
71     public static final long LINEINC = 1L << WHEREOFFSETBITS;
72 
73     /**
74      * End of input
75      */
76     public static final int EOF = -1;
77 
78     /**
79      * Where errors are reported
80      */
81     public Environment env;
82 
83     /**
84      * Input reader
85      */
86     protected ScannerInputReader in;
87 
88     /**
89      * If true, present all comments as tokens.
90      * Contents are not saved, but positions are recorded accurately,
91      * so the comment can be recovered from the text.
92      * Line terminations are also returned as comment tokens,
93      * and may be distinguished by their start and end positions,
94      * which are equal (meaning, these tokens contain no chars).
95      */
96    public boolean scanComments = false;
97 
98     /**
99      * Current token
100      */
101     public int token;
102 
103     /**
104      * The position of the current token
105      */
106     public long pos;
107 
108     /**
109      * The position of the previous token
110      */
111     public long prevPos;
112 
113     /**
114      * The current character
115      */
116     protected int ch;
117 
118     /*
119      * Token values.
120      */
121     public char charValue;
122     public int intValue;
123     public long longValue;
124     public float floatValue;
125     public double doubleValue;
126     public String stringValue;
127     public Identifier idValue;
128     public int radix;   // Radix, when reading int or long
129 
130     /*
131      * A doc comment preceding the most recent token
132      */
133     public String docComment;
134 
135     /*
136      * A growable character buffer.
137      */
138     private int count;
139     private char buffer[] = new char[1024];
growBuffer()140     private void growBuffer() {
141         char newBuffer[] = new char[buffer.length * 2];
142         System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
143         buffer = newBuffer;
144     }
145 
146     // The following two methods have been hand-inlined in
147     // scanDocComment.  If you make changes here, you should
148     // check to see if scanDocComment also needs modification.
putc(int ch)149     private void putc(int ch) {
150         if (count == buffer.length) {
151             growBuffer();
152         }
153         buffer[count++] = (char)ch;
154     }
155 
bufferString()156     private String bufferString() {
157         return new String(buffer, 0, count);
158     }
159 
160     /**
161      * Create a scanner to scan an input stream.
162      */
Scanner(Environment env, InputStream in)163     public Scanner(Environment env, InputStream in) throws IOException {
164         this.env = env;
165         useInputStream(in);
166     }
167 
168     /**
169      * Setup input from the given input stream,
170      * and scan the first token from it.
171      */
useInputStream(InputStream in)172     protected void useInputStream(InputStream in) throws IOException {
173         try {
174             this.in = new ScannerInputReader(env, in);
175         } catch (Exception e) {
176             env.setCharacterEncoding(null);
177             this.in = new ScannerInputReader(env, in);
178         }
179 
180         ch = this.in.read();
181         prevPos = this.in.pos;
182 
183         scan();
184     }
185 
186     /**
187      * Create a scanner to scan an input stream.
188      */
Scanner(Environment env)189     protected Scanner(Environment env) {
190         this.env = env;
191         // Expect the subclass to call useInputStream at the right time.
192     }
193 
194     /**
195      * Define a keyword.
196      */
defineKeyword(int val)197     private static void defineKeyword(int val) {
198         Identifier.lookup(opNames[val]).setType(val);
199     }
200 
201     /**
202      * Initialized keyword and token Hashtables
203      */
204     static {
205         // Statement keywords
206         defineKeyword(FOR);
207         defineKeyword(IF);
208         defineKeyword(ELSE);
209         defineKeyword(WHILE);
210         defineKeyword(DO);
211         defineKeyword(SWITCH);
212         defineKeyword(CASE);
213         defineKeyword(DEFAULT);
214         defineKeyword(BREAK);
215         defineKeyword(CONTINUE);
216         defineKeyword(RETURN);
217         defineKeyword(TRY);
218         defineKeyword(CATCH);
219         defineKeyword(FINALLY);
220         defineKeyword(THROW);
221 
222         // Type defineKeywords
223         defineKeyword(BYTE);
224         defineKeyword(CHAR);
225         defineKeyword(SHORT);
226         defineKeyword(INT);
227         defineKeyword(LONG);
228         defineKeyword(FLOAT);
229         defineKeyword(DOUBLE);
230         defineKeyword(VOID);
231         defineKeyword(BOOLEAN);
232 
233         // Expression keywords
234         defineKeyword(INSTANCEOF);
235         defineKeyword(TRUE);
236         defineKeyword(FALSE);
237         defineKeyword(NEW);
238         defineKeyword(THIS);
239         defineKeyword(SUPER);
240         defineKeyword(NULL);
241 
242         // Declaration keywords
243         defineKeyword(IMPORT);
244         defineKeyword(CLASS);
245         defineKeyword(EXTENDS);
246         defineKeyword(IMPLEMENTS);
247         defineKeyword(INTERFACE);
248         defineKeyword(PACKAGE);
249         defineKeyword(THROWS);
250 
251         // Modifier keywords
252         defineKeyword(PRIVATE);
253         defineKeyword(PUBLIC);
254         defineKeyword(PROTECTED);
255         defineKeyword(STATIC);
256         defineKeyword(TRANSIENT);
257         defineKeyword(SYNCHRONIZED);
258         defineKeyword(NATIVE);
259         defineKeyword(ABSTRACT);
260         defineKeyword(VOLATILE);
261         defineKeyword(FINAL);
262         defineKeyword(STRICTFP);
263 
264         // reserved keywords
265         defineKeyword(CONST);
266         defineKeyword(GOTO);
267     }
268 
269     /**
270      * Scan a comment. This method should be
271      * called once the initial /, * and the next
272      * character have been read.
273      */
skipComment()274     private void skipComment() throws IOException {
275         while (true) {
276             switch (ch) {
277               case EOF:
278                 env.error(pos, "eof.in.comment");
279                 return;
280 
281               case '*':
282                 if ((ch = in.read()) == '/')  {
283                     ch = in.read();
284                     return;
285                 }
286                 break;
287 
288               default:
289                 ch = in.read();
290                 break;
291             }
292         }
293     }
294 
295     /**
296      * Scan a doc comment. This method should be called
297      * once the initial /, * and * have been read. It gathers
298      * the content of the comment (witout leading spaces and '*'s)
299      * in the string buffer.
300      */
scanDocComment()301     private String scanDocComment() throws IOException {
302         // Note: this method has been hand-optimized to yield
303         // better performance.  This was done after it was noted
304         // that javadoc spent a great deal of its time here.
305         // This should also help the performance of the compiler
306         // as well -- it scans the doc comments to find
307         // @deprecated tags.
308         //
309         // The logic of the method has been completely rewritten
310         // to avoid the use of flags that need to be looked at
311         // for every character read.  Members that are accessed
312         // more than once have been stored in local variables.
313         // The methods putc() and bufferString() have been
314         // inlined by hand.  Extra cases have been added to
315         // switch statements to trick the compiler into generating
316         // a tableswitch instead of a lookupswitch.
317         //
318         // This implementation aims to preserve the previous
319         // behavior of this method.
320 
321         int c;
322 
323         // Put `in' in a local variable.
324         final ScannerInputReader in = this.in;
325 
326         // We maintain the buffer locally rather than calling putc().
327         char[] buffer = this.buffer;
328         int count = 0;
329 
330         // We are called pointing at the second star of the doc
331         // comment:
332         //
333         // Input: /** the rest of the comment ... */
334         //          ^
335         //
336         // We rely on this in the code below.
337 
338         // Consume any number of stars.
339         while ((c = in.read()) == '*')
340             ;
341 
342         // Is the comment of the form /**/, /***/, /****/, etc.?
343         if (c == '/') {
344             // Set ch and return
345             ch = in.read();
346             return "";
347         }
348 
349         // Skip a newline on the first line of the comment.
350         if (c == '\n') {
351             c = in.read();
352         }
353 
354     outerLoop:
355         // The outerLoop processes the doc comment, looping once
356         // for each line.  For each line, it first strips off
357         // whitespace, then it consumes any stars, then it
358         // puts the rest of the line into our buffer.
359         while (true) {
360 
361             // The wsLoop consumes whitespace from the beginning
362             // of each line.
363         wsLoop:
364             while (true) {
365                 switch (c) {
366                 case ' ':
367                 case '\t':
368                     // We could check for other forms of whitespace
369                     // as well, but this is left as is for minimum
370                     // disturbance of functionality.
371                     //
372                     // Just skip whitespace.
373                     c = in.read();
374                     break;
375 
376                 // We have added extra cases here to trick the
377                 // compiler into using a tableswitch instead of
378                 // a lookupswitch.  They can be removed without
379                 // a change in meaning.
380                 case 10: case 11: case 12: case 13: case 14: case 15:
381                 case 16: case 17: case 18: case 19: case 20: case 21:
382                 case 22: case 23: case 24: case 25: case 26: case 27:
383                 case 28: case 29: case 30: case 31:
384                 default:
385                     // We've seen something that isn't whitespace,
386                     // jump out.
387                     break wsLoop;
388                 }
389             } // end wsLoop.
390 
391             // Are there stars here?  If so, consume them all
392             // and check for the end of comment.
393             if (c == '*') {
394                 // Skip all of the stars...
395                 do {
396                     c = in.read();
397                 } while (c == '*');
398 
399                 // ...then check for the closing slash.
400                 if (c == '/') {
401                     // We're done with the doc comment.
402                     // Set ch and break out.
403                     ch = in.read();
404                     break outerLoop;
405                 }
406             }
407 
408             // The textLoop processes the rest of the characters
409             // on the line, adding them to our buffer.
410         textLoop:
411             while (true) {
412                 switch (c) {
413                 case EOF:
414                     // We've seen a premature EOF.  Break out
415                     // of the loop.
416                     env.error(pos, "eof.in.comment");
417                     ch = EOF;
418                     break outerLoop;
419 
420                 case '*':
421                     // Is this just a star?  Or is this the
422                     // end of a comment?
423                     c = in.read();
424                     if (c == '/') {
425                         // This is the end of the comment,
426                         // set ch and return our buffer.
427                         ch = in.read();
428                         break outerLoop;
429                     }
430                     // This is just an ordinary star.  Add it to
431                     // the buffer.
432                     if (count == buffer.length) {
433                         growBuffer();
434                         buffer = this.buffer;
435                     }
436                     buffer[count++] = '*';
437                     break;
438 
439                 case '\n':
440                     // We've seen a newline.  Add it to our
441                     // buffer and break out of this loop,
442                     // starting fresh on a new line.
443                     if (count == buffer.length) {
444                         growBuffer();
445                         buffer = this.buffer;
446                     }
447                     buffer[count++] = '\n';
448                     c = in.read();
449                     break textLoop;
450 
451                 // Again, the extra cases here are a trick
452                 // to get the compiler to generate a tableswitch.
453                 case 0: case 1: case 2: case 3: case 4: case 5:
454                 case 6: case 7: case 8: case 11: case 12: case 13:
455                 case 14: case 15: case 16: case 17: case 18: case 19:
456                 case 20: case 21: case 22: case 23: case 24: case 25:
457                 case 26: case 27: case 28: case 29: case 30: case 31:
458                 case 32: case 33: case 34: case 35: case 36: case 37:
459                 case 38: case 39: case 40:
460                 default:
461                     // Add the character to our buffer.
462                     if (count == buffer.length) {
463                         growBuffer();
464                         buffer = this.buffer;
465                     }
466                     buffer[count++] = (char)c;
467                     c = in.read();
468                     break;
469                 }
470             } // end textLoop
471         } // end outerLoop
472 
473         // We have scanned our doc comment.  It is stored in
474         // buffer.  The previous implementation of scanDocComment
475         // stripped off all trailing spaces and stars from the comment.
476         // We will do this as well, so as to cause a minimum of
477         // disturbance.  Is this what we want?
478         if (count > 0) {
479             int i = count - 1;
480         trailLoop:
481             while (i > -1) {
482                 switch (buffer[i]) {
483                 case ' ':
484                 case '\t':
485                 case '*':
486                     i--;
487                     break;
488                 // And again, the extra cases here are a trick
489                 // to get the compiler to generate a tableswitch.
490                 case 0: case 1: case 2: case 3: case 4: case 5:
491                 case 6: case 7: case 8: case 10: case 11: case 12:
492                 case 13: case 14: case 15: case 16: case 17: case 18:
493                 case 19: case 20: case 21: case 22: case 23: case 24:
494                 case 25: case 26: case 27: case 28: case 29: case 30:
495                 case 31: case 33: case 34: case 35: case 36: case 37:
496                 case 38: case 39: case 40:
497                 default:
498                     break trailLoop;
499                 }
500             }
501             count = i + 1;
502 
503             // Return the text of the doc comment.
504             return new String(buffer, 0, count);
505         } else {
506             return "";
507         }
508     }
509 
510     /**
511      * Scan a number. The first digit of the number should be the current
512      * character.  We may be scanning hex, decimal, or octal at this point
513      */
scanNumber()514     private void scanNumber() throws IOException {
515         boolean seenNonOctal = false;
516         boolean overflow = false;
517         boolean seenDigit = false; // used to detect invalid hex number 0xL
518         radix = (ch == '0' ? 8 : 10);
519         long value = ch - '0';
520         count = 0;
521         putc(ch);               // save character in buffer
522     numberLoop:
523         for (;;) {
524             switch (ch = in.read()) {
525               case '.':
526                 if (radix == 16)
527                     break numberLoop; // an illegal character
528                 scanReal();
529                 return;
530 
531               case '8': case '9':
532                 // We can't yet throw an error if reading an octal.  We might
533                 // discover we're really reading a real.
534                 seenNonOctal = true;
535               case '0': case '1': case '2': case '3':
536               case '4': case '5': case '6': case '7':
537                 seenDigit = true;
538                 putc(ch);
539                 if (radix == 10) {
540                     overflow = overflow || (value * 10)/10 != value;
541                     value = (value * 10) + (ch - '0');
542                     overflow = overflow || (value - 1 < -1);
543                 } else if (radix == 8) {
544                     overflow = overflow || (value >>> 61) != 0;
545                     value = (value << 3) + (ch - '0');
546                 } else {
547                     overflow = overflow || (value >>> 60) != 0;
548                     value = (value << 4) + (ch - '0');
549                 }
550                 break;
551 
552               case 'd': case 'D': case 'e': case 'E': case 'f': case 'F':
553                 if (radix != 16) {
554                     scanReal();
555                     return;
556                 }
557                 // fall through
558               case 'a': case 'A': case 'b': case 'B': case 'c': case 'C':
559                 seenDigit = true;
560                 putc(ch);
561                 if (radix != 16)
562                     break numberLoop; // an illegal character
563                 overflow = overflow || (value >>> 60) != 0;
564                 value = (value << 4) + 10 +
565                          Character.toLowerCase((char)ch) - 'a';
566                 break;
567 
568               case 'l': case 'L':
569                 ch = in.read(); // skip over 'l'
570                 longValue = value;
571                 token = LONGVAL;
572                 break numberLoop;
573 
574               case 'x': case 'X':
575                 // if the first character is a '0' and this is the second
576                 // letter, then read in a hexadecimal number.  Otherwise, error.
577                 if (count == 1 && radix == 8) {
578                     radix = 16;
579                     seenDigit = false;
580                     break;
581                 } else {
582                     // we'll get an illegal character error
583                     break numberLoop;
584                 }
585 
586               default:
587                 intValue = (int)value;
588                 token = INTVAL;
589                 break numberLoop;
590             }
591         } // while true
592 
593         // We have just finished reading the number.  The next thing better
594         // not be a letter or digit.
595         // Note:  There will be deprecation warnings against these uses
596         // of Character.isJavaLetterOrDigit and Character.isJavaLetter.
597         // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
598         if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
599             env.error(in.pos, "invalid.number");
600             do { ch = in.read(); }
601             while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
602             intValue = 0;
603             token = INTVAL;
604         } else if (radix == 8 && seenNonOctal) {
605             // A bogus octal literal.
606             intValue = 0;
607             token = INTVAL;
608             env.error(pos, "invalid.octal.number");
609         } else if (radix == 16 && seenDigit == false) {
610             // A hex literal with no digits, 0xL, for example.
611             intValue = 0;
612             token = INTVAL;
613             env.error(pos, "invalid.hex.number");
614         } else {
615             if (token == INTVAL) {
616                 // Check for overflow.  Note that base 10 literals
617                 // have different rules than base 8 and 16.
618                 overflow = overflow ||
619                     (value & 0xFFFFFFFF00000000L) != 0 ||
620                     (radix == 10 && value > 2147483648L);
621 
622                 if (overflow) {
623                     intValue = 0;
624 
625                     // Give a specific error message which tells
626                     // the user the range.
627                     switch (radix) {
628                     case 8:
629                         env.error(pos, "overflow.int.oct");
630                         break;
631                     case 10:
632                         env.error(pos, "overflow.int.dec");
633                         break;
634                     case 16:
635                         env.error(pos, "overflow.int.hex");
636                         break;
637                     default:
638                         throw new CompilerError("invalid radix");
639                     }
640                 }
641             } else {
642                 if (overflow) {
643                     longValue = 0;
644 
645                     // Give a specific error message which tells
646                     // the user the range.
647                     switch (radix) {
648                     case 8:
649                         env.error(pos, "overflow.long.oct");
650                         break;
651                     case 10:
652                         env.error(pos, "overflow.long.dec");
653                         break;
654                     case 16:
655                         env.error(pos, "overflow.long.hex");
656                         break;
657                     default:
658                         throw new CompilerError("invalid radix");
659                     }
660                 }
661             }
662         }
663     }
664 
665     /**
666      * Scan a float.  We are either looking at the decimal, or we have already
667      * seen it and put it into the buffer.  We haven't seen an exponent.
668      * Scan a float.  Should be called with the current character is either
669      * the 'e', 'E' or '.'
670      */
scanReal()671     private void scanReal() throws IOException {
672         boolean seenExponent = false;
673         boolean isSingleFloat = false;
674         char lastChar;
675         if (ch == '.') {
676             putc(ch);
677             ch = in.read();
678         }
679 
680     numberLoop:
681         for ( ; ; ch = in.read()) {
682             switch (ch) {
683                 case '0': case '1': case '2': case '3': case '4':
684                 case '5': case '6': case '7': case '8': case '9':
685                     putc(ch);
686                     break;
687 
688                 case 'e': case 'E':
689                     if (seenExponent)
690                         break numberLoop; // we'll get a format error
691                     putc(ch);
692                     seenExponent = true;
693                     break;
694 
695                 case '+': case '-':
696                     lastChar = buffer[count - 1];
697                     if (lastChar != 'e' && lastChar != 'E')
698                         break numberLoop; // this isn't an error, though!
699                     putc(ch);
700                     break;
701 
702                 case 'f': case 'F':
703                     ch = in.read(); // skip over 'f'
704                     isSingleFloat = true;
705                     break numberLoop;
706 
707                 case 'd': case 'D':
708                     ch = in.read(); // skip over 'd'
709                     // fall through
710                 default:
711                     break numberLoop;
712             } // sswitch
713         } // loop
714 
715         // we have just finished reading the number.  The next thing better
716         // not be a letter or digit.
717         if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
718             env.error(in.pos, "invalid.number");
719             do { ch = in.read(); }
720             while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
721             doubleValue = 0;
722             token = DOUBLEVAL;
723         } else {
724             token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
725             try {
726                 lastChar = buffer[count - 1];
727                 if (lastChar == 'e' || lastChar == 'E'
728                        || lastChar == '+' || lastChar == '-') {
729                     env.error(in.pos -1, "float.format");
730                 } else if (isSingleFloat) {
731                     String string = bufferString();
732                     floatValue = Float.valueOf(string).floatValue();
733                     if (Float.isInfinite(floatValue)) {
734                         env.error(pos, "overflow.float");
735                     } else if (floatValue == 0 && !looksLikeZero(string)) {
736                         env.error(pos, "underflow.float");
737                     }
738                 } else {
739                     String string = bufferString();
740                     doubleValue = Double.valueOf(string).doubleValue();
741                     if (Double.isInfinite(doubleValue)) {
742                         env.error(pos, "overflow.double");
743                     } else if (doubleValue == 0 && !looksLikeZero(string)) {
744                         env.error(pos, "underflow.double");
745                     }
746                 }
747             } catch (NumberFormatException ee) {
748                 env.error(pos, "float.format");
749                 doubleValue = 0;
750                 floatValue = 0;
751             }
752         }
753         return;
754     }
755 
756     // We have a token that parses as a number.  Is this token possibly zero?
757     // i.e. does it have a non-zero value in the mantissa?
looksLikeZero(String token)758     private static boolean looksLikeZero(String token) {
759         int length = token.length();
760         for (int i = 0; i < length; i++) {
761             switch (token.charAt(i)) {
762                 case 0: case '.':
763                     continue;
764                 case '1': case '2': case '3': case '4': case '5':
765                 case '6': case '7': case '8': case '9':
766                     return false;
767                 case 'e': case 'E': case 'f': case 'F':
768                     return true;
769             }
770         }
771         return true;
772     }
773 
774     /**
775      * Scan an escape character.
776      * @return the character or -1 if it escaped an
777      * end-of-line.
778      */
scanEscapeChar()779     private int scanEscapeChar() throws IOException {
780         long p = in.pos;
781 
782         switch (ch = in.read()) {
783           case '0': case '1': case '2': case '3':
784           case '4': case '5': case '6': case '7': {
785             int n = ch - '0';
786             for (int i = 2 ; i > 0 ; i--) {
787                 switch (ch = in.read()) {
788                   case '0': case '1': case '2': case '3':
789                   case '4': case '5': case '6': case '7':
790                     n = (n << 3) + ch - '0';
791                     break;
792 
793                   default:
794                     if (n > 0xFF) {
795                         env.error(p, "invalid.escape.char");
796                     }
797                     return n;
798                 }
799             }
800             ch = in.read();
801             if (n > 0xFF) {
802                 env.error(p, "invalid.escape.char");
803             }
804             return n;
805           }
806 
807           case 'r':  ch = in.read(); return '\r';
808           case 'n':  ch = in.read(); return '\n';
809           case 'f':  ch = in.read(); return '\f';
810           case 'b':  ch = in.read(); return '\b';
811           case 't':  ch = in.read(); return '\t';
812           case '\\': ch = in.read(); return '\\';
813           case '\"': ch = in.read(); return '\"';
814           case '\'': ch = in.read(); return '\'';
815         }
816 
817         env.error(p, "invalid.escape.char");
818         ch = in.read();
819         return -1;
820     }
821 
822     /**
823      * Scan a string. The current character
824      * should be the opening " of the string.
825      */
scanString()826     private void scanString() throws IOException {
827         token = STRINGVAL;
828         count = 0;
829         ch = in.read();
830 
831         // Scan a String
832         while (true) {
833             switch (ch) {
834               case EOF:
835                 env.error(pos, "eof.in.string");
836                 stringValue = bufferString();
837                 return;
838 
839               case '\r':
840               case '\n':
841                 ch = in.read();
842                 env.error(pos, "newline.in.string");
843                 stringValue = bufferString();
844                 return;
845 
846               case '"':
847                 ch = in.read();
848                 stringValue = bufferString();
849                 return;
850 
851               case '\\': {
852                 int c = scanEscapeChar();
853                 if (c >= 0) {
854                     putc((char)c);
855                 }
856                 break;
857               }
858 
859               default:
860                 putc(ch);
861                 ch = in.read();
862                 break;
863             }
864         }
865     }
866 
867     /**
868      * Scan a character. The current character should be
869      * the opening ' of the character constant.
870      */
scanCharacter()871     private void scanCharacter() throws IOException {
872         token = CHARVAL;
873 
874         switch (ch = in.read()) {
875           case '\\':
876             int c = scanEscapeChar();
877             charValue = (char)((c >= 0) ? c : 0);
878             break;
879 
880         case '\'':
881             // There are two standard problems this case deals with.  One
882             // is the malformed single quote constant (i.e. the programmer
883             // uses ''' instead of '\'') and the other is the empty
884             // character constant (i.e. '').  Just consume any number of
885             // single quotes and emit an error message.
886             charValue = 0;
887             env.error(pos, "invalid.char.constant");
888             ch = in.read();
889             while (ch == '\'') {
890                 ch = in.read();
891             }
892             return;
893 
894           case '\r':
895           case '\n':
896             charValue = 0;
897             env.error(pos, "invalid.char.constant");
898             return;
899 
900           default:
901             charValue = (char)ch;
902             ch = in.read();
903             break;
904         }
905 
906         if (ch == '\'') {
907             ch = in.read();
908         } else {
909             env.error(pos, "invalid.char.constant");
910             while (true) {
911                 switch (ch) {
912                   case '\'':
913                     ch = in.read();
914                     return;
915                   case ';':
916                   case '\n':
917                   case EOF:
918                     return;
919                   default:
920                     ch = in.read();
921                 }
922             }
923         }
924     }
925 
926     /**
927      * Scan an Identifier. The current character should
928      * be the first character of the identifier.
929      */
scanIdentifier()930     private void scanIdentifier() throws IOException {
931         count = 0;
932 
933         while (true) {
934             putc(ch);
935             switch (ch = in.read()) {
936               case 'a': case 'b': case 'c': case 'd': case 'e':
937               case 'f': case 'g': case 'h': case 'i': case 'j':
938               case 'k': case 'l': case 'm': case 'n': case 'o':
939               case 'p': case 'q': case 'r': case 's': case 't':
940               case 'u': case 'v': case 'w': case 'x': case 'y':
941               case 'z':
942               case 'A': case 'B': case 'C': case 'D': case 'E':
943               case 'F': case 'G': case 'H': case 'I': case 'J':
944               case 'K': case 'L': case 'M': case 'N': case 'O':
945               case 'P': case 'Q': case 'R': case 'S': case 'T':
946               case 'U': case 'V': case 'W': case 'X': case 'Y':
947               case 'Z':
948               case '0': case '1': case '2': case '3': case '4':
949               case '5': case '6': case '7': case '8': case '9':
950               case '$': case '_':
951                 break;
952 
953               default:
954                 if (!Character.isJavaLetterOrDigit((char)ch)) {
955                     idValue = Identifier.lookup(bufferString());
956                     token = idValue.getType();
957                     return;
958                 }
959             }
960         }
961     }
962 
963     /**
964      * The ending position of the current token
965      */
966     // Note: This should be part of the pos itself.
getEndPos()967     public long getEndPos() {
968         return in.pos;
969     }
970 
971     /**
972      * If the current token is IDENT, return the identifier occurrence.
973      * It will be freshly allocated.
974      */
getIdToken()975     public IdentifierToken getIdToken() {
976         return (token != IDENT) ? null : new IdentifierToken(pos, idValue);
977     }
978 
979     /**
980      * Scan the next token.
981      * @return the position of the previous token.
982      */
scan()983    public long scan() throws IOException {
984        return xscan();
985    }
986 
xscan()987     protected long xscan() throws IOException {
988         final ScannerInputReader in = this.in;
989         long retPos = pos;
990         prevPos = in.pos;
991         docComment = null;
992         while (true) {
993             pos = in.pos;
994 
995             switch (ch) {
996               case EOF:
997                 token = EOF;
998                 return retPos;
999 
1000               case '\n':
1001                 if (scanComments) {
1002                     ch = ' ';
1003                     // Avoid this path the next time around.
1004                     // Do not just call in.read; we want to present
1005                     // a null token (and also avoid read-ahead).
1006                     token = COMMENT;
1007                     return retPos;
1008                 }
1009               case ' ':
1010               case '\t':
1011               case '\f':
1012                 ch = in.read();
1013                 break;
1014 
1015               case '/':
1016                 switch (ch = in.read()) {
1017                   case '/':
1018                     // Parse a // comment
1019                     while (((ch = in.read()) != EOF) && (ch != '\n'));
1020                     if (scanComments) {
1021                         token = COMMENT;
1022                         return retPos;
1023                     }
1024                     break;
1025 
1026                   case '*':
1027                     ch = in.read();
1028                     if (ch == '*') {
1029                         docComment = scanDocComment();
1030                     } else {
1031                         skipComment();
1032                     }
1033                     if (scanComments) {
1034                         return retPos;
1035                     }
1036                     break;
1037 
1038                   case '=':
1039                     ch = in.read();
1040                     token = ASGDIV;
1041                     return retPos;
1042 
1043                   default:
1044                     token = DIV;
1045                     return retPos;
1046                 }
1047                 break;
1048 
1049               case '"':
1050                 scanString();
1051                 return retPos;
1052 
1053               case '\'':
1054                 scanCharacter();
1055                 return retPos;
1056 
1057               case '0': case '1': case '2': case '3': case '4':
1058               case '5': case '6': case '7': case '8': case '9':
1059                 scanNumber();
1060                 return retPos;
1061 
1062               case '.':
1063                 switch (ch = in.read()) {
1064                   case '0': case '1': case '2': case '3': case '4':
1065                   case '5': case '6': case '7': case '8': case '9':
1066                     count = 0;
1067                     putc('.');
1068                     scanReal();
1069                     break;
1070                   default:
1071                     token = FIELD;
1072                 }
1073                 return retPos;
1074 
1075               case '{':
1076                 ch = in.read();
1077                 token = LBRACE;
1078                 return retPos;
1079 
1080               case '}':
1081                 ch = in.read();
1082                 token = RBRACE;
1083                 return retPos;
1084 
1085               case '(':
1086                 ch = in.read();
1087                 token = LPAREN;
1088                 return retPos;
1089 
1090               case ')':
1091                 ch = in.read();
1092                 token = RPAREN;
1093                 return retPos;
1094 
1095               case '[':
1096                 ch = in.read();
1097                 token = LSQBRACKET;
1098                 return retPos;
1099 
1100               case ']':
1101                 ch = in.read();
1102                 token = RSQBRACKET;
1103                 return retPos;
1104 
1105               case ',':
1106                 ch = in.read();
1107                 token = COMMA;
1108                 return retPos;
1109 
1110               case ';':
1111                 ch = in.read();
1112                 token = SEMICOLON;
1113                 return retPos;
1114 
1115               case '?':
1116                 ch = in.read();
1117                 token = QUESTIONMARK;
1118                 return retPos;
1119 
1120               case '~':
1121                 ch = in.read();
1122                 token = BITNOT;
1123                 return retPos;
1124 
1125               case ':':
1126                 ch = in.read();
1127                 token = COLON;
1128                 return retPos;
1129 
1130               case '-':
1131                 switch (ch = in.read()) {
1132                   case '-':
1133                     ch = in.read();
1134                     token = DEC;
1135                     return retPos;
1136 
1137                   case '=':
1138                     ch = in.read();
1139                     token = ASGSUB;
1140                     return retPos;
1141                 }
1142                 token = SUB;
1143                 return retPos;
1144 
1145               case '+':
1146                 switch (ch = in.read()) {
1147                   case '+':
1148                     ch = in.read();
1149                     token = INC;
1150                     return retPos;
1151 
1152                   case '=':
1153                     ch = in.read();
1154                     token = ASGADD;
1155                     return retPos;
1156                 }
1157                 token = ADD;
1158                 return retPos;
1159 
1160               case '<':
1161                 switch (ch = in.read()) {
1162                   case '<':
1163                     if ((ch = in.read()) == '=') {
1164                         ch = in.read();
1165                         token = ASGLSHIFT;
1166                         return retPos;
1167                     }
1168                     token = LSHIFT;
1169                     return retPos;
1170 
1171                   case '=':
1172                     ch = in.read();
1173                     token = LE;
1174                     return retPos;
1175                 }
1176                 token = LT;
1177                 return retPos;
1178 
1179               case '>':
1180                 switch (ch = in.read()) {
1181                   case '>':
1182                     switch (ch = in.read()) {
1183                       case '=':
1184                         ch = in.read();
1185                         token = ASGRSHIFT;
1186                         return retPos;
1187 
1188                       case '>':
1189                         if ((ch = in.read()) == '=') {
1190                             ch = in.read();
1191                             token = ASGURSHIFT;
1192                             return retPos;
1193                         }
1194                         token = URSHIFT;
1195                         return retPos;
1196                     }
1197                     token = RSHIFT;
1198                     return retPos;
1199 
1200                   case '=':
1201                     ch = in.read();
1202                     token = GE;
1203                     return retPos;
1204                 }
1205                 token = GT;
1206                 return retPos;
1207 
1208               case '|':
1209                 switch (ch = in.read()) {
1210                   case '|':
1211                     ch = in.read();
1212                     token = OR;
1213                     return retPos;
1214 
1215                   case '=':
1216                     ch = in.read();
1217                     token = ASGBITOR;
1218                     return retPos;
1219                 }
1220                 token = BITOR;
1221                 return retPos;
1222 
1223               case '&':
1224                 switch (ch = in.read()) {
1225                   case '&':
1226                     ch = in.read();
1227                     token = AND;
1228                     return retPos;
1229 
1230                   case '=':
1231                     ch = in.read();
1232                     token = ASGBITAND;
1233                     return retPos;
1234                 }
1235                 token = BITAND;
1236                 return retPos;
1237 
1238               case '=':
1239                 if ((ch = in.read()) == '=') {
1240                     ch = in.read();
1241                     token = EQ;
1242                     return retPos;
1243                 }
1244                 token = ASSIGN;
1245                 return retPos;
1246 
1247               case '%':
1248                 if ((ch = in.read()) == '=') {
1249                     ch = in.read();
1250                     token = ASGREM;
1251                     return retPos;
1252                 }
1253                 token = REM;
1254                 return retPos;
1255 
1256               case '^':
1257                 if ((ch = in.read()) == '=') {
1258                     ch = in.read();
1259                     token = ASGBITXOR;
1260                     return retPos;
1261                 }
1262                 token = BITXOR;
1263                 return retPos;
1264 
1265               case '!':
1266                 if ((ch = in.read()) == '=') {
1267                     ch = in.read();
1268                     token = NE;
1269                     return retPos;
1270                 }
1271                 token = NOT;
1272                 return retPos;
1273 
1274               case '*':
1275                 if ((ch = in.read()) == '=') {
1276                     ch = in.read();
1277                     token = ASGMUL;
1278                     return retPos;
1279                 }
1280                 token = MUL;
1281                 return retPos;
1282 
1283               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1284               case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1285               case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1286               case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1287               case 'y': case 'z':
1288               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1289               case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1290               case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1291               case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1292               case 'Y': case 'Z':
1293               case '$': case '_':
1294                 scanIdentifier();
1295                 return retPos;
1296 
1297               case '\u001a':
1298                 // Our one concession to DOS.
1299                 if ((ch = in.read()) == EOF) {
1300                     token = EOF;
1301                     return retPos;
1302                 }
1303                 env.error(pos, "funny.char");
1304                 ch = in.read();
1305                 break;
1306 
1307 
1308               default:
1309                 if (Character.isJavaLetter((char)ch)) {
1310                     scanIdentifier();
1311                     return retPos;
1312                 }
1313                 env.error(pos, "funny.char");
1314                 ch = in.read();
1315                 break;
1316             }
1317         }
1318     }
1319 
1320     /**
1321      * Scan to a matching '}', ']' or ')'. The current token must be
1322      * a '{', '[' or '(';
1323      */
match(int open, int close)1324     public void match(int open, int close) throws IOException {
1325         int depth = 1;
1326 
1327         while (true) {
1328             scan();
1329             if (token == open) {
1330                 depth++;
1331             } else if (token == close) {
1332                 if (--depth == 0) {
1333                     return;
1334                 }
1335             } else if (token == EOF) {
1336                 env.error(pos, "unbalanced.paren");
1337                 return;
1338             }
1339         }
1340     }
1341 }
1342