tools/java/Scanner.java

/*
 * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package sun.tools.java;

import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;

/**
 * A Scanner for Java tokens. Errors are reported
 * to the environment object.<p>
 *
 * The scanner keeps track of the current token,
 * the value of the current token (if any), and the start
 * position of the current token.<p>
 *
 * The scan() method advances the scanner to the next
 * token in the input.<p>
 *
 * The match() method is used to quickly match opening
 * brackets (ie: '(', '{', or '[') with their closing
 * counter part. This is useful during error recovery.<p>
 *
 * An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
 * this means that both the line number and the exact offset into
 * the file are encoded in each position value.<p>
 *
 * The compiler treats either "\n", "\r" or "\r\n" as the
 * end of a line.<p>
 *
 * WARNING: The contents of this source file are not part of any
 * supported API.  Code that depends on them does so at its own risk:
 * they are subject to change or removal without notice.
 *
 * @author      Arthur van Hoff
 */

public
class Scanner implements Constants {
    /**
     * The increment for each character.
     */
    public static final long OFFSETINC = 1;

    /**
     * The increment for each line.
     */
    public static final long LINEINC = 1L << WHEREOFFSETBITS;

    /**
     * End of input
     */
    public static final int EOF = -1;

    /**
     * Where errors are reported
     */
    public Environment env;

    /**
     * Input reader
     */
    protected ScannerInputReader in;

    /**
     * If true, present all comments as tokens.
     * Contents are not saved, but positions are recorded accurately,
     * so the comment can be recovered from the text.
     * Line terminations are also returned as comment tokens,
     * and may be distinguished by their start and end positions,
     * which are equal (meaning, these tokens contain no chars).
     */
   public boolean scanComments = false;

    /**
     * Current token
     */
    public int token;

    /**
     * The position of the current token
     */
    public long pos;

    /**
     * The position of the previous token
     */
    public long prevPos;

    /**
     * The current character
     */
    protected int ch;

    /*
     * Token values.
     */
    public char charValue;
    public int intValue;
    public long longValue;
    public float floatValue;
    public double doubleValue;
    public String stringValue;
    public Identifier idValue;
    public int radix;   // Radix, when reading int or long

    /*
     * A doc comment preceding the most recent token
     */
    public String docComment;

    /*
     * A growable character buffer.
     */
    private int count;
    private char buffer[] = new char[1024];
    private void growBuffer() {
        char newBuffer[] = new char[buffer.length * 2];
        System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
        buffer = newBuffer;
    }

    // The following two methods have been hand-inlined in
    // scanDocComment.  If you make changes here, you should
    // check to see if scanDocComment also needs modification.
    private void putc(int ch) {
        if (count == buffer.length) {
            growBuffer();
        }
        buffer[count++] = (char)ch;
    }

    private String bufferString() {
        return new String(buffer, 0, count);
    }

    /**
     * Create a scanner to scan an input stream.
     */
    public Scanner(Environment env, InputStream in) throws IOException {
        this.env = env;
        useInputStream(in);
    }

    /**
     * Setup input from the given input stream,
     * and scan the first token from it.
     */
    protected void useInputStream(InputStream in) throws IOException {
        try {
            this.in = new ScannerInputReader(env, in);
        } catch (Exception e) {
            env.setCharacterEncoding(null);
            this.in = new ScannerInputReader(env, in);
        }

        ch = this.in.read();
        prevPos = this.in.pos;

        scan();
    }

    /**
     * Create a scanner to scan an input stream.
     */
    protected Scanner(Environment env) {
        this.env = env;
        // Expect the subclass to call useInputStream at the right time.
    }

    /**
     * Define a keyword.
     */
    private static void defineKeyword(int val) {
        Identifier.lookup(opNames[val]).setType(val);
    }

    /**
     * Initialized keyword and token Hashtables
     */
    static {
        // Statement keywords
        defineKeyword(FOR);
        defineKeyword(IF);
        defineKeyword(ELSE);
        defineKeyword(WHILE);
        defineKeyword(DO);
        defineKeyword(SWITCH);
        defineKeyword(CASE);
        defineKeyword(DEFAULT);
        defineKeyword(BREAK);
        defineKeyword(CONTINUE);
        defineKeyword(RETURN);
        defineKeyword(TRY);
        defineKeyword(CATCH);
        defineKeyword(FINALLY);
        defineKeyword(THROW);

        // Type defineKeywords
        defineKeyword(BYTE);
        defineKeyword(CHAR);
        defineKeyword(SHORT);
        defineKeyword(INT);
        defineKeyword(LONG);
        defineKeyword(FLOAT);
        defineKeyword(DOUBLE);
        defineKeyword(VOID);
        defineKeyword(BOOLEAN);

        // Expression keywords
        defineKeyword(INSTANCEOF);
        defineKeyword(TRUE);
        defineKeyword(FALSE);
        defineKeyword(NEW);
        defineKeyword(THIS);
        defineKeyword(SUPER);
        defineKeyword(NULL);

        // Declaration keywords
        defineKeyword(IMPORT);
        defineKeyword(CLASS);
        defineKeyword(EXTENDS);
        defineKeyword(IMPLEMENTS);
        defineKeyword(INTERFACE);
        defineKeyword(PACKAGE);
        defineKeyword(THROWS);

        // Modifier keywords
        defineKeyword(PRIVATE);
        defineKeyword(PUBLIC);
        defineKeyword(PROTECTED);
        defineKeyword(STATIC);
        defineKeyword(TRANSIENT);
        defineKeyword(SYNCHRONIZED);
        defineKeyword(NATIVE);
        defineKeyword(ABSTRACT);
        defineKeyword(VOLATILE);
        defineKeyword(FINAL);
        defineKeyword(STRICTFP);

        // reserved keywords
        defineKeyword(CONST);
        defineKeyword(GOTO);
    }

    /**
     * Scan a comment. This method should be
     * called once the initial /, * and the next
     * character have been read.
     */
    private void skipComment() throws IOException {
        while (true) {
            switch (ch) {
              case EOF:
                env.error(pos, "eof.in.comment");
                return;

              case '*':
                if ((ch = in.read()) == '/')  {
                    ch = in.read();
                    return;
                }
                break;

              default:
                ch = in.read();
                break;
            }
        }
    }

    /**
     * Scan a doc comment. This method should be called
     * once the initial /, * and * have been read. It gathers
     * the content of the comment (witout leading spaces and '*'s)
     * in the string buffer.
     */
    private String scanDocComment() throws IOException {
        // Note: this method has been hand-optimized to yield
        // better performance.  This was done after it was noted
        // that javadoc spent a great deal of its time here.
        // This should also help the performance of the compiler
        // as well -- it scans the doc comments to find
        // @deprecated tags.
        //
        // The logic of the method has been completely rewritten
        // to avoid the use of flags that need to be looked at
        // for every character read.  Members that are accessed
        // more than once have been stored in local variables.
        // The methods putc() and bufferString() have been
        // inlined by hand.  Extra cases have been added to
        // switch statements to trick the compiler into generating
        // a tableswitch instead of a lookupswitch.
        //
        // This implementation aims to preserve the previous
        // behavior of this method.

        int c;

        // Put `in' in a local variable.
        final ScannerInputReader in = this.in;

        // We maintain the buffer locally rather than calling putc().
        char[] buffer = this.buffer;
        int count = 0;

        // We are called pointing at the second star of the doc
        // comment:
        //
        // Input: /** the rest of the comment ... */
        //          ^
        //
        // We rely on this in the code below.

        // Consume any number of stars.
        while ((c = in.read()) == '*')
            ;

        // Is the comment of the form /**/, /***/, /****/, etc.?
        if (c == '/') {
            // Set ch and return
            ch = in.read();
            return "";
        }

        // Skip a newline on the first line of the comment.
        if (c == '\n') {
            c = in.read();
        }

    outerLoop:
        // The outerLoop processes the doc comment, looping once
        // for each line.  For each line, it first strips off
        // whitespace, then it consumes any stars, then it
        // puts the rest of the line into our buffer.
        while (true) {

            // The wsLoop consumes whitespace from the beginning
            // of each line.
        wsLoop:
            while (true) {
                switch (c) {
                case ' ':
                case '\t':
                    // We could check for other forms of whitespace
                    // as well, but this is left as is for minimum
                    // disturbance of functionality.
                    //
                    // Just skip whitespace.
                    c = in.read();
                    break;

                // We have added extra cases here to trick the
                // compiler into using a tableswitch instead of
                // a lookupswitch.  They can be removed without
                // a change in meaning.
                case 10: case 11: case 12: case 13: case 14: case 15:
                case 16: case 17: case 18: case 19: case 20: case 21:
                case 22: case 23: case 24: case 25: case 26: case 27:
                case 28: case 29: case 30: case 31:
                default:
                    // We've seen something that isn't whitespace,
                    // jump out.
                    break wsLoop;
                }
            } // end wsLoop.

            // Are there stars here?  If so, consume them all
            // and check for the end of comment.
            if (c == '*') {
                // Skip all of the stars...
                do {
                    c = in.read();
                } while (c == '*');

                // ...then check for the closing slash.
                if (c == '/') {
                    // We're done with the doc comment.
                    // Set ch and break out.
                    ch = in.read();
                    break outerLoop;
                }
            }

            // The textLoop processes the rest of the characters
            // on the line, adding them to our buffer.
        textLoop:
            while (true) {
                switch (c) {
                case EOF:
                    // We've seen a premature EOF.  Break out
                    // of the loop.
                    env.error(pos, "eof.in.comment");
                    ch = EOF;
                    break outerLoop;

                case '*':
                    // Is this just a star?  Or is this the
                    // end of a comment?
                    c = in.read();
                    if (c == '/') {
                        // This is the end of the comment,
                        // set ch and return our buffer.
                        ch = in.read();
                        break outerLoop;
                    }
                    // This is just an ordinary star.  Add it to
                    // the buffer.
                    if (count == buffer.length) {
                        growBuffer();
                        buffer = this.buffer;
                    }
                    buffer[count++] = '*';
                    break;

                case '\n':
                    // We've seen a newline.  Add it to our
                    // buffer and break out of this loop,
                    // starting fresh on a new line.
                    if (count == buffer.length) {
                        growBuffer();
                        buffer = this.buffer;
                    }
                    buffer[count++] = '\n';
                    c = in.read();
                    break textLoop;

                // Again, the extra cases here are a trick
                // to get the compiler to generate a tableswitch.
                case 0: case 1: case 2: case 3: case 4: case 5:
                case 6: case 7: case 8: case 11: case 12: case 13:
                case 14: case 15: case 16: case 17: case 18: case 19:
                case 20: case 21: case 22: case 23: case 24: case 25:
                case 26: case 27: case 28: case 29: case 30: case 31:
                case 32: case 33: case 34: case 35: case 36: case 37:
                case 38: case 39: case 40:
                default:
                    // Add the character to our buffer.
                    if (count == buffer.length) {
                        growBuffer();
                        buffer = this.buffer;
                    }
                    buffer[count++] = (char)c;
                    c = in.read();
                    break;
                }
            } // end textLoop
        } // end outerLoop

        // We have scanned our doc comment.  It is stored in
        // buffer.  The previous implementation of scanDocComment
        // stripped off all trailing spaces and stars from the comment.
        // We will do this as well, so as to cause a minimum of
        // disturbance.  Is this what we want?
        if (count > 0) {
            int i = count - 1;
        trailLoop:
            while (i > -1) {
                switch (buffer[i]) {
                case ' ':
                case '\t':
                case '*':
                    i--;
                    break;
                // And again, the extra cases here are a trick
                // to get the compiler to generate a tableswitch.
                case 0: case 1: case 2: case 3: case 4: case 5:
                case 6: case 7: case 8: case 10: case 11: case 12:
                case 13: case 14: case 15: case 16: case 17: case 18:
                case 19: case 20: case 21: case 22: case 23: case 24:
                case 25: case 26: case 27: case 28: case 29: case 30:
                case 31: case 33: case 34: case 35: case 36: case 37:
                case 38: case 39: case 40:
                default:
                    break trailLoop;
                }
            }
            count = i + 1;

            // Return the text of the doc comment.
            return new String(buffer, 0, count);
        } else {
            return "";
        }
    }

    /**
     * Scan a number. The first digit of the number should be the current
     * character.  We may be scanning hex, decimal, or octal at this point
     */
    private void scanNumber() throws IOException {
        boolean seenNonOctal = false;
        boolean overflow = false;
        boolean seenDigit = false; // used to detect invalid hex number 0xL
        radix = (ch == '0' ? 8 : 10);
        long value = ch - '0';
        count = 0;
        putc(ch);               // save character in buffer
    numberLoop:
        for (;;) {
            switch (ch = in.read()) {
              case '.':
                if (radix == 16)
                    break numberLoop; // an illegal character
                scanReal();
                return;

              case '8': case '9':
                // We can't yet throw an error if reading an octal.  We might
                // discover we're really reading a real.
                seenNonOctal = true;
              case '0': case '1': case '2': case '3':
              case '4': case '5': case '6': case '7':
                seenDigit = true;
                putc(ch);
                if (radix == 10) {
                    overflow = overflow || (value * 10)/10 != value;
                    value = (value * 10) + (ch - '0');
                    overflow = overflow || (value - 1 < -1);
                } else if (radix == 8) {
                    overflow = overflow || (value >>> 61) != 0;
                    value = (value << 3) + (ch - '0');
                } else {
                    overflow = overflow || (value >>> 60) != 0;
                    value = (value << 4) + (ch - '0');
                }
                break;

              case 'd': case 'D': case 'e': case 'E': case 'f': case 'F':
                if (radix != 16) {
                    scanReal();
                    return;
                }
                // fall through
              case 'a': case 'A': case 'b': case 'B': case 'c': case 'C':
                seenDigit = true;
                putc(ch);
                if (radix != 16)
                    break numberLoop; // an illegal character
                overflow = overflow || (value >>> 60) != 0;
                value = (value << 4) + 10 +
                         Character.toLowerCase((char)ch) - 'a';
                break;

              case 'l': case 'L':
                ch = in.read(); // skip over 'l'
                longValue = value;
                token = LONGVAL;
                break numberLoop;

              case 'x': case 'X':
                // if the first character is a '0' and this is the second
                // letter, then read in a hexadecimal number.  Otherwise, error.
                if (count == 1 && radix == 8) {
                    radix = 16;
                    seenDigit = false;
                    break;
                } else {
                    // we'll get an illegal character error
                    break numberLoop;
                }

              default:
                intValue = (int)value;
                token = INTVAL;
                break numberLoop;
            }
        } // while true

        // We have just finished reading the number.  The next thing better
        // not be a letter or digit.
        // Note:  There will be deprecation warnings against these uses
        // of Character.isJavaLetterOrDigit and Character.isJavaLetter.
        // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
        if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
            env.error(in.pos, "invalid.number");
            do { ch = in.read(); }
            while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
            intValue = 0;
            token = INTVAL;
        } else if (radix == 8 && seenNonOctal) {
            // A bogus octal literal.
            intValue = 0;
            token = INTVAL;
            env.error(pos, "invalid.octal.number");
        } else if (radix == 16 && seenDigit == false) {
            // A hex literal with no digits, 0xL, for example.
            intValue = 0;
            token = INTVAL;
            env.error(pos, "invalid.hex.number");
        } else {
            if (token == INTVAL) {
                // Check for overflow.  Note that base 10 literals
                // have different rules than base 8 and 16.
                overflow = overflow ||
                    (value & 0xFFFFFFFF00000000L) != 0 ||
                    (radix == 10 && value > 2147483648L);

                if (overflow) {
                    intValue = 0;

                    // Give a specific error message which tells
                    // the user the range.
                    switch (radix) {
                    case 8:
                        env.error(pos, "overflow.int.oct");
                        break;
                    case 10:
                        env.error(pos, "overflow.int.dec");
                        break;
                    case 16:
                        env.error(pos, "overflow.int.hex");
                        break;
                    default:
                        throw new CompilerError("invalid radix");
                    }
                }
            } else {
                if (overflow) {
                    longValue = 0;

                    // Give a specific error message which tells
                    // the user the range.
                    switch (radix) {
                    case 8:
                        env.error(pos, "overflow.long.oct");
                        break;
                    case 10:
                        env.error(pos, "overflow.long.dec");
                        break;
                    case 16:
                        env.error(pos, "overflow.long.hex");
                        break;
                    default:
                        throw new CompilerError("invalid radix");
                    }
                }
            }
        }
    }

    /**
     * Scan a float.  We are either looking at the decimal, or we have already
     * seen it and put it into the buffer.  We haven't seen an exponent.
     * Scan a float.  Should be called with the current character is either
     * the 'e', 'E' or '.'
     */
    private void scanReal() throws IOException {
        boolean seenExponent = false;
        boolean isSingleFloat = false;
        char lastChar;
        if (ch == '.') {
            putc(ch);
            ch = in.read();
        }

    numberLoop:
        for ( ; ; ch = in.read()) {
            switch (ch) {
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                    putc(ch);
                    break;

                case 'e': case 'E':
                    if (seenExponent)
                        break numberLoop; // we'll get a format error
                    putc(ch);
                    seenExponent = true;
                    break;

                case '+': case '-':
                    lastChar = buffer[count - 1];
                    if (lastChar != 'e' && lastChar != 'E')
                        break numberLoop; // this isn't an error, though!
                    putc(ch);
                    break;

                case 'f': case 'F':
                    ch = in.read(); // skip over 'f'
                    isSingleFloat = true;
                    break numberLoop;

                case 'd': case 'D':
                    ch = in.read(); // skip over 'd'
                    // fall through
                default:
                    break numberLoop;
            } // sswitch
        } // loop

        // we have just finished reading the number.  The next thing better
        // not be a letter or digit.
        if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
            env.error(in.pos, "invalid.number");
            do { ch = in.read(); }
            while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
            doubleValue = 0;
            token = DOUBLEVAL;
        } else {
            token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
            try {
                lastChar = buffer[count - 1];
                if (lastChar == 'e' || lastChar == 'E'
                       || lastChar == '+' || lastChar == '-') {
                    env.error(in.pos -1, "float.format");
                } else if (isSingleFloat) {
                    String string = bufferString();
                    floatValue = Float.valueOf(string).floatValue();
                    if (Float.isInfinite(floatValue)) {
                        env.error(pos, "overflow.float");
                    } else if (floatValue == 0 && !looksLikeZero(string)) {
                        env.error(pos, "underflow.float");
                    }
                } else {
                    String string = bufferString();
                    doubleValue = Double.valueOf(string).doubleValue();
                    if (Double.isInfinite(doubleValue)) {
                        env.error(pos, "overflow.double");
                    } else if (doubleValue == 0 && !looksLikeZero(string)) {
                        env.error(pos, "underflow.double");
                    }
                }
            } catch (NumberFormatException ee) {
                env.error(pos, "float.format");
                doubleValue = 0;
                floatValue = 0;
            }
        }
        return;
    }

    // We have a token that parses as a number.  Is this token possibly zero?
    // i.e. does it have a non-zero value in the mantissa?
    private static boolean looksLikeZero(String token) {
        int length = token.length();
        for (int i = 0; i < length; i++) {
            switch (token.charAt(i)) {
                case 0: case '.':
                    continue;
                case '1': case '2': case '3': case '4': case '5':
                case '6': case '7': case '8': case '9':
                    return false;
                case 'e': case 'E': case 'f': case 'F':
                    return true;
            }
        }
        return true;
    }

    /**
     * Scan an escape character.
     * @return the character or -1 if it escaped an
     * end-of-line.
     */
    private int scanEscapeChar() throws IOException {
        long p = in.pos;

        switch (ch = in.read()) {
          case '0': case '1': case '2': case '3':
          case '4': case '5': case '6': case '7': {
            int n = ch - '0';
            for (int i = 2 ; i > 0 ; i--) {
                switch (ch = in.read()) {
                  case '0': case '1': case '2': case '3':
                  case '4': case '5': case '6': case '7':
                    n = (n << 3) + ch - '0';
                    break;

                  default:
                    if (n > 0xFF) {
                        env.error(p, "invalid.escape.char");
                    }
                    return n;
                }
            }
            ch = in.read();
            if (n > 0xFF) {
                env.error(p, "invalid.escape.char");
            }
            return n;
          }

          case 'r':  ch = in.read(); return '\r';
          case 'n':  ch = in.read(); return '\n';
          case 'f':  ch = in.read(); return '\f';
          case 'b':  ch = in.read(); return '\b';
          case 't':  ch = in.read(); return '\t';
          case '\\': ch = in.read(); return '\\';
          case '\"': ch = in.read(); return '\"';
          case '\'': ch = in.read(); return '\'';
        }

        env.error(p, "invalid.escape.char");
        ch = in.read();
        return -1;
    }

    /**
     * Scan a string. The current character
     * should be the opening " of the string.
     */
    private void scanString() throws IOException {
        token = STRINGVAL;
        count = 0;
        ch = in.read();

        // Scan a String
        while (true) {
            switch (ch) {
              case EOF:
                env.error(pos, "eof.in.string");
                stringValue = bufferString();
                return;

              case '\r':
              case '\n':
                ch = in.read();
                env.error(pos, "newline.in.string");
                stringValue = bufferString();
                return;

              case '"':
                ch = in.read();
                stringValue = bufferString();
                return;

              case '\\': {
                int c = scanEscapeChar();
                if (c >= 0) {
                    putc((char)c);
                }
                break;
              }

              default:
                putc(ch);
                ch = in.read();
                break;
            }
        }
    }

    /**
     * Scan a character. The current character should be
     * the opening ' of the character constant.
     */
    private void scanCharacter() throws IOException {
        token = CHARVAL;

        switch (ch = in.read()) {
          case '\\':
            int c = scanEscapeChar();
            charValue = (char)((c >= 0) ? c : 0);
            break;

        case '\'':
            // There are two standard problems this case deals with.  One
            // is the malformed single quote constant (i.e. the programmer
            // uses ''' instead of '\'') and the other is the empty
            // character constant (i.e. '').  Just consume any number of
            // single quotes and emit an error message.
            charValue = 0;
            env.error(pos, "invalid.char.constant");
            ch = in.read();
            while (ch == '\'') {
                ch = in.read();
            }
            return;

          case '\r':
          case '\n':
            charValue = 0;
            env.error(pos, "invalid.char.constant");
            return;

          default:
            charValue = (char)ch;
            ch = in.read();
            break;
        }

        if (ch == '\'') {
            ch = in.read();
        } else {
            env.error(pos, "invalid.char.constant");
            while (true) {
                switch (ch) {
                  case '\'':
                    ch = in.read();
                    return;
                  case ';':
                  case '\n':
                  case EOF:
                    return;
                  default:
                    ch = in.read();
                }
            }
        }
    }

    /**
     * Scan an Identifier. The current character should
     * be the first character of the identifier.
     */
    private void scanIdentifier() throws IOException {
        count = 0;

        while (true) {
            putc(ch);
            switch (ch = in.read()) {
              case 'a': case 'b': case 'c': case 'd': case 'e':
              case 'f': case 'g': case 'h': case 'i': case 'j':
              case 'k': case 'l': case 'm': case 'n': case 'o':
              case 'p': case 'q': case 'r': case 's': case 't':
              case 'u': case 'v': case 'w': case 'x': case 'y':
              case 'z':
              case 'A': case 'B': case 'C': case 'D': case 'E':
              case 'F': case 'G': case 'H': case 'I': case 'J':
              case 'K': case 'L': case 'M': case 'N': case 'O':
              case 'P': case 'Q': case 'R': case 'S': case 'T':
              case 'U': case 'V': case 'W': case 'X': case 'Y':
              case 'Z':
              case '0': case '1': case '2': case '3': case '4':
              case '5': case '6': case '7': case '8': case '9':
              case '$': case '_':
                break;

              default:
                if (!Character.isJavaLetterOrDigit((char)ch)) {
                    idValue = Identifier.lookup(bufferString());
                    token = idValue.getType();
                    return;
                }
            }
        }
    }

    /**
     * The ending position of the current token
     */
    // Note: This should be part of the pos itself.
    public long getEndPos() {
        return in.pos;
    }

    /**
     * If the current token is IDENT, return the identifier occurrence.
     * It will be freshly allocated.
     */
    public IdentifierToken getIdToken() {
        return (token != IDENT) ? null : new IdentifierToken(pos, idValue);
    }

    /**
     * Scan the next token.
     * @return the position of the previous token.
     */
   public long scan() throws IOException {
       return xscan();
   }

    protected long xscan() throws IOException {
        final ScannerInputReader in = this.in;
        long retPos = pos;
        prevPos = in.pos;
        docComment = null;
        while (true) {
            pos = in.pos;

            switch (ch) {
              case EOF:
                token = EOF;
                return retPos;

              case '\n':
                if (scanComments) {
                    ch = ' ';
                    // Avoid this path the next time around.
                    // Do not just call in.read; we want to present
                    // a null token (and also avoid read-ahead).
                    token = COMMENT;
                    return retPos;
                }
              case ' ':
              case '\t':
              case '\f':
                ch = in.read();
                break;

              case '/':
                switch (ch = in.read()) {
                  case '/':
                    // Parse a // comment
                    while (((ch = in.read()) != EOF) && (ch != '\n'));
                    if (scanComments) {
                        token = COMMENT;
                        return retPos;
                    }
                    break;

                  case '*':
                    ch = in.read();
                    if (ch == '*') {
                        docComment = scanDocComment();
                    } else {
                        skipComment();
                    }
                    if (scanComments) {
                        return retPos;
                    }
                    break;

                  case '=':
                    ch = in.read();
                    token = ASGDIV;
                    return retPos;

                  default:
                    token = DIV;
                    return retPos;
                }
                break;

              case '"':
                scanString();
                return retPos;

              case '\'':
                scanCharacter();
                return retPos;

              case '0': case '1': case '2': case '3': case '4':
              case '5': case '6': case '7': case '8': case '9':
                scanNumber();
                return retPos;

              case '.':
                switch (ch = in.read()) {
                  case '0': case '1': case '2': case '3': case '4':
                  case '5': case '6': case '7': case '8': case '9':
                    count = 0;
                    putc('.');
                    scanReal();
                    break;
                  default:
                    token = FIELD;
                }
                return retPos;

              case '{':
                ch = in.read();
                token = LBRACE;
                return retPos;

              case '}':
                ch = in.read();
                token = RBRACE;
                return retPos;

              case '(':
                ch = in.read();
                token = LPAREN;
                return retPos;

              case ')':
                ch = in.read();
                token = RPAREN;
                return retPos;

              case '[':
                ch = in.read();
                token = LSQBRACKET;
                return retPos;

              case ']':
                ch = in.read();
                token = RSQBRACKET;
                return retPos;

              case ',':
                ch = in.read();
                token = COMMA;
                return retPos;

              case ';':
                ch = in.read();
                token = SEMICOLON;
                return retPos;

              case '?':
                ch = in.read();
                token = QUESTIONMARK;
                return retPos;

              case '~':
                ch = in.read();
                token = BITNOT;
                return retPos;

              case ':':
                ch = in.read();
                token = COLON;
                return retPos;

              case '-':
                switch (ch = in.read()) {
                  case '-':
                    ch = in.read();
                    token = DEC;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = ASGSUB;
                    return retPos;
                }
                token = SUB;
                return retPos;

              case '+':
                switch (ch = in.read()) {
                  case '+':
                    ch = in.read();
                    token = INC;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = ASGADD;
                    return retPos;
                }
                token = ADD;
                return retPos;

              case '<':
                switch (ch = in.read()) {
                  case '<':
                    if ((ch = in.read()) == '=') {
                        ch = in.read();
                        token = ASGLSHIFT;
                        return retPos;
                    }
                    token = LSHIFT;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = LE;
                    return retPos;
                }
                token = LT;
                return retPos;

              case '>':
                switch (ch = in.read()) {
                  case '>':
                    switch (ch = in.read()) {
                      case '=':
                        ch = in.read();
                        token = ASGRSHIFT;
                        return retPos;

                      case '>':
                        if ((ch = in.read()) == '=') {
                            ch = in.read();
                            token = ASGURSHIFT;
                            return retPos;
                        }
                        token = URSHIFT;
                        return retPos;
                    }
                    token = RSHIFT;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = GE;
                    return retPos;
                }
                token = GT;
                return retPos;

              case '|':
                switch (ch = in.read()) {
                  case '|':
                    ch = in.read();
                    token = OR;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = ASGBITOR;
                    return retPos;
                }
                token = BITOR;
                return retPos;

              case '&':
                switch (ch = in.read()) {
                  case '&':
                    ch = in.read();
                    token = AND;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = ASGBITAND;
                    return retPos;
                }
                token = BITAND;
                return retPos;

              case '=':
                if ((ch = in.read()) == '=') {
                    ch = in.read();
                    token = EQ;
                    return retPos;
                }
                token = ASSIGN;
                return retPos;

              case '%':
                if ((ch = in.read()) == '=') {
                    ch = in.read();
                    token = ASGREM;
                    return retPos;
                }
                token = REM;
                return retPos;

              case '^':
                if ((ch = in.read()) == '=') {
                    ch = in.read();
                    token = ASGBITXOR;
                    return retPos;
                }
                token = BITXOR;
                return retPos;

              case '!':
                if ((ch = in.read()) == '=') {
                    ch = in.read();
                    token = NE;
                    return retPos;
                }
                token = NOT;
                return retPos;

              case '*':
                if ((ch = in.read()) == '=') {
                    ch = in.read();
                    token = ASGMUL;
                    return retPos;
                }
                token = MUL;
                return retPos;

              case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
              case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
              case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
              case 's': case 't': case 'u': case 'v': case 'w': case 'x':
              case 'y': case 'z':
              case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
              case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
              case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
              case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
              case 'Y': case 'Z':
              case '$': case '_':
                scanIdentifier();
                return retPos;

              case '\u001a':
                // Our one concession to DOS.
                if ((ch = in.read()) == EOF) {
                    token = EOF;
                    return retPos;
                }
                env.error(pos, "funny.char");
                ch = in.read();
                break;


              default:
                if (Character.isJavaLetter((char)ch)) {
                    scanIdentifier();
                    return retPos;
                }
                env.error(pos, "funny.char");
                ch = in.read();
                break;
            }
        }
    }

    /**
     * Scan to a matching '}', ']' or ')'. The current token must be
     * a '{', '[' or '(';
     */
    public void match(int open, int close) throws IOException {
        int depth = 1;

        while (true) {
            scan();
            if (token == open) {
                depth++;
            } else if (token == close) {
                if (--depth == 0) {
                    return;
                }
            } else if (token == EOF) {
                env.error(pos, "unbalanced.paren");
                return;
            }
        }
    }
}