1 /*
2  * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package jdk.nashorn.internal.parser;
27 
28 import static jdk.nashorn.internal.parser.TokenType.ADD;
29 import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER;
30 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
31 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
32 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
33 import static jdk.nashorn.internal.parser.TokenType.EOF;
34 import static jdk.nashorn.internal.parser.TokenType.EOL;
35 import static jdk.nashorn.internal.parser.TokenType.ERROR;
36 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
37 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
38 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
39 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
40 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
41 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
42 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
43 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
44 import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
45 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
46 import static jdk.nashorn.internal.parser.TokenType.REGEX;
47 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
48 import static jdk.nashorn.internal.parser.TokenType.STRING;
49 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
50 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
51 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
52 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
53 import static jdk.nashorn.internal.parser.TokenType.XML;
54 
55 import java.io.Serializable;
56 
57 import jdk.nashorn.internal.runtime.ECMAErrors;
58 import jdk.nashorn.internal.runtime.ErrorManager;
59 import jdk.nashorn.internal.runtime.JSErrorType;
60 import jdk.nashorn.internal.runtime.JSType;
61 import jdk.nashorn.internal.runtime.ParserException;
62 import jdk.nashorn.internal.runtime.Source;
63 import jdk.nashorn.internal.runtime.options.Options;
64 
65 /**
66  * Responsible for converting source content into a stream of tokens.
67  *
68  */
69 @SuppressWarnings("fallthrough")
70 public class Lexer extends Scanner {
71     private static final long MIN_INT_L = Integer.MIN_VALUE;
72     private static final long MAX_INT_L = Integer.MAX_VALUE;
73 
74     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
75 
76     /** Content source. */
77     private final Source source;
78 
79     /** Buffered stream for tokens. */
80     private final TokenStream stream;
81 
82     /** True if here and edit strings are supported. */
83     private final boolean scripting;
84 
85     /** True if parsing in ECMAScript 6 mode. */
86     private final boolean es6;
87 
88     /** True if a nested scan. (scan to completion, no EOF.) */
89     private final boolean nested;
90 
91     /** Pending new line number and position. */
92     int pendingLine;
93 
94     /** Position of last EOL + 1. */
95     private int linePosition;
96 
97     /** Type of last token added. */
98     private TokenType last;
99 
100     private final boolean pauseOnFunctionBody;
101     private boolean pauseOnNextLeftBrace;
102 
103     private int templateExpressionOpenBraces;
104 
105     private static final String JAVASCRIPT_OTHER_WHITESPACE =
106         "\u2028" + // line separator
107         "\u2029" + // paragraph separator
108         "\u00a0" + // Latin-1 space
109         "\u1680" + // Ogham space mark
110         "\u180e" + // separator, Mongolian vowel
111         "\u2000" + // en quad
112         "\u2001" + // em quad
113         "\u2002" + // en space
114         "\u2003" + // em space
115         "\u2004" + // three-per-em space
116         "\u2005" + // four-per-em space
117         "\u2006" + // six-per-em space
118         "\u2007" + // figure space
119         "\u2008" + // punctuation space
120         "\u2009" + // thin space
121         "\u200a" + // hair space
122         "\u202f" + // narrow no-break space
123         "\u205f" + // medium mathematical space
124         "\u3000" + // ideographic space
125         "\ufeff"   // byte order mark
126         ;
127 
128     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
129         "\\u000a" + // line feed
130         "\\u000d" + // carriage return (ctrl-m)
131         "\\u2028" + // line separator
132         "\\u2029" + // paragraph separator
133         "\\u0009" + // tab
134         "\\u0020" + // ASCII space
135         "\\u000b" + // tabulation line
136         "\\u000c" + // ff (ctrl-l)
137         "\\u00a0" + // Latin-1 space
138         "\\u1680" + // Ogham space mark
139         "\\u180e" + // separator, Mongolian vowel
140         "\\u2000" + // en quad
141         "\\u2001" + // em quad
142         "\\u2002" + // en space
143         "\\u2003" + // em space
144         "\\u2004" + // three-per-em space
145         "\\u2005" + // four-per-em space
146         "\\u2006" + // six-per-em space
147         "\\u2007" + // figure space
148         "\\u2008" + // punctuation space
149         "\\u2009" + // thin space
150         "\\u200a" + // hair space
151         "\\u202f" + // narrow no-break space
152         "\\u205f" + // medium mathematical space
153         "\\u3000" + // ideographic space
154         "\\ufeff"   // byte order mark
155         ;
156 
unicodeEscape(final char ch)157     static String unicodeEscape(final char ch) {
158         final StringBuilder sb = new StringBuilder();
159 
160         sb.append("\\u");
161 
162         final String hex = Integer.toHexString(ch);
163         for (int i = hex.length(); i < 4; i++) {
164             sb.append('0');
165         }
166         sb.append(hex);
167 
168         return sb.toString();
169     }
170 
171     /**
172      * Constructor
173      *
174      * @param source    the source
175      * @param stream    the token stream to lex
176      */
Lexer(final Source source, final TokenStream stream)177     public Lexer(final Source source, final TokenStream stream) {
178         this(source, stream, false, false);
179     }
180 
181     /**
182      * Constructor
183      *
184      * @param source    the source
185      * @param stream    the token stream to lex
186      * @param scripting are we in scripting mode
187      * @param es6       are we in ECMAScript 6 mode
188      */
Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6)189     public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) {
190         this(source, 0, source.getLength(), stream, scripting, es6, false);
191     }
192 
193     /**
194      * Constructor
195      *
196      * @param source    the source
197      * @param start     start position in source from which to start lexing
198      * @param len       length of source segment to lex
199      * @param stream    token stream to lex
200      * @param scripting are we in scripting mode
201      * @param es6       are we in ECMAScript 6 mode
202      * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
203      * function body. This is used with the feature where the parser is skipping nested function bodies to
204      * avoid reading ahead unnecessarily when we skip the function bodies.
205      */
Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody)206     public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) {
207         super(source.getContent(), 1, start, len);
208         this.source      = source;
209         this.stream      = stream;
210         this.scripting   = scripting;
211         this.es6         = es6;
212         this.nested      = false;
213         this.pendingLine = 1;
214         this.last        = EOL;
215 
216         this.pauseOnFunctionBody = pauseOnFunctionBody;
217     }
218 
Lexer(final Lexer lexer, final State state)219     private Lexer(final Lexer lexer, final State state) {
220         super(lexer, state);
221 
222         source = lexer.source;
223         stream = lexer.stream;
224         scripting = lexer.scripting;
225         es6 = lexer.es6;
226         nested = true;
227 
228         pendingLine = state.pendingLine;
229         linePosition = state.linePosition;
230         last = EOL;
231         pauseOnFunctionBody = false;
232     }
233 
234     static class State extends Scanner.State {
235         /** Pending new line number and position. */
236         public final int pendingLine;
237 
238         /** Position of last EOL + 1. */
239         public final int linePosition;
240 
241         /** Type of last token added. */
242         public final TokenType last;
243 
244         /*
245          * Constructor.
246          */
247 
State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last)248         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
249             super(position, limit, line);
250 
251             this.pendingLine = pendingLine;
252             this.linePosition = linePosition;
253             this.last = last;
254         }
255     }
256 
257     /**
258      * Save the state of the scan.
259      *
260      * @return Captured state.
261      */
262     @Override
saveState()263     State saveState() {
264         return new State(position, limit, line, pendingLine, linePosition, last);
265     }
266 
267     /**
268      * Restore the state of the scan.
269      *
270      * @param state
271      *            Captured state.
272      */
restoreState(final State state)273     void restoreState(final State state) {
274         super.restoreState(state);
275 
276         pendingLine = state.pendingLine;
277         linePosition = state.linePosition;
278         last = state.last;
279     }
280 
281     /**
282      * Add a new token to the stream.
283      *
284      * @param type
285      *            Token type.
286      * @param start
287      *            Start position.
288      * @param end
289      *            End position.
290      */
add(final TokenType type, final int start, final int end)291     protected void add(final TokenType type, final int start, final int end) {
292         // Record last token.
293         last = type;
294 
295         // Only emit the last EOL in a cluster.
296         if (type == EOL) {
297             pendingLine = end;
298             linePosition = start;
299         } else {
300             // Write any pending EOL to stream.
301             if (pendingLine != -1) {
302                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
303                 pendingLine = -1;
304             }
305 
306             // Write token to stream.
307             stream.put(Token.toDesc(type, start, end - start));
308         }
309     }
310 
311     /**
312      * Add a new token to the stream.
313      *
314      * @param type
315      *            Token type.
316      * @param start
317      *            Start position.
318      */
add(final TokenType type, final int start)319     protected void add(final TokenType type, final int start) {
320         add(type, start, position);
321     }
322 
323     /**
324      * Return the String of valid whitespace characters for regular
325      * expressions in JavaScript
326      * @return regexp whitespace string
327      */
getWhitespaceRegExp()328     public static String getWhitespaceRegExp() {
329         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
330     }
331 
332     /**
333      * Skip end of line.
334      *
335      * @param addEOL true if EOL token should be recorded.
336      */
skipEOL(final boolean addEOL)337     private void skipEOL(final boolean addEOL) {
338 
339         if (ch0 == '\r') { // detect \r\n pattern
340             skip(1);
341             if (ch0 == '\n') {
342                 skip(1);
343             }
344         } else { // all other space, ch0 is guaranteed to be EOL or \0
345             skip(1);
346         }
347 
348         // bump up line count
349         line++;
350 
351         if (addEOL) {
352             // Add an EOL token.
353             add(EOL, position, line);
354         }
355     }
356 
357     /**
358      * Skip over rest of line including end of line.
359      *
360      * @param addEOL true if EOL token should be recorded.
361      */
skipLine(final boolean addEOL)362     private void skipLine(final boolean addEOL) {
363         // Ignore characters.
364         while (!isEOL(ch0) && !atEOF()) {
365             skip(1);
366         }
367         // Skip over end of line.
368         skipEOL(addEOL);
369     }
370 
371     /**
372      * Test whether a char is valid JavaScript whitespace
373      * @param ch a char
374      * @return true if valid JavaScript whitespace
375      */
isJSWhitespace(final char ch)376     public static boolean isJSWhitespace(final char ch) {
377         return ch == ' '                  // space
378             || ch >= '\t' && ch <= '\r'   // 0x09..0x0d: tab, line feed, tabulation line, ff, carriage return
379             || ch >= 160 && isOtherJSWhitespace(ch);
380     }
381 
isOtherJSWhitespace(final char ch)382     private static boolean isOtherJSWhitespace(final char ch) {
383         return JAVASCRIPT_OTHER_WHITESPACE.indexOf(ch) != -1;
384     }
385 
386     /**
387      * Test whether a char is valid JavaScript end of line
388      * @param ch a char
389      * @return true if valid JavaScript end of line
390      */
isJSEOL(final char ch)391     public static boolean isJSEOL(final char ch) {
392         return ch == '\n'      // line feed
393             || ch == '\r'      // carriage return (ctrl-m)
394             || ch == '\u2028'  // line separator
395             || ch == '\u2029'; // paragraph separator
396     }
397 
398     /**
399      * Test if char is a string delimiter, e.g. '\' or '"'.
400      * @param ch a char
401      * @return true if string delimiter
402      */
isStringDelimiter(final char ch)403     protected boolean isStringDelimiter(final char ch) {
404         return ch == '\'' || ch == '"';
405     }
406 
407     /**
408      * Test if char is a template literal delimiter ('`').
409      */
isTemplateDelimiter(final char ch)410     private static boolean isTemplateDelimiter(final char ch) {
411         return ch == '`';
412     }
413 
414     /**
415      * Test whether a char is valid JavaScript whitespace
416      * @param ch a char
417      * @return true if valid JavaScript whitespace
418      */
isWhitespace(final char ch)419     protected boolean isWhitespace(final char ch) {
420         return Lexer.isJSWhitespace(ch);
421     }
422 
423     /**
424      * Test whether a char is valid JavaScript end of line
425      * @param ch a char
426      * @return true if valid JavaScript end of line
427      */
isEOL(final char ch)428     protected boolean isEOL(final char ch) {
429         return Lexer.isJSEOL(ch);
430     }
431 
432     /**
433      * Skip over whitespace and detect end of line, adding EOL tokens if
434      * encountered.
435      *
436      * @param addEOL true if EOL tokens should be recorded.
437      */
skipWhitespace(final boolean addEOL)438     private void skipWhitespace(final boolean addEOL) {
439         while (isWhitespace(ch0)) {
440             if (isEOL(ch0)) {
441                 skipEOL(addEOL);
442             } else {
443                 skip(1);
444             }
445         }
446     }
447 
448     /**
449      * Skip over comments.
450      *
451      * @return True if a comment.
452      */
skipComments()453     protected boolean skipComments() {
454         // Save the current position.
455         final int start = position;
456 
457         if (ch0 == '/') {
458             // Is it a // comment.
459             if (ch1 == '/') {
460                 // Skip over //.
461                 skip(2);
462 
463                 boolean directiveComment = false;
464                 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
465                     directiveComment = true;
466                 }
467 
468                 // Scan for EOL.
469                 while (!atEOF() && !isEOL(ch0)) {
470                     skip(1);
471                 }
472                 // Did detect a comment.
473                 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
474                 return true;
475             } else if (ch1 == '*') {
476                 // Skip over /*.
477                 skip(2);
478                 // Scan for */.
479                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
480                     // If end of line handle else skip character.
481                     if (isEOL(ch0)) {
482                         skipEOL(true);
483                     } else {
484                         skip(1);
485                     }
486                 }
487 
488                 if (atEOF()) {
489                     // TODO - Report closing */ missing in parser.
490                     add(ERROR, start);
491                 } else {
492                     // Skip */.
493                     skip(2);
494                 }
495 
496                 // Did detect a comment.
497                 add(COMMENT, start);
498                 return true;
499             }
500         } else if (ch0 == '#') {
501             assert scripting;
502             // shell style comment
503             // Skip over #.
504             skip(1);
505             // Scan for EOL.
506             while (!atEOF() && !isEOL(ch0)) {
507                 skip(1);
508             }
509             // Did detect a comment.
510             add(COMMENT, start);
511             return true;
512         }
513 
514         // Not a comment.
515         return false;
516     }
517 
518     /**
519      * Convert a regex token to a token object.
520      *
521      * @param start  Position in source content.
522      * @param length Length of regex token.
523      * @return Regex token object.
524      */
valueOfPattern(final int start, final int length)525     public RegexToken valueOfPattern(final int start, final int length) {
526         // Save the current position.
527         final int savePosition = position;
528         // Reset to beginning of content.
529         reset(start);
530         // Buffer for recording characters.
531         final StringBuilder sb = new StringBuilder(length);
532 
533         // Skip /.
534         skip(1);
535         boolean inBrackets = false;
536         // Scan for closing /, stopping at end of line.
537         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
538             // Skip over escaped character.
539             if (ch0 == '\\') {
540                 sb.append(ch0);
541                 sb.append(ch1);
542                 skip(2);
543             } else {
544                 if (ch0 == '[') {
545                     inBrackets = true;
546                 } else if (ch0 == ']') {
547                     inBrackets = false;
548                 }
549 
550                 // Skip literal character.
551                 sb.append(ch0);
552                 skip(1);
553             }
554         }
555 
556         // Get pattern as string.
557         final String regex = sb.toString();
558 
559         // Skip /.
560         skip(1);
561 
562         // Options as string.
563         final String options = source.getString(position, scanIdentifier());
564 
565         reset(savePosition);
566 
567         // Compile the pattern.
568         return new RegexToken(regex, options);
569     }
570 
571     /**
572      * Return true if the given token can be the beginning of a literal.
573      *
574      * @param token a token
575      * @return true if token can start a literal.
576      */
canStartLiteral(final TokenType token)577     public boolean canStartLiteral(final TokenType token) {
578         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
579     }
580 
581     /**
582      * interface to receive line information for multi-line literals.
583      */
584     protected interface LineInfoReceiver {
585         /**
586          * Receives line information
587          * @param line last line number
588          * @param linePosition position of last line
589          */
lineInfo(int line, int linePosition)590         public void lineInfo(int line, int linePosition);
591     }
592 
593     /**
594      * Check whether the given token represents the beginning of a literal. If so scan
595      * the literal and return <code>true</code>, otherwise return false.
596      *
597      * @param token the token.
598      * @param startTokenType the token type.
599      * @param lir LineInfoReceiver that receives line info for multi-line string literals.
600      * @return True if a literal beginning with startToken was found and scanned.
601      */
scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir)602     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
603         // Check if it can be a literal.
604         if (!canStartLiteral(startTokenType)) {
605             return false;
606         }
607         // We break on ambiguous tokens so if we already moved on it can't be a literal.
608         if (stream.get(stream.last()) != token) {
609             return false;
610         }
611 
612         // Record current position in case multiple heredocs start on this line - see JDK-8073653
613         final State state = saveState();
614         // Rewind to token start position
615         reset(Token.descPosition(token));
616 
617         if (ch0 == '/') {
618             return scanRegEx();
619         } else if (ch0 == '<') {
620             if (ch1 == '<') {
621                 return scanHereString(lir, state);
622             } else if (Character.isJavaIdentifierStart(ch1)) {
623                 return scanXMLLiteral();
624             }
625         }
626 
627         return false;
628     }
629 
630     /**
631      * Scan over regex literal.
632      *
633      * @return True if a regex literal.
634      */
scanRegEx()635     private boolean scanRegEx() {
636         assert ch0 == '/';
637         // Make sure it's not a comment.
638         if (ch1 != '/' && ch1 != '*') {
639             // Record beginning of literal.
640             final int start = position;
641             // Skip /.
642             skip(1);
643             boolean inBrackets = false;
644 
645             // Scan for closing /, stopping at end of line.
646             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
647                 // Skip over escaped character.
648                 if (ch0 == '\\') {
649                     skip(1);
650                     if (isEOL(ch0)) {
651                         reset(start);
652                         return false;
653                     }
654                     skip(1);
655                 } else {
656                     if (ch0 == '[') {
657                         inBrackets = true;
658                     } else if (ch0 == ']') {
659                         inBrackets = false;
660                     }
661 
662                     // Skip literal character.
663                     skip(1);
664                 }
665             }
666 
667             // If regex literal.
668             if (ch0 == '/') {
669                 // Skip /.
670                 skip(1);
671 
672                 // Skip over options.
673                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
674                     skip(1);
675                 }
676 
677                 // Add regex token.
678                 add(REGEX, start);
679                 // Regex literal detected.
680                 return true;
681             }
682 
683             // False start try again.
684             reset(start);
685         }
686 
687         // Regex literal not detected.
688         return false;
689     }
690 
691     /**
692      * Convert a digit to a integer.  Can't use Character.digit since we are
693      * restricted to ASCII by the spec.
694      *
695      * @param ch   Character to convert.
696      * @param base Numeric base.
697      *
698      * @return The converted digit or -1 if invalid.
699      */
convertDigit(final char ch, final int base)700     protected static int convertDigit(final char ch, final int base) {
701         int digit;
702 
703         if ('0' <= ch && ch <= '9') {
704             digit = ch - '0';
705         } else if ('A' <= ch && ch <= 'Z') {
706             digit = ch - 'A' + 10;
707         } else if ('a' <= ch && ch <= 'z') {
708             digit = ch - 'a' + 10;
709         } else {
710             return -1;
711         }
712 
713         return digit < base ? digit : -1;
714     }
715 
716 
717     /**
718      * Get the value of a hexadecimal numeric sequence.
719      *
720      * @param length Number of digits.
721      * @param type   Type of token to report against.
722      * @return Value of sequence or < 0 if no digits.
723      */
hexSequence(final int length, final TokenType type)724     private int hexSequence(final int length, final TokenType type) {
725         int value = 0;
726 
727         for (int i = 0; i < length; i++) {
728             final int digit = convertDigit(ch0, 16);
729 
730             if (digit == -1) {
731                 error(Lexer.message("invalid.hex"), type, position, limit);
732                 return i == 0 ? -1 : value;
733             }
734 
735             value = digit | value << 4;
736             skip(1);
737         }
738 
739         return value;
740     }
741 
742     /**
743      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
744      *
745      * @return Value of sequence.
746      */
octalSequence()747     private int octalSequence() {
748         int value = 0;
749 
750         for (int i = 0; i < 3; i++) {
751             final int digit = convertDigit(ch0, 8);
752 
753             if (digit == -1) {
754                 break;
755             }
756             value = digit | value << 3;
757             skip(1);
758 
759             if (i == 1 && value >= 32) {
760                 break;
761             }
762         }
763         return value;
764     }
765 
766     /**
767      * Convert a string to a JavaScript identifier.
768      *
769      * @param start  Position in source content.
770      * @param length Length of token.
771      * @return Ident string or null if an error.
772      */
valueOfIdent(final int start, final int length)773     private String valueOfIdent(final int start, final int length) throws RuntimeException {
774         // Save the current position.
775         final int savePosition = position;
776         // End of scan.
777         final int end = start + length;
778         // Reset to beginning of content.
779         reset(start);
780         // Buffer for recording characters.
781         final StringBuilder sb = new StringBuilder(length);
782 
783         // Scan until end of line or end of file.
784         while (!atEOF() && position < end && !isEOL(ch0)) {
785             // If escape character.
786             if (ch0 == '\\' && ch1 == 'u') {
787                 skip(2);
788                 final int ch = hexSequence(4, TokenType.IDENT);
789                 assert ! isWhitespace((char)ch);
790                 assert ch >= 0;
791                 sb.append((char)ch);
792             } else {
793                 // Add regular character.
794                 sb.append(ch0);
795                 skip(1);
796             }
797         }
798 
799         // Restore position.
800         reset(savePosition);
801 
802         return sb.toString();
803     }
804 
805     /**
806      * Scan over and identifier or keyword. Handles identifiers containing
807      * encoded Unicode chars.
808      *
809      * Example:
810      *
811      * var \u0042 = 44;
812      */
scanIdentifierOrKeyword()813     private void scanIdentifierOrKeyword() {
814         // Record beginning of identifier.
815         final int start = position;
816         // Scan identifier.
817         final int length = scanIdentifier();
818         // Check to see if it is a keyword.
819         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
820         if (type == FUNCTION && pauseOnFunctionBody) {
821             pauseOnNextLeftBrace = true;
822         }
823         // Add keyword or identifier token.
824         add(type, start);
825     }
826 
827     /**
828      * Convert a string to a JavaScript string object.
829      *
830      * @param start  Position in source content.
831      * @param length Length of token.
832      * @return JavaScript string object.
833      */
valueOfString(final int start, final int length, final boolean strict)834     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
835         // Save the current position.
836         final int savePosition = position;
837         // Calculate the end position.
838         final int end = start + length;
839         // Reset to beginning of string.
840         reset(start);
841 
842         // Buffer for recording characters.
843         final StringBuilder sb = new StringBuilder(length);
844 
845         // Scan until end of string.
846         while (position < end) {
847             // If escape character.
848             if (ch0 == '\\') {
849                 skip(1);
850 
851                 final char next = ch0;
852                 final int afterSlash = position;
853 
854                 skip(1);
855 
856                 // Special characters.
857                 switch (next) {
858                 case '0':
859                 case '1':
860                 case '2':
861                 case '3':
862                 case '4':
863                 case '5':
864                 case '6':
865                 case '7': {
866                     if (strict) {
867                         // "\0" itself is allowed in strict mode. Only other 'real'
868                         // octal escape sequences are not allowed (eg. "\02", "\31").
869                         // See section 7.8.4 String literals production EscapeSequence
870                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
871                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
872                         }
873                     }
874                     reset(afterSlash);
875                     // Octal sequence.
876                     final int ch = octalSequence();
877 
878                     if (ch < 0) {
879                         sb.append('\\');
880                         sb.append('x');
881                     } else {
882                         sb.append((char)ch);
883                     }
884                     break;
885                 }
886                 case 'n':
887                     sb.append('\n');
888                     break;
889                 case 't':
890                     sb.append('\t');
891                     break;
892                 case 'b':
893                     sb.append('\b');
894                     break;
895                 case 'f':
896                     sb.append('\f');
897                     break;
898                 case 'r':
899                     sb.append('\r');
900                     break;
901                 case '\'':
902                     sb.append('\'');
903                     break;
904                 case '\"':
905                     sb.append('\"');
906                     break;
907                 case '\\':
908                     sb.append('\\');
909                     break;
910                 case '\r': // CR | CRLF
911                     if (ch0 == '\n') {
912                         skip(1);
913                     }
914                     // fall through
915                 case '\n': // LF
916                 case '\u2028': // LS
917                 case '\u2029': // PS
918                     // continue on the next line, slash-return continues string
919                     // literal
920                     break;
921                 case 'x': {
922                     // Hex sequence.
923                     final int ch = hexSequence(2, STRING);
924 
925                     if (ch < 0) {
926                         sb.append('\\');
927                         sb.append('x');
928                     } else {
929                         sb.append((char)ch);
930                     }
931                 }
932                     break;
933                 case 'u': {
934                     // Unicode sequence.
935                     final int ch = hexSequence(4, STRING);
936 
937                     if (ch < 0) {
938                         sb.append('\\');
939                         sb.append('u');
940                     } else {
941                         sb.append((char)ch);
942                     }
943                 }
944                     break;
945                 case 'v':
946                     sb.append('\u000B');
947                     break;
948                 // All other characters.
949                 default:
950                     sb.append(next);
951                     break;
952                 }
953             } else if (ch0 == '\r') {
954                 // Convert CR-LF or CR to LF line terminator.
955                 sb.append('\n');
956                 skip(ch1 == '\n' ? 2 : 1);
957             } else {
958                 // Add regular character.
959                 sb.append(ch0);
960                 skip(1);
961             }
962         }
963 
964         // Restore position.
965         reset(savePosition);
966 
967         return sb.toString();
968     }
969 
970     /**
971      * Scan over a string literal.
972      * @param add true if we are not just scanning but should actually modify the token stream
973      */
scanString(final boolean add)974     protected void scanString(final boolean add) {
975         // Type of string.
976         TokenType type = STRING;
977         // Record starting quote.
978         final char quote = ch0;
979         // Skip over quote.
980         skip(1);
981 
982         // Record beginning of string content.
983         final State stringState = saveState();
984 
985         // Scan until close quote or end of line.
986         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
987             // Skip over escaped character.
988             if (ch0 == '\\') {
989                 type = ESCSTRING;
990                 skip(1);
991                 if (isEOL(ch0)) {
992                     // Multiline string literal
993                     skipEOL(false);
994                     continue;
995                 }
996             }
997             // Skip literal character.
998             skip(1);
999         }
1000 
1001         // If close quote.
1002         if (ch0 == quote) {
1003             // Skip close quote.
1004             skip(1);
1005         } else {
1006             error(Lexer.message("missing.close.quote"), STRING, position, limit);
1007         }
1008 
1009         // If not just scanning.
1010         if (add) {
1011             // Record end of string.
1012             stringState.setLimit(position - 1);
1013 
1014             if (scripting && !stringState.isEmpty()) {
1015                 switch (quote) {
1016                 case '`':
1017                     // Mark the beginning of an exec string.
1018                     add(EXECSTRING, stringState.position, stringState.limit);
1019                     // Frame edit string with left brace.
1020                     add(LBRACE, stringState.position, stringState.position);
1021                     // Process edit string.
1022                     editString(type, stringState);
1023                     // Frame edit string with right brace.
1024                     add(RBRACE, stringState.limit, stringState.limit);
1025                     break;
1026                 case '"':
1027                     // Only edit double quoted strings.
1028                     editString(type, stringState);
1029                     break;
1030                 case '\'':
1031                     // Add string token without editing.
1032                     add(type, stringState.position, stringState.limit);
1033                     break;
1034                 default:
1035                     break;
1036                 }
1037             } else {
1038                 /// Add string token without editing.
1039                 add(type, stringState.position, stringState.limit);
1040             }
1041         }
1042     }
1043 
1044     /**
1045      * Scan over a template string literal.
1046      */
scanTemplate()1047     private void scanTemplate() {
1048         assert ch0 == '`';
1049         TokenType type = TEMPLATE;
1050 
1051         // Skip over quote and record beginning of string content.
1052         skip(1);
1053         State stringState = saveState();
1054 
1055         // Scan until close quote
1056         while (!atEOF()) {
1057             // Skip over escaped character.
1058             if (ch0 == '`') {
1059                 skip(1);
1060                 // Record end of string.
1061                 stringState.setLimit(position - 1);
1062                 add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit);
1063                 return;
1064             } else if (ch0 == '$' && ch1 == '{') {
1065                 skip(2);
1066                 stringState.setLimit(position - 2);
1067                 add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit);
1068 
1069                 // scan to RBRACE
1070                 final Lexer expressionLexer = new Lexer(this, saveState());
1071                 expressionLexer.templateExpressionOpenBraces = 1;
1072                 expressionLexer.lexify();
1073                 restoreState(expressionLexer.saveState());
1074 
1075                 // scan next middle or tail of the template literal
1076                 assert ch0 == '}';
1077                 type = TEMPLATE_MIDDLE;
1078 
1079                 // Skip over rbrace and record beginning of string content.
1080                 skip(1);
1081                 stringState = saveState();
1082 
1083                 continue;
1084             } else if (ch0 == '\\') {
1085                 skip(1);
1086                 // EscapeSequence
1087                 if (isEOL(ch0)) {
1088                     // LineContinuation
1089                     skipEOL(false);
1090                     continue;
1091                 }
1092             }  else if (isEOL(ch0)) {
1093                 // LineTerminatorSequence
1094                 skipEOL(false);
1095                 continue;
1096             }
1097 
1098             // Skip literal character.
1099             skip(1);
1100         }
1101 
1102         error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit);
1103     }
1104 
1105     /**
1106      * Convert string to number.
1107      *
1108      * @param valueString  String to convert.
1109      * @param radix        Numeric base.
1110      * @return Converted number.
1111      */
valueOf(final String valueString, final int radix)1112     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1113         try {
1114             return Integer.parseInt(valueString, radix);
1115         } catch (final NumberFormatException e) {
1116             if (radix == 10) {
1117                 return Double.valueOf(valueString);
1118             }
1119 
1120             double value = 0.0;
1121 
1122             for (int i = 0; i < valueString.length(); i++) {
1123                 final char ch = valueString.charAt(i);
1124                 // Preverified, should always be a valid digit.
1125                 final int digit = convertDigit(ch, radix);
1126                 value *= radix;
1127                 value += digit;
1128             }
1129 
1130             return value;
1131         }
1132     }
1133 
1134     /**
1135      * Scan a number.
1136      */
scanNumber()1137     protected void scanNumber() {
1138         // Record beginning of number.
1139         final int start = position;
1140         // Assume value is a decimal.
1141         TokenType type = DECIMAL;
1142 
1143         // First digit of number.
1144         int digit = convertDigit(ch0, 10);
1145 
1146         // If number begins with 0x.
1147         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1148             // Skip over 0xN.
1149             skip(3);
1150             // Skip over remaining digits.
1151             while (convertDigit(ch0, 16) != -1) {
1152                 skip(1);
1153             }
1154 
1155             type = HEXADECIMAL;
1156         } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) {
1157             // Skip over 0oN.
1158             skip(3);
1159             // Skip over remaining digits.
1160             while (convertDigit(ch0, 8) != -1) {
1161                 skip(1);
1162             }
1163 
1164             type = OCTAL;
1165         } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) {
1166             // Skip over 0bN.
1167             skip(3);
1168             // Skip over remaining digits.
1169             while (convertDigit(ch0, 2) != -1) {
1170                 skip(1);
1171             }
1172 
1173             type = BINARY_NUMBER;
1174         } else {
1175             // Check for possible octal constant.
1176             boolean octal = digit == 0;
1177             // Skip first digit if not leading '.'.
1178             if (digit != -1) {
1179                 skip(1);
1180             }
1181 
1182             // Skip remaining digits.
1183             while ((digit = convertDigit(ch0, 10)) != -1) {
1184                 // Check octal only digits.
1185                 octal = octal && digit < 8;
1186                 // Skip digit.
1187                 skip(1);
1188             }
1189 
1190             if (octal && position - start > 1) {
1191                 type = OCTAL_LEGACY;
1192             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1193                 // Must be a double.
1194                 if (ch0 == '.') {
1195                     // Skip period.
1196                     skip(1);
1197                     // Skip mantissa.
1198                     while (convertDigit(ch0, 10) != -1) {
1199                         skip(1);
1200                     }
1201                 }
1202 
1203                 // Detect exponent.
1204                 if (ch0 == 'E' || ch0 == 'e') {
1205                     // Skip E.
1206                     skip(1);
1207                     // Detect and skip exponent sign.
1208                     if (ch0 == '+' || ch0 == '-') {
1209                         skip(1);
1210                     }
1211                     // Skip exponent.
1212                     while (convertDigit(ch0, 10) != -1) {
1213                         skip(1);
1214                     }
1215                 }
1216 
1217                 type = FLOATING;
1218             }
1219         }
1220 
1221         if (Character.isJavaIdentifierStart(ch0)) {
1222             error(Lexer.message("missing.space.after.number"), type, position, 1);
1223         }
1224 
1225         // Add number token.
1226         add(type, start);
1227     }
1228 
1229     /**
1230      * Convert a regex token to a token object.
1231      *
1232      * @param start  Position in source content.
1233      * @param length Length of regex token.
1234      * @return Regex token object.
1235      */
valueOfXML(final int start, final int length)1236     XMLToken valueOfXML(final int start, final int length) {
1237         return new XMLToken(source.getString(start, length));
1238     }
1239 
1240     /**
1241      * Scan over a XML token.
1242      *
1243      * @return TRUE if is an XML literal.
1244      */
scanXMLLiteral()1245     private boolean scanXMLLiteral() {
1246         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1247         if (XML_LITERALS) {
1248             // Record beginning of xml expression.
1249             final int start = position;
1250 
1251             int openCount = 0;
1252 
1253             do {
1254                 if (ch0 == '<') {
1255                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1256                         skip(3);
1257                         openCount--;
1258                     } else if (Character.isJavaIdentifierStart(ch1)) {
1259                         skip(2);
1260                         openCount++;
1261                     } else if (ch1 == '?') {
1262                         skip(2);
1263                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1264                         skip(4);
1265                     } else {
1266                         reset(start);
1267                         return false;
1268                     }
1269 
1270                     while (!atEOF() && ch0 != '>') {
1271                         if (ch0 == '/' && ch1 == '>') {
1272                             openCount--;
1273                             skip(1);
1274                             break;
1275                         } else if (ch0 == '\"' || ch0 == '\'') {
1276                             scanString(false);
1277                         } else {
1278                             skip(1);
1279                         }
1280                     }
1281 
1282                     if (ch0 != '>') {
1283                         reset(start);
1284                         return false;
1285                     }
1286 
1287                     skip(1);
1288                 } else if (atEOF()) {
1289                     reset(start);
1290                     return false;
1291                 } else {
1292                     skip(1);
1293                 }
1294             } while (openCount > 0);
1295 
1296             add(XML, start);
1297             return true;
1298         }
1299 
1300         return false;
1301     }
1302 
1303     /**
1304      * Scan over identifier characters.
1305      *
1306      * @return Length of identifier or zero if none found.
1307      */
scanIdentifier()1308     private int scanIdentifier() {
1309         final int start = position;
1310 
1311         // Make sure first character is valid start character.
1312         if (ch0 == '\\' && ch1 == 'u') {
1313             skip(2);
1314             final int ch = hexSequence(4, TokenType.IDENT);
1315 
1316             if (!Character.isJavaIdentifierStart(ch)) {
1317                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1318             }
1319         } else if (!Character.isJavaIdentifierStart(ch0)) {
1320             // Not an identifier.
1321             return 0;
1322         }
1323 
1324         // Make sure remaining characters are valid part characters.
1325         while (!atEOF()) {
1326             if (ch0 == '\\' && ch1 == 'u') {
1327                 skip(2);
1328                 final int ch = hexSequence(4, TokenType.IDENT);
1329 
1330                 if (!Character.isJavaIdentifierPart(ch)) {
1331                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1332                 }
1333             } else if (Character.isJavaIdentifierPart(ch0)) {
1334                 skip(1);
1335             } else {
1336                 break;
1337             }
1338         }
1339 
1340         // Length of identifier sequence.
1341         return position - start;
1342     }
1343 
1344     /**
1345      * Compare two identifiers (in content) for equality.
1346      *
1347      * @param aStart  Start of first identifier.
1348      * @param aLength Length of first identifier.
1349      * @param bStart  Start of second identifier.
1350      * @param bLength Length of second identifier.
1351      * @return True if equal.
1352      */
identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength)1353     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1354         if (aLength == bLength) {
1355             for (int i = 0; i < aLength; i++) {
1356                 if (content[aStart + i] != content[bStart + i]) {
1357                     return false;
1358                 }
1359             }
1360 
1361             return true;
1362         }
1363 
1364         return false;
1365     }
1366 
1367     /**
1368      * Detect if a line starts with a marker identifier.
1369      *
1370      * @param identStart  Start of identifier.
1371      * @param identLength Length of identifier.
1372      * @return True if detected.
1373      */
hasHereMarker(final int identStart, final int identLength)1374     private boolean hasHereMarker(final int identStart, final int identLength) {
1375         // Skip any whitespace.
1376         skipWhitespace(false);
1377 
1378         return identifierEqual(identStart, identLength, position, scanIdentifier());
1379     }
1380 
1381     /**
1382      * Lexer to service edit strings.
1383      */
1384     private static class EditStringLexer extends Lexer {
1385         /** Type of string literals to emit. */
1386         final TokenType stringType;
1387 
1388         /*
1389          * Constructor.
1390          */
1391 
EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState)1392         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1393             super(lexer, stringState);
1394 
1395             this.stringType = stringType;
1396         }
1397 
1398         /**
1399          * Lexify the contents of the string.
1400          */
1401         @Override
lexify()1402         public void lexify() {
1403             // Record start of string position.
1404             int stringStart = position;
1405             // Indicate that the priming first string has not been emitted.
1406             boolean primed = false;
1407 
1408             while (true) {
1409                 // Detect end of content.
1410                 if (atEOF()) {
1411                     break;
1412                 }
1413 
1414                 // Honour escapes (should be well formed.)
1415                 if (ch0 == '\\' && stringType == ESCSTRING) {
1416                     skip(2);
1417 
1418                     continue;
1419                 }
1420 
1421                 // If start of expression.
1422                 if (ch0 == '$' && ch1 == '{') {
1423                     if (!primed || stringStart != position) {
1424                         if (primed) {
1425                             add(ADD, stringStart, stringStart + 1);
1426                         }
1427 
1428                         add(stringType, stringStart, position);
1429                         primed = true;
1430                     }
1431 
1432                     // Skip ${
1433                     skip(2);
1434 
1435                     // Save expression state.
1436                     final State expressionState = saveState();
1437 
1438                     // Start with one open brace.
1439                     int braceCount = 1;
1440 
1441                     // Scan for the rest of the string.
1442                     while (!atEOF()) {
1443                         // If closing brace.
1444                         if (ch0 == '}') {
1445                             // Break only only if matching brace.
1446                             if (--braceCount == 0) {
1447                                 break;
1448                             }
1449                         } else if (ch0 == '{') {
1450                             // Bump up the brace count.
1451                             braceCount++;
1452                         }
1453 
1454                         // Skip to next character.
1455                         skip(1);
1456                     }
1457 
1458                     // If braces don't match then report an error.
1459                     if (braceCount != 0) {
1460                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1461                     }
1462 
1463                     // Mark end of expression.
1464                     expressionState.setLimit(position);
1465                     // Skip closing brace.
1466                     skip(1);
1467 
1468                     // Start next string.
1469                     stringStart = position;
1470 
1471                     // Concatenate expression.
1472                     add(ADD, expressionState.position, expressionState.position + 1);
1473                     add(LPAREN, expressionState.position, expressionState.position + 1);
1474 
1475                     // Scan expression.
1476                     final Lexer lexer = new Lexer(this, expressionState);
1477                     lexer.lexify();
1478 
1479                     // Close out expression parenthesis.
1480                     add(RPAREN, position - 1, position);
1481 
1482                     continue;
1483                 }
1484 
1485                 // Next character in string.
1486                 skip(1);
1487             }
1488 
1489             // If there is any unemitted string portion.
1490             if (stringStart != limit) {
1491                 // Concatenate remaining string.
1492                 if (primed) {
1493                     add(ADD, stringStart, 1);
1494                 }
1495 
1496                 add(stringType, stringStart, limit);
1497             }
1498         }
1499 
1500     }
1501 
1502     /**
1503      * Edit string for nested expressions.
1504      *
1505      * @param stringType  Type of string literals to emit.
1506      * @param stringState State of lexer at start of string.
1507      */
editString(final TokenType stringType, final State stringState)1508     private void editString(final TokenType stringType, final State stringState) {
1509         // Use special lexer to scan string.
1510         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1511         lexer.lexify();
1512 
1513         // Need to keep lexer informed.
1514         last = stringType;
1515     }
1516 
1517     /**
1518      * Scan over a here string.
1519      *
1520      * @return TRUE if is a here string.
1521      */
scanHereString(final LineInfoReceiver lir, final State oldState)1522     private boolean scanHereString(final LineInfoReceiver lir, final State oldState) {
1523         assert ch0 == '<' && ch1 == '<';
1524         if (scripting) {
1525             // Record beginning of here string.
1526             final State saved = saveState();
1527 
1528             // << or <<<
1529             final boolean excludeLastEOL = ch2 != '<';
1530 
1531             if (excludeLastEOL) {
1532                 skip(2);
1533             } else {
1534                 skip(3);
1535             }
1536 
1537             // Scan identifier. It might be quoted, indicating that no string editing should take place.
1538             final char quoteChar = ch0;
1539             final boolean noStringEditing = quoteChar == '"' || quoteChar == '\'';
1540             if (noStringEditing) {
1541                 skip(1);
1542             }
1543             final int identStart = position;
1544             final int identLength = scanIdentifier();
1545             if (noStringEditing) {
1546                 if (ch0 != quoteChar) {
1547                     error(Lexer.message("here.non.matching.delimiter"), last, position, position);
1548                     restoreState(saved);
1549                     return false;
1550                 }
1551                 skip(1);
1552             }
1553 
1554             // Check for identifier.
1555             if (identLength == 0) {
1556                 // Treat as shift.
1557                 restoreState(saved);
1558 
1559                 return false;
1560             }
1561 
1562             // Record rest of line.
1563             final State restState = saveState();
1564             // keep line number updated
1565             int lastLine = line;
1566 
1567             skipLine(false);
1568             lastLine++;
1569             int lastLinePosition = position;
1570             restState.setLimit(position);
1571 
1572             if (oldState.position > position) {
1573                 restoreState(oldState);
1574                 skipLine(false);
1575             }
1576 
1577             // Record beginning of string.
1578             final State stringState = saveState();
1579             int stringEnd = position;
1580 
1581             // Hunt down marker.
1582             while (!atEOF()) {
1583                 // Skip any whitespace.
1584                 skipWhitespace(false);
1585 
1586                 //handle trailing blank lines
1587                 lastLinePosition = position;
1588                 stringEnd = position;
1589 
1590                 if (hasHereMarker(identStart, identLength)) {
1591                     break;
1592                 }
1593 
1594                 skipLine(false);
1595                 lastLine++;
1596                 lastLinePosition = position;
1597                 stringEnd = position;
1598             }
1599 
1600             // notify last line information
1601             lir.lineInfo(lastLine, lastLinePosition);
1602 
1603             // Record end of string.
1604             stringState.setLimit(stringEnd);
1605 
1606             // If marker is missing.
1607             if (stringState.isEmpty() || atEOF()) {
1608                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1609                 restoreState(saved);
1610 
1611                 return false;
1612             }
1613 
1614             // Remove last end of line if specified.
1615             if (excludeLastEOL) {
1616                 // Handles \n.
1617                 if (content[stringEnd - 1] == '\n') {
1618                     stringEnd--;
1619                 }
1620 
1621                 // Handles \r and \r\n.
1622                 if (content[stringEnd - 1] == '\r') {
1623                     stringEnd--;
1624                 }
1625 
1626                 // Update end of string.
1627                 stringState.setLimit(stringEnd);
1628             }
1629 
1630             // Edit string if appropriate.
1631             if (!noStringEditing && !stringState.isEmpty()) {
1632                 editString(STRING, stringState);
1633             } else {
1634                 // Add here string.
1635                 add(STRING, stringState.position, stringState.limit);
1636             }
1637 
1638             // Scan rest of original line.
1639             final Lexer restLexer = new Lexer(this, restState);
1640 
1641             restLexer.lexify();
1642 
1643             return true;
1644         }
1645 
1646         return false;
1647     }
1648 
1649     /**
1650      * Breaks source content down into lex units, adding tokens to the token
1651      * stream. The routine scans until the stream buffer is full. Can be called
1652      * repeatedly until EOF is detected.
1653      */
lexify()1654     public void lexify() {
1655         while (!stream.isFull() || nested) {
1656             // Skip over whitespace.
1657             skipWhitespace(true);
1658 
1659             // Detect end of file.
1660             if (atEOF()) {
1661                 if (!nested) {
1662                     // Add an EOF token at the end.
1663                     add(EOF, position);
1664                 }
1665 
1666                 break;
1667             }
1668 
1669             // Check for comments. Note that we don't scan for regexp and other literals here as
1670             // we may not have enough context to distinguish them from similar looking operators.
1671             // Instead we break on ambiguous operators below and let the parser decide.
1672             if (ch0 == '/' && skipComments()) {
1673                 continue;
1674             }
1675 
1676             if (scripting && ch0 == '#' && skipComments()) {
1677                 continue;
1678             }
1679 
1680             // TokenType for lookup of delimiter or operator.
1681             TokenType type;
1682 
1683             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1684                 // '.' followed by digit.
1685                 // Scan and add a number.
1686                 scanNumber();
1687             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1688                 if (templateExpressionOpenBraces > 0) {
1689                     if (type == LBRACE) {
1690                         templateExpressionOpenBraces++;
1691                     } else if (type == RBRACE) {
1692                         if (--templateExpressionOpenBraces == 0) {
1693                             break;
1694                         }
1695                     }
1696                 }
1697 
1698                 // Get the number of characters in the token.
1699                 final int typeLength = type.getLength();
1700                 // Skip that many characters.
1701                 skip(typeLength);
1702                 // Add operator token.
1703                 add(type, position - typeLength);
1704                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1705                 // We break to let the parser decide what it is.
1706                 if (canStartLiteral(type)) {
1707                     break;
1708                 } else if (type == LBRACE && pauseOnNextLeftBrace) {
1709                     pauseOnNextLeftBrace = false;
1710                     break;
1711                 }
1712             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1713                 // Scan and add identifier or keyword.
1714                 scanIdentifierOrKeyword();
1715             } else if (isStringDelimiter(ch0)) {
1716                 // Scan and add a string.
1717                 scanString(true);
1718             } else if (Character.isDigit(ch0)) {
1719                 // Scan and add a number.
1720                 scanNumber();
1721             } else if (isTemplateDelimiter(ch0) && es6) {
1722                 // Scan and add template in ES6 mode.
1723                 scanTemplate();
1724             } else if (isTemplateDelimiter(ch0) && scripting) {
1725                 // Scan and add an exec string ('`') in scripting mode.
1726                 scanString(true);
1727             } else {
1728                 // Don't recognize this character.
1729                 skip(1);
1730                 add(ERROR, position - 1);
1731             }
1732         }
1733     }
1734 
1735     /**
1736      * Return value of token given its token descriptor.
1737      *
1738      * @param token  Token descriptor.
1739      * @return JavaScript value.
1740      */
getValueOf(final long token, final boolean strict)1741     Object getValueOf(final long token, final boolean strict) {
1742         final int start = Token.descPosition(token);
1743         final int len   = Token.descLength(token);
1744 
1745         switch (Token.descType(token)) {
1746         case DECIMAL:
1747             return Lexer.valueOf(source.getString(start, len), 10); // number
1748         case HEXADECIMAL:
1749             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1750         case OCTAL_LEGACY:
1751             return Lexer.valueOf(source.getString(start, len), 8); // number
1752         case OCTAL:
1753             return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number
1754         case BINARY_NUMBER:
1755             return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number
1756         case FLOATING:
1757             final String str   = source.getString(start, len);
1758             final double value = Double.valueOf(str);
1759             if (str.indexOf('.') != -1) {
1760                 return value; //number
1761             }
1762             //anything without an explicit decimal point is still subject to a
1763             //"representable as int or long" check. Then the programmer does not
1764             //explicitly code something as a double. For example new Color(int, int, int)
1765             //and new Color(float, float, float) will get ambiguous for cases like
1766             //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1767             //yet we don't want e.g. 1e6 to be a double unnecessarily
1768             if (JSType.isStrictlyRepresentableAsInt(value)) {
1769                 return (int)value;
1770             }
1771             return value;
1772         case STRING:
1773             return source.getString(start, len); // String
1774         case ESCSTRING:
1775             return valueOfString(start, len, strict); // String
1776         case IDENT:
1777             return valueOfIdent(start, len); // String
1778         case REGEX:
1779             return valueOfPattern(start, len); // RegexToken::LexerToken
1780         case TEMPLATE:
1781         case TEMPLATE_HEAD:
1782         case TEMPLATE_MIDDLE:
1783         case TEMPLATE_TAIL:
1784             return valueOfString(start, len, true); // String
1785         case XML:
1786             return valueOfXML(start, len); // XMLToken::LexerToken
1787         case DIRECTIVE_COMMENT:
1788             return source.getString(start, len);
1789         default:
1790             break;
1791         }
1792 
1793         return null;
1794     }
1795 
1796     /**
1797      * Get the raw string value of a template literal string part.
1798      *
1799      * @param token template string token
1800      * @return raw string
1801      */
valueOfRawString(final long token)1802     public String valueOfRawString(final long token) {
1803         final int start  = Token.descPosition(token);
1804         final int length = Token.descLength(token);
1805 
1806         // Save the current position.
1807         final int savePosition = position;
1808         // Calculate the end position.
1809         final int end = start + length;
1810         // Reset to beginning of string.
1811         reset(start);
1812 
1813         // Buffer for recording characters.
1814         final StringBuilder sb = new StringBuilder(length);
1815 
1816         // Scan until end of string.
1817         while (position < end) {
1818             if (ch0 == '\r') {
1819                 // Convert CR-LF or CR to LF line terminator.
1820                 sb.append('\n');
1821                 skip(ch1 == '\n' ? 2 : 1);
1822             } else {
1823                 // Add regular character.
1824                 sb.append(ch0);
1825                 skip(1);
1826             }
1827         }
1828 
1829         // Restore position.
1830         reset(savePosition);
1831 
1832         return sb.toString();
1833     }
1834 
1835     /**
1836      * Get the correctly localized error message for a given message id format arguments
1837      * @param msgId message id
1838      * @param args  format arguments
1839      * @return message
1840      */
message(final String msgId, final String... args)1841     protected static String message(final String msgId, final String... args) {
1842         return ECMAErrors.getMessage("lexer.error." + msgId, args);
1843     }
1844 
1845     /**
1846      * Generate a runtime exception
1847      *
1848      * @param message       error message
1849      * @param type          token type
1850      * @param start         start position of lexed error
1851      * @param length        length of lexed error
1852      * @throws ParserException  unconditionally
1853      */
error(final String message, final TokenType type, final int start, final int length)1854     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1855         final long token     = Token.toDesc(type, start, length);
1856         final int  pos       = Token.descPosition(token);
1857         final int  lineNum   = source.getLine(pos);
1858         final int  columnNum = source.getColumn(pos);
1859         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1860         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1861     }
1862 
1863     /**
1864      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1865      * This is the abstract superclass
1866      */
1867     public static abstract class LexerToken implements Serializable {
1868         private static final long serialVersionUID = 1L;
1869 
1870         private final String expression;
1871 
1872         /**
1873          * Constructor
1874          * @param expression token expression
1875          */
LexerToken(final String expression)1876         protected LexerToken(final String expression) {
1877             this.expression = expression;
1878         }
1879 
1880         /**
1881          * Get the expression
1882          * @return expression
1883          */
getExpression()1884         public String getExpression() {
1885             return expression;
1886         }
1887     }
1888 
1889     /**
1890      * Temporary container for regular expressions.
1891      */
1892     public static class RegexToken extends LexerToken {
1893         private static final long serialVersionUID = 1L;
1894 
1895         /** Options. */
1896         private final String options;
1897 
1898         /**
1899          * Constructor.
1900          *
1901          * @param expression  regexp expression
1902          * @param options     regexp options
1903          */
RegexToken(final String expression, final String options)1904         public RegexToken(final String expression, final String options) {
1905             super(expression);
1906             this.options = options;
1907         }
1908 
1909         /**
1910          * Get regexp options
1911          * @return options
1912          */
getOptions()1913         public String getOptions() {
1914             return options;
1915         }
1916 
1917         @Override
toString()1918         public String toString() {
1919             return '/' + getExpression() + '/' + options;
1920         }
1921     }
1922 
1923     /**
1924      * Temporary container for XML expression.
1925      */
1926     public static class XMLToken extends LexerToken {
1927         private static final long serialVersionUID = 1L;
1928 
1929         /**
1930          * Constructor.
1931          *
1932          * @param expression  XML expression
1933          */
XMLToken(final String expression)1934         public XMLToken(final String expression) {
1935             super(expression);
1936         }
1937     }
1938 }
1939