/* * Copyright (c) 2003 * David Leonard. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of David Leonard nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if HAVE_CONFIG_H # include #endif #if STDC_HEADERS # include # include #endif #if HAVE_STRING_H # include #endif #include #include #include #include #include #include #include #include #include #include #include #include "tokens.h" #include "lex.h" #include "stringdefs.h" #include "unicode.h" #include "dtoa.h" #include "dprint.h" #include "nmath.h" #ifndef NDEBUG int SEE_lex_debug = 0; #endif /* * Lexical analyser. * * This is a lexical analyser for ECMAScript. It uses a 6-character * lookahead input 'filter' (to detect '\u####') and provides an * interface that reveals if the returned token was immediately * preceeded by a line terminator. * * The lexical analyser's behaviour when deciding a slash '/' as * a division or the start of a regular expression is determined by * a flag. The parser is exepected to set it. * * NOTE: Although all strings generated for ECMAscript are UTF-16, * this lexer requires UCS-32 input. */ /* Macros that assume local variable lex */ #define NEXT lex->input->lookahead #define SKIP do { SEE_INPUT_NEXT(lex->input); \ } while (!ATEOF && is_FormatControl(NEXT)) #define UNGET(c) do { lex->la[++lex->lalen]=(c); } while (0) #define ATEOF (lex->input->eof) #define LOOKAHEAD(buf, len) SEE_input_lookahead_copy(lex->input, buf, len) #define CONSUME(ch) \ do { \ if (ATEOF) \ SYNTAX_ERROR(STR(unexpected_eof)); \ if (NEXT != (ch)) \ SYNTAX_ERROR(SEE_string_sprintf( \ lex->input->interpreter, "expected '%c'", (ch))); \ SKIP; \ } while (0) #define SYNTAX_ERROR(s) \ SEE_error_throw_string(lex->input->interpreter, \ lex->input->interpreter->SyntaxError, \ prefix_msg(s, lex)) /* Sign constants */ #define NEGATIVE (-1) #define POSITIVE (1) /* Prototypes */ static struct SEE_string *prefix_msg(struct SEE_string *s, struct lex *lex); static int is_FormatControl(SEE_unicode_t c); static int is_WhiteSpace(SEE_unicode_t c); static int is_LineTerminator(SEE_unicode_t c); static int is_HexDigit(SEE_unicode_t c); static int HexValue(SEE_unicode_t c); static int is_HexEscape(struct lex *lex); static int is_UnicodeEscape(struct lex *lex); static int is_IdentifierStart(struct lex *lex); static int is_IdentifierPart(struct lex *lex); static SEE_unicode_t HexEscape(struct lex *lex); static SEE_unicode_t UnicodeEscape(struct lex *lex); static int DivPunctuator(struct lex *lex); static int LineTerminator(struct lex *lex); static int SkipToEndOfLine(struct lex *lex); static int SGMLComment(struct lex *lex); static int SGMLCommentEnd(struct lex *lex); static int Punctuator(struct lex *lex); static int StringLiteral(struct lex *lex); static int RegularExpressionLiteral(struct lex *lex, int prev); static int NumericLiteral(struct lex *lex); static int CommentDiv(struct lex *lex); static int Token(struct lex *lex); static int lex0(struct lex *lex); /* Returns ("line " + next_lineno + ": " + s) */ static struct SEE_string * prefix_msg(s, lex) struct SEE_string *s; struct lex *lex; { struct SEE_string *t; struct SEE_interpreter *interp = lex->input->interpreter; t = SEE_string_sprintf(interp, "line %d: ", lex->next_lineno); SEE_string_append(t, s); return t; } static int is_FormatControl(c) SEE_unicode_t c; /* 7.1 */ { return UNICODE_IS_Cf(c); /* category Cf or L or R */ } static int is_WhiteSpace(c) SEE_unicode_t c; /* 7.2 */ { return (c == 0x0009 || c == 0x000B || c == 0x000C || c == 0x0020 || c == 0x00A0 || UNICODE_IS_Zs(c)); } static int is_LineTerminator(c) SEE_unicode_t c; /* 7.3 */ { return (c == 0x000A || c == 0x000D || c == 0x2028 || c == 0x2029); } static int is_HexDigit(c) SEE_unicode_t c; { return ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')); } /* Returns the hexadecimal value of a character. Assumes char is a hex digit */ static int HexValue(c) SEE_unicode_t c; { if (c >= '0' && c <= '9') return c - '0'; else if (c >= 'a' && c <= 'f') return c - 'a' + 10; else /* (c >= 'A' && c <= 'F') */ return c - 'A' + 10; } static int is_HexEscape(lex) struct lex *lex; /* 7.6 */ { SEE_unicode_t lookahead[4]; int lookahead_len; lookahead_len = LOOKAHEAD(lookahead, 4); return (lookahead_len >= 4 && lookahead[0] == '\\' && lookahead[1] == 'x' && is_HexDigit(lookahead[2]) && is_HexDigit(lookahead[3])); } static int is_UnicodeEscape(lex) struct lex *lex; /* 7.6 */ { SEE_unicode_t lookahead[6]; int lookahead_len; lookahead_len = LOOKAHEAD(lookahead, 6); return (lookahead_len >= 6 && lookahead[0] == '\\' && lookahead[1] == 'u' && is_HexDigit(lookahead[2]) && is_HexDigit(lookahead[3]) && is_HexDigit(lookahead[4]) && is_HexDigit(lookahead[5])); } static int is_IdentifierStart(lex) struct lex *lex; /* 7.6 */ { SEE_unicode_t c; if (ATEOF) return 0; if (is_UnicodeEscape(lex)) return 1; c = NEXT; return UNICODE_IS_IS(c); } static int is_IdentifierPart(lex) struct lex *lex; /* 7.6 */ { SEE_unicode_t c; if (ATEOF) return 0; if (is_UnicodeEscape(lex)) return 1; c = NEXT; return UNICODE_IS_IP(c); } static SEE_unicode_t HexEscape(lex) struct lex *lex; /* 7.6 la \x */ { int i; SEE_unicode_t r = 0; CONSUME('\\'); CONSUME('x'); for (i = 0; i < 2; i++) { if (ATEOF) SYNTAX_ERROR(STR(unexpected_eof)); r = (r << 4) | HexValue(NEXT); SKIP; } return r; } static SEE_unicode_t UnicodeEscape(lex) struct lex *lex; /* 7.6 la \u */ { int i; SEE_unicode_t r = 0; CONSUME('\\'); CONSUME('u'); for (i = 0; i < 4; i++) { if (ATEOF) SYNTAX_ERROR(STR(unexpected_eof)); r = (r << 4) | HexValue(NEXT); SKIP; } return r; /* * XXX NOTE: the \uxxxx escape can only encode characters * up to 0xffff. To express unicode characters above this * codepoint, you would have to use a UTF-16 surrogate, but * this is problematic. Better would be to augment ECMA-262 * with a \Uxxxxxxxx escape, such as Python provides. * (spec bug?) */ } static int DivPunctuator(lex) struct lex *lex; /* 7.7 la / */ { CONSUME('/'); if (!ATEOF && NEXT == '=') { SKIP; return tDIVEQ; } return tDIV; } static int LineTerminator(lex) struct lex *lex; /* line terminator */ { SEE_unicode_t lookahead[2]; int lookahead_len; lookahead_len = LOOKAHEAD(lookahead, 2); SEE_ASSERT(lex->input->interpreter, is_LineTerminator(lookahead[0])); SKIP; if (lookahead_len == 2 && lookahead[0] == '\r' && lookahead[1] == '\n') {} /* Don't count the \r in a CRLF pair */ else lex->next_lineno++; return tLINETERMINATOR; } /* Skips all characters up to and including a line terminator (or EOF) */ static int SkipToEndOfLine(lex) struct lex *lex; { while (!ATEOF && !is_LineTerminator(NEXT)) SKIP; if (ATEOF) return tEND; return LineTerminator(lex); } static int SGMLComment(lex) struct lex *lex; /* la */ { /* * The closing '-->' is supposed to be protected by an * actual '//' comment leader. (Refer to Chapter 9 of * 'Client-Side JavaScript Guide', by Netscape) but * we treat it as '//' for compatibility. */ return SkipToEndOfLine(lex); } static int Punctuator(lex) struct lex *lex; /* 7.7 */ { SEE_unicode_t op[4]; /* ">>>=" is the longest punctuator */ struct token *t; int j, len, oplen; struct SEE_interpreter *interp = lex->input->interpreter; if (ATEOF) return tEND; oplen = LOOKAHEAD(op, 4); len = SEE_tok_noperators - 1; if (len > oplen) len = oplen; for (; len > 0; len--) for (t = SEE_tok_operators[len]; t->token; t++) { for (j = 0; j < len; j++) if (t->identifier[j] != op[j]) goto out; if (t->token == tSGMLCOMMENT) { if (interp->compatibility & SEE_COMPAT_SGMLCOM) return SGMLComment(lex); else goto out; } if (t->token == tSGMLCOMMENTEND && lex->next_at_bol) { if (interp->compatibility & SEE_COMPAT_SGMLCOM) return SGMLCommentEnd(lex); else goto out; } for (j = 0; j < len; j++) SKIP; return t->token; out: /* continue */ ; } /* * Throw a descriptive error message */ if (op[0] == SEE_INPUT_BADCHAR) SYNTAX_ERROR(SEE_string_sprintf(interp, "malformed unicode input")); else if (op[0] >= ' ' && op[0] <= '~') SYNTAX_ERROR(SEE_string_sprintf(interp, "unexpected character '%c'", op[0])); else SYNTAX_ERROR(SEE_string_sprintf(interp, "unexpected character '\\u%04x'", op[0])); /* NOTREACHED */ } static int StringLiteral(lex) struct lex *lex; /* 7.8.4 la ' " */ { SEE_unicode_t quote; SEE_unicode_t c = 0; struct SEE_string *s; struct SEE_interpreter *interp = lex->input->interpreter; s = SEE_string_new(interp, 0); quote = NEXT; SKIP; while (!ATEOF && NEXT != quote) { if (is_LineTerminator(NEXT)) SYNTAX_ERROR(STR(broken_literal)); else if (is_UnicodeEscape(lex)) c = UnicodeEscape(lex); else if (is_HexEscape(lex)) c = HexEscape(lex); else if (NEXT == '\\') { SKIP; if (is_LineTerminator(NEXT)) { if (SEE_GET_JS_COMPAT(interp)) { /* Ignore escaped LineTerminator */ SKIP; continue; } SYNTAX_ERROR(STR(escaped_lit_nl)); } else if (ATEOF) SYNTAX_ERROR(STR(escaped_lit_nl)); switch (NEXT) { case 'b': c = 0x0008; SKIP; break; case 't': c = 0x0009; SKIP; break; case 'n': c = 0x000a; SKIP; break; case 'v': c = 0x000b; SKIP; break; case 'f': c = 0x000c; SKIP; break; case 'r': c = 0x000d; SKIP; break; case '0': case '1': case '2': case '3': c = NEXT - '0'; SKIP; if (!ATEOF && NEXT >= '0' && NEXT <= '7') { c = (c << 3) | (NEXT - '0'); SKIP; } if (!ATEOF && NEXT >= '0' && NEXT <= '7') { c = (c << 3) | (NEXT - '0'); SKIP; } break; case '4': case '5': case '6': case '7': c = NEXT - '0'; SKIP; if (!ATEOF && NEXT >= '0' && NEXT <= '7') { c = (c << 3) | (NEXT - '0'); SKIP; } break; case 'x': case 'u': if (SEE_GET_JS_COMPAT(interp)) goto literal; /* Strict ECMA: */ if (NEXT == 'x') SYNTAX_ERROR(STR(invalid_esc_x)); else SYNTAX_ERROR(STR(invalid_esc_u)); /* NOTREACHED */ default: literal: c = NEXT; SKIP; break; } } else { c = NEXT; SKIP; } SEE_string_append_unicode(s, c); } CONSUME(quote); SEE_SET_STRING(&lex->value, s); return tSTRING; } /* * 7.8.5 Scans for a regular expression token. * Assumes prev (immediately previous token) is either tDIV or tDIVEQ. * Returns tREGEX on success or throws an exception on failure. * The string in lex->value is of the form "/regex/flags" */ static int RegularExpressionLiteral(lex, prev) struct lex *lex; int prev; { struct SEE_string *s; int incc = 0; struct SEE_interpreter *interp = lex->input->interpreter; s = SEE_string_new(interp, 0); SEE_string_addch(s, '/'); if (prev == tDIVEQ) SEE_string_addch(s, '='); while (!ATEOF) { if (NEXT == '/' && (!incc || !(SEE_GET_JS_COMPAT(interp)))) /* EXT:15 */ break; if (NEXT == '\\') { SEE_string_addch(s, '\\'); SKIP; if (ATEOF) break; } else { /* Track charclasses for JS_COMPAT */ if (NEXT == '[') incc = 1; if (NEXT == ']') incc = 0; } if (is_LineTerminator(NEXT)) SYNTAX_ERROR(STR(broken_regex)); SEE_string_append_unicode(s, NEXT); SKIP; } if (ATEOF) SYNTAX_ERROR(STR(eof_in_regex)); CONSUME('/'); SEE_string_addch(s, '/'); while (!ATEOF && is_IdentifierPart(lex)) { SEE_string_append_unicode(s, NEXT); SKIP; } SEE_SET_STRING(&lex->value, s); return tREGEX; } static int NumericLiteral(lex) struct lex *lex; /* 7.8.3 la [.0-9] */ { SEE_number_t n, e; int seendigit; unsigned int i; struct SEE_string *s; char *numbuf, *endstr; struct SEE_interpreter *interp = lex->input->interpreter; seendigit = 0; n = 0; s = SEE_string_new(interp, 0); if (NEXT == '0') { SKIP; if (!ATEOF && (NEXT == 'x' || NEXT == 'X')) { SKIP; if (ATEOF || !is_HexDigit(NEXT)) SYNTAX_ERROR(STR(hex_literal_detritus)); while (!ATEOF && is_HexDigit(NEXT)) { SEE_string_addch(s, (SEE_char_t)NEXT); SKIP; } if (!ATEOF && is_IdentifierStart(lex)) SYNTAX_ERROR(STR(hex_literal_detritus)); e = 1; for (i = 0; i < s->length; i++) { n += e * HexValue(s->data[s->length - i - 1]); e *= 16; } SEE_SET_NUMBER(&lex->value, n); return tNUMBER; } SEE_string_addch(s, '0'); seendigit = 1; } while (!ATEOF && '0' <= NEXT && NEXT <= '9') { SEE_string_addch(s, (SEE_char_t)NEXT); seendigit = 1; SKIP; } /* Octal integers */ if (SEE_GET_JS_COMPAT(interp) && seendigit && (ATEOF || (NEXT != '.' && NEXT != 'e' && NEXT != 'E')) && s->length > 1 && s->data[0] == '0') { /* Octal integers start with 0 and dont follow with . or e */ n = 0; for (i = 1; i < s->length; i++) { if (s->data[i] > '7') goto not_octal; n = n * 8 + s->data[i] - '0'; } if (!ATEOF && is_IdentifierStart(lex)) goto not_octal; SEE_SET_NUMBER(&lex->value, n); return tNUMBER; } not_octal: if (!ATEOF && NEXT == '.') { SEE_string_addch(s, '.'); SKIP; while (!ATEOF && '0' <= NEXT && NEXT <= '9') { seendigit = 1; SEE_string_addch(s, (SEE_char_t)NEXT); SKIP; } } if (!seendigit) { /* free(s) */ return '.'; /* Actually matched Punctuator! */ } if (!ATEOF && (NEXT == 'e' || NEXT == 'E')) { SEE_string_addch(s, (SEE_char_t)NEXT); SKIP; seendigit = 0; if (!ATEOF && NEXT == '-') { SEE_string_addch(s, '-'); SKIP; } else if (!ATEOF && NEXT == '+') { SEE_string_addch(s, '+'); SKIP; } e = 0; while (!ATEOF && '0' <= NEXT && NEXT <= '9') { seendigit = 1; SEE_string_addch(s, (SEE_char_t)NEXT); SKIP; } if (!seendigit) SYNTAX_ERROR(STR(dec_literal_detritus)); } numbuf = SEE_STRING_ALLOCA(interp, char, s->length + 1); for (i = 0; i < s->length; i++) numbuf[i] = s->data[i] & 0x7f; numbuf[i] = '\0'; endstr = NULL; n = SEE_strtod(numbuf, &endstr); if (!endstr || *endstr) /* impossible condition? */ SYNTAX_ERROR(STR(dec_literal_detritus)); SEE_SET_NUMBER(&lex->value, n); return tNUMBER; } static int CommentDiv(lex) struct lex *lex; /* 7.4 la / */ { SEE_unicode_t lookahead[2]; int lookahead_len; lookahead_len = LOOKAHEAD(lookahead, 2); if (lookahead_len >= 2 && lookahead[0] == '/' && lookahead[1] == '*') { int starprev = 0, contains_newline = 0; SKIP; SKIP; while (!ATEOF) { if (starprev && NEXT == '/') { CONSUME('/'); return contains_newline ? tLINETERMINATOR : tCOMMENT; } if (is_LineTerminator(NEXT)) { (void)LineTerminator(lex); contains_newline = 1; starprev = 0; } else { starprev = (NEXT == '*'); SKIP; } } SYNTAX_ERROR(STR(eof_in_c_comment)); } if (lookahead_len >= 2 && lookahead[0] == '/' && lookahead[1] == '/') return SkipToEndOfLine(lex); /* * NB: This assumes regular expressions not wanted, * and that the rest of the regex can be scanned later * if the parser wants it. */ return DivPunctuator(lex); } static int Token(lex) struct lex *lex; /* 7.5 */ { struct SEE_interpreter *interp = lex->input->interpreter; if (ATEOF) return tEND; if (NEXT == '\'' || NEXT == '\"') return StringLiteral(lex); if ((NEXT >= '0' && NEXT <= '9') || NEXT == '.') return NumericLiteral(lex); if (is_IdentifierStart(lex)) { int hasescape = 0, i; struct SEE_string *s; SEE_unicode_t c; s = SEE_string_new(interp, 0); do { if (is_UnicodeEscape(lex)) { c = UnicodeEscape(lex); if (s->length == 0) { if (!UNICODE_IS_IS(c)) SYNTAX_ERROR(STR(bad_unicode_ident)); } else if (!UNICODE_IS_IP(c)) SYNTAX_ERROR(STR(bad_unicode_ident)); hasescape = 1; } else { c = NEXT; SKIP; } SEE_string_append_unicode(s, c); } while (is_IdentifierPart(lex)); /* match keywords */ if (!hasescape) for (i = 0; i < SEE_tok_nkeywords; i++) { const struct SEE_string *keyword; keyword = STRn(SEE_tok_keywords[i].index); if (keyword->length == s->length && SEE_string_cmp(keyword, s) == 0) { int token = SEE_tok_keywords[i].token; if (token == tRESERVED && /* EXT:3 */ SEE_COMPAT_JS(interp, >=, JS11)) { #ifndef NDEBUG dprintf("Warning: line %d: reserved token '", lex->next_lineno); dprints(s); dprintf("' treated as identifier\n"); #endif break; } return token; } } SEE_intern_and_free(interp, &s); SEE_SET_STRING(&lex->value, s); return tIDENT; } return Punctuator(lex); } /* * Scanner grammar goal. Scans lex->input for a token, and returns it. * * May return multiple tLINETERMINATORs, but will never return tCOMMENT. * Scans the InputElementDiv production (never InputElementRegex). * If this function returns tDIV or tDIVEQ, and a regular expression is wanted, * then SEE_lex_regex() should be called immediately. */ static int lex0(lex) struct lex *lex; { int ret; again: while (!ATEOF && is_WhiteSpace(NEXT) && !is_LineTerminator(NEXT)) SKIP; /* skip non-newline whitespace */ if (ATEOF) return tEND; if (is_LineTerminator(NEXT)) return LineTerminator(lex); switch (NEXT) { case '/': ret = CommentDiv(lex); if (ret == tCOMMENT) goto again; /* Discard tCOMMENTs */ return ret; case '\"': case '\'': return StringLiteral(lex); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return NumericLiteral(lex); case '.': { SEE_unicode_t lookahead[2]; int lookahead_len; lookahead_len = LOOKAHEAD(lookahead, 2); if (lookahead_len >= 2 && lookahead[1] >= '0' && lookahead[1] <= '9') return NumericLiteral(lex); SKIP; return '.'; } default: return Token(lex); } } /*------------------------------------------------------------ * Public API */ /* * Initialises a tokenizer structure */ void SEE_lex_init(lex, inp) struct lex *lex; struct SEE_input *inp; { lex->input = inp; SEE_SET_UNDEFINED(&lex->value); lex->next_lineno = inp->first_lineno; lex->next_filename = SEE_intern(inp->interpreter, inp->filename); lex->next_at_bol = 1; (void)SEE_lex_next(lex); } /* * Main interface to the lexical anaylser. * * We keep a one-token lookahead. * Each call to this function generates a new lookahead token * (in lex->next) and returns the previous one, so * the lex flags apply to the scanning of the NEXT token, * and NOT to the token being returned. (ie The caller should * generally refer to the resulting lex->next to make * decisions. The value returned is merely a convenience.) * * On return, this function also sets (or clears) the * lex->next_follows_nl flag when a newline is seen immediately * before lex->next. The parser should use this information to * perform automatic semicolon insertion. Note that the defined * tLINETERMINATOR token is an internal scanner pseudo-token and * is never returned by this function. Use the next_follows_nl flag. * * As a special case, if end-of-file (tEND) does not follow * a line terminator, then this function pretends that it does. * * The lex->next_lineno field reflects the line number of * lex->next. */ int SEE_lex_next(lex) struct lex *lex; { int next, token; lex->next_follows_nl = 0; next = lex->next; token = lex0(lex); while (token == tLINETERMINATOR) { #ifndef NDEBUG if (SEE_lex_debug && !lex->next_follows_nl) dprintf("lex: [LINETERMINATOR]\n"); #endif lex->next_follows_nl = 1; lex->next_at_bol = 1; token = lex0(lex); } lex->next_at_bol = 0; if (token == tEND) lex->next_follows_nl = 1; lex->next = token; #ifndef NDEBUG if (SEE_lex_debug) switch (lex->next) { case tIDENT: dprintf("lex: tIDENT "); dprintv(lex->input->interpreter, &lex->value); dprintf("\n"); break; case tSTRING: dprintf("lex: tSTRING "); dprintv(lex->input->interpreter, &lex->value); dprintf("\n"); break; case tNUMBER: dprintf("lex: tNUMBER "); dprintv(lex->input->interpreter, &lex->value); dprintf("\n"); break; default: dprintf("lex: %s\n", SEE_tokenname(lex->next)); } #endif return next; } /* * Converts the next token (just scanned) into a regular expression, * if possible. */ void SEE_lex_regex(lex) struct lex *lex; { if (lex->next == tDIV || lex->next == tDIVEQ) lex->next = RegularExpressionLiteral(lex, lex->next); } /* * 9.3.1 * Scans a SEE_string to convert it into a number. * On success, sets res to the resulting number and returns non-zero. * * This function is called by SEE_ToNumber(). */ int SEE_lex_number(interp, s, res) struct SEE_interpreter *interp; struct SEE_string *s; struct SEE_value *res; { SEE_number_t n, sign; int seendig, hexok; int len = s->length; int i, pos; int start; char *numbuf, *endstr; /* These work becuase we expect no Unicode surrogates in numbers */ #undef ATEOF #undef NEXT #undef SKIP #define ATEOF (pos >= len) #define NEXT (s->data[pos]) #define SKIP pos++ pos = 0; /* StrWhiteSpace */ while (!ATEOF && (is_WhiteSpace(NEXT) || is_LineTerminator(NEXT))) SKIP; if (ATEOF) { SEE_SET_NUMBER(res, 0); /* +0 */ return 1; } sign = 0; if (NEXT == '-') { sign = NEGATIVE; SKIP; } else if (NEXT == '+') { sign = POSITIVE; SKIP; } /* Strict ECMA262-3 hex strings require no sign. Netscape relaxes this. */ hexok = !sign || SEE_GET_JS_COMPAT(interp); if (ATEOF) goto fail; if (NEXT == 'I') { SKIP; if (ATEOF || NEXT != 'n') goto fail; SKIP; if (ATEOF || NEXT != 'f') goto fail; SKIP; if (ATEOF || NEXT != 'i') goto fail; SKIP; if (ATEOF || NEXT != 'n') goto fail; SKIP; if (ATEOF || NEXT != 'i') goto fail; SKIP; if (ATEOF || NEXT != 't') goto fail; SKIP; if (ATEOF || NEXT != 'y') goto fail; SKIP; n = SEE_Infinity; } else { n = 0; start = pos; /* Hexadecimal */ if (hexok && pos + 1 < len && s->data[pos] == '0' && (s->data[pos+1] == 'x' || s->data[pos+1] == 'X')) { SKIP; SKIP; seendig = 0; while (!ATEOF && is_HexDigit(NEXT)) { seendig = 1; n = 16 * n + HexValue(NEXT); SKIP; } if (!seendig) goto fail; goto out; } #if 0 /* Octal */ if (SEE_COMPAT_JS(interp, >=, JS11) && /* EXT:4 */ !ATEOF && NEXT == '0' && !(pos + 1 < len && (s->data[pos+1] == '.' || s->data[pos+1] == 'e' || s->data[pos+1] == 'E'))) { SKIP; n = 0; while (!ATEOF && NEXT >= '0' && NEXT <= '7') { n = 8 * n + NEXT - '0'; SKIP; } goto out; } #endif /* * After this point, we expect to use strtod, so we * just check for character validity, rather than computing n. */ seendig = 0; while (!ATEOF && NEXT >= '0' && NEXT <= '9') { seendig = 1; SKIP; } if (!ATEOF && NEXT == '.') { SKIP; /* '.' */ while (!ATEOF && NEXT >= '0' && NEXT <= '9') { seendig = 1; SKIP; } } if (!seendig) goto fail; /* a lone dot is illegal */ if (!ATEOF && (NEXT == 'e' || NEXT == 'E')) { SKIP; if (!ATEOF && NEXT == '-') { SKIP; } else if (!ATEOF && NEXT == '+') SKIP; seendig = 0; while (!ATEOF && NEXT >= '0' && NEXT <= '9') { seendig = 1; SKIP; } if (!seendig) goto fail; } numbuf = SEE_STRING_ALLOCA(interp, char, pos - start + 1); for (i = 0; i < pos - start; i++) numbuf[i] = s->data[i + start] & 0x7f; numbuf[i] = '\0'; endstr = NULL; n = SEE_strtod(numbuf, &endstr); if (!endstr || *endstr != '\0') goto fail; } out: if (!sign) sign = POSITIVE; /* trailing StrWhiteSpace */ while (!ATEOF && (is_WhiteSpace(NEXT) || is_LineTerminator(NEXT))) SKIP; if (ATEOF) { SEE_SET_NUMBER(res, SEE_COPYSIGN(n, sign)); return 1; } fail: return 0; }