1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 // JS lexical scanner.
8 
9 #include "frontend/TokenStream.h"
10 
11 #include "mozilla/IntegerTypeTraits.h"
12 #include "mozilla/PodOperations.h"
13 #include "mozilla/UniquePtr.h"
14 
15 #include <ctype.h>
16 #include <stdarg.h>
17 #include <stdio.h>
18 #include <string.h>
19 
20 #include "jsatom.h"
21 #include "jscntxt.h"
22 #include "jscompartment.h"
23 #include "jsexn.h"
24 #include "jsnum.h"
25 
26 #include "frontend/BytecodeCompiler.h"
27 #include "js/CharacterEncoding.h"
28 #include "vm/HelperThreads.h"
29 #include "vm/Keywords.h"
30 #include "vm/StringBuffer.h"
31 
32 using namespace js;
33 using namespace js::frontend;
34 using namespace js::unicode;
35 
36 using mozilla::Maybe;
37 using mozilla::PodAssign;
38 using mozilla::PodCopy;
39 using mozilla::PodZero;
40 using mozilla::UniquePtr;
41 
42 struct KeywordInfo {
43     const char* chars;         // C string with keyword text
44     TokenKind   tokentype;
45 };
46 
47 static const KeywordInfo keywords[] = {
48 #define KEYWORD_INFO(keyword, name, type) \
49     {js_##keyword##_str, type},
50     FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO)
51 #undef KEYWORD_INFO
52 };
53 
54 // Returns a KeywordInfo for the specified characters, or nullptr if the string
55 // is not a keyword.
56 template <typename CharT>
57 static const KeywordInfo*
FindKeyword(const CharT * s,size_t length)58 FindKeyword(const CharT* s, size_t length)
59 {
60     MOZ_ASSERT(length != 0);
61 
62     size_t i;
63     const KeywordInfo* kw;
64     const char* chars;
65 
66 #define JSKW_LENGTH()           length
67 #define JSKW_AT(column)         s[column]
68 #define JSKW_GOT_MATCH(index)   i = (index); goto got_match;
69 #define JSKW_TEST_GUESS(index)  i = (index); goto test_guess;
70 #define JSKW_NO_MATCH()         goto no_match;
71 #include "jsautokw.h"
72 #undef JSKW_NO_MATCH
73 #undef JSKW_TEST_GUESS
74 #undef JSKW_GOT_MATCH
75 #undef JSKW_AT
76 #undef JSKW_LENGTH
77 
78   got_match:
79     return &keywords[i];
80 
81   test_guess:
82     kw = &keywords[i];
83     chars = kw->chars;
84     do {
85         if (*s++ != (unsigned char)(*chars++))
86             goto no_match;
87     } while (--length != 0);
88     return kw;
89 
90   no_match:
91     return nullptr;
92 }
93 
94 static const KeywordInfo*
FindKeyword(JSLinearString * str)95 FindKeyword(JSLinearString* str)
96 {
97     JS::AutoCheckCannotGC nogc;
98     return str->hasLatin1Chars()
99            ? FindKeyword(str->latin1Chars(nogc), str->length())
100            : FindKeyword(str->twoByteChars(nogc), str->length());
101 }
102 
103 template <typename CharT>
104 static bool
IsIdentifier(const CharT * chars,size_t length)105 IsIdentifier(const CharT* chars, size_t length)
106 {
107     if (length == 0)
108         return false;
109 
110     if (!IsIdentifierStart(*chars))
111         return false;
112 
113     const CharT* end = chars + length;
114     while (++chars != end) {
115         if (!IsIdentifierPart(*chars))
116             return false;
117     }
118 
119     return true;
120 }
121 
122 bool
IsIdentifier(JSLinearString * str)123 frontend::IsIdentifier(JSLinearString* str)
124 {
125     JS::AutoCheckCannotGC nogc;
126     return str->hasLatin1Chars()
127            ? ::IsIdentifier(str->latin1Chars(nogc), str->length())
128            : ::IsIdentifier(str->twoByteChars(nogc), str->length());
129 }
130 
131 bool
IsIdentifier(const char16_t * chars,size_t length)132 frontend::IsIdentifier(const char16_t* chars, size_t length)
133 {
134     return ::IsIdentifier(chars, length);
135 }
136 
137 bool
IsKeyword(JSLinearString * str)138 frontend::IsKeyword(JSLinearString* str)
139 {
140     return FindKeyword(str) != nullptr;
141 }
142 
SourceCoords(ExclusiveContext * cx,uint32_t ln)143 TokenStream::SourceCoords::SourceCoords(ExclusiveContext* cx, uint32_t ln)
144   : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)
145 {
146     // This is actually necessary!  Removing it causes compile errors on
147     // GCC and clang.  You could try declaring this:
148     //
149     //   const uint32_t TokenStream::SourceCoords::MAX_PTR;
150     //
151     // which fixes the GCC/clang error, but causes bustage on Windows.  Sigh.
152     //
153     uint32_t maxPtr = MAX_PTR;
154 
155     // The first line begins at buffer offset 0.  MAX_PTR is the sentinel.  The
156     // appends cannot fail because |lineStartOffsets_| has statically-allocated
157     // elements.
158     MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
159     MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
160     lineStartOffsets_.infallibleAppend(0);
161     lineStartOffsets_.infallibleAppend(maxPtr);
162 }
163 
164 MOZ_ALWAYS_INLINE bool
add(uint32_t lineNum,uint32_t lineStartOffset)165 TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
166 {
167     uint32_t lineIndex = lineNumToIndex(lineNum);
168     uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
169 
170     MOZ_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);
171 
172     if (lineIndex == sentinelIndex) {
173         // We haven't seen this newline before.  Update lineStartOffsets_
174         // only if lineStartOffsets_.append succeeds, to keep sentinel.
175         // Otherwise return false to tell TokenStream about OOM.
176         uint32_t maxPtr = MAX_PTR;
177         if (!lineStartOffsets_.append(maxPtr))
178             return false;
179 
180         lineStartOffsets_[lineIndex] = lineStartOffset;
181     } else {
182         // We have seen this newline before (and ungot it).  Do nothing (other
183         // than checking it hasn't mysteriously changed).
184         // This path can be executed after hitting OOM, so check lineIndex.
185         MOZ_ASSERT_IF(lineIndex < sentinelIndex, lineStartOffsets_[lineIndex] == lineStartOffset);
186     }
187     return true;
188 }
189 
190 MOZ_ALWAYS_INLINE bool
fill(const TokenStream::SourceCoords & other)191 TokenStream::SourceCoords::fill(const TokenStream::SourceCoords& other)
192 {
193     MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
194     MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
195 
196     if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
197         return true;
198 
199     uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
200     lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
201 
202     for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {
203         if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))
204             return false;
205     }
206     return true;
207 }
208 
209 MOZ_ALWAYS_INLINE uint32_t
lineIndexOf(uint32_t offset) const210 TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const
211 {
212     uint32_t iMin, iMax, iMid;
213 
214     if (lineStartOffsets_[lastLineIndex_] <= offset) {
215         // If we reach here, offset is on a line the same as or higher than
216         // last time.  Check first for the +0, +1, +2 cases, because they
217         // typically cover 85--98% of cases.
218         if (offset < lineStartOffsets_[lastLineIndex_ + 1])
219             return lastLineIndex_;      // lineIndex is same as last time
220 
221         // If we reach here, there must be at least one more entry (plus the
222         // sentinel).  Try it.
223         lastLineIndex_++;
224         if (offset < lineStartOffsets_[lastLineIndex_ + 1])
225             return lastLineIndex_;      // lineIndex is one higher than last time
226 
227         // The same logic applies here.
228         lastLineIndex_++;
229         if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
230             return lastLineIndex_;      // lineIndex is two higher than last time
231         }
232 
233         // No luck.  Oh well, we have a better-than-default starting point for
234         // the binary search.
235         iMin = lastLineIndex_ + 1;
236         MOZ_ASSERT(iMin < lineStartOffsets_.length() - 1);   // -1 due to the sentinel
237 
238     } else {
239         iMin = 0;
240     }
241 
242     // This is a binary search with deferred detection of equality, which was
243     // marginally faster in this case than a standard binary search.
244     // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
245     // want one before that.
246     iMax = lineStartOffsets_.length() - 2;
247     while (iMax > iMin) {
248         iMid = iMin + (iMax - iMin) / 2;
249         if (offset >= lineStartOffsets_[iMid + 1])
250             iMin = iMid + 1;    // offset is above lineStartOffsets_[iMid]
251         else
252             iMax = iMid;        // offset is below or within lineStartOffsets_[iMid]
253     }
254     MOZ_ASSERT(iMax == iMin);
255     MOZ_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
256     lastLineIndex_ = iMin;
257     return iMin;
258 }
259 
260 uint32_t
lineNum(uint32_t offset) const261 TokenStream::SourceCoords::lineNum(uint32_t offset) const
262 {
263     uint32_t lineIndex = lineIndexOf(offset);
264     return lineIndexToNum(lineIndex);
265 }
266 
267 uint32_t
columnIndex(uint32_t offset) const268 TokenStream::SourceCoords::columnIndex(uint32_t offset) const
269 {
270     uint32_t lineIndex = lineIndexOf(offset);
271     uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
272     MOZ_ASSERT(offset >= lineStartOffset);
273     return offset - lineStartOffset;
274 }
275 
276 void
lineNumAndColumnIndex(uint32_t offset,uint32_t * lineNum,uint32_t * columnIndex) const277 TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum,
278                                                  uint32_t* columnIndex) const
279 {
280     uint32_t lineIndex = lineIndexOf(offset);
281     *lineNum = lineIndexToNum(lineIndex);
282     uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
283     MOZ_ASSERT(offset >= lineStartOffset);
284     *columnIndex = offset - lineStartOffset;
285 }
286 
287 #ifdef _MSC_VER
288 #pragma warning(push)
289 #pragma warning(disable:4351)
290 #endif
291 
TokenStream(ExclusiveContext * cx,const ReadOnlyCompileOptions & options,const char16_t * base,size_t length,StrictModeGetter * smg)292 TokenStream::TokenStream(ExclusiveContext* cx, const ReadOnlyCompileOptions& options,
293                          const char16_t* base, size_t length, StrictModeGetter* smg)
294   : srcCoords(cx, options.lineno),
295     options_(options),
296     tokens(),
297     cursor(),
298     lookahead(),
299     lineno(options.lineno),
300     flags(),
301     linebase(0),
302     prevLinebase(size_t(-1)),
303     userbuf(cx, base, length, options.column),
304     filename(options.filename()),
305     displayURL_(nullptr),
306     sourceMapURL_(nullptr),
307     tokenbuf(cx),
308     cx(cx),
309     mutedErrors(options.mutedErrors()),
310     strictModeGetter(smg)
311 {
312     // Nb: the following tables could be static, but initializing them here is
313     // much easier.  Don't worry, the time to initialize them for each
314     // TokenStream is trivial.  See bug 639420.
315 
316     // See Parser::assignExpr() for an explanation of isExprEnding[].
317     memset(isExprEnding, 0, sizeof(isExprEnding));
318     isExprEnding[TOK_COMMA] = 1;
319     isExprEnding[TOK_SEMI]  = 1;
320     isExprEnding[TOK_COLON] = 1;
321     isExprEnding[TOK_RP]    = 1;
322     isExprEnding[TOK_RB]    = 1;
323     isExprEnding[TOK_RC]    = 1;
324 }
325 
326 #ifdef _MSC_VER
327 #pragma warning(pop)
328 #endif
329 
330 bool
checkOptions()331 TokenStream::checkOptions()
332 {
333     // Constrain starting columns to half of the range of a signed 32-bit value,
334     // to avoid overflow.
335     if (options().column >= mozilla::MaxValue<int32_t>::value / 2 + 1) {
336         reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
337         return false;
338     }
339 
340     return true;
341 }
342 
~TokenStream()343 TokenStream::~TokenStream()
344 {
345 }
346 
347 // Use the fastest available getc.
348 #if defined(HAVE_GETC_UNLOCKED)
349 # define fast_getc getc_unlocked
350 #elif defined(HAVE__GETC_NOLOCK)
351 # define fast_getc _getc_nolock
352 #else
353 # define fast_getc getc
354 #endif
355 
356 MOZ_ALWAYS_INLINE void
updateLineInfoForEOL()357 TokenStream::updateLineInfoForEOL()
358 {
359     prevLinebase = linebase;
360     linebase = userbuf.offset();
361     lineno++;
362     if (!srcCoords.add(lineno, linebase))
363         flags.hitOOM = true;
364 }
365 
366 MOZ_ALWAYS_INLINE void
updateFlagsForEOL()367 TokenStream::updateFlagsForEOL()
368 {
369     flags.isDirtyLine = false;
370 }
371 
372 // This gets the next char, normalizing all EOL sequences to '\n' as it goes.
373 int32_t
getChar()374 TokenStream::getChar()
375 {
376     int32_t c;
377     if (MOZ_LIKELY(userbuf.hasRawChars())) {
378         c = userbuf.getRawChar();
379 
380         // Normalize the char16_t if it was a newline.
381         if (MOZ_UNLIKELY(c == '\n'))
382             goto eol;
383         if (MOZ_UNLIKELY(c == '\r')) {
384             // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
385             if (MOZ_LIKELY(userbuf.hasRawChars()))
386                 userbuf.matchRawChar('\n');
387             goto eol;
388         }
389         if (MOZ_UNLIKELY(c == LINE_SEPARATOR || c == PARA_SEPARATOR))
390             goto eol;
391 
392         return c;
393     }
394 
395     flags.isEOF = true;
396     return EOF;
397 
398   eol:
399     updateLineInfoForEOL();
400     return '\n';
401 }
402 
403 // This gets the next char. It does nothing special with EOL sequences, not
404 // even updating the line counters.  It can be used safely if (a) the
405 // resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
406 // it's an EOL, and (b) the line-related state (lineno, linebase) is not used
407 // before it's ungotten.
408 int32_t
getCharIgnoreEOL()409 TokenStream::getCharIgnoreEOL()
410 {
411     if (MOZ_LIKELY(userbuf.hasRawChars()))
412         return userbuf.getRawChar();
413 
414     flags.isEOF = true;
415     return EOF;
416 }
417 
418 void
ungetChar(int32_t c)419 TokenStream::ungetChar(int32_t c)
420 {
421     if (c == EOF)
422         return;
423     MOZ_ASSERT(!userbuf.atStart());
424     userbuf.ungetRawChar();
425     if (c == '\n') {
426 #ifdef DEBUG
427         int32_t c2 = userbuf.peekRawChar();
428         MOZ_ASSERT(TokenBuf::isRawEOLChar(c2));
429 #endif
430 
431         // If it's a \r\n sequence, also unget the \r.
432         if (!userbuf.atStart())
433             userbuf.matchRawCharBackwards('\r');
434 
435         MOZ_ASSERT(prevLinebase != size_t(-1));    // we should never get more than one EOL char
436         linebase = prevLinebase;
437         prevLinebase = size_t(-1);
438         lineno--;
439     } else {
440         MOZ_ASSERT(userbuf.peekRawChar() == c);
441     }
442 }
443 
444 void
ungetCharIgnoreEOL(int32_t c)445 TokenStream::ungetCharIgnoreEOL(int32_t c)
446 {
447     if (c == EOF)
448         return;
449     MOZ_ASSERT(!userbuf.atStart());
450     userbuf.ungetRawChar();
451 }
452 
453 // Return true iff |n| raw characters can be read from this without reading past
454 // EOF or a newline, and copy those characters into |cp| if so.  The characters
455 // are not consumed: use skipChars(n) to do so after checking that the consumed
456 // characters had appropriate values.
457 bool
peekChars(int n,char16_t * cp)458 TokenStream::peekChars(int n, char16_t* cp)
459 {
460     int i, j;
461     int32_t c;
462 
463     for (i = 0; i < n; i++) {
464         c = getCharIgnoreEOL();
465         if (c == EOF)
466             break;
467         if (c == '\n') {
468             ungetCharIgnoreEOL(c);
469             break;
470         }
471         cp[i] = char16_t(c);
472     }
473     for (j = i - 1; j >= 0; j--)
474         ungetCharIgnoreEOL(cp[j]);
475     return i == n;
476 }
477 
478 size_t
findEOLMax(size_t start,size_t max)479 TokenStream::TokenBuf::findEOLMax(size_t start, size_t max)
480 {
481     const char16_t* p = rawCharPtrAt(start);
482 
483     size_t n = 0;
484     while (true) {
485         if (p >= limit_)
486             break;
487         if (n >= max)
488             break;
489         n++;
490         if (TokenBuf::isRawEOLChar(*p++))
491             break;
492     }
493     return start + n;
494 }
495 
496 bool
advance(size_t position)497 TokenStream::advance(size_t position)
498 {
499     const char16_t* end = userbuf.rawCharPtrAt(position);
500     while (userbuf.addressOfNextRawChar() < end)
501         getChar();
502 
503     Token* cur = &tokens[cursor];
504     cur->pos.begin = userbuf.offset();
505     MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));
506     lookahead = 0;
507 
508     if (flags.hitOOM)
509         return reportError(JSMSG_OUT_OF_MEMORY);
510 
511     return true;
512 }
513 
514 void
tell(Position * pos)515 TokenStream::tell(Position* pos)
516 {
517     pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true);
518     pos->flags = flags;
519     pos->lineno = lineno;
520     pos->linebase = linebase;
521     pos->prevLinebase = prevLinebase;
522     pos->lookahead = lookahead;
523     pos->currentToken = currentToken();
524     for (unsigned i = 0; i < lookahead; i++)
525         pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask];
526 }
527 
528 void
seek(const Position & pos)529 TokenStream::seek(const Position& pos)
530 {
531     userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true);
532     flags = pos.flags;
533     lineno = pos.lineno;
534     linebase = pos.linebase;
535     prevLinebase = pos.prevLinebase;
536     lookahead = pos.lookahead;
537 
538     tokens[cursor] = pos.currentToken;
539     for (unsigned i = 0; i < lookahead; i++)
540         tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i];
541 }
542 
543 bool
seek(const Position & pos,const TokenStream & other)544 TokenStream::seek(const Position& pos, const TokenStream& other)
545 {
546     if (!srcCoords.fill(other.srcCoords))
547         return false;
548     seek(pos);
549     return true;
550 }
551 
552 bool
reportStrictModeErrorNumberVA(uint32_t offset,bool strictMode,unsigned errorNumber,va_list args)553 TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
554                                            va_list args)
555 {
556     // In strict mode code, this is an error, not merely a warning.
557     unsigned flags;
558     if (strictMode)
559         flags = JSREPORT_ERROR;
560     else if (options().extraWarningsOption)
561         flags = JSREPORT_WARNING | JSREPORT_STRICT;
562     else
563         return true;
564 
565     return reportCompileErrorNumberVA(offset, flags, errorNumber, args);
566 }
567 
568 void
throwError(JSContext * cx)569 CompileError::throwError(JSContext* cx)
570 {
571     // If there's a runtime exception type associated with this error
572     // number, set that as the pending exception.  For errors occuring at
573     // compile time, this is very likely to be a JSEXN_SYNTAXERR.
574     //
575     // If an exception is thrown but not caught, the JSREPORT_EXCEPTION
576     // flag will be set in report.flags.  Proper behavior for an error
577     // reporter is to ignore a report with this flag for all but top-level
578     // compilation errors.  The exception will remain pending, and so long
579     // as the non-top-level "load", "eval", or "compile" native function
580     // returns false, the top-level reporter will eventually receive the
581     // uncaught exception report.
582     if (!ErrorToException(cx, message, &report, nullptr, nullptr))
583         CallErrorReporter(cx, message, &report);
584 }
585 
~CompileError()586 CompileError::~CompileError()
587 {
588     js_free((void*)report.linebuf());
589     js_free((void*)report.ucmessage);
590     js_free(message);
591     message = nullptr;
592 
593     if (report.messageArgs) {
594         if (argumentsType == ArgumentsAreASCII) {
595             unsigned i = 0;
596             while (report.messageArgs[i])
597                 js_free((void*)report.messageArgs[i++]);
598         }
599         js_free(report.messageArgs);
600     }
601 
602     PodZero(&report);
603 }
604 
605 bool
reportCompileErrorNumberVA(uint32_t offset,unsigned flags,unsigned errorNumber,va_list args)606 TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
607                                         va_list args)
608 {
609     bool warning = JSREPORT_IS_WARNING(flags);
610 
611     if (warning && options().werrorOption) {
612         flags &= ~JSREPORT_WARNING;
613         warning = false;
614     }
615 
616     // On the main thread, report the error immediately. When compiling off
617     // thread, save the error so that the main thread can report it later.
618     CompileError tempErr;
619     CompileError& err = cx->isJSContext() ? tempErr : cx->addPendingCompileError();
620 
621     err.report.flags = flags;
622     err.report.errorNumber = errorNumber;
623     err.report.filename = filename;
624     err.report.isMuted = mutedErrors;
625     if (offset == NoOffset) {
626         err.report.lineno = 0;
627         err.report.column = 0;
628     } else {
629         err.report.lineno = srcCoords.lineNum(offset);
630         err.report.column = srcCoords.columnIndex(offset);
631     }
632 
633     // If we have no location information, try to get one from the caller.
634     bool callerFilename = false;
635     if (offset != NoOffset && !err.report.filename && cx->isJSContext()) {
636         NonBuiltinFrameIter iter(cx->asJSContext(),
637                                  FrameIter::ALL_CONTEXTS, FrameIter::GO_THROUGH_SAVED,
638                                  FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,
639                                  cx->compartment()->principals());
640         if (!iter.done() && iter.scriptFilename()) {
641             callerFilename = true;
642             err.report.filename = iter.scriptFilename();
643             err.report.lineno = iter.computeLine(&err.report.column);
644         }
645     }
646 
647     err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII;
648 
649     if (!ExpandErrorArgumentsVA(cx, GetErrorMessage, nullptr, errorNumber, &err.message,
650                                 &err.report, err.argumentsType, args))
651     {
652         return false;
653     }
654 
655     // Given a token, T, that we want to complain about: if T's (starting)
656     // lineno doesn't match TokenStream's lineno, that means we've scanned past
657     // the line that T starts on, which makes it hard to print some or all of
658     // T's (starting) line for context.
659     //
660     // So we don't even try, leaving report.linebuf and friends zeroed.  This
661     // means that any error involving a multi-line token (e.g. an unterminated
662     // multi-line string literal) won't have a context printed.
663     if (offset != NoOffset && err.report.lineno == lineno && !callerFilename) {
664         // We show only a portion (a "window") of the line around the erroneous
665         // token -- the first char in the token, plus |windowRadius| chars
666         // before it and |windowRadius - 1| chars after it.  This is because
667         // lines can be very long and printing the whole line is (a) not that
668         // helpful, and (b) can waste a lot of memory.  See bug 634444.
669         static const size_t windowRadius = 60;
670 
671         // The window must start within the current line, no earlier than
672         // windowRadius characters before offset.
673         size_t windowStart = (offset - linebase > windowRadius) ?
674                              offset - windowRadius :
675                              linebase;
676 
677         // The window must start within the portion of the current line
678         // that we actually have in our buffer.
679         if (windowStart < userbuf.startOffset())
680             windowStart = userbuf.startOffset();
681 
682         // The window must end within the current line, no later than
683         // windowRadius after offset.
684         size_t windowEnd = userbuf.findEOLMax(offset, windowRadius);
685         size_t windowLength = windowEnd - windowStart;
686         MOZ_ASSERT(windowLength <= windowRadius * 2);
687 
688         // Create the windowed strings.
689         StringBuffer windowBuf(cx);
690         if (!windowBuf.append(userbuf.rawCharPtrAt(windowStart), windowLength) ||
691             !windowBuf.append('\0'))
692         {
693             return false;
694         }
695 
696         // The window into the offending source line, without final \n.
697         mozilla::UniquePtr<char16_t[], JS::FreePolicy> linebuf(windowBuf.stealChars());
698         if (!linebuf)
699             return false;
700 
701         err.report.initLinebuf(linebuf.release(), windowLength, offset - windowStart);
702     }
703 
704     if (cx->isJSContext())
705         err.throwError(cx->asJSContext());
706 
707     return warning;
708 }
709 
710 bool
reportStrictModeError(unsigned errorNumber,...)711 TokenStream::reportStrictModeError(unsigned errorNumber, ...)
712 {
713     va_list args;
714     va_start(args, errorNumber);
715     bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(),
716                                                 errorNumber, args);
717     va_end(args);
718     return result;
719 }
720 
721 bool
reportError(unsigned errorNumber,...)722 TokenStream::reportError(unsigned errorNumber, ...)
723 {
724     va_list args;
725     va_start(args, errorNumber);
726     bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber,
727                                              args);
728     va_end(args);
729     return result;
730 }
731 
732 bool
reportErrorNoOffset(unsigned errorNumber,...)733 TokenStream::reportErrorNoOffset(unsigned errorNumber, ...)
734 {
735     va_list args;
736     va_start(args, errorNumber);
737     bool result = reportCompileErrorNumberVA(NoOffset, JSREPORT_ERROR, errorNumber,
738                                              args);
739     va_end(args);
740     return result;
741 }
742 
743 bool
reportWarning(unsigned errorNumber,...)744 TokenStream::reportWarning(unsigned errorNumber, ...)
745 {
746     va_list args;
747     va_start(args, errorNumber);
748     bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING,
749                                              errorNumber, args);
750     va_end(args);
751     return result;
752 }
753 
754 bool
reportStrictWarningErrorNumberVA(uint32_t offset,unsigned errorNumber,va_list args)755 TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args)
756 {
757     if (!options().extraWarningsOption)
758         return true;
759 
760     return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args);
761 }
762 
763 void
reportAsmJSError(uint32_t offset,unsigned errorNumber,...)764 TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)
765 {
766     va_list args;
767     va_start(args, errorNumber);
768     unsigned flags = options().throwOnAsmJSValidationFailureOption
769                      ? JSREPORT_ERROR
770                      : JSREPORT_WARNING;
771     reportCompileErrorNumberVA(offset, flags, errorNumber, args);
772     va_end(args);
773 }
774 
775 // We have encountered a '\': check for a Unicode escape sequence after it.
776 // Return 'true' and the character code value (by value) if we found a
777 // Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not
778 // advance along the buffer.
779 bool
peekUnicodeEscape(int * result)780 TokenStream::peekUnicodeEscape(int* result)
781 {
782     char16_t cp[5];
783 
784     if (peekChars(5, cp) && cp[0] == 'u' &&
785         JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
786         JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
787     {
788         *result = (((((JS7_UNHEX(cp[1]) << 4)
789                 + JS7_UNHEX(cp[2])) << 4)
790               + JS7_UNHEX(cp[3])) << 4)
791             + JS7_UNHEX(cp[4]);
792         return true;
793     }
794     return false;
795 }
796 
797 bool
matchUnicodeEscapeIdStart(int32_t * cp)798 TokenStream::matchUnicodeEscapeIdStart(int32_t* cp)
799 {
800     if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
801         skipChars(5);
802         return true;
803     }
804     return false;
805 }
806 
807 bool
matchUnicodeEscapeIdent(int32_t * cp)808 TokenStream::matchUnicodeEscapeIdent(int32_t* cp)
809 {
810     if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
811         skipChars(5);
812         return true;
813     }
814     return false;
815 }
816 
817 // Helper function which returns true if the first length(q) characters in p are
818 // the same as the characters in q.
819 static bool
CharsMatch(const char16_t * p,const char * q)820 CharsMatch(const char16_t* p, const char* q) {
821     while (*q) {
822         if (*p++ != *q++)
823             return false;
824     }
825     return true;
826 }
827 
828 bool
getDirectives(bool isMultiline,bool shouldWarnDeprecated)829 TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated)
830 {
831     // Match directive comments used in debugging, such as "//# sourceURL" and
832     // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
833     //
834     // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
835     // line comments containing a source mapping URL inside a multiline
836     // comment. To avoid potentially expensive lookahead and backtracking, we
837     // only check for this case if we encounter a '#' character.
838 
839     if (!getDisplayURL(isMultiline, shouldWarnDeprecated))
840         return false;
841     if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated))
842         return false;
843 
844     return true;
845 }
846 
847 bool
getDirective(bool isMultiline,bool shouldWarnDeprecated,const char * directive,int directiveLength,const char * errorMsgPragma,UniquePtr<char16_t[],JS::FreePolicy> * destination)848 TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated,
849                           const char* directive, int directiveLength,
850                           const char* errorMsgPragma,
851                           UniquePtr<char16_t[], JS::FreePolicy>* destination)
852 {
853     MOZ_ASSERT(directiveLength <= 18);
854     char16_t peeked[18];
855     int32_t c;
856 
857     if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {
858         if (shouldWarnDeprecated &&
859             !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
860             return false;
861 
862         skipChars(directiveLength);
863         tokenbuf.clear();
864 
865         while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {
866             getChar();
867             // Debugging directives can occur in both single- and multi-line
868             // comments. If we're currently inside a multi-line comment, we also
869             // need to recognize multi-line comment terminators.
870             if (isMultiline && c == '*' && peekChar() == '/') {
871                 ungetChar('*');
872                 break;
873             }
874             if (!tokenbuf.append(c))
875                 return false;
876         }
877 
878         if (tokenbuf.empty()) {
879             // The directive's URL was missing, but this is not quite an
880             // exception that we should stop and drop everything for.
881             return true;
882         }
883 
884         size_t length = tokenbuf.length();
885 
886         *destination = cx->make_pod_array<char16_t>(length + 1);
887         if (!*destination)
888             return false;
889 
890         PodCopy(destination->get(), tokenbuf.begin(), length);
891         (*destination)[length] = '\0';
892     }
893 
894     return true;
895 }
896 
897 bool
getDisplayURL(bool isMultiline,bool shouldWarnDeprecated)898 TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated)
899 {
900     // Match comments of the form "//# sourceURL=<url>" or
901     // "/\* //# sourceURL=<url> *\/"
902     //
903     // Note that while these are labeled "sourceURL" in the source text,
904     // internally we refer to it as a "displayURL" to distinguish what the
905     // developer would like to refer to the source as from the source's actual
906     // URL.
907 
908     return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11,
909                         "sourceURL", &displayURL_);
910 }
911 
912 bool
getSourceMappingURL(bool isMultiline,bool shouldWarnDeprecated)913 TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated)
914 {
915     // Match comments of the form "//# sourceMappingURL=<url>" or
916     // "/\* //# sourceMappingURL=<url> *\/"
917 
918     return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18,
919                         "sourceMappingURL", &sourceMapURL_);
920 }
921 
922 MOZ_ALWAYS_INLINE Token*
newToken(ptrdiff_t adjust)923 TokenStream::newToken(ptrdiff_t adjust)
924 {
925     cursor = (cursor + 1) & ntokensMask;
926     Token* tp = &tokens[cursor];
927     tp->pos.begin = userbuf.offset() + adjust;
928 
929     // NOTE: tp->pos.end is not set until the very end of getTokenInternal().
930     MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end));
931 
932     return tp;
933 }
934 
935 MOZ_ALWAYS_INLINE JSAtom*
atomize(ExclusiveContext * cx,CharBuffer & cb)936 TokenStream::atomize(ExclusiveContext* cx, CharBuffer& cb)
937 {
938     return AtomizeChars(cx, cb.begin(), cb.length());
939 }
940 
941 #ifdef DEBUG
942 static bool
IsTokenSane(Token * tp)943 IsTokenSane(Token* tp)
944 {
945     // Nb: TOK_EOL should never be used in an actual Token;  it should only be
946     // returned as a TokenKind from peekTokenSameLine().
947     if (tp->type < 0 || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
948         return false;
949 
950     if (tp->pos.end < tp->pos.begin)
951         return false;
952 
953     return true;
954 }
955 #endif
956 
957 bool
putIdentInTokenbuf(const char16_t * identStart)958 TokenStream::putIdentInTokenbuf(const char16_t* identStart)
959 {
960     int32_t c, qc;
961     const char16_t* tmp = userbuf.addressOfNextRawChar();
962     userbuf.setAddressOfNextRawChar(identStart);
963 
964     tokenbuf.clear();
965     for (;;) {
966         c = getCharIgnoreEOL();
967         if (!IsIdentifierPart(c)) {
968             if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
969                 break;
970             c = qc;
971         }
972         if (!tokenbuf.append(c)) {
973             userbuf.setAddressOfNextRawChar(tmp);
974             return false;
975         }
976     }
977     userbuf.setAddressOfNextRawChar(tmp);
978     return true;
979 }
980 
981 bool
checkForKeyword(const KeywordInfo * kw,TokenKind * ttp)982 TokenStream::checkForKeyword(const KeywordInfo* kw, TokenKind* ttp)
983 {
984     if (kw->tokentype == TOK_RESERVED)
985         return reportError(JSMSG_RESERVED_ID, kw->chars);
986 
987     if (kw->tokentype == TOK_STRICT_RESERVED)
988         return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);
989 
990     // Treat 'let' as an identifier and contextually a keyword in sloppy mode.
991     // It is always a keyword in strict mode.
992     if (kw->tokentype == TOK_LET && !strictMode())
993         return true;
994 
995     // Working keyword.
996     if (ttp) {
997         *ttp = kw->tokentype;
998         return true;
999     }
1000 
1001     return reportError(JSMSG_RESERVED_ID, kw->chars);
1002 }
1003 
1004 bool
checkForKeyword(JSAtom * atom,TokenKind * ttp)1005 TokenStream::checkForKeyword(JSAtom* atom, TokenKind* ttp)
1006 {
1007     const KeywordInfo* kw = FindKeyword(atom);
1008     if (!kw)
1009         return true;
1010 
1011     return checkForKeyword(kw, ttp);
1012 }
1013 
1014 enum FirstCharKind {
1015     // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
1016     // token that cannot also be a prefix of a longer token.  E.g. ';' has the
1017     // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
1018     // that begin with '+'.
1019     //
1020     // The few token kinds satisfying these properties cover roughly 35--45%
1021     // of the tokens seen in practice.
1022     //
1023     // We represent the 'OneChar' kind with any positive value less than
1024     // TOK_LIMIT.  This representation lets us associate each one-char token
1025     // char16_t with a TokenKind and thus avoid a subsequent char16_t-to-TokenKind
1026     // conversion.
1027     OneChar_Min = 0,
1028     OneChar_Max = TOK_LIMIT - 1,
1029 
1030     Space = TOK_LIMIT,
1031     Ident,
1032     Dec,
1033     String,
1034     EOL,
1035     BasePrefix,
1036     Other,
1037 
1038     LastCharKind = Other
1039 };
1040 
1041 // OneChar: 40,  41,  44,  58,  59,  63,  91,  93,  123, 125, 126:
1042 //          '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'
1043 // Ident:   36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
1044 // Dot:     46: '.'
1045 // Equals:  61: '='
1046 // String:  34, 39: '"', '\''
1047 // Dec:     49..57: '1'..'9'
1048 // Plus:    43: '+'
1049 // BasePrefix:  48: '0'
1050 // Space:   9, 11, 12, 32: '\t', '\v', '\f', ' '
1051 // EOL:     10, 13: '\n', '\r'
1052 //
1053 #define T_COMMA     TOK_COMMA
1054 #define T_COLON     TOK_COLON
1055 #define T_BITNOT    TOK_BITNOT
1056 #define Templat     String
1057 #define _______     Other
1058 static const uint8_t firstCharKinds[] = {
1059 /*         0        1        2        3        4        5        6        7        8        9    */
1060 /*   0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______,   Space,
1061 /*  10+ */     EOL,   Space,   Space,     EOL, _______, _______, _______, _______, _______, _______,
1062 /*  20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
1063 /*  30+ */ _______, _______,   Space, _______,  String, _______,   Ident, _______, _______,  String,
1064 /*  40+ */  TOK_LP,  TOK_RP, _______, _______, T_COMMA,_______,  _______, _______,BasePrefix,  Dec,
1065 /*  50+ */     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,    Dec,  T_COLON,TOK_SEMI,
1066 /*  60+ */ _______, _______, _______,TOK_HOOK, _______,   Ident,   Ident,   Ident,   Ident,   Ident,
1067 /*  70+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1068 /*  80+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1069 /*  90+ */   Ident,  TOK_LB, _______,  TOK_RB, _______,   Ident, Templat,   Ident,   Ident,   Ident,
1070 /* 100+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1071 /* 110+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1072 /* 120+ */   Ident,   Ident,   Ident,  TOK_LC, _______,  TOK_RC,T_BITNOT, _______
1073 };
1074 #undef T_COMMA
1075 #undef T_COLON
1076 #undef T_BITNOT
1077 #undef Templat
1078 #undef _______
1079 
1080 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
1081               "Elements of firstCharKinds[] are too small");
1082 
1083 bool
getTokenInternal(TokenKind * ttp,Modifier modifier)1084 TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
1085 {
1086     int c, qc;
1087     Token* tp;
1088     FirstCharKind c1kind;
1089     const char16_t* numStart;
1090     bool hasExp;
1091     DecimalPoint decimalPoint;
1092     const char16_t* identStart;
1093     bool hadUnicodeEscape;
1094 
1095     // Check if in the middle of a template string. Have to get this out of
1096     // the way first.
1097     if (MOZ_UNLIKELY(modifier == TemplateTail)) {
1098         if (!getStringOrTemplateToken('`', &tp))
1099             goto error;
1100         goto out;
1101     }
1102 
1103   retry:
1104     if (MOZ_UNLIKELY(!userbuf.hasRawChars())) {
1105         tp = newToken(0);
1106         tp->type = TOK_EOF;
1107         flags.isEOF = true;
1108         goto out;
1109     }
1110 
1111     c = userbuf.getRawChar();
1112     MOZ_ASSERT(c != EOF);
1113 
1114     // Chars not in the range 0..127 are rare.  Getting them out of the way
1115     // early allows subsequent checking to be faster.
1116     if (MOZ_UNLIKELY(c >= 128)) {
1117         if (IsSpaceOrBOM2(c)) {
1118             if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
1119                 updateLineInfoForEOL();
1120                 updateFlagsForEOL();
1121             }
1122 
1123             goto retry;
1124         }
1125 
1126         tp = newToken(-1);
1127 
1128         static_assert('$' < 128,
1129                       "IdentifierStart contains '$', but as !IsLetter('$'), "
1130                       "ensure that '$' is never handled here");
1131         static_assert('_' < 128,
1132                       "IdentifierStart contains '_', but as !IsLetter('_'), "
1133                       "ensure that '_' is never handled here");
1134         if (IsLetter(c)) {
1135             identStart = userbuf.addressOfNextRawChar() - 1;
1136             hadUnicodeEscape = false;
1137             goto identifier;
1138         }
1139 
1140         goto badchar;
1141     }
1142 
1143     // Get the token kind, based on the first char.  The ordering of c1kind
1144     // comparison is based on the frequency of tokens in real code -- Parsemark
1145     // (which represents typical JS code on the web) and the Unreal demo (which
1146     // represents asm.js code).
1147     //
1148     //                  Parsemark   Unreal
1149     //  OneChar         32.9%       39.7%
1150     //  Space           25.0%        0.6%
1151     //  Ident           19.2%       36.4%
1152     //  Dec              7.2%        5.1%
1153     //  String           7.9%        0.0%
1154     //  EOL              1.7%        0.0%
1155     //  BasePrefix       0.4%        4.9%
1156     //  Other            5.7%       13.3%
1157     //
1158     // The ordering is based mostly only Parsemark frequencies, with Unreal
1159     // frequencies used to break close categories (e.g. |Dec| and |String|).
1160     // |Other| is biggish, but no other token kind is common enough for it to
1161     // be worth adding extra values to FirstCharKind.
1162     //
1163     c1kind = FirstCharKind(firstCharKinds[c]);
1164 
1165     // Look for an unambiguous single-char token.
1166     //
1167     if (c1kind <= OneChar_Max) {
1168         tp = newToken(-1);
1169         tp->type = TokenKind(c1kind);
1170         goto out;
1171     }
1172 
1173     // Skip over non-EOL whitespace chars.
1174     //
1175     if (c1kind == Space)
1176         goto retry;
1177 
1178     // Look for an identifier.
1179     //
1180     if (c1kind == Ident) {
1181         tp = newToken(-1);
1182         identStart = userbuf.addressOfNextRawChar() - 1;
1183         hadUnicodeEscape = false;
1184 
1185       identifier:
1186         for (;;) {
1187             c = getCharIgnoreEOL();
1188             if (c == EOF)
1189                 break;
1190             if (!IsIdentifierPart(c)) {
1191                 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1192                     break;
1193                 hadUnicodeEscape = true;
1194             }
1195         }
1196         ungetCharIgnoreEOL(c);
1197 
1198         // Identifiers containing no Unicode escapes can be processed directly
1199         // from userbuf.  The rest must use the escapes converted via tokenbuf
1200         // before atomizing.
1201         const char16_t* chars;
1202         size_t length;
1203         if (hadUnicodeEscape) {
1204             if (!putIdentInTokenbuf(identStart))
1205                 goto error;
1206 
1207             chars = tokenbuf.begin();
1208             length = tokenbuf.length();
1209         } else {
1210             chars = identStart;
1211             length = userbuf.addressOfNextRawChar() - identStart;
1212         }
1213 
1214         // Represent keywords as keyword tokens unless told otherwise.
1215         if (modifier != KeywordIsName) {
1216             if (const KeywordInfo* kw = FindKeyword(chars, length)) {
1217                 // That said, keywords can't contain escapes.  (Contexts where
1218                 // keywords are treated as names, that also sometimes treat
1219                 // keywords as keywords, must manually check this requirement.)
1220                 if (hadUnicodeEscape) {
1221                     reportError(JSMSG_ESCAPED_KEYWORD);
1222                     goto error;
1223                 }
1224 
1225                 tp->type = TOK_NAME;
1226                 if (!checkForKeyword(kw, &tp->type))
1227                     goto error;
1228                 if (tp->type != TOK_NAME)
1229                     goto out;
1230             }
1231         }
1232 
1233         JSAtom* atom = AtomizeChars(cx, chars, length);
1234         if (!atom)
1235             goto error;
1236         tp->type = TOK_NAME;
1237         tp->setName(atom->asPropertyName());
1238         goto out;
1239     }
1240 
1241     // Look for a decimal number.
1242     //
1243     if (c1kind == Dec) {
1244         tp = newToken(-1);
1245         numStart = userbuf.addressOfNextRawChar() - 1;
1246 
1247       decimal:
1248         decimalPoint = NoDecimal;
1249         hasExp = false;
1250         while (JS7_ISDEC(c))
1251             c = getCharIgnoreEOL();
1252 
1253         if (c == '.') {
1254             decimalPoint = HasDecimal;
1255           decimal_dot:
1256             do {
1257                 c = getCharIgnoreEOL();
1258             } while (JS7_ISDEC(c));
1259         }
1260         if (c == 'e' || c == 'E') {
1261             hasExp = true;
1262             c = getCharIgnoreEOL();
1263             if (c == '+' || c == '-')
1264                 c = getCharIgnoreEOL();
1265             if (!JS7_ISDEC(c)) {
1266                 ungetCharIgnoreEOL(c);
1267                 reportError(JSMSG_MISSING_EXPONENT);
1268                 goto error;
1269             }
1270             do {
1271                 c = getCharIgnoreEOL();
1272             } while (JS7_ISDEC(c));
1273         }
1274         ungetCharIgnoreEOL(c);
1275 
1276         if (c != EOF && IsIdentifierStart(c)) {
1277             reportError(JSMSG_IDSTART_AFTER_NUMBER);
1278             goto error;
1279         }
1280 
1281         // Unlike identifiers and strings, numbers cannot contain escaped
1282         // chars, so we don't need to use tokenbuf.  Instead we can just
1283         // convert the char16_t characters in userbuf to the numeric value.
1284         double dval;
1285         if (!((decimalPoint == HasDecimal) || hasExp)) {
1286             if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval))
1287                 goto error;
1288         } else {
1289             const char16_t* dummy;
1290             if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
1291                 goto error;
1292         }
1293         tp->type = TOK_NUMBER;
1294         tp->setNumber(dval, decimalPoint);
1295         goto out;
1296     }
1297 
1298     // Look for a string or a template string.
1299     //
1300     if (c1kind == String) {
1301         if (!getStringOrTemplateToken(c, &tp))
1302             goto error;
1303         goto out;
1304     }
1305 
1306     // Skip over EOL chars, updating line state along the way.
1307     //
1308     if (c1kind == EOL) {
1309         // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
1310         if (c == '\r' && userbuf.hasRawChars())
1311             userbuf.matchRawChar('\n');
1312         updateLineInfoForEOL();
1313         updateFlagsForEOL();
1314         goto retry;
1315     }
1316 
1317     // Look for a hexadecimal, octal, or binary number.
1318     //
1319     if (c1kind == BasePrefix) {
1320         tp = newToken(-1);
1321         int radix;
1322         c = getCharIgnoreEOL();
1323         if (c == 'x' || c == 'X') {
1324             radix = 16;
1325             c = getCharIgnoreEOL();
1326             if (!JS7_ISHEX(c)) {
1327                 ungetCharIgnoreEOL(c);
1328                 reportError(JSMSG_MISSING_HEXDIGITS);
1329                 goto error;
1330             }
1331             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0x'
1332             while (JS7_ISHEX(c))
1333                 c = getCharIgnoreEOL();
1334         } else if (c == 'b' || c == 'B') {
1335             radix = 2;
1336             c = getCharIgnoreEOL();
1337             if (c != '0' && c != '1') {
1338                 ungetCharIgnoreEOL(c);
1339                 reportError(JSMSG_MISSING_BINARY_DIGITS);
1340                 goto error;
1341             }
1342             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0b'
1343             while (c == '0' || c == '1')
1344                 c = getCharIgnoreEOL();
1345         } else if (c == 'o' || c == 'O') {
1346             radix = 8;
1347             c = getCharIgnoreEOL();
1348             if (c < '0' || c > '7') {
1349                 ungetCharIgnoreEOL(c);
1350                 reportError(JSMSG_MISSING_OCTAL_DIGITS);
1351                 goto error;
1352             }
1353             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0o'
1354             while ('0' <= c && c <= '7')
1355                 c = getCharIgnoreEOL();
1356         } else if (JS7_ISDEC(c)) {
1357             radix = 8;
1358             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0'
1359             while (JS7_ISDEC(c)) {
1360                 // Octal integer literals are not permitted in strict mode code.
1361                 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
1362                     goto error;
1363 
1364                 // Outside strict mode, we permit 08 and 09 as decimal numbers,
1365                 // which makes our behaviour a superset of the ECMA numeric
1366                 // grammar. We might not always be so permissive, so we warn
1367                 // about it.
1368                 if (c >= '8') {
1369                     if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1370                         goto error;
1371                     }
1372                     goto decimal;   // use the decimal scanner for the rest of the number
1373                 }
1374                 c = getCharIgnoreEOL();
1375             }
1376         } else {
1377             // '0' not followed by 'x', 'X' or a digit;  scan as a decimal number.
1378             numStart = userbuf.addressOfNextRawChar() - 1;
1379             goto decimal;
1380         }
1381         ungetCharIgnoreEOL(c);
1382 
1383         if (c != EOF && IsIdentifierStart(c)) {
1384             reportError(JSMSG_IDSTART_AFTER_NUMBER);
1385             goto error;
1386         }
1387 
1388         double dval;
1389         const char16_t* dummy;
1390         if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
1391             goto error;
1392         tp->type = TOK_NUMBER;
1393         tp->setNumber(dval, NoDecimal);
1394         goto out;
1395     }
1396 
1397     // This handles everything else.
1398     //
1399     MOZ_ASSERT(c1kind == Other);
1400     tp = newToken(-1);
1401     switch (c) {
1402       case '.':
1403         c = getCharIgnoreEOL();
1404         if (JS7_ISDEC(c)) {
1405             numStart = userbuf.addressOfNextRawChar() - 2;
1406             decimalPoint = HasDecimal;
1407             hasExp = false;
1408             goto decimal_dot;
1409         }
1410         if (c == '.') {
1411             if (matchChar('.')) {
1412                 tp->type = TOK_TRIPLEDOT;
1413                 goto out;
1414             }
1415         }
1416         ungetCharIgnoreEOL(c);
1417         tp->type = TOK_DOT;
1418         goto out;
1419 
1420       case '=':
1421         if (matchChar('='))
1422             tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ;
1423         else if (matchChar('>'))
1424             tp->type = TOK_ARROW;
1425         else
1426             tp->type = TOK_ASSIGN;
1427         goto out;
1428 
1429       case '+':
1430         if (matchChar('+'))
1431             tp->type = TOK_INC;
1432         else
1433             tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD;
1434         goto out;
1435 
1436       case '\\':
1437         hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
1438         if (hadUnicodeEscape) {
1439             identStart = userbuf.addressOfNextRawChar() - 6;
1440             goto identifier;
1441         }
1442         goto badchar;
1443 
1444       case '|':
1445         if (matchChar('|'))
1446             tp->type = TOK_OR;
1447         else
1448             tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR;
1449         goto out;
1450 
1451       case '^':
1452         tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR;
1453         goto out;
1454 
1455       case '&':
1456         if (matchChar('&'))
1457             tp->type = TOK_AND;
1458         else
1459             tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND;
1460         goto out;
1461 
1462       case '!':
1463         if (matchChar('='))
1464             tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE;
1465         else
1466             tp->type = TOK_NOT;
1467         goto out;
1468 
1469       case '<':
1470         // NB: treat HTML begin-comment as comment-till-end-of-line.
1471         if (matchChar('!')) {
1472             if (matchChar('-')) {
1473                 if (matchChar('-'))
1474                     goto skipline;
1475                 ungetChar('-');
1476             }
1477             ungetChar('!');
1478         }
1479         if (matchChar('<')) {
1480             tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
1481         } else {
1482             tp->type = matchChar('=') ? TOK_LE : TOK_LT;
1483         }
1484         goto out;
1485 
1486       case '>':
1487         if (matchChar('>')) {
1488             if (matchChar('>'))
1489                 tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
1490             else
1491                 tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
1492         } else {
1493             tp->type = matchChar('=') ? TOK_GE : TOK_GT;
1494         }
1495         goto out;
1496 
1497       case '*':
1498 #ifdef JS_HAS_EXPONENTIATION
1499         if (matchChar('*'))
1500             tp->type = matchChar('=') ? TOK_POWASSIGN : TOK_POW;
1501         else
1502 #endif
1503             tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL;
1504         goto out;
1505 
1506       case '/':
1507         // Look for a single-line comment.
1508         if (matchChar('/')) {
1509             c = peekChar();
1510             if (c == '@' || c == '#') {
1511                 bool shouldWarn = getChar() == '@';
1512                 if (!getDirectives(false, shouldWarn))
1513                     goto error;
1514             }
1515 
1516         skipline:
1517             while ((c = getChar()) != EOF && c != '\n')
1518                 continue;
1519             ungetChar(c);
1520             cursor = (cursor - 1) & ntokensMask;
1521             goto retry;
1522         }
1523 
1524         // Look for a multi-line comment.
1525         if (matchChar('*')) {
1526             unsigned linenoBefore = lineno;
1527             while ((c = getChar()) != EOF &&
1528                    !(c == '*' && matchChar('/'))) {
1529                 if (c == '@' || c == '#') {
1530                     bool shouldWarn = c == '@';
1531                     if (!getDirectives(true, shouldWarn))
1532                         goto error;
1533                 }
1534             }
1535             if (c == EOF) {
1536                 reportError(JSMSG_UNTERMINATED_COMMENT);
1537                 goto error;
1538             }
1539             if (linenoBefore != lineno)
1540                 updateFlagsForEOL();
1541             cursor = (cursor - 1) & ntokensMask;
1542             goto retry;
1543         }
1544 
1545         // Look for a regexp.
1546         if (modifier == Operand) {
1547             tokenbuf.clear();
1548 
1549             bool inCharClass = false;
1550             for (;;) {
1551                 c = getChar();
1552                 if (c == '\\') {
1553                     if (!tokenbuf.append(c))
1554                         goto error;
1555                     c = getChar();
1556                 } else if (c == '[') {
1557                     inCharClass = true;
1558                 } else if (c == ']') {
1559                     inCharClass = false;
1560                 } else if (c == '/' && !inCharClass) {
1561                     // For compat with IE, allow unescaped / in char classes.
1562                     break;
1563                 }
1564                 if (c == '\n' || c == EOF) {
1565                     ungetChar(c);
1566                     reportError(JSMSG_UNTERMINATED_REGEXP);
1567                     goto error;
1568                 }
1569                 if (!tokenbuf.append(c))
1570                     goto error;
1571             }
1572 
1573             RegExpFlag reflags = NoFlags;
1574             unsigned length = tokenbuf.length() + 1;
1575             while (true) {
1576                 c = peekChar();
1577                 if (c == 'g' && !(reflags & GlobalFlag))
1578                     reflags = RegExpFlag(reflags | GlobalFlag);
1579                 else if (c == 'i' && !(reflags & IgnoreCaseFlag))
1580                     reflags = RegExpFlag(reflags | IgnoreCaseFlag);
1581                 else if (c == 'm' && !(reflags & MultilineFlag))
1582                     reflags = RegExpFlag(reflags | MultilineFlag);
1583                 else if (c == 'y' && !(reflags & StickyFlag))
1584                     reflags = RegExpFlag(reflags | StickyFlag);
1585                 else
1586                     break;
1587                 getChar();
1588                 length++;
1589             }
1590 
1591             c = peekChar();
1592             if (JS7_ISLET(c)) {
1593                 char buf[2] = { '\0', '\0' };
1594                 tp->pos.begin += length + 1;
1595                 buf[0] = char(c);
1596                 reportError(JSMSG_BAD_REGEXP_FLAG, buf);
1597                 (void) getChar();
1598                 goto error;
1599             }
1600             tp->type = TOK_REGEXP;
1601             tp->setRegExpFlags(reflags);
1602             goto out;
1603         }
1604 
1605         tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
1606         goto out;
1607 
1608       case '%':
1609         tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
1610         goto out;
1611 
1612       case '-':
1613         if (matchChar('-')) {
1614             if (peekChar() == '>' && !flags.isDirtyLine)
1615                 goto skipline;
1616             tp->type = TOK_DEC;
1617         } else {
1618             tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB;
1619         }
1620         goto out;
1621 
1622       badchar:
1623       default:
1624         reportError(JSMSG_ILLEGAL_CHARACTER);
1625         goto error;
1626     }
1627 
1628     MOZ_CRASH("should have jumped to |out| or |error|");
1629 
1630   out:
1631     if (flags.hitOOM)
1632         return reportError(JSMSG_OUT_OF_MEMORY);
1633 
1634     flags.isDirtyLine = true;
1635     tp->pos.end = userbuf.offset();
1636 #ifdef DEBUG
1637     // Save the modifier used to get this token, so that if an ungetToken()
1638     // occurs and then the token is re-gotten (or peeked, etc.), we can assert
1639     // that both gets have used the same modifiers.
1640     tp->modifier = modifier;
1641     tp->modifierException = NoException;
1642 #endif
1643     MOZ_ASSERT(IsTokenSane(tp));
1644     *ttp = tp->type;
1645     return true;
1646 
1647   error:
1648     if (flags.hitOOM)
1649         return reportError(JSMSG_OUT_OF_MEMORY);
1650 
1651     flags.isDirtyLine = true;
1652     tp->pos.end = userbuf.offset();
1653     MOZ_MAKE_MEM_UNDEFINED(&tp->type, sizeof(tp->type));
1654     flags.hadError = true;
1655 #ifdef DEBUG
1656     // Poisoning userbuf on error establishes an invariant: once an erroneous
1657     // token has been seen, userbuf will not be consulted again.  This is true
1658     // because the parser will deal with the illegal token by aborting parsing
1659     // immediately.
1660     userbuf.poison();
1661 #endif
1662     MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp));
1663     return false;
1664 }
1665 
1666 bool
getBracedUnicode(uint32_t * cp)1667 TokenStream::getBracedUnicode(uint32_t* cp)
1668 {
1669     consumeKnownChar('{');
1670 
1671     bool first = true;
1672     int32_t c;
1673     uint32_t code = 0;
1674     while (true) {
1675         c = getCharIgnoreEOL();
1676         if (c == EOF)
1677             return false;
1678         if (c == '}') {
1679             if (first)
1680                 return false;
1681             break;
1682         }
1683 
1684         if (!JS7_ISHEX(c))
1685             return false;
1686 
1687         code = (code << 4) | JS7_UNHEX(c);
1688         if (code > 0x10FFFF)
1689             return false;
1690         first = false;
1691     }
1692 
1693     *cp = code;
1694     return true;
1695 }
1696 
1697 bool
getStringOrTemplateToken(int untilChar,Token ** tp)1698 TokenStream::getStringOrTemplateToken(int untilChar, Token** tp)
1699 {
1700     int c;
1701     int nc = -1;
1702 
1703     bool parsingTemplate = (untilChar == '`');
1704 
1705     *tp = newToken(-1);
1706     tokenbuf.clear();
1707 
1708     // We need to detect any of these chars:  " or ', \n (or its
1709     // equivalents), \\, EOF.  Because we detect EOL sequences here and
1710     // put them back immediately, we can use getCharIgnoreEOL().
1711     while ((c = getCharIgnoreEOL()) != untilChar) {
1712         if (c == EOF) {
1713             ungetCharIgnoreEOL(c);
1714             reportError(JSMSG_UNTERMINATED_STRING);
1715             return false;
1716         }
1717 
1718         if (c == '\\') {
1719             switch (c = getChar()) {
1720               case 'b': c = '\b'; break;
1721               case 'f': c = '\f'; break;
1722               case 'n': c = '\n'; break;
1723               case 'r': c = '\r'; break;
1724               case 't': c = '\t'; break;
1725               case 'v': c = '\v'; break;
1726 
1727               case '\n':
1728                 // ES5 7.8.4: an escaped line terminator represents
1729                 // no character.
1730                 continue;
1731 
1732               // Unicode character specification.
1733               case 'u': {
1734                 if (peekChar() == '{') {
1735                     uint32_t code;
1736                     if (!getBracedUnicode(&code)) {
1737                         reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
1738                         return false;
1739                     }
1740 
1741                     MOZ_ASSERT(code <= 0x10FFFF);
1742                     if (code < 0x10000) {
1743                         c = code;
1744                     } else {
1745                         if (!tokenbuf.append((code - 0x10000) / 1024 + 0xD800))
1746                             return false;
1747                         c = ((code - 0x10000) % 1024) + 0xDC00;
1748                     }
1749                     break;
1750                 }
1751 
1752                 char16_t cp[4];
1753                 if (peekChars(4, cp) &&
1754                     JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3]))
1755                 {
1756                     c = JS7_UNHEX(cp[0]);
1757                     c = (c << 4) + JS7_UNHEX(cp[1]);
1758                     c = (c << 4) + JS7_UNHEX(cp[2]);
1759                     c = (c << 4) + JS7_UNHEX(cp[3]);
1760                     skipChars(4);
1761                 } else {
1762                     reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
1763                     return false;
1764                 }
1765                 break;
1766               }
1767 
1768               // Hexadecimal character specification.
1769               case 'x': {
1770                 char16_t cp[2];
1771                 if (peekChars(2, cp) && JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1772                     c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1773                     skipChars(2);
1774                 } else {
1775                     reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal");
1776                     return false;
1777                 }
1778                 break;
1779               }
1780 
1781               default:
1782                 // Octal character specification.
1783                 if (JS7_ISOCT(c)) {
1784                     int32_t val = JS7_UNOCT(c);
1785 
1786                     c = peekChar();
1787 
1788                     // Strict mode code allows only \0, then a non-digit.
1789                     if (val != 0 || JS7_ISDEC(c)) {
1790                         if (parsingTemplate) {
1791                             reportError(JSMSG_DEPRECATED_OCTAL);
1792                             return false;
1793                         }
1794                         if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
1795                             return false;
1796                         flags.sawOctalEscape = true;
1797                     }
1798 
1799                     if (JS7_ISOCT(c)) {
1800                         val = 8 * val + JS7_UNOCT(c);
1801                         getChar();
1802                         c = peekChar();
1803                         if (JS7_ISOCT(c)) {
1804                             int32_t save = val;
1805                             val = 8 * val + JS7_UNOCT(c);
1806                             if (val <= 0xFF)
1807                                 getChar();
1808                             else
1809                                 val = save;
1810                         }
1811                     }
1812 
1813                     c = char16_t(val);
1814                 }
1815                 break;
1816             }
1817         } else if (TokenBuf::isRawEOLChar(c)) {
1818             if (!parsingTemplate) {
1819                 ungetCharIgnoreEOL(c);
1820                 reportError(JSMSG_UNTERMINATED_STRING);
1821                 return false;
1822             }
1823             if (c == '\r') {
1824                 c = '\n';
1825                 if (userbuf.peekRawChar() == '\n')
1826                     skipChars(1);
1827             }
1828             updateLineInfoForEOL();
1829             updateFlagsForEOL();
1830         } else if (parsingTemplate && c == '$') {
1831             if ((nc = getCharIgnoreEOL()) == '{')
1832                 break;
1833             ungetCharIgnoreEOL(nc);
1834         }
1835 
1836         if (!tokenbuf.append(c)) {
1837             ReportOutOfMemory(cx);
1838             return false;
1839         }
1840     }
1841 
1842     JSAtom* atom = atomize(cx, tokenbuf);
1843     if (!atom)
1844         return false;
1845 
1846     if (!parsingTemplate) {
1847         (*tp)->type = TOK_STRING;
1848     } else {
1849         if (c == '$' && nc == '{')
1850             (*tp)->type = TOK_TEMPLATE_HEAD;
1851         else
1852             (*tp)->type = TOK_NO_SUBS_TEMPLATE;
1853     }
1854 
1855     (*tp)->setAtom(atom);
1856     return true;
1857 }
1858 
1859 JS_FRIEND_API(int)
js_fgets(char * buf,int size,FILE * file)1860 js_fgets(char* buf, int size, FILE* file)
1861 {
1862     int n, i, c;
1863     bool crflag;
1864 
1865     n = size - 1;
1866     if (n < 0)
1867         return -1;
1868 
1869     crflag = false;
1870     for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
1871         buf[i] = c;
1872         if (c == '\n') {        // any \n ends a line
1873             i++;                // keep the \n; we know there is room for \0
1874             break;
1875         }
1876         if (crflag) {           // \r not followed by \n ends line at the \r
1877             ungetc(c, file);
1878             break;              // and overwrite c in buf with \0
1879         }
1880         crflag = (c == '\r');
1881     }
1882 
1883     buf[i] = '\0';
1884     return i;
1885 }
1886 
1887 const char*
TokenKindToDesc(TokenKind tt)1888 frontend::TokenKindToDesc(TokenKind tt)
1889 {
1890     switch (tt) {
1891 #define EMIT_CASE(name, desc) case TOK_##name: return desc;
1892       FOR_EACH_TOKEN_KIND(EMIT_CASE)
1893 #undef EMIT_CASE
1894       case TOK_LIMIT:
1895         MOZ_ASSERT_UNREACHABLE("TOK_LIMIT should not be passed.");
1896         break;
1897     }
1898 
1899     return "<bad TokenKind>";
1900 }
1901 
1902 #ifdef DEBUG
1903 const char*
TokenKindToString(TokenKind tt)1904 TokenKindToString(TokenKind tt)
1905 {
1906     switch (tt) {
1907 #define EMIT_CASE(name, desc) case TOK_##name: return "TOK_" #name;
1908       FOR_EACH_TOKEN_KIND(EMIT_CASE)
1909 #undef EMIT_CASE
1910       case TOK_LIMIT: break;
1911     }
1912 
1913     return "<bad TokenKind>";
1914 }
1915 #endif
1916