1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 // JS lexical scanner.
8 
9 #include "frontend/TokenStream.h"
10 
11 #include "mozilla/IntegerTypeTraits.h"
12 #include "mozilla/PodOperations.h"
13 
14 #include <ctype.h>
15 #include <stdarg.h>
16 #include <stdio.h>
17 #include <string.h>
18 
19 #include "jsatom.h"
20 #include "jscntxt.h"
21 #include "jscompartment.h"
22 #include "jsexn.h"
23 #include "jsnum.h"
24 
25 #include "frontend/BytecodeCompiler.h"
26 #include "js/CharacterEncoding.h"
27 #include "js/UniquePtr.h"
28 #include "vm/HelperThreads.h"
29 #include "vm/Keywords.h"
30 #include "vm/StringBuffer.h"
31 #include "vm/Unicode.h"
32 
33 using namespace js;
34 using namespace js::frontend;
35 
36 using mozilla::Maybe;
37 using mozilla::PodAssign;
38 using mozilla::PodCopy;
39 using mozilla::PodZero;
40 
41 struct KeywordInfo {
42     const char* chars;         // C string with keyword text
43     TokenKind   tokentype;
44 };
45 
46 static const KeywordInfo keywords[] = {
47 #define KEYWORD_INFO(keyword, name, type) \
48     {js_##keyword##_str, type},
49     FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO)
50 #undef KEYWORD_INFO
51 };
52 
53 // Returns a KeywordInfo for the specified characters, or nullptr if the string
54 // is not a keyword.
55 template <typename CharT>
56 static const KeywordInfo*
57 FindKeyword(const CharT* s, size_t length)
58 {
59     MOZ_ASSERT(length != 0);
60 
61     size_t i;
62     const KeywordInfo* kw;
63     const char* chars;
64 
65 #define JSKW_LENGTH()           length
66 #define JSKW_AT(column)         s[column]
67 #define JSKW_GOT_MATCH(index)   i = (index); goto got_match;
68 #define JSKW_TEST_GUESS(index)  i = (index); goto test_guess;
69 #define JSKW_NO_MATCH()         goto no_match;
70 #include "jsautokw.h"
71 #undef JSKW_NO_MATCH
72 #undef JSKW_TEST_GUESS
73 #undef JSKW_GOT_MATCH
74 #undef JSKW_AT
75 #undef JSKW_LENGTH
76 
77   got_match:
78     return &keywords[i];
79 
80   test_guess:
81     kw = &keywords[i];
82     chars = kw->chars;
83     do {
84         if (*s++ != (unsigned char)(*chars++))
85             goto no_match;
86     } while (--length != 0);
87     return kw;
88 
89   no_match:
90     return nullptr;
91 }
92 
93 static const KeywordInfo*
94 FindKeyword(JSLinearString* str)
95 {
96     JS::AutoCheckCannotGC nogc;
97     return str->hasLatin1Chars()
98            ? FindKeyword(str->latin1Chars(nogc), str->length())
99            : FindKeyword(str->twoByteChars(nogc), str->length());
100 }
101 
102 template <typename CharT>
103 static bool
104 IsIdentifier(const CharT* chars, size_t length)
105 {
106     if (length == 0)
107         return false;
108 
109     if (!unicode::IsIdentifierStart(char16_t(*chars)))
110         return false;
111 
112     const CharT* end = chars + length;
113     while (++chars != end) {
114         if (!unicode::IsIdentifierPart(char16_t(*chars)))
115             return false;
116     }
117 
118     return true;
119 }
120 
121 bool
122 frontend::IsIdentifier(JSLinearString* str)
123 {
124     JS::AutoCheckCannotGC nogc;
125     return str->hasLatin1Chars()
126            ? ::IsIdentifier(str->latin1Chars(nogc), str->length())
127            : ::IsIdentifier(str->twoByteChars(nogc), str->length());
128 }
129 
130 bool
131 frontend::IsIdentifier(const char16_t* chars, size_t length)
132 {
133     return ::IsIdentifier(chars, length);
134 }
135 
136 bool
137 frontend::IsKeyword(JSLinearString* str)
138 {
139     return FindKeyword(str) != nullptr;
140 }
141 
142 TokenStream::SourceCoords::SourceCoords(ExclusiveContext* cx, uint32_t ln)
143   : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)
144 {
145     // This is actually necessary!  Removing it causes compile errors on
146     // GCC and clang.  You could try declaring this:
147     //
148     //   const uint32_t TokenStream::SourceCoords::MAX_PTR;
149     //
150     // which fixes the GCC/clang error, but causes bustage on Windows.  Sigh.
151     //
152     uint32_t maxPtr = MAX_PTR;
153 
154     // The first line begins at buffer offset 0.  MAX_PTR is the sentinel.  The
155     // appends cannot fail because |lineStartOffsets_| has statically-allocated
156     // elements.
157     MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
158     MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
159     lineStartOffsets_.infallibleAppend(0);
160     lineStartOffsets_.infallibleAppend(maxPtr);
161 }
162 
163 MOZ_ALWAYS_INLINE bool
164 TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
165 {
166     uint32_t lineIndex = lineNumToIndex(lineNum);
167     uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
168 
169     MOZ_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);
170 
171     if (lineIndex == sentinelIndex) {
172         // We haven't seen this newline before.  Update lineStartOffsets_
173         // only if lineStartOffsets_.append succeeds, to keep sentinel.
174         // Otherwise return false to tell TokenStream about OOM.
175         uint32_t maxPtr = MAX_PTR;
176         if (!lineStartOffsets_.append(maxPtr))
177             return false;
178 
179         lineStartOffsets_[lineIndex] = lineStartOffset;
180     } else {
181         // We have seen this newline before (and ungot it).  Do nothing (other
182         // than checking it hasn't mysteriously changed).
183         // This path can be executed after hitting OOM, so check lineIndex.
184         MOZ_ASSERT_IF(lineIndex < sentinelIndex, lineStartOffsets_[lineIndex] == lineStartOffset);
185     }
186     return true;
187 }
188 
189 MOZ_ALWAYS_INLINE bool
190 TokenStream::SourceCoords::fill(const TokenStream::SourceCoords& other)
191 {
192     MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
193     MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
194 
195     if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
196         return true;
197 
198     uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
199     lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
200 
201     for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {
202         if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))
203             return false;
204     }
205     return true;
206 }
207 
208 MOZ_ALWAYS_INLINE uint32_t
209 TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const
210 {
211     uint32_t iMin, iMax, iMid;
212 
213     if (lineStartOffsets_[lastLineIndex_] <= offset) {
214         // If we reach here, offset is on a line the same as or higher than
215         // last time.  Check first for the +0, +1, +2 cases, because they
216         // typically cover 85--98% of cases.
217         if (offset < lineStartOffsets_[lastLineIndex_ + 1])
218             return lastLineIndex_;      // lineIndex is same as last time
219 
220         // If we reach here, there must be at least one more entry (plus the
221         // sentinel).  Try it.
222         lastLineIndex_++;
223         if (offset < lineStartOffsets_[lastLineIndex_ + 1])
224             return lastLineIndex_;      // lineIndex is one higher than last time
225 
226         // The same logic applies here.
227         lastLineIndex_++;
228         if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
229             return lastLineIndex_;      // lineIndex is two higher than last time
230         }
231 
232         // No luck.  Oh well, we have a better-than-default starting point for
233         // the binary search.
234         iMin = lastLineIndex_ + 1;
235         MOZ_ASSERT(iMin < lineStartOffsets_.length() - 1);   // -1 due to the sentinel
236 
237     } else {
238         iMin = 0;
239     }
240 
241     // This is a binary search with deferred detection of equality, which was
242     // marginally faster in this case than a standard binary search.
243     // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
244     // want one before that.
245     iMax = lineStartOffsets_.length() - 2;
246     while (iMax > iMin) {
247         iMid = iMin + (iMax - iMin) / 2;
248         if (offset >= lineStartOffsets_[iMid + 1])
249             iMin = iMid + 1;    // offset is above lineStartOffsets_[iMid]
250         else
251             iMax = iMid;        // offset is below or within lineStartOffsets_[iMid]
252     }
253     MOZ_ASSERT(iMax == iMin);
254     MOZ_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
255     lastLineIndex_ = iMin;
256     return iMin;
257 }
258 
259 uint32_t
260 TokenStream::SourceCoords::lineNum(uint32_t offset) const
261 {
262     uint32_t lineIndex = lineIndexOf(offset);
263     return lineIndexToNum(lineIndex);
264 }
265 
266 uint32_t
267 TokenStream::SourceCoords::columnIndex(uint32_t offset) const
268 {
269     uint32_t lineIndex = lineIndexOf(offset);
270     uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
271     MOZ_ASSERT(offset >= lineStartOffset);
272     return offset - lineStartOffset;
273 }
274 
275 void
276 TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum,
277                                                  uint32_t* columnIndex) const
278 {
279     uint32_t lineIndex = lineIndexOf(offset);
280     *lineNum = lineIndexToNum(lineIndex);
281     uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
282     MOZ_ASSERT(offset >= lineStartOffset);
283     *columnIndex = offset - lineStartOffset;
284 }
285 
286 #ifdef _MSC_VER
287 #pragma warning(push)
288 #pragma warning(disable:4351)
289 #endif
290 
291 TokenStream::TokenStream(ExclusiveContext* cx, const ReadOnlyCompileOptions& options,
292                          const char16_t* base, size_t length, StrictModeGetter* smg)
293   : srcCoords(cx, options.lineno),
294     options_(options),
295     tokens(),
296     cursor(),
297     lookahead(),
298     lineno(options.lineno),
299     flags(),
300     linebase(0),
301     prevLinebase(size_t(-1)),
302     userbuf(cx, base, length, options.column),
303     filename(options.filename()),
304     displayURL_(nullptr),
305     sourceMapURL_(nullptr),
306     tokenbuf(cx),
307     cx(cx),
308     mutedErrors(options.mutedErrors()),
309     strictModeGetter(smg)
310 {
311     // Nb: the following tables could be static, but initializing them here is
312     // much easier.  Don't worry, the time to initialize them for each
313     // TokenStream is trivial.  See bug 639420.
314 
315     // See Parser::assignExpr() for an explanation of isExprEnding[].
316     memset(isExprEnding, 0, sizeof(isExprEnding));
317     isExprEnding[TOK_COMMA] = 1;
318     isExprEnding[TOK_SEMI]  = 1;
319     isExprEnding[TOK_COLON] = 1;
320     isExprEnding[TOK_RP]    = 1;
321     isExprEnding[TOK_RB]    = 1;
322     isExprEnding[TOK_RC]    = 1;
323 }
324 
325 #ifdef _MSC_VER
326 #pragma warning(pop)
327 #endif
328 
329 bool
330 TokenStream::checkOptions()
331 {
332     // Constrain starting columns to half of the range of a signed 32-bit value,
333     // to avoid overflow.
334     if (options().column >= mozilla::MaxValue<int32_t>::value / 2 + 1) {
335         reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
336         return false;
337     }
338 
339     return true;
340 }
341 
342 TokenStream::~TokenStream()
343 {
344 }
345 
346 // Use the fastest available getc.
347 #if defined(HAVE_GETC_UNLOCKED)
348 # define fast_getc getc_unlocked
349 #elif defined(HAVE__GETC_NOLOCK)
350 # define fast_getc _getc_nolock
351 #else
352 # define fast_getc getc
353 #endif
354 
355 MOZ_ALWAYS_INLINE void
356 TokenStream::updateLineInfoForEOL()
357 {
358     prevLinebase = linebase;
359     linebase = userbuf.offset();
360     lineno++;
361     if (!srcCoords.add(lineno, linebase))
362         flags.hitOOM = true;
363 }
364 
365 MOZ_ALWAYS_INLINE void
366 TokenStream::updateFlagsForEOL()
367 {
368     flags.isDirtyLine = false;
369 }
370 
371 // This gets the next char, normalizing all EOL sequences to '\n' as it goes.
372 int32_t
373 TokenStream::getChar()
374 {
375     int32_t c;
376     if (MOZ_LIKELY(userbuf.hasRawChars())) {
377         c = userbuf.getRawChar();
378 
379         // Normalize the char16_t if it was a newline.
380         if (MOZ_UNLIKELY(c == '\n'))
381             goto eol;
382         if (MOZ_UNLIKELY(c == '\r')) {
383             // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
384             if (MOZ_LIKELY(userbuf.hasRawChars()))
385                 userbuf.matchRawChar('\n');
386             goto eol;
387         }
388         if (MOZ_UNLIKELY(c == LINE_SEPARATOR || c == PARA_SEPARATOR))
389             goto eol;
390 
391         return c;
392     }
393 
394     flags.isEOF = true;
395     return EOF;
396 
397   eol:
398     updateLineInfoForEOL();
399     return '\n';
400 }
401 
402 // This gets the next char. It does nothing special with EOL sequences, not
403 // even updating the line counters.  It can be used safely if (a) the
404 // resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
405 // it's an EOL, and (b) the line-related state (lineno, linebase) is not used
406 // before it's ungotten.
407 int32_t
408 TokenStream::getCharIgnoreEOL()
409 {
410     if (MOZ_LIKELY(userbuf.hasRawChars()))
411         return userbuf.getRawChar();
412 
413     flags.isEOF = true;
414     return EOF;
415 }
416 
417 void
418 TokenStream::ungetChar(int32_t c)
419 {
420     if (c == EOF)
421         return;
422     MOZ_ASSERT(!userbuf.atStart());
423     userbuf.ungetRawChar();
424     if (c == '\n') {
425 #ifdef DEBUG
426         int32_t c2 = userbuf.peekRawChar();
427         MOZ_ASSERT(TokenBuf::isRawEOLChar(c2));
428 #endif
429 
430         // If it's a \r\n sequence, also unget the \r.
431         if (!userbuf.atStart())
432             userbuf.matchRawCharBackwards('\r');
433 
434         MOZ_ASSERT(prevLinebase != size_t(-1));    // we should never get more than one EOL char
435         linebase = prevLinebase;
436         prevLinebase = size_t(-1);
437         lineno--;
438     } else {
439         MOZ_ASSERT(userbuf.peekRawChar() == c);
440     }
441 }
442 
443 void
444 TokenStream::ungetCharIgnoreEOL(int32_t c)
445 {
446     if (c == EOF)
447         return;
448     MOZ_ASSERT(!userbuf.atStart());
449     userbuf.ungetRawChar();
450 }
451 
452 // Return true iff |n| raw characters can be read from this without reading past
453 // EOF or a newline, and copy those characters into |cp| if so.  The characters
454 // are not consumed: use skipChars(n) to do so after checking that the consumed
455 // characters had appropriate values.
456 bool
457 TokenStream::peekChars(int n, char16_t* cp)
458 {
459     int i, j;
460     int32_t c;
461 
462     for (i = 0; i < n; i++) {
463         c = getCharIgnoreEOL();
464         if (c == EOF)
465             break;
466         if (c == '\n') {
467             ungetCharIgnoreEOL(c);
468             break;
469         }
470         cp[i] = char16_t(c);
471     }
472     for (j = i - 1; j >= 0; j--)
473         ungetCharIgnoreEOL(cp[j]);
474     return i == n;
475 }
476 
477 size_t
478 TokenStream::TokenBuf::findEOLMax(size_t start, size_t max)
479 {
480     const char16_t* p = rawCharPtrAt(start);
481 
482     size_t n = 0;
483     while (true) {
484         if (p >= limit_)
485             break;
486         if (n >= max)
487             break;
488         n++;
489         if (TokenBuf::isRawEOLChar(*p++))
490             break;
491     }
492     return start + n;
493 }
494 
495 bool
496 TokenStream::advance(size_t position)
497 {
498     const char16_t* end = userbuf.rawCharPtrAt(position);
499     while (userbuf.addressOfNextRawChar() < end)
500         getChar();
501 
502     Token* cur = &tokens[cursor];
503     cur->pos.begin = userbuf.offset();
504     MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));
505     lookahead = 0;
506 
507     if (flags.hitOOM)
508         return reportError(JSMSG_OUT_OF_MEMORY);
509 
510     return true;
511 }
512 
513 void
514 TokenStream::tell(Position* pos)
515 {
516     pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true);
517     pos->flags = flags;
518     pos->lineno = lineno;
519     pos->linebase = linebase;
520     pos->prevLinebase = prevLinebase;
521     pos->lookahead = lookahead;
522     pos->currentToken = currentToken();
523     for (unsigned i = 0; i < lookahead; i++)
524         pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask];
525 }
526 
527 void
528 TokenStream::seek(const Position& pos)
529 {
530     userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true);
531     flags = pos.flags;
532     lineno = pos.lineno;
533     linebase = pos.linebase;
534     prevLinebase = pos.prevLinebase;
535     lookahead = pos.lookahead;
536 
537     tokens[cursor] = pos.currentToken;
538     for (unsigned i = 0; i < lookahead; i++)
539         tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i];
540 }
541 
542 bool
543 TokenStream::seek(const Position& pos, const TokenStream& other)
544 {
545     if (!srcCoords.fill(other.srcCoords))
546         return false;
547     seek(pos);
548     return true;
549 }
550 
551 bool
552 TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
553                                            va_list args)
554 {
555     // In strict mode code, this is an error, not merely a warning.
556     unsigned flags;
557     if (strictMode)
558         flags = JSREPORT_ERROR;
559     else if (options().extraWarningsOption)
560         flags = JSREPORT_WARNING | JSREPORT_STRICT;
561     else
562         return true;
563 
564     return reportCompileErrorNumberVA(offset, flags, errorNumber, args);
565 }
566 
567 void
568 CompileError::throwError(JSContext* cx)
569 {
570     if (JSREPORT_IS_WARNING(flags)) {
571         CallWarningReporter(cx, this);
572         return;
573     }
574 
575     // If there's a runtime exception type associated with this error
576     // number, set that as the pending exception.  For errors occuring at
577     // compile time, this is very likely to be a JSEXN_SYNTAXERR.
578     //
579     // If an exception is thrown but not caught, the JSREPORT_EXCEPTION
580     // flag will be set in report.flags.  Proper behavior for an error
581     // reporter is to ignore a report with this flag for all but top-level
582     // compilation errors.  The exception will remain pending, and so long
583     // as the non-top-level "load", "eval", or "compile" native function
584     // returns false, the top-level reporter will eventually receive the
585     // uncaught exception report.
586     ErrorToException(cx, this, nullptr, nullptr);
587 }
588 
589 bool
590 TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
591                                         va_list args)
592 {
593     bool warning = JSREPORT_IS_WARNING(flags);
594 
595     if (warning && options().werrorOption) {
596         flags &= ~JSREPORT_WARNING;
597         warning = false;
598     }
599 
600     // On the main thread, report the error immediately. When compiling off
601     // thread, save the error so that the main thread can report it later.
602     CompileError tempErr;
603     CompileError* tempErrPtr = &tempErr;
604     if (!cx->isJSContext() && !cx->addPendingCompileError(&tempErrPtr))
605         return false;
606     CompileError& err = *tempErrPtr;
607 
608     err.flags = flags;
609     err.errorNumber = errorNumber;
610     err.filename = filename;
611     err.isMuted = mutedErrors;
612     if (offset == NoOffset) {
613         err.lineno = 0;
614         err.column = 0;
615     } else {
616         err.lineno = srcCoords.lineNum(offset);
617         err.column = srcCoords.columnIndex(offset);
618     }
619 
620     // If we have no location information, try to get one from the caller.
621     bool callerFilename = false;
622     if (offset != NoOffset && !err.filename && cx->isJSContext()) {
623         NonBuiltinFrameIter iter(cx->asJSContext(),
624                                  FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,
625                                  cx->compartment()->principals());
626         if (!iter.done() && iter.filename()) {
627             callerFilename = true;
628             err.filename = iter.filename();
629             err.lineno = iter.computeLine(&err.column);
630         }
631     }
632 
633     if (!ExpandErrorArgumentsVA(cx, GetErrorMessage, nullptr, errorNumber,
634                                 nullptr, ArgumentsAreLatin1, &err, args))
635     {
636         return false;
637     }
638 
639     // Given a token, T, that we want to complain about: if T's (starting)
640     // lineno doesn't match TokenStream's lineno, that means we've scanned past
641     // the line that T starts on, which makes it hard to print some or all of
642     // T's (starting) line for context.
643     //
644     // So we don't even try, leaving report.linebuf and friends zeroed.  This
645     // means that any error involving a multi-line token (e.g. an unterminated
646     // multi-line string literal) won't have a context printed.
647     if (offset != NoOffset && err.lineno == lineno && !callerFilename) {
648         // We show only a portion (a "window") of the line around the erroneous
649         // token -- the first char in the token, plus |windowRadius| chars
650         // before it and |windowRadius - 1| chars after it.  This is because
651         // lines can be very long and printing the whole line is (a) not that
652         // helpful, and (b) can waste a lot of memory.  See bug 634444.
653         static const size_t windowRadius = 60;
654 
655         // The window must start within the current line, no earlier than
656         // windowRadius characters before offset.
657         size_t windowStart = (offset - linebase > windowRadius) ?
658                              offset - windowRadius :
659                              linebase;
660 
661         // The window must start within the portion of the current line
662         // that we actually have in our buffer.
663         if (windowStart < userbuf.startOffset())
664             windowStart = userbuf.startOffset();
665 
666         // The window must end within the current line, no later than
667         // windowRadius after offset.
668         size_t windowEnd = userbuf.findEOLMax(offset, windowRadius);
669         size_t windowLength = windowEnd - windowStart;
670         MOZ_ASSERT(windowLength <= windowRadius * 2);
671 
672         // Create the windowed strings.
673         StringBuffer windowBuf(cx);
674         if (!windowBuf.append(userbuf.rawCharPtrAt(windowStart), windowLength) ||
675             !windowBuf.append('\0'))
676         {
677             return false;
678         }
679 
680         // The window into the offending source line, without final \n.
681         UniqueTwoByteChars linebuf(windowBuf.stealChars());
682         if (!linebuf)
683             return false;
684 
685         err.initOwnedLinebuf(linebuf.release(), windowLength, offset - windowStart);
686     }
687 
688     if (cx->isJSContext())
689         err.throwError(cx->asJSContext());
690 
691     return warning;
692 }
693 
694 bool
695 TokenStream::reportStrictModeError(unsigned errorNumber, ...)
696 {
697     va_list args;
698     va_start(args, errorNumber);
699     bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(),
700                                                 errorNumber, args);
701     va_end(args);
702     return result;
703 }
704 
705 bool
706 TokenStream::reportError(unsigned errorNumber, ...)
707 {
708     va_list args;
709     va_start(args, errorNumber);
710     bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber,
711                                              args);
712     va_end(args);
713     return result;
714 }
715 
716 bool
717 TokenStream::reportErrorNoOffset(unsigned errorNumber, ...)
718 {
719     va_list args;
720     va_start(args, errorNumber);
721     bool result = reportCompileErrorNumberVA(NoOffset, JSREPORT_ERROR, errorNumber,
722                                              args);
723     va_end(args);
724     return result;
725 }
726 
727 bool
728 TokenStream::reportWarning(unsigned errorNumber, ...)
729 {
730     va_list args;
731     va_start(args, errorNumber);
732     bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING,
733                                              errorNumber, args);
734     va_end(args);
735     return result;
736 }
737 
738 bool
739 TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args)
740 {
741     if (!options().extraWarningsOption)
742         return true;
743 
744     return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args);
745 }
746 
747 void
748 TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)
749 {
750     va_list args;
751     va_start(args, errorNumber);
752     unsigned flags = options().throwOnAsmJSValidationFailureOption
753                      ? JSREPORT_ERROR
754                      : JSREPORT_WARNING;
755     reportCompileErrorNumberVA(offset, flags, errorNumber, args);
756     va_end(args);
757 }
758 
759 // We have encountered a '\': check for a Unicode escape sequence after it.
760 // Return the length of the escape sequence and the character code point (by
761 // value) if we found a Unicode escape sequence.  Otherwise, return 0.  In both
762 // cases, do not advance along the buffer.
763 uint32_t
764 TokenStream::peekUnicodeEscape(uint32_t* codePoint)
765 {
766     int32_t c = getCharIgnoreEOL();
767     if (c != 'u') {
768         ungetCharIgnoreEOL(c);
769         return 0;
770     }
771 
772     char16_t cp[3];
773     uint32_t length;
774     c = getCharIgnoreEOL();
775     if (JS7_ISHEX(c) && peekChars(3, cp) &&
776         JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]))
777     {
778         *codePoint = (JS7_UNHEX(c) << 12) |
779                      (JS7_UNHEX(cp[0]) << 8) |
780                      (JS7_UNHEX(cp[1]) << 4) |
781                      JS7_UNHEX(cp[2]);
782         length = 5;
783     } else if (c == '{') {
784         length = peekExtendedUnicodeEscape(codePoint);
785     } else {
786         length = 0;
787     }
788 
789     ungetCharIgnoreEOL(c);
790     ungetCharIgnoreEOL('u');
791     return length;
792 }
793 
794 uint32_t
795 TokenStream::peekExtendedUnicodeEscape(uint32_t* codePoint)
796 {
797     // The opening brace character was already read.
798     int32_t c = getCharIgnoreEOL();
799 
800     // Skip leading zeros.
801     uint32_t leadingZeros = 0;
802     while (c == '0') {
803         leadingZeros++;
804         c = getCharIgnoreEOL();
805     }
806 
807     char16_t cp[6];
808     size_t i = 0;
809     uint32_t code = 0;
810     while (JS7_ISHEX(c) && i < 6) {
811         cp[i++] = c;
812         code = code << 4 | JS7_UNHEX(c);
813         c = getCharIgnoreEOL();
814     }
815 
816     uint32_t length;
817     if (c == '}' && (leadingZeros > 0 || i > 0) && code <= unicode::NonBMPMax) {
818         *codePoint = code;
819         length = leadingZeros + i + 3;
820     } else {
821         length = 0;
822     }
823 
824     ungetCharIgnoreEOL(c);
825     while (i--)
826         ungetCharIgnoreEOL(cp[i]);
827     while (leadingZeros--)
828         ungetCharIgnoreEOL('0');
829 
830     return length;
831 }
832 
833 uint32_t
834 TokenStream::matchUnicodeEscapeIdStart(uint32_t* codePoint)
835 {
836     uint32_t length = peekUnicodeEscape(codePoint);
837     if (length > 0 && unicode::IsIdentifierStart(*codePoint)) {
838         skipChars(length);
839         return length;
840     }
841     return 0;
842 }
843 
844 bool
845 TokenStream::matchUnicodeEscapeIdent(uint32_t* codePoint)
846 {
847     uint32_t length = peekUnicodeEscape(codePoint);
848     if (length > 0 && unicode::IsIdentifierPart(*codePoint)) {
849         skipChars(length);
850         return true;
851     }
852     return false;
853 }
854 
855 // Helper function which returns true if the first length(q) characters in p are
856 // the same as the characters in q.
857 static bool
858 CharsMatch(const char16_t* p, const char* q) {
859     while (*q) {
860         if (*p++ != *q++)
861             return false;
862     }
863     return true;
864 }
865 
866 bool
867 TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated)
868 {
869     // Match directive comments used in debugging, such as "//# sourceURL" and
870     // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
871     //
872     // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
873     // line comments containing a source mapping URL inside a multiline
874     // comment. To avoid potentially expensive lookahead and backtracking, we
875     // only check for this case if we encounter a '#' character.
876 
877     if (!getDisplayURL(isMultiline, shouldWarnDeprecated))
878         return false;
879     if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated))
880         return false;
881 
882     return true;
883 }
884 
885 bool
886 TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated,
887                           const char* directive, int directiveLength,
888                           const char* errorMsgPragma,
889                           UniqueTwoByteChars* destination)
890 {
891     MOZ_ASSERT(directiveLength <= 18);
892     char16_t peeked[18];
893     int32_t c;
894 
895     if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {
896         if (shouldWarnDeprecated &&
897             !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
898             return false;
899 
900         skipChars(directiveLength);
901         tokenbuf.clear();
902 
903         while ((c = peekChar()) && c != EOF && !unicode::IsSpaceOrBOM2(c)) {
904             getChar();
905             // Debugging directives can occur in both single- and multi-line
906             // comments. If we're currently inside a multi-line comment, we also
907             // need to recognize multi-line comment terminators.
908             if (isMultiline && c == '*' && peekChar() == '/') {
909                 ungetChar('*');
910                 break;
911             }
912             if (!tokenbuf.append(c))
913                 return false;
914         }
915 
916         if (tokenbuf.empty()) {
917             // The directive's URL was missing, but this is not quite an
918             // exception that we should stop and drop everything for.
919             return true;
920         }
921 
922         size_t length = tokenbuf.length();
923 
924         *destination = cx->make_pod_array<char16_t>(length + 1);
925         if (!*destination)
926             return false;
927 
928         PodCopy(destination->get(), tokenbuf.begin(), length);
929         (*destination)[length] = '\0';
930     }
931 
932     return true;
933 }
934 
935 bool
936 TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated)
937 {
938     // Match comments of the form "//# sourceURL=<url>" or
939     // "/\* //# sourceURL=<url> *\/"
940     //
941     // Note that while these are labeled "sourceURL" in the source text,
942     // internally we refer to it as a "displayURL" to distinguish what the
943     // developer would like to refer to the source as from the source's actual
944     // URL.
945 
946     return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11,
947                         "sourceURL", &displayURL_);
948 }
949 
950 bool
951 TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated)
952 {
953     // Match comments of the form "//# sourceMappingURL=<url>" or
954     // "/\* //# sourceMappingURL=<url> *\/"
955 
956     return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18,
957                         "sourceMappingURL", &sourceMapURL_);
958 }
959 
960 MOZ_ALWAYS_INLINE Token*
961 TokenStream::newToken(ptrdiff_t adjust)
962 {
963     cursor = (cursor + 1) & ntokensMask;
964     Token* tp = &tokens[cursor];
965     tp->pos.begin = userbuf.offset() + adjust;
966 
967     // NOTE: tp->pos.end is not set until the very end of getTokenInternal().
968     MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end));
969 
970     return tp;
971 }
972 
973 MOZ_ALWAYS_INLINE JSAtom*
974 TokenStream::atomize(ExclusiveContext* cx, CharBuffer& cb)
975 {
976     return AtomizeChars(cx, cb.begin(), cb.length());
977 }
978 
979 #ifdef DEBUG
980 static bool
981 IsTokenSane(Token* tp)
982 {
983     // Nb: TOK_EOL should never be used in an actual Token;  it should only be
984     // returned as a TokenKind from peekTokenSameLine().
985     if (tp->type < 0 || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
986         return false;
987 
988     if (tp->pos.end < tp->pos.begin)
989         return false;
990 
991     return true;
992 }
993 #endif
994 
995 bool
996 TokenStream::putIdentInTokenbuf(const char16_t* identStart)
997 {
998     int32_t c;
999     uint32_t qc;
1000     const char16_t* tmp = userbuf.addressOfNextRawChar();
1001     userbuf.setAddressOfNextRawChar(identStart);
1002 
1003     tokenbuf.clear();
1004     for (;;) {
1005         c = getCharIgnoreEOL();
1006         if (!unicode::IsIdentifierPart(char16_t(c))) {
1007             if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1008                 break;
1009             c = qc;
1010         }
1011         if (!tokenbuf.append(c)) {
1012             userbuf.setAddressOfNextRawChar(tmp);
1013             return false;
1014         }
1015     }
1016     userbuf.setAddressOfNextRawChar(tmp);
1017     return true;
1018 }
1019 
1020 bool
1021 TokenStream::checkForKeyword(const KeywordInfo* kw, TokenKind* ttp)
1022 {
1023     if (!awaitIsKeyword && kw->tokentype == TOK_AWAIT) {
1024         if (ttp)
1025             *ttp = TOK_NAME;
1026         return true;
1027     }
1028 
1029     if (kw->tokentype == TOK_RESERVED)
1030         return reportError(JSMSG_RESERVED_ID, kw->chars);
1031 
1032     if (kw->tokentype == TOK_STRICT_RESERVED)
1033         return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);
1034 
1035     // Working keyword.
1036     *ttp = kw->tokentype;
1037     return true;
1038 }
1039 
1040 bool
1041 TokenStream::checkForKeyword(JSAtom* atom, TokenKind* ttp)
1042 {
1043     const KeywordInfo* kw = FindKeyword(atom);
1044     if (!kw)
1045         return true;
1046 
1047     return checkForKeyword(kw, ttp);
1048 }
1049 
1050 enum FirstCharKind {
1051     // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
1052     // token that cannot also be a prefix of a longer token.  E.g. ';' has the
1053     // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
1054     // that begin with '+'.
1055     //
1056     // The few token kinds satisfying these properties cover roughly 35--45%
1057     // of the tokens seen in practice.
1058     //
1059     // We represent the 'OneChar' kind with any positive value less than
1060     // TOK_LIMIT.  This representation lets us associate each one-char token
1061     // char16_t with a TokenKind and thus avoid a subsequent char16_t-to-TokenKind
1062     // conversion.
1063     OneChar_Min = 0,
1064     OneChar_Max = TOK_LIMIT - 1,
1065 
1066     Space = TOK_LIMIT,
1067     Ident,
1068     Dec,
1069     String,
1070     EOL,
1071     BasePrefix,
1072     Other,
1073 
1074     LastCharKind = Other
1075 };
1076 
1077 // OneChar: 40,  41,  44,  58,  59,  63,  91,  93,  123, 125, 126:
1078 //          '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'
1079 // Ident:   36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
1080 // Dot:     46: '.'
1081 // Equals:  61: '='
1082 // String:  34, 39: '"', '\''
1083 // Dec:     49..57: '1'..'9'
1084 // Plus:    43: '+'
1085 // BasePrefix:  48: '0'
1086 // Space:   9, 11, 12, 32: '\t', '\v', '\f', ' '
1087 // EOL:     10, 13: '\n', '\r'
1088 //
1089 #define T_COMMA     TOK_COMMA
1090 #define T_COLON     TOK_COLON
1091 #define T_BITNOT    TOK_BITNOT
1092 #define Templat     String
1093 #define _______     Other
1094 static const uint8_t firstCharKinds[] = {
1095 /*         0        1        2        3        4        5        6        7        8        9    */
1096 /*   0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______,   Space,
1097 /*  10+ */     EOL,   Space,   Space,     EOL, _______, _______, _______, _______, _______, _______,
1098 /*  20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
1099 /*  30+ */ _______, _______,   Space, _______,  String, _______,   Ident, _______, _______,  String,
1100 /*  40+ */  TOK_LP,  TOK_RP, _______, _______, T_COMMA,_______,  _______, _______,BasePrefix,  Dec,
1101 /*  50+ */     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,    Dec,  T_COLON,TOK_SEMI,
1102 /*  60+ */ _______, _______, _______,TOK_HOOK, _______,   Ident,   Ident,   Ident,   Ident,   Ident,
1103 /*  70+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1104 /*  80+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1105 /*  90+ */   Ident,  TOK_LB, _______,  TOK_RB, _______,   Ident, Templat,   Ident,   Ident,   Ident,
1106 /* 100+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1107 /* 110+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1108 /* 120+ */   Ident,   Ident,   Ident,  TOK_LC, _______,  TOK_RC,T_BITNOT, _______
1109 };
1110 #undef T_COMMA
1111 #undef T_COLON
1112 #undef T_BITNOT
1113 #undef Templat
1114 #undef _______
1115 
1116 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
1117               "Elements of firstCharKinds[] are too small");
1118 
1119 bool
1120 TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
1121 {
1122     int c;
1123     uint32_t qc;
1124     Token* tp;
1125     FirstCharKind c1kind;
1126     const char16_t* numStart;
1127     bool hasExp;
1128     DecimalPoint decimalPoint;
1129     const char16_t* identStart;
1130     bool hadUnicodeEscape;
1131 
1132     // Check if in the middle of a template string. Have to get this out of
1133     // the way first.
1134     if (MOZ_UNLIKELY(modifier == TemplateTail)) {
1135         if (!getStringOrTemplateToken('`', &tp))
1136             goto error;
1137         goto out;
1138     }
1139 
1140   retry:
1141     if (MOZ_UNLIKELY(!userbuf.hasRawChars())) {
1142         tp = newToken(0);
1143         tp->type = TOK_EOF;
1144         flags.isEOF = true;
1145         goto out;
1146     }
1147 
1148     c = userbuf.getRawChar();
1149     MOZ_ASSERT(c != EOF);
1150 
1151     // Chars not in the range 0..127 are rare.  Getting them out of the way
1152     // early allows subsequent checking to be faster.
1153     if (MOZ_UNLIKELY(c >= 128)) {
1154         if (unicode::IsSpaceOrBOM2(c)) {
1155             if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
1156                 updateLineInfoForEOL();
1157                 updateFlagsForEOL();
1158             }
1159 
1160             goto retry;
1161         }
1162 
1163         tp = newToken(-1);
1164 
1165         static_assert('$' < 128,
1166                       "IdentifierStart contains '$', but as !IsUnicodeIDStart('$'), "
1167                       "ensure that '$' is never handled here");
1168         static_assert('_' < 128,
1169                       "IdentifierStart contains '_', but as !IsUnicodeIDStart('_'), "
1170                       "ensure that '_' is never handled here");
1171         if (unicode::IsUnicodeIDStart(c)) {
1172             identStart = userbuf.addressOfNextRawChar() - 1;
1173             hadUnicodeEscape = false;
1174             goto identifier;
1175         }
1176 
1177         goto badchar;
1178     }
1179 
1180     // Get the token kind, based on the first char.  The ordering of c1kind
1181     // comparison is based on the frequency of tokens in real code -- Parsemark
1182     // (which represents typical JS code on the web) and the Unreal demo (which
1183     // represents asm.js code).
1184     //
1185     //                  Parsemark   Unreal
1186     //  OneChar         32.9%       39.7%
1187     //  Space           25.0%        0.6%
1188     //  Ident           19.2%       36.4%
1189     //  Dec              7.2%        5.1%
1190     //  String           7.9%        0.0%
1191     //  EOL              1.7%        0.0%
1192     //  BasePrefix       0.4%        4.9%
1193     //  Other            5.7%       13.3%
1194     //
1195     // The ordering is based mostly only Parsemark frequencies, with Unreal
1196     // frequencies used to break close categories (e.g. |Dec| and |String|).
1197     // |Other| is biggish, but no other token kind is common enough for it to
1198     // be worth adding extra values to FirstCharKind.
1199     //
1200     c1kind = FirstCharKind(firstCharKinds[c]);
1201 
1202     // Look for an unambiguous single-char token.
1203     //
1204     if (c1kind <= OneChar_Max) {
1205         tp = newToken(-1);
1206         tp->type = TokenKind(c1kind);
1207         goto out;
1208     }
1209 
1210     // Skip over non-EOL whitespace chars.
1211     //
1212     if (c1kind == Space)
1213         goto retry;
1214 
1215     // Look for an identifier.
1216     //
1217     if (c1kind == Ident) {
1218         tp = newToken(-1);
1219         identStart = userbuf.addressOfNextRawChar() - 1;
1220         hadUnicodeEscape = false;
1221 
1222       identifier:
1223         for (;;) {
1224             c = getCharIgnoreEOL();
1225             if (c == EOF)
1226                 break;
1227             if (!unicode::IsIdentifierPart(char16_t(c))) {
1228                 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1229                     break;
1230                 hadUnicodeEscape = true;
1231             }
1232         }
1233         ungetCharIgnoreEOL(c);
1234 
1235         // Identifiers containing no Unicode escapes can be processed directly
1236         // from userbuf.  The rest must use the escapes converted via tokenbuf
1237         // before atomizing.
1238         const char16_t* chars;
1239         size_t length;
1240         if (hadUnicodeEscape) {
1241             if (!putIdentInTokenbuf(identStart))
1242                 goto error;
1243 
1244             chars = tokenbuf.begin();
1245             length = tokenbuf.length();
1246         } else {
1247             chars = identStart;
1248             length = userbuf.addressOfNextRawChar() - identStart;
1249         }
1250 
1251         // Represent keywords as keyword tokens unless told otherwise.
1252         if (modifier != KeywordIsName) {
1253             if (const KeywordInfo* kw = FindKeyword(chars, length)) {
1254                 // That said, keywords can't contain escapes.  (Contexts where
1255                 // keywords are treated as names, that also sometimes treat
1256                 // keywords as keywords, must manually check this requirement.)
1257                 // There are two exceptions
1258                 // 1) StrictReservedWords: These keywords need to be treated as
1259                 //    names in non-strict mode.
1260                 // 2) yield is also treated as a name if it contains an escape
1261                 //    sequence. The parser must handle this case separately.
1262                 if (hadUnicodeEscape && !(
1263                         (kw->tokentype == TOK_STRICT_RESERVED && !strictMode()) ||
1264                          kw->tokentype == TOK_YIELD))
1265                 {
1266                     reportError(JSMSG_ESCAPED_KEYWORD);
1267                     goto error;
1268                 }
1269 
1270                 tp->type = TOK_NAME;
1271                 if (!checkForKeyword(kw, &tp->type))
1272                     goto error;
1273                 if (tp->type != TOK_NAME && !hadUnicodeEscape)
1274                     goto out;
1275             }
1276         }
1277 
1278         JSAtom* atom = AtomizeChars(cx, chars, length);
1279         if (!atom)
1280             goto error;
1281         tp->type = TOK_NAME;
1282         tp->setName(atom->asPropertyName());
1283         goto out;
1284     }
1285 
1286     // Look for a decimal number.
1287     //
1288     if (c1kind == Dec) {
1289         tp = newToken(-1);
1290         numStart = userbuf.addressOfNextRawChar() - 1;
1291 
1292       decimal:
1293         decimalPoint = NoDecimal;
1294         hasExp = false;
1295         while (JS7_ISDEC(c))
1296             c = getCharIgnoreEOL();
1297 
1298         if (c == '.') {
1299             decimalPoint = HasDecimal;
1300           decimal_dot:
1301             do {
1302                 c = getCharIgnoreEOL();
1303             } while (JS7_ISDEC(c));
1304         }
1305         if (c == 'e' || c == 'E') {
1306             hasExp = true;
1307             c = getCharIgnoreEOL();
1308             if (c == '+' || c == '-')
1309                 c = getCharIgnoreEOL();
1310             if (!JS7_ISDEC(c)) {
1311                 ungetCharIgnoreEOL(c);
1312                 reportError(JSMSG_MISSING_EXPONENT);
1313                 goto error;
1314             }
1315             do {
1316                 c = getCharIgnoreEOL();
1317             } while (JS7_ISDEC(c));
1318         }
1319         ungetCharIgnoreEOL(c);
1320 
1321         if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
1322             reportError(JSMSG_IDSTART_AFTER_NUMBER);
1323             goto error;
1324         }
1325 
1326         // Unlike identifiers and strings, numbers cannot contain escaped
1327         // chars, so we don't need to use tokenbuf.  Instead we can just
1328         // convert the char16_t characters in userbuf to the numeric value.
1329         double dval;
1330         if (!((decimalPoint == HasDecimal) || hasExp)) {
1331             if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval))
1332                 goto error;
1333         } else {
1334             const char16_t* dummy;
1335             if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
1336                 goto error;
1337         }
1338         tp->type = TOK_NUMBER;
1339         tp->setNumber(dval, decimalPoint);
1340         goto out;
1341     }
1342 
1343     // Look for a string or a template string.
1344     //
1345     if (c1kind == String) {
1346         if (!getStringOrTemplateToken(c, &tp))
1347             goto error;
1348         goto out;
1349     }
1350 
1351     // Skip over EOL chars, updating line state along the way.
1352     //
1353     if (c1kind == EOL) {
1354         // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
1355         if (c == '\r' && userbuf.hasRawChars())
1356             userbuf.matchRawChar('\n');
1357         updateLineInfoForEOL();
1358         updateFlagsForEOL();
1359         goto retry;
1360     }
1361 
1362     // Look for a hexadecimal, octal, or binary number.
1363     //
1364     if (c1kind == BasePrefix) {
1365         tp = newToken(-1);
1366         int radix;
1367         c = getCharIgnoreEOL();
1368         if (c == 'x' || c == 'X') {
1369             radix = 16;
1370             c = getCharIgnoreEOL();
1371             if (!JS7_ISHEX(c)) {
1372                 ungetCharIgnoreEOL(c);
1373                 reportError(JSMSG_MISSING_HEXDIGITS);
1374                 goto error;
1375             }
1376             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0x'
1377             while (JS7_ISHEX(c))
1378                 c = getCharIgnoreEOL();
1379         } else if (c == 'b' || c == 'B') {
1380             radix = 2;
1381             c = getCharIgnoreEOL();
1382             if (c != '0' && c != '1') {
1383                 ungetCharIgnoreEOL(c);
1384                 reportError(JSMSG_MISSING_BINARY_DIGITS);
1385                 goto error;
1386             }
1387             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0b'
1388             while (c == '0' || c == '1')
1389                 c = getCharIgnoreEOL();
1390         } else if (c == 'o' || c == 'O') {
1391             radix = 8;
1392             c = getCharIgnoreEOL();
1393             if (c < '0' || c > '7') {
1394                 ungetCharIgnoreEOL(c);
1395                 reportError(JSMSG_MISSING_OCTAL_DIGITS);
1396                 goto error;
1397             }
1398             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0o'
1399             while ('0' <= c && c <= '7')
1400                 c = getCharIgnoreEOL();
1401         } else if (JS7_ISDEC(c)) {
1402             radix = 8;
1403             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0'
1404             while (JS7_ISDEC(c)) {
1405                 // Octal integer literals are not permitted in strict mode code.
1406                 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
1407                     goto error;
1408 
1409                 // Outside strict mode, we permit 08 and 09 as decimal numbers,
1410                 // which makes our behaviour a superset of the ECMA numeric
1411                 // grammar. We might not always be so permissive, so we warn
1412                 // about it.
1413                 if (c >= '8') {
1414                     if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1415                         goto error;
1416                     }
1417                     goto decimal;   // use the decimal scanner for the rest of the number
1418                 }
1419                 c = getCharIgnoreEOL();
1420             }
1421         } else {
1422             // '0' not followed by 'x', 'X' or a digit;  scan as a decimal number.
1423             numStart = userbuf.addressOfNextRawChar() - 1;
1424             goto decimal;
1425         }
1426         ungetCharIgnoreEOL(c);
1427 
1428         if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
1429             reportError(JSMSG_IDSTART_AFTER_NUMBER);
1430             goto error;
1431         }
1432 
1433         double dval;
1434         const char16_t* dummy;
1435         if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
1436             goto error;
1437         tp->type = TOK_NUMBER;
1438         tp->setNumber(dval, NoDecimal);
1439         goto out;
1440     }
1441 
1442     // This handles everything else.
1443     //
1444     MOZ_ASSERT(c1kind == Other);
1445     tp = newToken(-1);
1446     switch (c) {
1447       case '.':
1448         c = getCharIgnoreEOL();
1449         if (JS7_ISDEC(c)) {
1450             numStart = userbuf.addressOfNextRawChar() - 2;
1451             decimalPoint = HasDecimal;
1452             hasExp = false;
1453             goto decimal_dot;
1454         }
1455         if (c == '.') {
1456             if (matchChar('.')) {
1457                 tp->type = TOK_TRIPLEDOT;
1458                 goto out;
1459             }
1460         }
1461         ungetCharIgnoreEOL(c);
1462         tp->type = TOK_DOT;
1463         goto out;
1464 
1465       case '=':
1466         if (matchChar('='))
1467             tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ;
1468         else if (matchChar('>'))
1469             tp->type = TOK_ARROW;
1470         else
1471             tp->type = TOK_ASSIGN;
1472         goto out;
1473 
1474       case '+':
1475         if (matchChar('+'))
1476             tp->type = TOK_INC;
1477         else
1478             tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD;
1479         goto out;
1480 
1481       case '\\': {
1482         uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc);
1483         if (escapeLength > 0) {
1484             identStart = userbuf.addressOfNextRawChar() - escapeLength - 1;
1485             hadUnicodeEscape = true;
1486             goto identifier;
1487         }
1488         goto badchar;
1489       }
1490 
1491       case '|':
1492         if (matchChar('|'))
1493             tp->type = TOK_OR;
1494         else
1495             tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR;
1496         goto out;
1497 
1498       case '^':
1499         tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR;
1500         goto out;
1501 
1502       case '&':
1503         if (matchChar('&'))
1504             tp->type = TOK_AND;
1505         else
1506             tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND;
1507         goto out;
1508 
1509       case '!':
1510         if (matchChar('='))
1511             tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE;
1512         else
1513             tp->type = TOK_NOT;
1514         goto out;
1515 
1516       case '<':
1517         // NB: treat HTML begin-comment as comment-till-end-of-line.
1518         if (matchChar('!')) {
1519             if (matchChar('-')) {
1520                 if (matchChar('-'))
1521                     goto skipline;
1522                 ungetChar('-');
1523             }
1524             ungetChar('!');
1525         }
1526         if (matchChar('<')) {
1527             tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
1528         } else {
1529             tp->type = matchChar('=') ? TOK_LE : TOK_LT;
1530         }
1531         goto out;
1532 
1533       case '>':
1534         if (matchChar('>')) {
1535             if (matchChar('>'))
1536                 tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
1537             else
1538                 tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
1539         } else {
1540             tp->type = matchChar('=') ? TOK_GE : TOK_GT;
1541         }
1542         goto out;
1543 
1544       case '*':
1545         if (matchChar('*'))
1546             tp->type = matchChar('=') ? TOK_POWASSIGN : TOK_POW;
1547         else
1548             tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL;
1549         goto out;
1550 
1551       case '/':
1552         // Look for a single-line comment.
1553         if (matchChar('/')) {
1554             c = peekChar();
1555             if (c == '@' || c == '#') {
1556                 bool shouldWarn = getChar() == '@';
1557                 if (!getDirectives(false, shouldWarn))
1558                     goto error;
1559             }
1560 
1561         skipline:
1562             while ((c = getChar()) != EOF && c != '\n')
1563                 continue;
1564             ungetChar(c);
1565             cursor = (cursor - 1) & ntokensMask;
1566             goto retry;
1567         }
1568 
1569         // Look for a multi-line comment.
1570         if (matchChar('*')) {
1571             unsigned linenoBefore = lineno;
1572             while ((c = getChar()) != EOF &&
1573                    !(c == '*' && matchChar('/'))) {
1574                 if (c == '@' || c == '#') {
1575                     bool shouldWarn = c == '@';
1576                     if (!getDirectives(true, shouldWarn))
1577                         goto error;
1578                 }
1579             }
1580             if (c == EOF) {
1581                 reportError(JSMSG_UNTERMINATED_COMMENT);
1582                 goto error;
1583             }
1584             if (linenoBefore != lineno)
1585                 updateFlagsForEOL();
1586             cursor = (cursor - 1) & ntokensMask;
1587             goto retry;
1588         }
1589 
1590         // Look for a regexp.
1591         if (modifier == Operand) {
1592             tokenbuf.clear();
1593 
1594             bool inCharClass = false;
1595             for (;;) {
1596                 c = getChar();
1597                 if (c == '\\') {
1598                     if (!tokenbuf.append(c))
1599                         goto error;
1600                     c = getChar();
1601                 } else if (c == '[') {
1602                     inCharClass = true;
1603                 } else if (c == ']') {
1604                     inCharClass = false;
1605                 } else if (c == '/' && !inCharClass) {
1606                     // For compat with IE, allow unescaped / in char classes.
1607                     break;
1608                 }
1609                 if (c == '\n' || c == EOF) {
1610                     ungetChar(c);
1611                     reportError(JSMSG_UNTERMINATED_REGEXP);
1612                     goto error;
1613                 }
1614                 if (!tokenbuf.append(c))
1615                     goto error;
1616             }
1617 
1618             RegExpFlag reflags = NoFlags;
1619             unsigned length = tokenbuf.length() + 1;
1620             while (true) {
1621                 c = peekChar();
1622                 if (c == 'g' && !(reflags & GlobalFlag))
1623                     reflags = RegExpFlag(reflags | GlobalFlag);
1624                 else if (c == 'i' && !(reflags & IgnoreCaseFlag))
1625                     reflags = RegExpFlag(reflags | IgnoreCaseFlag);
1626                 else if (c == 'm' && !(reflags & MultilineFlag))
1627                     reflags = RegExpFlag(reflags | MultilineFlag);
1628                 else if (c == 'y' && !(reflags & StickyFlag))
1629                     reflags = RegExpFlag(reflags | StickyFlag);
1630                 else if (c == 'u' && !(reflags & UnicodeFlag))
1631                     reflags = RegExpFlag(reflags | UnicodeFlag);
1632                 else
1633                     break;
1634                 getChar();
1635                 length++;
1636             }
1637 
1638             c = peekChar();
1639             if (JS7_ISLET(c)) {
1640                 char buf[2] = { '\0', '\0' };
1641                 tp->pos.begin += length + 1;
1642                 buf[0] = char(c);
1643                 reportError(JSMSG_BAD_REGEXP_FLAG, buf);
1644                 (void) getChar();
1645                 goto error;
1646             }
1647             tp->type = TOK_REGEXP;
1648             tp->setRegExpFlags(reflags);
1649             goto out;
1650         }
1651 
1652         tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
1653         goto out;
1654 
1655       case '%':
1656         tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
1657         goto out;
1658 
1659       case '-':
1660         if (matchChar('-')) {
1661             if (peekChar() == '>' && !flags.isDirtyLine)
1662                 goto skipline;
1663             tp->type = TOK_DEC;
1664         } else {
1665             tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB;
1666         }
1667         goto out;
1668 
1669       badchar:
1670       default:
1671         reportError(JSMSG_ILLEGAL_CHARACTER);
1672         goto error;
1673     }
1674 
1675     MOZ_CRASH("should have jumped to |out| or |error|");
1676 
1677   out:
1678     if (flags.hitOOM)
1679         return reportError(JSMSG_OUT_OF_MEMORY);
1680 
1681     flags.isDirtyLine = true;
1682     tp->pos.end = userbuf.offset();
1683 #ifdef DEBUG
1684     // Save the modifier used to get this token, so that if an ungetToken()
1685     // occurs and then the token is re-gotten (or peeked, etc.), we can assert
1686     // that both gets have used the same modifiers.
1687     tp->modifier = modifier;
1688     tp->modifierException = NoException;
1689 #endif
1690     MOZ_ASSERT(IsTokenSane(tp));
1691     *ttp = tp->type;
1692     return true;
1693 
1694   error:
1695     if (flags.hitOOM)
1696         return reportError(JSMSG_OUT_OF_MEMORY);
1697 
1698     flags.isDirtyLine = true;
1699     tp->pos.end = userbuf.offset();
1700     MOZ_MAKE_MEM_UNDEFINED(&tp->type, sizeof(tp->type));
1701     flags.hadError = true;
1702 #ifdef DEBUG
1703     // Poisoning userbuf on error establishes an invariant: once an erroneous
1704     // token has been seen, userbuf will not be consulted again.  This is true
1705     // because the parser will deal with the illegal token by aborting parsing
1706     // immediately.
1707     userbuf.poison();
1708 #endif
1709     MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp));
1710     return false;
1711 }
1712 
1713 bool
1714 TokenStream::getBracedUnicode(uint32_t* cp)
1715 {
1716     consumeKnownChar('{');
1717 
1718     bool first = true;
1719     int32_t c;
1720     uint32_t code = 0;
1721     while (true) {
1722         c = getCharIgnoreEOL();
1723         if (c == EOF)
1724             return false;
1725         if (c == '}') {
1726             if (first)
1727                 return false;
1728             break;
1729         }
1730 
1731         if (!JS7_ISHEX(c))
1732             return false;
1733 
1734         code = (code << 4) | JS7_UNHEX(c);
1735         if (code > unicode::NonBMPMax)
1736             return false;
1737         first = false;
1738     }
1739 
1740     *cp = code;
1741     return true;
1742 }
1743 
1744 bool
1745 TokenStream::getStringOrTemplateToken(int untilChar, Token** tp)
1746 {
1747     int c;
1748     int nc = -1;
1749 
1750     bool parsingTemplate = (untilChar == '`');
1751 
1752     *tp = newToken(-1);
1753     tokenbuf.clear();
1754 
1755     // We need to detect any of these chars:  " or ', \n (or its
1756     // equivalents), \\, EOF.  Because we detect EOL sequences here and
1757     // put them back immediately, we can use getCharIgnoreEOL().
1758     while ((c = getCharIgnoreEOL()) != untilChar) {
1759         if (c == EOF) {
1760             ungetCharIgnoreEOL(c);
1761             reportError(JSMSG_UNTERMINATED_STRING);
1762             return false;
1763         }
1764 
1765         if (c == '\\') {
1766             switch (c = getChar()) {
1767               case 'b': c = '\b'; break;
1768               case 'f': c = '\f'; break;
1769               case 'n': c = '\n'; break;
1770               case 'r': c = '\r'; break;
1771               case 't': c = '\t'; break;
1772               case 'v': c = '\v'; break;
1773 
1774               case '\n':
1775                 // ES5 7.8.4: an escaped line terminator represents
1776                 // no character.
1777                 continue;
1778 
1779               // Unicode character specification.
1780               case 'u': {
1781                 if (peekChar() == '{') {
1782                     uint32_t code;
1783                     if (!getBracedUnicode(&code)) {
1784                         reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
1785                         return false;
1786                     }
1787 
1788                     MOZ_ASSERT(code <= unicode::NonBMPMax);
1789                     if (code < unicode::NonBMPMin) {
1790                         c = code;
1791                     } else {
1792                         if (!tokenbuf.append(unicode::LeadSurrogate(code)))
1793                             return false;
1794                         c = unicode::TrailSurrogate(code);
1795                     }
1796                     break;
1797                 }
1798 
1799                 char16_t cp[4];
1800                 if (peekChars(4, cp) &&
1801                     JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3]))
1802                 {
1803                     c = JS7_UNHEX(cp[0]);
1804                     c = (c << 4) + JS7_UNHEX(cp[1]);
1805                     c = (c << 4) + JS7_UNHEX(cp[2]);
1806                     c = (c << 4) + JS7_UNHEX(cp[3]);
1807                     skipChars(4);
1808                 } else {
1809                     reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
1810                     return false;
1811                 }
1812                 break;
1813               }
1814 
1815               // Hexadecimal character specification.
1816               case 'x': {
1817                 char16_t cp[2];
1818                 if (peekChars(2, cp) && JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1819                     c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1820                     skipChars(2);
1821                 } else {
1822                     reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal");
1823                     return false;
1824                 }
1825                 break;
1826               }
1827 
1828               default:
1829                 // Octal character specification.
1830                 if (JS7_ISOCT(c)) {
1831                     int32_t val = JS7_UNOCT(c);
1832 
1833                     c = peekChar();
1834 
1835                     // Strict mode code allows only \0, then a non-digit.
1836                     if (val != 0 || JS7_ISDEC(c)) {
1837                         if (parsingTemplate) {
1838                             reportError(JSMSG_DEPRECATED_OCTAL);
1839                             return false;
1840                         }
1841                         if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
1842                             return false;
1843                         flags.sawOctalEscape = true;
1844                     }
1845 
1846                     if (JS7_ISOCT(c)) {
1847                         val = 8 * val + JS7_UNOCT(c);
1848                         getChar();
1849                         c = peekChar();
1850                         if (JS7_ISOCT(c)) {
1851                             int32_t save = val;
1852                             val = 8 * val + JS7_UNOCT(c);
1853                             if (val <= 0xFF)
1854                                 getChar();
1855                             else
1856                                 val = save;
1857                         }
1858                     }
1859 
1860                     c = char16_t(val);
1861                 }
1862                 break;
1863             }
1864         } else if (TokenBuf::isRawEOLChar(c)) {
1865             if (!parsingTemplate) {
1866                 ungetCharIgnoreEOL(c);
1867                 reportError(JSMSG_UNTERMINATED_STRING);
1868                 return false;
1869             }
1870             if (c == '\r') {
1871                 c = '\n';
1872                 if (userbuf.peekRawChar() == '\n')
1873                     skipCharsIgnoreEOL(1);
1874             }
1875             updateLineInfoForEOL();
1876             updateFlagsForEOL();
1877         } else if (parsingTemplate && c == '$') {
1878             if ((nc = getCharIgnoreEOL()) == '{')
1879                 break;
1880             ungetCharIgnoreEOL(nc);
1881         }
1882 
1883         if (!tokenbuf.append(c)) {
1884             ReportOutOfMemory(cx);
1885             return false;
1886         }
1887     }
1888 
1889     JSAtom* atom = atomize(cx, tokenbuf);
1890     if (!atom)
1891         return false;
1892 
1893     if (!parsingTemplate) {
1894         (*tp)->type = TOK_STRING;
1895     } else {
1896         if (c == '$' && nc == '{')
1897             (*tp)->type = TOK_TEMPLATE_HEAD;
1898         else
1899             (*tp)->type = TOK_NO_SUBS_TEMPLATE;
1900     }
1901 
1902     (*tp)->setAtom(atom);
1903     return true;
1904 }
1905 
1906 JS_FRIEND_API(int)
1907 js_fgets(char* buf, int size, FILE* file)
1908 {
1909     int n, i, c;
1910     bool crflag;
1911 
1912     n = size - 1;
1913     if (n < 0)
1914         return -1;
1915 
1916     crflag = false;
1917     for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
1918         buf[i] = c;
1919         if (c == '\n') {        // any \n ends a line
1920             i++;                // keep the \n; we know there is room for \0
1921             break;
1922         }
1923         if (crflag) {           // \r not followed by \n ends line at the \r
1924             ungetc(c, file);
1925             break;              // and overwrite c in buf with \0
1926         }
1927         crflag = (c == '\r');
1928     }
1929 
1930     buf[i] = '\0';
1931     return i;
1932 }
1933 
1934 const char*
1935 frontend::TokenKindToDesc(TokenKind tt)
1936 {
1937     switch (tt) {
1938 #define EMIT_CASE(name, desc) case TOK_##name: return desc;
1939       FOR_EACH_TOKEN_KIND(EMIT_CASE)
1940 #undef EMIT_CASE
1941       case TOK_LIMIT:
1942         MOZ_ASSERT_UNREACHABLE("TOK_LIMIT should not be passed.");
1943         break;
1944     }
1945 
1946     return "<bad TokenKind>";
1947 }
1948 
1949 #ifdef DEBUG
1950 const char*
1951 TokenKindToString(TokenKind tt)
1952 {
1953     switch (tt) {
1954 #define EMIT_CASE(name, desc) case TOK_##name: return "TOK_" #name;
1955       FOR_EACH_TOKEN_KIND(EMIT_CASE)
1956 #undef EMIT_CASE
1957       case TOK_LIMIT: break;
1958     }
1959 
1960     return "<bad TokenKind>";
1961 }
1962 #endif
1963