1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 // JS lexical scanner.
8
9 #include "frontend/TokenStream.h"
10
11 #include "mozilla/IntegerTypeTraits.h"
12 #include "mozilla/PodOperations.h"
13 #include "mozilla/UniquePtr.h"
14
15 #include <ctype.h>
16 #include <stdarg.h>
17 #include <stdio.h>
18 #include <string.h>
19
20 #include "jsatom.h"
21 #include "jscntxt.h"
22 #include "jscompartment.h"
23 #include "jsexn.h"
24 #include "jsnum.h"
25
26 #include "frontend/BytecodeCompiler.h"
27 #include "js/CharacterEncoding.h"
28 #include "vm/HelperThreads.h"
29 #include "vm/Keywords.h"
30 #include "vm/StringBuffer.h"
31
32 using namespace js;
33 using namespace js::frontend;
34 using namespace js::unicode;
35
36 using mozilla::Maybe;
37 using mozilla::PodAssign;
38 using mozilla::PodCopy;
39 using mozilla::PodZero;
40 using mozilla::UniquePtr;
41
42 struct KeywordInfo {
43 const char* chars; // C string with keyword text
44 TokenKind tokentype;
45 };
46
47 static const KeywordInfo keywords[] = {
48 #define KEYWORD_INFO(keyword, name, type) \
49 {js_##keyword##_str, type},
50 FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO)
51 #undef KEYWORD_INFO
52 };
53
54 // Returns a KeywordInfo for the specified characters, or nullptr if the string
55 // is not a keyword.
56 template <typename CharT>
57 static const KeywordInfo*
FindKeyword(const CharT * s,size_t length)58 FindKeyword(const CharT* s, size_t length)
59 {
60 MOZ_ASSERT(length != 0);
61
62 size_t i;
63 const KeywordInfo* kw;
64 const char* chars;
65
66 #define JSKW_LENGTH() length
67 #define JSKW_AT(column) s[column]
68 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
69 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
70 #define JSKW_NO_MATCH() goto no_match;
71 #include "jsautokw.h"
72 #undef JSKW_NO_MATCH
73 #undef JSKW_TEST_GUESS
74 #undef JSKW_GOT_MATCH
75 #undef JSKW_AT
76 #undef JSKW_LENGTH
77
78 got_match:
79 return &keywords[i];
80
81 test_guess:
82 kw = &keywords[i];
83 chars = kw->chars;
84 do {
85 if (*s++ != (unsigned char)(*chars++))
86 goto no_match;
87 } while (--length != 0);
88 return kw;
89
90 no_match:
91 return nullptr;
92 }
93
94 static const KeywordInfo*
FindKeyword(JSLinearString * str)95 FindKeyword(JSLinearString* str)
96 {
97 JS::AutoCheckCannotGC nogc;
98 return str->hasLatin1Chars()
99 ? FindKeyword(str->latin1Chars(nogc), str->length())
100 : FindKeyword(str->twoByteChars(nogc), str->length());
101 }
102
103 template <typename CharT>
104 static bool
IsIdentifier(const CharT * chars,size_t length)105 IsIdentifier(const CharT* chars, size_t length)
106 {
107 if (length == 0)
108 return false;
109
110 if (!IsIdentifierStart(*chars))
111 return false;
112
113 const CharT* end = chars + length;
114 while (++chars != end) {
115 if (!IsIdentifierPart(*chars))
116 return false;
117 }
118
119 return true;
120 }
121
122 bool
IsIdentifier(JSLinearString * str)123 frontend::IsIdentifier(JSLinearString* str)
124 {
125 JS::AutoCheckCannotGC nogc;
126 return str->hasLatin1Chars()
127 ? ::IsIdentifier(str->latin1Chars(nogc), str->length())
128 : ::IsIdentifier(str->twoByteChars(nogc), str->length());
129 }
130
131 bool
IsIdentifier(const char16_t * chars,size_t length)132 frontend::IsIdentifier(const char16_t* chars, size_t length)
133 {
134 return ::IsIdentifier(chars, length);
135 }
136
137 bool
IsKeyword(JSLinearString * str)138 frontend::IsKeyword(JSLinearString* str)
139 {
140 return FindKeyword(str) != nullptr;
141 }
142
SourceCoords(ExclusiveContext * cx,uint32_t ln)143 TokenStream::SourceCoords::SourceCoords(ExclusiveContext* cx, uint32_t ln)
144 : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)
145 {
146 // This is actually necessary! Removing it causes compile errors on
147 // GCC and clang. You could try declaring this:
148 //
149 // const uint32_t TokenStream::SourceCoords::MAX_PTR;
150 //
151 // which fixes the GCC/clang error, but causes bustage on Windows. Sigh.
152 //
153 uint32_t maxPtr = MAX_PTR;
154
155 // The first line begins at buffer offset 0. MAX_PTR is the sentinel. The
156 // appends cannot fail because |lineStartOffsets_| has statically-allocated
157 // elements.
158 MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
159 MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
160 lineStartOffsets_.infallibleAppend(0);
161 lineStartOffsets_.infallibleAppend(maxPtr);
162 }
163
164 MOZ_ALWAYS_INLINE bool
add(uint32_t lineNum,uint32_t lineStartOffset)165 TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
166 {
167 uint32_t lineIndex = lineNumToIndex(lineNum);
168 uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
169
170 MOZ_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);
171
172 if (lineIndex == sentinelIndex) {
173 // We haven't seen this newline before. Update lineStartOffsets_
174 // only if lineStartOffsets_.append succeeds, to keep sentinel.
175 // Otherwise return false to tell TokenStream about OOM.
176 uint32_t maxPtr = MAX_PTR;
177 if (!lineStartOffsets_.append(maxPtr))
178 return false;
179
180 lineStartOffsets_[lineIndex] = lineStartOffset;
181 } else {
182 // We have seen this newline before (and ungot it). Do nothing (other
183 // than checking it hasn't mysteriously changed).
184 // This path can be executed after hitting OOM, so check lineIndex.
185 MOZ_ASSERT_IF(lineIndex < sentinelIndex, lineStartOffsets_[lineIndex] == lineStartOffset);
186 }
187 return true;
188 }
189
190 MOZ_ALWAYS_INLINE bool
fill(const TokenStream::SourceCoords & other)191 TokenStream::SourceCoords::fill(const TokenStream::SourceCoords& other)
192 {
193 MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
194 MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
195
196 if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
197 return true;
198
199 uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
200 lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
201
202 for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {
203 if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))
204 return false;
205 }
206 return true;
207 }
208
209 MOZ_ALWAYS_INLINE uint32_t
lineIndexOf(uint32_t offset) const210 TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const
211 {
212 uint32_t iMin, iMax, iMid;
213
214 if (lineStartOffsets_[lastLineIndex_] <= offset) {
215 // If we reach here, offset is on a line the same as or higher than
216 // last time. Check first for the +0, +1, +2 cases, because they
217 // typically cover 85--98% of cases.
218 if (offset < lineStartOffsets_[lastLineIndex_ + 1])
219 return lastLineIndex_; // lineIndex is same as last time
220
221 // If we reach here, there must be at least one more entry (plus the
222 // sentinel). Try it.
223 lastLineIndex_++;
224 if (offset < lineStartOffsets_[lastLineIndex_ + 1])
225 return lastLineIndex_; // lineIndex is one higher than last time
226
227 // The same logic applies here.
228 lastLineIndex_++;
229 if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
230 return lastLineIndex_; // lineIndex is two higher than last time
231 }
232
233 // No luck. Oh well, we have a better-than-default starting point for
234 // the binary search.
235 iMin = lastLineIndex_ + 1;
236 MOZ_ASSERT(iMin < lineStartOffsets_.length() - 1); // -1 due to the sentinel
237
238 } else {
239 iMin = 0;
240 }
241
242 // This is a binary search with deferred detection of equality, which was
243 // marginally faster in this case than a standard binary search.
244 // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
245 // want one before that.
246 iMax = lineStartOffsets_.length() - 2;
247 while (iMax > iMin) {
248 iMid = iMin + (iMax - iMin) / 2;
249 if (offset >= lineStartOffsets_[iMid + 1])
250 iMin = iMid + 1; // offset is above lineStartOffsets_[iMid]
251 else
252 iMax = iMid; // offset is below or within lineStartOffsets_[iMid]
253 }
254 MOZ_ASSERT(iMax == iMin);
255 MOZ_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
256 lastLineIndex_ = iMin;
257 return iMin;
258 }
259
260 uint32_t
lineNum(uint32_t offset) const261 TokenStream::SourceCoords::lineNum(uint32_t offset) const
262 {
263 uint32_t lineIndex = lineIndexOf(offset);
264 return lineIndexToNum(lineIndex);
265 }
266
267 uint32_t
columnIndex(uint32_t offset) const268 TokenStream::SourceCoords::columnIndex(uint32_t offset) const
269 {
270 uint32_t lineIndex = lineIndexOf(offset);
271 uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
272 MOZ_ASSERT(offset >= lineStartOffset);
273 return offset - lineStartOffset;
274 }
275
276 void
lineNumAndColumnIndex(uint32_t offset,uint32_t * lineNum,uint32_t * columnIndex) const277 TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum,
278 uint32_t* columnIndex) const
279 {
280 uint32_t lineIndex = lineIndexOf(offset);
281 *lineNum = lineIndexToNum(lineIndex);
282 uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
283 MOZ_ASSERT(offset >= lineStartOffset);
284 *columnIndex = offset - lineStartOffset;
285 }
286
287 #ifdef _MSC_VER
288 #pragma warning(push)
289 #pragma warning(disable:4351)
290 #endif
291
TokenStream(ExclusiveContext * cx,const ReadOnlyCompileOptions & options,const char16_t * base,size_t length,StrictModeGetter * smg)292 TokenStream::TokenStream(ExclusiveContext* cx, const ReadOnlyCompileOptions& options,
293 const char16_t* base, size_t length, StrictModeGetter* smg)
294 : srcCoords(cx, options.lineno),
295 options_(options),
296 tokens(),
297 cursor(),
298 lookahead(),
299 lineno(options.lineno),
300 flags(),
301 linebase(0),
302 prevLinebase(size_t(-1)),
303 userbuf(cx, base, length, options.column),
304 filename(options.filename()),
305 displayURL_(nullptr),
306 sourceMapURL_(nullptr),
307 tokenbuf(cx),
308 cx(cx),
309 mutedErrors(options.mutedErrors()),
310 strictModeGetter(smg)
311 {
312 // Nb: the following tables could be static, but initializing them here is
313 // much easier. Don't worry, the time to initialize them for each
314 // TokenStream is trivial. See bug 639420.
315
316 // See Parser::assignExpr() for an explanation of isExprEnding[].
317 memset(isExprEnding, 0, sizeof(isExprEnding));
318 isExprEnding[TOK_COMMA] = 1;
319 isExprEnding[TOK_SEMI] = 1;
320 isExprEnding[TOK_COLON] = 1;
321 isExprEnding[TOK_RP] = 1;
322 isExprEnding[TOK_RB] = 1;
323 isExprEnding[TOK_RC] = 1;
324 }
325
326 #ifdef _MSC_VER
327 #pragma warning(pop)
328 #endif
329
330 bool
checkOptions()331 TokenStream::checkOptions()
332 {
333 // Constrain starting columns to half of the range of a signed 32-bit value,
334 // to avoid overflow.
335 if (options().column >= mozilla::MaxValue<int32_t>::value / 2 + 1) {
336 reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
337 return false;
338 }
339
340 return true;
341 }
342
~TokenStream()343 TokenStream::~TokenStream()
344 {
345 }
346
347 // Use the fastest available getc.
348 #if defined(HAVE_GETC_UNLOCKED)
349 # define fast_getc getc_unlocked
350 #elif defined(HAVE__GETC_NOLOCK)
351 # define fast_getc _getc_nolock
352 #else
353 # define fast_getc getc
354 #endif
355
356 MOZ_ALWAYS_INLINE void
updateLineInfoForEOL()357 TokenStream::updateLineInfoForEOL()
358 {
359 prevLinebase = linebase;
360 linebase = userbuf.offset();
361 lineno++;
362 if (!srcCoords.add(lineno, linebase))
363 flags.hitOOM = true;
364 }
365
366 MOZ_ALWAYS_INLINE void
updateFlagsForEOL()367 TokenStream::updateFlagsForEOL()
368 {
369 flags.isDirtyLine = false;
370 }
371
372 // This gets the next char, normalizing all EOL sequences to '\n' as it goes.
373 int32_t
getChar()374 TokenStream::getChar()
375 {
376 int32_t c;
377 if (MOZ_LIKELY(userbuf.hasRawChars())) {
378 c = userbuf.getRawChar();
379
380 // Normalize the char16_t if it was a newline.
381 if (MOZ_UNLIKELY(c == '\n'))
382 goto eol;
383 if (MOZ_UNLIKELY(c == '\r')) {
384 // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
385 if (MOZ_LIKELY(userbuf.hasRawChars()))
386 userbuf.matchRawChar('\n');
387 goto eol;
388 }
389 if (MOZ_UNLIKELY(c == LINE_SEPARATOR || c == PARA_SEPARATOR))
390 goto eol;
391
392 return c;
393 }
394
395 flags.isEOF = true;
396 return EOF;
397
398 eol:
399 updateLineInfoForEOL();
400 return '\n';
401 }
402
403 // This gets the next char. It does nothing special with EOL sequences, not
404 // even updating the line counters. It can be used safely if (a) the
405 // resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
406 // it's an EOL, and (b) the line-related state (lineno, linebase) is not used
407 // before it's ungotten.
408 int32_t
getCharIgnoreEOL()409 TokenStream::getCharIgnoreEOL()
410 {
411 if (MOZ_LIKELY(userbuf.hasRawChars()))
412 return userbuf.getRawChar();
413
414 flags.isEOF = true;
415 return EOF;
416 }
417
418 void
ungetChar(int32_t c)419 TokenStream::ungetChar(int32_t c)
420 {
421 if (c == EOF)
422 return;
423 MOZ_ASSERT(!userbuf.atStart());
424 userbuf.ungetRawChar();
425 if (c == '\n') {
426 #ifdef DEBUG
427 int32_t c2 = userbuf.peekRawChar();
428 MOZ_ASSERT(TokenBuf::isRawEOLChar(c2));
429 #endif
430
431 // If it's a \r\n sequence, also unget the \r.
432 if (!userbuf.atStart())
433 userbuf.matchRawCharBackwards('\r');
434
435 MOZ_ASSERT(prevLinebase != size_t(-1)); // we should never get more than one EOL char
436 linebase = prevLinebase;
437 prevLinebase = size_t(-1);
438 lineno--;
439 } else {
440 MOZ_ASSERT(userbuf.peekRawChar() == c);
441 }
442 }
443
444 void
ungetCharIgnoreEOL(int32_t c)445 TokenStream::ungetCharIgnoreEOL(int32_t c)
446 {
447 if (c == EOF)
448 return;
449 MOZ_ASSERT(!userbuf.atStart());
450 userbuf.ungetRawChar();
451 }
452
453 // Return true iff |n| raw characters can be read from this without reading past
454 // EOF or a newline, and copy those characters into |cp| if so. The characters
455 // are not consumed: use skipChars(n) to do so after checking that the consumed
456 // characters had appropriate values.
457 bool
peekChars(int n,char16_t * cp)458 TokenStream::peekChars(int n, char16_t* cp)
459 {
460 int i, j;
461 int32_t c;
462
463 for (i = 0; i < n; i++) {
464 c = getCharIgnoreEOL();
465 if (c == EOF)
466 break;
467 if (c == '\n') {
468 ungetCharIgnoreEOL(c);
469 break;
470 }
471 cp[i] = char16_t(c);
472 }
473 for (j = i - 1; j >= 0; j--)
474 ungetCharIgnoreEOL(cp[j]);
475 return i == n;
476 }
477
478 size_t
findEOLMax(size_t start,size_t max)479 TokenStream::TokenBuf::findEOLMax(size_t start, size_t max)
480 {
481 const char16_t* p = rawCharPtrAt(start);
482
483 size_t n = 0;
484 while (true) {
485 if (p >= limit_)
486 break;
487 if (n >= max)
488 break;
489 n++;
490 if (TokenBuf::isRawEOLChar(*p++))
491 break;
492 }
493 return start + n;
494 }
495
496 bool
advance(size_t position)497 TokenStream::advance(size_t position)
498 {
499 const char16_t* end = userbuf.rawCharPtrAt(position);
500 while (userbuf.addressOfNextRawChar() < end)
501 getChar();
502
503 Token* cur = &tokens[cursor];
504 cur->pos.begin = userbuf.offset();
505 MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));
506 lookahead = 0;
507
508 if (flags.hitOOM)
509 return reportError(JSMSG_OUT_OF_MEMORY);
510
511 return true;
512 }
513
514 void
tell(Position * pos)515 TokenStream::tell(Position* pos)
516 {
517 pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true);
518 pos->flags = flags;
519 pos->lineno = lineno;
520 pos->linebase = linebase;
521 pos->prevLinebase = prevLinebase;
522 pos->lookahead = lookahead;
523 pos->currentToken = currentToken();
524 for (unsigned i = 0; i < lookahead; i++)
525 pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask];
526 }
527
528 void
seek(const Position & pos)529 TokenStream::seek(const Position& pos)
530 {
531 userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true);
532 flags = pos.flags;
533 lineno = pos.lineno;
534 linebase = pos.linebase;
535 prevLinebase = pos.prevLinebase;
536 lookahead = pos.lookahead;
537
538 tokens[cursor] = pos.currentToken;
539 for (unsigned i = 0; i < lookahead; i++)
540 tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i];
541 }
542
543 bool
seek(const Position & pos,const TokenStream & other)544 TokenStream::seek(const Position& pos, const TokenStream& other)
545 {
546 if (!srcCoords.fill(other.srcCoords))
547 return false;
548 seek(pos);
549 return true;
550 }
551
552 bool
reportStrictModeErrorNumberVA(uint32_t offset,bool strictMode,unsigned errorNumber,va_list args)553 TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
554 va_list args)
555 {
556 // In strict mode code, this is an error, not merely a warning.
557 unsigned flags;
558 if (strictMode)
559 flags = JSREPORT_ERROR;
560 else if (options().extraWarningsOption)
561 flags = JSREPORT_WARNING | JSREPORT_STRICT;
562 else
563 return true;
564
565 return reportCompileErrorNumberVA(offset, flags, errorNumber, args);
566 }
567
568 void
throwError(JSContext * cx)569 CompileError::throwError(JSContext* cx)
570 {
571 // If there's a runtime exception type associated with this error
572 // number, set that as the pending exception. For errors occuring at
573 // compile time, this is very likely to be a JSEXN_SYNTAXERR.
574 //
575 // If an exception is thrown but not caught, the JSREPORT_EXCEPTION
576 // flag will be set in report.flags. Proper behavior for an error
577 // reporter is to ignore a report with this flag for all but top-level
578 // compilation errors. The exception will remain pending, and so long
579 // as the non-top-level "load", "eval", or "compile" native function
580 // returns false, the top-level reporter will eventually receive the
581 // uncaught exception report.
582 if (!ErrorToException(cx, message, &report, nullptr, nullptr))
583 CallErrorReporter(cx, message, &report);
584 }
585
~CompileError()586 CompileError::~CompileError()
587 {
588 js_free((void*)report.linebuf());
589 js_free((void*)report.ucmessage);
590 js_free(message);
591 message = nullptr;
592
593 if (report.messageArgs) {
594 if (argumentsType == ArgumentsAreASCII) {
595 unsigned i = 0;
596 while (report.messageArgs[i])
597 js_free((void*)report.messageArgs[i++]);
598 }
599 js_free(report.messageArgs);
600 }
601
602 PodZero(&report);
603 }
604
605 bool
reportCompileErrorNumberVA(uint32_t offset,unsigned flags,unsigned errorNumber,va_list args)606 TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
607 va_list args)
608 {
609 bool warning = JSREPORT_IS_WARNING(flags);
610
611 if (warning && options().werrorOption) {
612 flags &= ~JSREPORT_WARNING;
613 warning = false;
614 }
615
616 // On the main thread, report the error immediately. When compiling off
617 // thread, save the error so that the main thread can report it later.
618 CompileError tempErr;
619 CompileError& err = cx->isJSContext() ? tempErr : cx->addPendingCompileError();
620
621 err.report.flags = flags;
622 err.report.errorNumber = errorNumber;
623 err.report.filename = filename;
624 err.report.isMuted = mutedErrors;
625 if (offset == NoOffset) {
626 err.report.lineno = 0;
627 err.report.column = 0;
628 } else {
629 err.report.lineno = srcCoords.lineNum(offset);
630 err.report.column = srcCoords.columnIndex(offset);
631 }
632
633 // If we have no location information, try to get one from the caller.
634 bool callerFilename = false;
635 if (offset != NoOffset && !err.report.filename && cx->isJSContext()) {
636 NonBuiltinFrameIter iter(cx->asJSContext(),
637 FrameIter::ALL_CONTEXTS, FrameIter::GO_THROUGH_SAVED,
638 FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,
639 cx->compartment()->principals());
640 if (!iter.done() && iter.scriptFilename()) {
641 callerFilename = true;
642 err.report.filename = iter.scriptFilename();
643 err.report.lineno = iter.computeLine(&err.report.column);
644 }
645 }
646
647 err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII;
648
649 if (!ExpandErrorArgumentsVA(cx, GetErrorMessage, nullptr, errorNumber, &err.message,
650 &err.report, err.argumentsType, args))
651 {
652 return false;
653 }
654
655 // Given a token, T, that we want to complain about: if T's (starting)
656 // lineno doesn't match TokenStream's lineno, that means we've scanned past
657 // the line that T starts on, which makes it hard to print some or all of
658 // T's (starting) line for context.
659 //
660 // So we don't even try, leaving report.linebuf and friends zeroed. This
661 // means that any error involving a multi-line token (e.g. an unterminated
662 // multi-line string literal) won't have a context printed.
663 if (offset != NoOffset && err.report.lineno == lineno && !callerFilename) {
664 // We show only a portion (a "window") of the line around the erroneous
665 // token -- the first char in the token, plus |windowRadius| chars
666 // before it and |windowRadius - 1| chars after it. This is because
667 // lines can be very long and printing the whole line is (a) not that
668 // helpful, and (b) can waste a lot of memory. See bug 634444.
669 static const size_t windowRadius = 60;
670
671 // The window must start within the current line, no earlier than
672 // windowRadius characters before offset.
673 size_t windowStart = (offset - linebase > windowRadius) ?
674 offset - windowRadius :
675 linebase;
676
677 // The window must start within the portion of the current line
678 // that we actually have in our buffer.
679 if (windowStart < userbuf.startOffset())
680 windowStart = userbuf.startOffset();
681
682 // The window must end within the current line, no later than
683 // windowRadius after offset.
684 size_t windowEnd = userbuf.findEOLMax(offset, windowRadius);
685 size_t windowLength = windowEnd - windowStart;
686 MOZ_ASSERT(windowLength <= windowRadius * 2);
687
688 // Create the windowed strings.
689 StringBuffer windowBuf(cx);
690 if (!windowBuf.append(userbuf.rawCharPtrAt(windowStart), windowLength) ||
691 !windowBuf.append('\0'))
692 {
693 return false;
694 }
695
696 // The window into the offending source line, without final \n.
697 mozilla::UniquePtr<char16_t[], JS::FreePolicy> linebuf(windowBuf.stealChars());
698 if (!linebuf)
699 return false;
700
701 err.report.initLinebuf(linebuf.release(), windowLength, offset - windowStart);
702 }
703
704 if (cx->isJSContext())
705 err.throwError(cx->asJSContext());
706
707 return warning;
708 }
709
710 bool
reportStrictModeError(unsigned errorNumber,...)711 TokenStream::reportStrictModeError(unsigned errorNumber, ...)
712 {
713 va_list args;
714 va_start(args, errorNumber);
715 bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(),
716 errorNumber, args);
717 va_end(args);
718 return result;
719 }
720
721 bool
reportError(unsigned errorNumber,...)722 TokenStream::reportError(unsigned errorNumber, ...)
723 {
724 va_list args;
725 va_start(args, errorNumber);
726 bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber,
727 args);
728 va_end(args);
729 return result;
730 }
731
732 bool
reportErrorNoOffset(unsigned errorNumber,...)733 TokenStream::reportErrorNoOffset(unsigned errorNumber, ...)
734 {
735 va_list args;
736 va_start(args, errorNumber);
737 bool result = reportCompileErrorNumberVA(NoOffset, JSREPORT_ERROR, errorNumber,
738 args);
739 va_end(args);
740 return result;
741 }
742
743 bool
reportWarning(unsigned errorNumber,...)744 TokenStream::reportWarning(unsigned errorNumber, ...)
745 {
746 va_list args;
747 va_start(args, errorNumber);
748 bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING,
749 errorNumber, args);
750 va_end(args);
751 return result;
752 }
753
754 bool
reportStrictWarningErrorNumberVA(uint32_t offset,unsigned errorNumber,va_list args)755 TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args)
756 {
757 if (!options().extraWarningsOption)
758 return true;
759
760 return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args);
761 }
762
763 void
reportAsmJSError(uint32_t offset,unsigned errorNumber,...)764 TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)
765 {
766 va_list args;
767 va_start(args, errorNumber);
768 unsigned flags = options().throwOnAsmJSValidationFailureOption
769 ? JSREPORT_ERROR
770 : JSREPORT_WARNING;
771 reportCompileErrorNumberVA(offset, flags, errorNumber, args);
772 va_end(args);
773 }
774
775 // We have encountered a '\': check for a Unicode escape sequence after it.
776 // Return 'true' and the character code value (by value) if we found a
777 // Unicode escape sequence. Otherwise, return 'false'. In both cases, do not
778 // advance along the buffer.
779 bool
peekUnicodeEscape(int * result)780 TokenStream::peekUnicodeEscape(int* result)
781 {
782 char16_t cp[5];
783
784 if (peekChars(5, cp) && cp[0] == 'u' &&
785 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
786 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
787 {
788 *result = (((((JS7_UNHEX(cp[1]) << 4)
789 + JS7_UNHEX(cp[2])) << 4)
790 + JS7_UNHEX(cp[3])) << 4)
791 + JS7_UNHEX(cp[4]);
792 return true;
793 }
794 return false;
795 }
796
797 bool
matchUnicodeEscapeIdStart(int32_t * cp)798 TokenStream::matchUnicodeEscapeIdStart(int32_t* cp)
799 {
800 if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
801 skipChars(5);
802 return true;
803 }
804 return false;
805 }
806
807 bool
matchUnicodeEscapeIdent(int32_t * cp)808 TokenStream::matchUnicodeEscapeIdent(int32_t* cp)
809 {
810 if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
811 skipChars(5);
812 return true;
813 }
814 return false;
815 }
816
817 // Helper function which returns true if the first length(q) characters in p are
818 // the same as the characters in q.
819 static bool
CharsMatch(const char16_t * p,const char * q)820 CharsMatch(const char16_t* p, const char* q) {
821 while (*q) {
822 if (*p++ != *q++)
823 return false;
824 }
825 return true;
826 }
827
828 bool
getDirectives(bool isMultiline,bool shouldWarnDeprecated)829 TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated)
830 {
831 // Match directive comments used in debugging, such as "//# sourceURL" and
832 // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
833 //
834 // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
835 // line comments containing a source mapping URL inside a multiline
836 // comment. To avoid potentially expensive lookahead and backtracking, we
837 // only check for this case if we encounter a '#' character.
838
839 if (!getDisplayURL(isMultiline, shouldWarnDeprecated))
840 return false;
841 if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated))
842 return false;
843
844 return true;
845 }
846
847 bool
getDirective(bool isMultiline,bool shouldWarnDeprecated,const char * directive,int directiveLength,const char * errorMsgPragma,UniquePtr<char16_t[],JS::FreePolicy> * destination)848 TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated,
849 const char* directive, int directiveLength,
850 const char* errorMsgPragma,
851 UniquePtr<char16_t[], JS::FreePolicy>* destination)
852 {
853 MOZ_ASSERT(directiveLength <= 18);
854 char16_t peeked[18];
855 int32_t c;
856
857 if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {
858 if (shouldWarnDeprecated &&
859 !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
860 return false;
861
862 skipChars(directiveLength);
863 tokenbuf.clear();
864
865 while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {
866 getChar();
867 // Debugging directives can occur in both single- and multi-line
868 // comments. If we're currently inside a multi-line comment, we also
869 // need to recognize multi-line comment terminators.
870 if (isMultiline && c == '*' && peekChar() == '/') {
871 ungetChar('*');
872 break;
873 }
874 if (!tokenbuf.append(c))
875 return false;
876 }
877
878 if (tokenbuf.empty()) {
879 // The directive's URL was missing, but this is not quite an
880 // exception that we should stop and drop everything for.
881 return true;
882 }
883
884 size_t length = tokenbuf.length();
885
886 *destination = cx->make_pod_array<char16_t>(length + 1);
887 if (!*destination)
888 return false;
889
890 PodCopy(destination->get(), tokenbuf.begin(), length);
891 (*destination)[length] = '\0';
892 }
893
894 return true;
895 }
896
897 bool
getDisplayURL(bool isMultiline,bool shouldWarnDeprecated)898 TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated)
899 {
900 // Match comments of the form "//# sourceURL=<url>" or
901 // "/\* //# sourceURL=<url> *\/"
902 //
903 // Note that while these are labeled "sourceURL" in the source text,
904 // internally we refer to it as a "displayURL" to distinguish what the
905 // developer would like to refer to the source as from the source's actual
906 // URL.
907
908 return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11,
909 "sourceURL", &displayURL_);
910 }
911
912 bool
getSourceMappingURL(bool isMultiline,bool shouldWarnDeprecated)913 TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated)
914 {
915 // Match comments of the form "//# sourceMappingURL=<url>" or
916 // "/\* //# sourceMappingURL=<url> *\/"
917
918 return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18,
919 "sourceMappingURL", &sourceMapURL_);
920 }
921
922 MOZ_ALWAYS_INLINE Token*
newToken(ptrdiff_t adjust)923 TokenStream::newToken(ptrdiff_t adjust)
924 {
925 cursor = (cursor + 1) & ntokensMask;
926 Token* tp = &tokens[cursor];
927 tp->pos.begin = userbuf.offset() + adjust;
928
929 // NOTE: tp->pos.end is not set until the very end of getTokenInternal().
930 MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end));
931
932 return tp;
933 }
934
935 MOZ_ALWAYS_INLINE JSAtom*
atomize(ExclusiveContext * cx,CharBuffer & cb)936 TokenStream::atomize(ExclusiveContext* cx, CharBuffer& cb)
937 {
938 return AtomizeChars(cx, cb.begin(), cb.length());
939 }
940
941 #ifdef DEBUG
942 static bool
IsTokenSane(Token * tp)943 IsTokenSane(Token* tp)
944 {
945 // Nb: TOK_EOL should never be used in an actual Token; it should only be
946 // returned as a TokenKind from peekTokenSameLine().
947 if (tp->type < 0 || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
948 return false;
949
950 if (tp->pos.end < tp->pos.begin)
951 return false;
952
953 return true;
954 }
955 #endif
956
957 bool
putIdentInTokenbuf(const char16_t * identStart)958 TokenStream::putIdentInTokenbuf(const char16_t* identStart)
959 {
960 int32_t c, qc;
961 const char16_t* tmp = userbuf.addressOfNextRawChar();
962 userbuf.setAddressOfNextRawChar(identStart);
963
964 tokenbuf.clear();
965 for (;;) {
966 c = getCharIgnoreEOL();
967 if (!IsIdentifierPart(c)) {
968 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
969 break;
970 c = qc;
971 }
972 if (!tokenbuf.append(c)) {
973 userbuf.setAddressOfNextRawChar(tmp);
974 return false;
975 }
976 }
977 userbuf.setAddressOfNextRawChar(tmp);
978 return true;
979 }
980
981 bool
checkForKeyword(const KeywordInfo * kw,TokenKind * ttp)982 TokenStream::checkForKeyword(const KeywordInfo* kw, TokenKind* ttp)
983 {
984 if (kw->tokentype == TOK_RESERVED)
985 return reportError(JSMSG_RESERVED_ID, kw->chars);
986
987 if (kw->tokentype == TOK_STRICT_RESERVED)
988 return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);
989
990 // Treat 'let' as an identifier and contextually a keyword in sloppy mode.
991 // It is always a keyword in strict mode.
992 if (kw->tokentype == TOK_LET && !strictMode())
993 return true;
994
995 // Working keyword.
996 if (ttp) {
997 *ttp = kw->tokentype;
998 return true;
999 }
1000
1001 return reportError(JSMSG_RESERVED_ID, kw->chars);
1002 }
1003
1004 bool
checkForKeyword(JSAtom * atom,TokenKind * ttp)1005 TokenStream::checkForKeyword(JSAtom* atom, TokenKind* ttp)
1006 {
1007 const KeywordInfo* kw = FindKeyword(atom);
1008 if (!kw)
1009 return true;
1010
1011 return checkForKeyword(kw, ttp);
1012 }
1013
1014 enum FirstCharKind {
1015 // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
1016 // token that cannot also be a prefix of a longer token. E.g. ';' has the
1017 // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
1018 // that begin with '+'.
1019 //
1020 // The few token kinds satisfying these properties cover roughly 35--45%
1021 // of the tokens seen in practice.
1022 //
1023 // We represent the 'OneChar' kind with any positive value less than
1024 // TOK_LIMIT. This representation lets us associate each one-char token
1025 // char16_t with a TokenKind and thus avoid a subsequent char16_t-to-TokenKind
1026 // conversion.
1027 OneChar_Min = 0,
1028 OneChar_Max = TOK_LIMIT - 1,
1029
1030 Space = TOK_LIMIT,
1031 Ident,
1032 Dec,
1033 String,
1034 EOL,
1035 BasePrefix,
1036 Other,
1037
1038 LastCharKind = Other
1039 };
1040
1041 // OneChar: 40, 41, 44, 58, 59, 63, 91, 93, 123, 125, 126:
1042 // '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'
1043 // Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
1044 // Dot: 46: '.'
1045 // Equals: 61: '='
1046 // String: 34, 39: '"', '\''
1047 // Dec: 49..57: '1'..'9'
1048 // Plus: 43: '+'
1049 // BasePrefix: 48: '0'
1050 // Space: 9, 11, 12, 32: '\t', '\v', '\f', ' '
1051 // EOL: 10, 13: '\n', '\r'
1052 //
1053 #define T_COMMA TOK_COMMA
1054 #define T_COLON TOK_COLON
1055 #define T_BITNOT TOK_BITNOT
1056 #define Templat String
1057 #define _______ Other
1058 static const uint8_t firstCharKinds[] = {
1059 /* 0 1 2 3 4 5 6 7 8 9 */
1060 /* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space,
1061 /* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______,
1062 /* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
1063 /* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String,
1064 /* 40+ */ TOK_LP, TOK_RP, _______, _______, T_COMMA,_______, _______, _______,BasePrefix, Dec,
1065 /* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, T_COLON,TOK_SEMI,
1066 /* 60+ */ _______, _______, _______,TOK_HOOK, _______, Ident, Ident, Ident, Ident, Ident,
1067 /* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1068 /* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1069 /* 90+ */ Ident, TOK_LB, _______, TOK_RB, _______, Ident, Templat, Ident, Ident, Ident,
1070 /* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1071 /* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1072 /* 120+ */ Ident, Ident, Ident, TOK_LC, _______, TOK_RC,T_BITNOT, _______
1073 };
1074 #undef T_COMMA
1075 #undef T_COLON
1076 #undef T_BITNOT
1077 #undef Templat
1078 #undef _______
1079
1080 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
1081 "Elements of firstCharKinds[] are too small");
1082
1083 bool
getTokenInternal(TokenKind * ttp,Modifier modifier)1084 TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
1085 {
1086 int c, qc;
1087 Token* tp;
1088 FirstCharKind c1kind;
1089 const char16_t* numStart;
1090 bool hasExp;
1091 DecimalPoint decimalPoint;
1092 const char16_t* identStart;
1093 bool hadUnicodeEscape;
1094
1095 // Check if in the middle of a template string. Have to get this out of
1096 // the way first.
1097 if (MOZ_UNLIKELY(modifier == TemplateTail)) {
1098 if (!getStringOrTemplateToken('`', &tp))
1099 goto error;
1100 goto out;
1101 }
1102
1103 retry:
1104 if (MOZ_UNLIKELY(!userbuf.hasRawChars())) {
1105 tp = newToken(0);
1106 tp->type = TOK_EOF;
1107 flags.isEOF = true;
1108 goto out;
1109 }
1110
1111 c = userbuf.getRawChar();
1112 MOZ_ASSERT(c != EOF);
1113
1114 // Chars not in the range 0..127 are rare. Getting them out of the way
1115 // early allows subsequent checking to be faster.
1116 if (MOZ_UNLIKELY(c >= 128)) {
1117 if (IsSpaceOrBOM2(c)) {
1118 if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
1119 updateLineInfoForEOL();
1120 updateFlagsForEOL();
1121 }
1122
1123 goto retry;
1124 }
1125
1126 tp = newToken(-1);
1127
1128 static_assert('$' < 128,
1129 "IdentifierStart contains '$', but as !IsLetter('$'), "
1130 "ensure that '$' is never handled here");
1131 static_assert('_' < 128,
1132 "IdentifierStart contains '_', but as !IsLetter('_'), "
1133 "ensure that '_' is never handled here");
1134 if (IsLetter(c)) {
1135 identStart = userbuf.addressOfNextRawChar() - 1;
1136 hadUnicodeEscape = false;
1137 goto identifier;
1138 }
1139
1140 goto badchar;
1141 }
1142
1143 // Get the token kind, based on the first char. The ordering of c1kind
1144 // comparison is based on the frequency of tokens in real code -- Parsemark
1145 // (which represents typical JS code on the web) and the Unreal demo (which
1146 // represents asm.js code).
1147 //
1148 // Parsemark Unreal
1149 // OneChar 32.9% 39.7%
1150 // Space 25.0% 0.6%
1151 // Ident 19.2% 36.4%
1152 // Dec 7.2% 5.1%
1153 // String 7.9% 0.0%
1154 // EOL 1.7% 0.0%
1155 // BasePrefix 0.4% 4.9%
1156 // Other 5.7% 13.3%
1157 //
1158 // The ordering is based mostly only Parsemark frequencies, with Unreal
1159 // frequencies used to break close categories (e.g. |Dec| and |String|).
1160 // |Other| is biggish, but no other token kind is common enough for it to
1161 // be worth adding extra values to FirstCharKind.
1162 //
1163 c1kind = FirstCharKind(firstCharKinds[c]);
1164
1165 // Look for an unambiguous single-char token.
1166 //
1167 if (c1kind <= OneChar_Max) {
1168 tp = newToken(-1);
1169 tp->type = TokenKind(c1kind);
1170 goto out;
1171 }
1172
1173 // Skip over non-EOL whitespace chars.
1174 //
1175 if (c1kind == Space)
1176 goto retry;
1177
1178 // Look for an identifier.
1179 //
1180 if (c1kind == Ident) {
1181 tp = newToken(-1);
1182 identStart = userbuf.addressOfNextRawChar() - 1;
1183 hadUnicodeEscape = false;
1184
1185 identifier:
1186 for (;;) {
1187 c = getCharIgnoreEOL();
1188 if (c == EOF)
1189 break;
1190 if (!IsIdentifierPart(c)) {
1191 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1192 break;
1193 hadUnicodeEscape = true;
1194 }
1195 }
1196 ungetCharIgnoreEOL(c);
1197
1198 // Identifiers containing no Unicode escapes can be processed directly
1199 // from userbuf. The rest must use the escapes converted via tokenbuf
1200 // before atomizing.
1201 const char16_t* chars;
1202 size_t length;
1203 if (hadUnicodeEscape) {
1204 if (!putIdentInTokenbuf(identStart))
1205 goto error;
1206
1207 chars = tokenbuf.begin();
1208 length = tokenbuf.length();
1209 } else {
1210 chars = identStart;
1211 length = userbuf.addressOfNextRawChar() - identStart;
1212 }
1213
1214 // Represent keywords as keyword tokens unless told otherwise.
1215 if (modifier != KeywordIsName) {
1216 if (const KeywordInfo* kw = FindKeyword(chars, length)) {
1217 // That said, keywords can't contain escapes. (Contexts where
1218 // keywords are treated as names, that also sometimes treat
1219 // keywords as keywords, must manually check this requirement.)
1220 if (hadUnicodeEscape) {
1221 reportError(JSMSG_ESCAPED_KEYWORD);
1222 goto error;
1223 }
1224
1225 tp->type = TOK_NAME;
1226 if (!checkForKeyword(kw, &tp->type))
1227 goto error;
1228 if (tp->type != TOK_NAME)
1229 goto out;
1230 }
1231 }
1232
1233 JSAtom* atom = AtomizeChars(cx, chars, length);
1234 if (!atom)
1235 goto error;
1236 tp->type = TOK_NAME;
1237 tp->setName(atom->asPropertyName());
1238 goto out;
1239 }
1240
1241 // Look for a decimal number.
1242 //
1243 if (c1kind == Dec) {
1244 tp = newToken(-1);
1245 numStart = userbuf.addressOfNextRawChar() - 1;
1246
1247 decimal:
1248 decimalPoint = NoDecimal;
1249 hasExp = false;
1250 while (JS7_ISDEC(c))
1251 c = getCharIgnoreEOL();
1252
1253 if (c == '.') {
1254 decimalPoint = HasDecimal;
1255 decimal_dot:
1256 do {
1257 c = getCharIgnoreEOL();
1258 } while (JS7_ISDEC(c));
1259 }
1260 if (c == 'e' || c == 'E') {
1261 hasExp = true;
1262 c = getCharIgnoreEOL();
1263 if (c == '+' || c == '-')
1264 c = getCharIgnoreEOL();
1265 if (!JS7_ISDEC(c)) {
1266 ungetCharIgnoreEOL(c);
1267 reportError(JSMSG_MISSING_EXPONENT);
1268 goto error;
1269 }
1270 do {
1271 c = getCharIgnoreEOL();
1272 } while (JS7_ISDEC(c));
1273 }
1274 ungetCharIgnoreEOL(c);
1275
1276 if (c != EOF && IsIdentifierStart(c)) {
1277 reportError(JSMSG_IDSTART_AFTER_NUMBER);
1278 goto error;
1279 }
1280
1281 // Unlike identifiers and strings, numbers cannot contain escaped
1282 // chars, so we don't need to use tokenbuf. Instead we can just
1283 // convert the char16_t characters in userbuf to the numeric value.
1284 double dval;
1285 if (!((decimalPoint == HasDecimal) || hasExp)) {
1286 if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval))
1287 goto error;
1288 } else {
1289 const char16_t* dummy;
1290 if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
1291 goto error;
1292 }
1293 tp->type = TOK_NUMBER;
1294 tp->setNumber(dval, decimalPoint);
1295 goto out;
1296 }
1297
1298 // Look for a string or a template string.
1299 //
1300 if (c1kind == String) {
1301 if (!getStringOrTemplateToken(c, &tp))
1302 goto error;
1303 goto out;
1304 }
1305
1306 // Skip over EOL chars, updating line state along the way.
1307 //
1308 if (c1kind == EOL) {
1309 // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
1310 if (c == '\r' && userbuf.hasRawChars())
1311 userbuf.matchRawChar('\n');
1312 updateLineInfoForEOL();
1313 updateFlagsForEOL();
1314 goto retry;
1315 }
1316
1317 // Look for a hexadecimal, octal, or binary number.
1318 //
1319 if (c1kind == BasePrefix) {
1320 tp = newToken(-1);
1321 int radix;
1322 c = getCharIgnoreEOL();
1323 if (c == 'x' || c == 'X') {
1324 radix = 16;
1325 c = getCharIgnoreEOL();
1326 if (!JS7_ISHEX(c)) {
1327 ungetCharIgnoreEOL(c);
1328 reportError(JSMSG_MISSING_HEXDIGITS);
1329 goto error;
1330 }
1331 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0x'
1332 while (JS7_ISHEX(c))
1333 c = getCharIgnoreEOL();
1334 } else if (c == 'b' || c == 'B') {
1335 radix = 2;
1336 c = getCharIgnoreEOL();
1337 if (c != '0' && c != '1') {
1338 ungetCharIgnoreEOL(c);
1339 reportError(JSMSG_MISSING_BINARY_DIGITS);
1340 goto error;
1341 }
1342 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0b'
1343 while (c == '0' || c == '1')
1344 c = getCharIgnoreEOL();
1345 } else if (c == 'o' || c == 'O') {
1346 radix = 8;
1347 c = getCharIgnoreEOL();
1348 if (c < '0' || c > '7') {
1349 ungetCharIgnoreEOL(c);
1350 reportError(JSMSG_MISSING_OCTAL_DIGITS);
1351 goto error;
1352 }
1353 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0o'
1354 while ('0' <= c && c <= '7')
1355 c = getCharIgnoreEOL();
1356 } else if (JS7_ISDEC(c)) {
1357 radix = 8;
1358 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0'
1359 while (JS7_ISDEC(c)) {
1360 // Octal integer literals are not permitted in strict mode code.
1361 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
1362 goto error;
1363
1364 // Outside strict mode, we permit 08 and 09 as decimal numbers,
1365 // which makes our behaviour a superset of the ECMA numeric
1366 // grammar. We might not always be so permissive, so we warn
1367 // about it.
1368 if (c >= '8') {
1369 if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1370 goto error;
1371 }
1372 goto decimal; // use the decimal scanner for the rest of the number
1373 }
1374 c = getCharIgnoreEOL();
1375 }
1376 } else {
1377 // '0' not followed by 'x', 'X' or a digit; scan as a decimal number.
1378 numStart = userbuf.addressOfNextRawChar() - 1;
1379 goto decimal;
1380 }
1381 ungetCharIgnoreEOL(c);
1382
1383 if (c != EOF && IsIdentifierStart(c)) {
1384 reportError(JSMSG_IDSTART_AFTER_NUMBER);
1385 goto error;
1386 }
1387
1388 double dval;
1389 const char16_t* dummy;
1390 if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
1391 goto error;
1392 tp->type = TOK_NUMBER;
1393 tp->setNumber(dval, NoDecimal);
1394 goto out;
1395 }
1396
1397 // This handles everything else.
1398 //
1399 MOZ_ASSERT(c1kind == Other);
1400 tp = newToken(-1);
1401 switch (c) {
1402 case '.':
1403 c = getCharIgnoreEOL();
1404 if (JS7_ISDEC(c)) {
1405 numStart = userbuf.addressOfNextRawChar() - 2;
1406 decimalPoint = HasDecimal;
1407 hasExp = false;
1408 goto decimal_dot;
1409 }
1410 if (c == '.') {
1411 if (matchChar('.')) {
1412 tp->type = TOK_TRIPLEDOT;
1413 goto out;
1414 }
1415 }
1416 ungetCharIgnoreEOL(c);
1417 tp->type = TOK_DOT;
1418 goto out;
1419
1420 case '=':
1421 if (matchChar('='))
1422 tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ;
1423 else if (matchChar('>'))
1424 tp->type = TOK_ARROW;
1425 else
1426 tp->type = TOK_ASSIGN;
1427 goto out;
1428
1429 case '+':
1430 if (matchChar('+'))
1431 tp->type = TOK_INC;
1432 else
1433 tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD;
1434 goto out;
1435
1436 case '\\':
1437 hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
1438 if (hadUnicodeEscape) {
1439 identStart = userbuf.addressOfNextRawChar() - 6;
1440 goto identifier;
1441 }
1442 goto badchar;
1443
1444 case '|':
1445 if (matchChar('|'))
1446 tp->type = TOK_OR;
1447 else
1448 tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR;
1449 goto out;
1450
1451 case '^':
1452 tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR;
1453 goto out;
1454
1455 case '&':
1456 if (matchChar('&'))
1457 tp->type = TOK_AND;
1458 else
1459 tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND;
1460 goto out;
1461
1462 case '!':
1463 if (matchChar('='))
1464 tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE;
1465 else
1466 tp->type = TOK_NOT;
1467 goto out;
1468
1469 case '<':
1470 // NB: treat HTML begin-comment as comment-till-end-of-line.
1471 if (matchChar('!')) {
1472 if (matchChar('-')) {
1473 if (matchChar('-'))
1474 goto skipline;
1475 ungetChar('-');
1476 }
1477 ungetChar('!');
1478 }
1479 if (matchChar('<')) {
1480 tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
1481 } else {
1482 tp->type = matchChar('=') ? TOK_LE : TOK_LT;
1483 }
1484 goto out;
1485
1486 case '>':
1487 if (matchChar('>')) {
1488 if (matchChar('>'))
1489 tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
1490 else
1491 tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
1492 } else {
1493 tp->type = matchChar('=') ? TOK_GE : TOK_GT;
1494 }
1495 goto out;
1496
1497 case '*':
1498 #ifdef JS_HAS_EXPONENTIATION
1499 if (matchChar('*'))
1500 tp->type = matchChar('=') ? TOK_POWASSIGN : TOK_POW;
1501 else
1502 #endif
1503 tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL;
1504 goto out;
1505
1506 case '/':
1507 // Look for a single-line comment.
1508 if (matchChar('/')) {
1509 c = peekChar();
1510 if (c == '@' || c == '#') {
1511 bool shouldWarn = getChar() == '@';
1512 if (!getDirectives(false, shouldWarn))
1513 goto error;
1514 }
1515
1516 skipline:
1517 while ((c = getChar()) != EOF && c != '\n')
1518 continue;
1519 ungetChar(c);
1520 cursor = (cursor - 1) & ntokensMask;
1521 goto retry;
1522 }
1523
1524 // Look for a multi-line comment.
1525 if (matchChar('*')) {
1526 unsigned linenoBefore = lineno;
1527 while ((c = getChar()) != EOF &&
1528 !(c == '*' && matchChar('/'))) {
1529 if (c == '@' || c == '#') {
1530 bool shouldWarn = c == '@';
1531 if (!getDirectives(true, shouldWarn))
1532 goto error;
1533 }
1534 }
1535 if (c == EOF) {
1536 reportError(JSMSG_UNTERMINATED_COMMENT);
1537 goto error;
1538 }
1539 if (linenoBefore != lineno)
1540 updateFlagsForEOL();
1541 cursor = (cursor - 1) & ntokensMask;
1542 goto retry;
1543 }
1544
1545 // Look for a regexp.
1546 if (modifier == Operand) {
1547 tokenbuf.clear();
1548
1549 bool inCharClass = false;
1550 for (;;) {
1551 c = getChar();
1552 if (c == '\\') {
1553 if (!tokenbuf.append(c))
1554 goto error;
1555 c = getChar();
1556 } else if (c == '[') {
1557 inCharClass = true;
1558 } else if (c == ']') {
1559 inCharClass = false;
1560 } else if (c == '/' && !inCharClass) {
1561 // For compat with IE, allow unescaped / in char classes.
1562 break;
1563 }
1564 if (c == '\n' || c == EOF) {
1565 ungetChar(c);
1566 reportError(JSMSG_UNTERMINATED_REGEXP);
1567 goto error;
1568 }
1569 if (!tokenbuf.append(c))
1570 goto error;
1571 }
1572
1573 RegExpFlag reflags = NoFlags;
1574 unsigned length = tokenbuf.length() + 1;
1575 while (true) {
1576 c = peekChar();
1577 if (c == 'g' && !(reflags & GlobalFlag))
1578 reflags = RegExpFlag(reflags | GlobalFlag);
1579 else if (c == 'i' && !(reflags & IgnoreCaseFlag))
1580 reflags = RegExpFlag(reflags | IgnoreCaseFlag);
1581 else if (c == 'm' && !(reflags & MultilineFlag))
1582 reflags = RegExpFlag(reflags | MultilineFlag);
1583 else if (c == 'y' && !(reflags & StickyFlag))
1584 reflags = RegExpFlag(reflags | StickyFlag);
1585 else
1586 break;
1587 getChar();
1588 length++;
1589 }
1590
1591 c = peekChar();
1592 if (JS7_ISLET(c)) {
1593 char buf[2] = { '\0', '\0' };
1594 tp->pos.begin += length + 1;
1595 buf[0] = char(c);
1596 reportError(JSMSG_BAD_REGEXP_FLAG, buf);
1597 (void) getChar();
1598 goto error;
1599 }
1600 tp->type = TOK_REGEXP;
1601 tp->setRegExpFlags(reflags);
1602 goto out;
1603 }
1604
1605 tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
1606 goto out;
1607
1608 case '%':
1609 tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
1610 goto out;
1611
1612 case '-':
1613 if (matchChar('-')) {
1614 if (peekChar() == '>' && !flags.isDirtyLine)
1615 goto skipline;
1616 tp->type = TOK_DEC;
1617 } else {
1618 tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB;
1619 }
1620 goto out;
1621
1622 badchar:
1623 default:
1624 reportError(JSMSG_ILLEGAL_CHARACTER);
1625 goto error;
1626 }
1627
1628 MOZ_CRASH("should have jumped to |out| or |error|");
1629
1630 out:
1631 if (flags.hitOOM)
1632 return reportError(JSMSG_OUT_OF_MEMORY);
1633
1634 flags.isDirtyLine = true;
1635 tp->pos.end = userbuf.offset();
1636 #ifdef DEBUG
1637 // Save the modifier used to get this token, so that if an ungetToken()
1638 // occurs and then the token is re-gotten (or peeked, etc.), we can assert
1639 // that both gets have used the same modifiers.
1640 tp->modifier = modifier;
1641 tp->modifierException = NoException;
1642 #endif
1643 MOZ_ASSERT(IsTokenSane(tp));
1644 *ttp = tp->type;
1645 return true;
1646
1647 error:
1648 if (flags.hitOOM)
1649 return reportError(JSMSG_OUT_OF_MEMORY);
1650
1651 flags.isDirtyLine = true;
1652 tp->pos.end = userbuf.offset();
1653 MOZ_MAKE_MEM_UNDEFINED(&tp->type, sizeof(tp->type));
1654 flags.hadError = true;
1655 #ifdef DEBUG
1656 // Poisoning userbuf on error establishes an invariant: once an erroneous
1657 // token has been seen, userbuf will not be consulted again. This is true
1658 // because the parser will deal with the illegal token by aborting parsing
1659 // immediately.
1660 userbuf.poison();
1661 #endif
1662 MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp));
1663 return false;
1664 }
1665
1666 bool
getBracedUnicode(uint32_t * cp)1667 TokenStream::getBracedUnicode(uint32_t* cp)
1668 {
1669 consumeKnownChar('{');
1670
1671 bool first = true;
1672 int32_t c;
1673 uint32_t code = 0;
1674 while (true) {
1675 c = getCharIgnoreEOL();
1676 if (c == EOF)
1677 return false;
1678 if (c == '}') {
1679 if (first)
1680 return false;
1681 break;
1682 }
1683
1684 if (!JS7_ISHEX(c))
1685 return false;
1686
1687 code = (code << 4) | JS7_UNHEX(c);
1688 if (code > 0x10FFFF)
1689 return false;
1690 first = false;
1691 }
1692
1693 *cp = code;
1694 return true;
1695 }
1696
1697 bool
getStringOrTemplateToken(int untilChar,Token ** tp)1698 TokenStream::getStringOrTemplateToken(int untilChar, Token** tp)
1699 {
1700 int c;
1701 int nc = -1;
1702
1703 bool parsingTemplate = (untilChar == '`');
1704
1705 *tp = newToken(-1);
1706 tokenbuf.clear();
1707
1708 // We need to detect any of these chars: " or ', \n (or its
1709 // equivalents), \\, EOF. Because we detect EOL sequences here and
1710 // put them back immediately, we can use getCharIgnoreEOL().
1711 while ((c = getCharIgnoreEOL()) != untilChar) {
1712 if (c == EOF) {
1713 ungetCharIgnoreEOL(c);
1714 reportError(JSMSG_UNTERMINATED_STRING);
1715 return false;
1716 }
1717
1718 if (c == '\\') {
1719 switch (c = getChar()) {
1720 case 'b': c = '\b'; break;
1721 case 'f': c = '\f'; break;
1722 case 'n': c = '\n'; break;
1723 case 'r': c = '\r'; break;
1724 case 't': c = '\t'; break;
1725 case 'v': c = '\v'; break;
1726
1727 case '\n':
1728 // ES5 7.8.4: an escaped line terminator represents
1729 // no character.
1730 continue;
1731
1732 // Unicode character specification.
1733 case 'u': {
1734 if (peekChar() == '{') {
1735 uint32_t code;
1736 if (!getBracedUnicode(&code)) {
1737 reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
1738 return false;
1739 }
1740
1741 MOZ_ASSERT(code <= 0x10FFFF);
1742 if (code < 0x10000) {
1743 c = code;
1744 } else {
1745 if (!tokenbuf.append((code - 0x10000) / 1024 + 0xD800))
1746 return false;
1747 c = ((code - 0x10000) % 1024) + 0xDC00;
1748 }
1749 break;
1750 }
1751
1752 char16_t cp[4];
1753 if (peekChars(4, cp) &&
1754 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3]))
1755 {
1756 c = JS7_UNHEX(cp[0]);
1757 c = (c << 4) + JS7_UNHEX(cp[1]);
1758 c = (c << 4) + JS7_UNHEX(cp[2]);
1759 c = (c << 4) + JS7_UNHEX(cp[3]);
1760 skipChars(4);
1761 } else {
1762 reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
1763 return false;
1764 }
1765 break;
1766 }
1767
1768 // Hexadecimal character specification.
1769 case 'x': {
1770 char16_t cp[2];
1771 if (peekChars(2, cp) && JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1772 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1773 skipChars(2);
1774 } else {
1775 reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal");
1776 return false;
1777 }
1778 break;
1779 }
1780
1781 default:
1782 // Octal character specification.
1783 if (JS7_ISOCT(c)) {
1784 int32_t val = JS7_UNOCT(c);
1785
1786 c = peekChar();
1787
1788 // Strict mode code allows only \0, then a non-digit.
1789 if (val != 0 || JS7_ISDEC(c)) {
1790 if (parsingTemplate) {
1791 reportError(JSMSG_DEPRECATED_OCTAL);
1792 return false;
1793 }
1794 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
1795 return false;
1796 flags.sawOctalEscape = true;
1797 }
1798
1799 if (JS7_ISOCT(c)) {
1800 val = 8 * val + JS7_UNOCT(c);
1801 getChar();
1802 c = peekChar();
1803 if (JS7_ISOCT(c)) {
1804 int32_t save = val;
1805 val = 8 * val + JS7_UNOCT(c);
1806 if (val <= 0xFF)
1807 getChar();
1808 else
1809 val = save;
1810 }
1811 }
1812
1813 c = char16_t(val);
1814 }
1815 break;
1816 }
1817 } else if (TokenBuf::isRawEOLChar(c)) {
1818 if (!parsingTemplate) {
1819 ungetCharIgnoreEOL(c);
1820 reportError(JSMSG_UNTERMINATED_STRING);
1821 return false;
1822 }
1823 if (c == '\r') {
1824 c = '\n';
1825 if (userbuf.peekRawChar() == '\n')
1826 skipChars(1);
1827 }
1828 updateLineInfoForEOL();
1829 updateFlagsForEOL();
1830 } else if (parsingTemplate && c == '$') {
1831 if ((nc = getCharIgnoreEOL()) == '{')
1832 break;
1833 ungetCharIgnoreEOL(nc);
1834 }
1835
1836 if (!tokenbuf.append(c)) {
1837 ReportOutOfMemory(cx);
1838 return false;
1839 }
1840 }
1841
1842 JSAtom* atom = atomize(cx, tokenbuf);
1843 if (!atom)
1844 return false;
1845
1846 if (!parsingTemplate) {
1847 (*tp)->type = TOK_STRING;
1848 } else {
1849 if (c == '$' && nc == '{')
1850 (*tp)->type = TOK_TEMPLATE_HEAD;
1851 else
1852 (*tp)->type = TOK_NO_SUBS_TEMPLATE;
1853 }
1854
1855 (*tp)->setAtom(atom);
1856 return true;
1857 }
1858
1859 JS_FRIEND_API(int)
js_fgets(char * buf,int size,FILE * file)1860 js_fgets(char* buf, int size, FILE* file)
1861 {
1862 int n, i, c;
1863 bool crflag;
1864
1865 n = size - 1;
1866 if (n < 0)
1867 return -1;
1868
1869 crflag = false;
1870 for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
1871 buf[i] = c;
1872 if (c == '\n') { // any \n ends a line
1873 i++; // keep the \n; we know there is room for \0
1874 break;
1875 }
1876 if (crflag) { // \r not followed by \n ends line at the \r
1877 ungetc(c, file);
1878 break; // and overwrite c in buf with \0
1879 }
1880 crflag = (c == '\r');
1881 }
1882
1883 buf[i] = '\0';
1884 return i;
1885 }
1886
1887 const char*
TokenKindToDesc(TokenKind tt)1888 frontend::TokenKindToDesc(TokenKind tt)
1889 {
1890 switch (tt) {
1891 #define EMIT_CASE(name, desc) case TOK_##name: return desc;
1892 FOR_EACH_TOKEN_KIND(EMIT_CASE)
1893 #undef EMIT_CASE
1894 case TOK_LIMIT:
1895 MOZ_ASSERT_UNREACHABLE("TOK_LIMIT should not be passed.");
1896 break;
1897 }
1898
1899 return "<bad TokenKind>";
1900 }
1901
1902 #ifdef DEBUG
1903 const char*
TokenKindToString(TokenKind tt)1904 TokenKindToString(TokenKind tt)
1905 {
1906 switch (tt) {
1907 #define EMIT_CASE(name, desc) case TOK_##name: return "TOK_" #name;
1908 FOR_EACH_TOKEN_KIND(EMIT_CASE)
1909 #undef EMIT_CASE
1910 case TOK_LIMIT: break;
1911 }
1912
1913 return "<bad TokenKind>";
1914 }
1915 #endif
1916