1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Features shared by parsing and pre-parsing scanners.
6
7 #include "src/parsing/scanner.h"
8
9 #include <stdint.h>
10
11 #include <cmath>
12
13 #include "src/ast/ast-value-factory.h"
14 #include "src/char-predicates-inl.h"
15 #include "src/conversions-inl.h"
16 #include "src/objects/bigint.h"
17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol
18 #include "src/unicode-cache-inl.h"
19
20 namespace v8 {
21 namespace internal {
22
23 class Scanner::ErrorState {
24 public:
ErrorState(MessageTemplate::Template * message_stack,Scanner::Location * location_stack)25 ErrorState(MessageTemplate::Template* message_stack,
26 Scanner::Location* location_stack)
27 : message_stack_(message_stack),
28 old_message_(*message_stack),
29 location_stack_(location_stack),
30 old_location_(*location_stack) {
31 *message_stack_ = MessageTemplate::kNone;
32 *location_stack_ = Location::invalid();
33 }
34
~ErrorState()35 ~ErrorState() {
36 *message_stack_ = old_message_;
37 *location_stack_ = old_location_;
38 }
39
MoveErrorTo(TokenDesc * dest)40 void MoveErrorTo(TokenDesc* dest) {
41 if (*message_stack_ == MessageTemplate::kNone) {
42 return;
43 }
44 if (dest->invalid_template_escape_message == MessageTemplate::kNone) {
45 dest->invalid_template_escape_message = *message_stack_;
46 dest->invalid_template_escape_location = *location_stack_;
47 }
48 *message_stack_ = MessageTemplate::kNone;
49 *location_stack_ = Location::invalid();
50 }
51
52 private:
53 MessageTemplate::Template* const message_stack_;
54 MessageTemplate::Template const old_message_;
55 Scanner::Location* const location_stack_;
56 Scanner::Location const old_location_;
57 };
58
59 // ----------------------------------------------------------------------------
60 // Scanner::LiteralBuffer
61
Internalize(Isolate * isolate) const62 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
63 if (is_one_byte()) {
64 return isolate->factory()->InternalizeOneByteString(one_byte_literal());
65 }
66 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
67 }
68
NewCapacity(int min_capacity)69 int Scanner::LiteralBuffer::NewCapacity(int min_capacity) {
70 int capacity = Max(min_capacity, backing_store_.length());
71 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
72 return new_capacity;
73 }
74
ExpandBuffer()75 void Scanner::LiteralBuffer::ExpandBuffer() {
76 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
77 MemCopy(new_store.start(), backing_store_.start(), position_);
78 backing_store_.Dispose();
79 backing_store_ = new_store;
80 }
81
ConvertToTwoByte()82 void Scanner::LiteralBuffer::ConvertToTwoByte() {
83 DCHECK(is_one_byte_);
84 Vector<byte> new_store;
85 int new_content_size = position_ * kUC16Size;
86 if (new_content_size >= backing_store_.length()) {
87 // Ensure room for all currently read code units as UC16 as well
88 // as the code unit about to be stored.
89 new_store = Vector<byte>::New(NewCapacity(new_content_size));
90 } else {
91 new_store = backing_store_;
92 }
93 uint8_t* src = backing_store_.start();
94 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
95 for (int i = position_ - 1; i >= 0; i--) {
96 dst[i] = src[i];
97 }
98 if (new_store.start() != backing_store_.start()) {
99 backing_store_.Dispose();
100 backing_store_ = new_store;
101 }
102 position_ = new_content_size;
103 is_one_byte_ = false;
104 }
105
AddCharSlow(uc32 code_unit)106 void Scanner::LiteralBuffer::AddCharSlow(uc32 code_unit) {
107 if (position_ >= backing_store_.length()) ExpandBuffer();
108 if (is_one_byte_) {
109 if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
110 backing_store_[position_] = static_cast<byte>(code_unit);
111 position_ += kOneByteSize;
112 return;
113 }
114 ConvertToTwoByte();
115 }
116 if (code_unit <=
117 static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
118 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
119 position_ += kUC16Size;
120 } else {
121 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
122 unibrow::Utf16::LeadSurrogate(code_unit);
123 position_ += kUC16Size;
124 if (position_ >= backing_store_.length()) ExpandBuffer();
125 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
126 unibrow::Utf16::TrailSurrogate(code_unit);
127 position_ += kUC16Size;
128 }
129 }
130
131 // ----------------------------------------------------------------------------
132 // Scanner::BookmarkScope
133
134 const size_t Scanner::BookmarkScope::kBookmarkAtFirstPos =
135 std::numeric_limits<size_t>::max() - 2;
136 const size_t Scanner::BookmarkScope::kNoBookmark =
137 std::numeric_limits<size_t>::max() - 1;
138 const size_t Scanner::BookmarkScope::kBookmarkWasApplied =
139 std::numeric_limits<size_t>::max();
140
Set()141 void Scanner::BookmarkScope::Set() {
142 DCHECK_EQ(bookmark_, kNoBookmark);
143 DCHECK_EQ(scanner_->next_next_.token, Token::UNINITIALIZED);
144
145 // The first token is a bit special, since current_ will still be
146 // uninitialized. In this case, store kBookmarkAtFirstPos and special-case it
147 // when
148 // applying the bookmark.
149 DCHECK_IMPLIES(
150 scanner_->current_.token == Token::UNINITIALIZED,
151 scanner_->current_.location.beg_pos == scanner_->next_.location.beg_pos);
152 bookmark_ = (scanner_->current_.token == Token::UNINITIALIZED)
153 ? kBookmarkAtFirstPos
154 : scanner_->location().beg_pos;
155 }
156
Apply()157 void Scanner::BookmarkScope::Apply() {
158 DCHECK(HasBeenSet()); // Caller hasn't called SetBookmark.
159 if (bookmark_ == kBookmarkAtFirstPos) {
160 scanner_->SeekNext(0);
161 } else {
162 scanner_->SeekNext(bookmark_);
163 scanner_->Next();
164 DCHECK_EQ(scanner_->location().beg_pos, static_cast<int>(bookmark_));
165 }
166 bookmark_ = kBookmarkWasApplied;
167 }
168
HasBeenSet()169 bool Scanner::BookmarkScope::HasBeenSet() {
170 return bookmark_ != kNoBookmark && bookmark_ != kBookmarkWasApplied;
171 }
172
HasBeenApplied()173 bool Scanner::BookmarkScope::HasBeenApplied() {
174 return bookmark_ == kBookmarkWasApplied;
175 }
176
177 // ----------------------------------------------------------------------------
178 // Scanner
179
Scanner(UnicodeCache * unicode_cache)180 Scanner::Scanner(UnicodeCache* unicode_cache)
181 : unicode_cache_(unicode_cache),
182 octal_pos_(Location::invalid()),
183 octal_message_(MessageTemplate::kNone),
184 found_html_comment_(false),
185 allow_harmony_bigint_(false),
186 allow_harmony_numeric_separator_(false) {}
187
Initialize(Utf16CharacterStream * source,bool is_module)188 void Scanner::Initialize(Utf16CharacterStream* source, bool is_module) {
189 DCHECK_NOT_NULL(source);
190 source_ = source;
191 is_module_ = is_module;
192 // Need to capture identifiers in order to recognize "get" and "set"
193 // in object literals.
194 Init();
195 has_line_terminator_before_next_ = true;
196 Scan();
197 }
198
199 template <bool capture_raw, bool unicode>
ScanHexNumber(int expected_length)200 uc32 Scanner::ScanHexNumber(int expected_length) {
201 DCHECK_LE(expected_length, 4); // prevent overflow
202
203 int begin = source_pos() - 2;
204 uc32 x = 0;
205 for (int i = 0; i < expected_length; i++) {
206 int d = HexValue(c0_);
207 if (d < 0) {
208 ReportScannerError(Location(begin, begin + expected_length + 2),
209 unicode
210 ? MessageTemplate::kInvalidUnicodeEscapeSequence
211 : MessageTemplate::kInvalidHexEscapeSequence);
212 return -1;
213 }
214 x = x * 16 + d;
215 Advance<capture_raw>();
216 }
217
218 return x;
219 }
220
221 template <bool capture_raw>
ScanUnlimitedLengthHexNumber(int max_value,int beg_pos)222 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
223 uc32 x = 0;
224 int d = HexValue(c0_);
225 if (d < 0) return -1;
226
227 while (d >= 0) {
228 x = x * 16 + d;
229 if (x > max_value) {
230 ReportScannerError(Location(beg_pos, source_pos() + 1),
231 MessageTemplate::kUndefinedUnicodeCodePoint);
232 return -1;
233 }
234 Advance<capture_raw>();
235 d = HexValue(c0_);
236 }
237
238 return x;
239 }
240
241
242 // Ensure that tokens can be stored in a byte.
243 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
244
245 // Table of one-character tokens, by character (0x00..0x7F only).
246 // clang-format off
247 static const byte one_char_tokens[] = {
248 Token::ILLEGAL,
249 Token::ILLEGAL,
250 Token::ILLEGAL,
251 Token::ILLEGAL,
252 Token::ILLEGAL,
253 Token::ILLEGAL,
254 Token::ILLEGAL,
255 Token::ILLEGAL,
256 Token::ILLEGAL,
257 Token::ILLEGAL,
258 Token::ILLEGAL,
259 Token::ILLEGAL,
260 Token::ILLEGAL,
261 Token::ILLEGAL,
262 Token::ILLEGAL,
263 Token::ILLEGAL,
264 Token::ILLEGAL,
265 Token::ILLEGAL,
266 Token::ILLEGAL,
267 Token::ILLEGAL,
268 Token::ILLEGAL,
269 Token::ILLEGAL,
270 Token::ILLEGAL,
271 Token::ILLEGAL,
272 Token::ILLEGAL,
273 Token::ILLEGAL,
274 Token::ILLEGAL,
275 Token::ILLEGAL,
276 Token::ILLEGAL,
277 Token::ILLEGAL,
278 Token::ILLEGAL,
279 Token::ILLEGAL,
280 Token::ILLEGAL,
281 Token::ILLEGAL,
282 Token::ILLEGAL,
283 Token::ILLEGAL,
284 Token::ILLEGAL,
285 Token::ILLEGAL,
286 Token::ILLEGAL,
287 Token::ILLEGAL,
288 Token::LPAREN, // 0x28
289 Token::RPAREN, // 0x29
290 Token::ILLEGAL,
291 Token::ILLEGAL,
292 Token::COMMA, // 0x2C
293 Token::ILLEGAL,
294 Token::ILLEGAL,
295 Token::ILLEGAL,
296 Token::ILLEGAL,
297 Token::ILLEGAL,
298 Token::ILLEGAL,
299 Token::ILLEGAL,
300 Token::ILLEGAL,
301 Token::ILLEGAL,
302 Token::ILLEGAL,
303 Token::ILLEGAL,
304 Token::ILLEGAL,
305 Token::ILLEGAL,
306 Token::COLON, // 0x3A
307 Token::SEMICOLON, // 0x3B
308 Token::ILLEGAL,
309 Token::ILLEGAL,
310 Token::ILLEGAL,
311 Token::CONDITIONAL, // 0x3F
312 Token::ILLEGAL,
313 Token::ILLEGAL,
314 Token::ILLEGAL,
315 Token::ILLEGAL,
316 Token::ILLEGAL,
317 Token::ILLEGAL,
318 Token::ILLEGAL,
319 Token::ILLEGAL,
320 Token::ILLEGAL,
321 Token::ILLEGAL,
322 Token::ILLEGAL,
323 Token::ILLEGAL,
324 Token::ILLEGAL,
325 Token::ILLEGAL,
326 Token::ILLEGAL,
327 Token::ILLEGAL,
328 Token::ILLEGAL,
329 Token::ILLEGAL,
330 Token::ILLEGAL,
331 Token::ILLEGAL,
332 Token::ILLEGAL,
333 Token::ILLEGAL,
334 Token::ILLEGAL,
335 Token::ILLEGAL,
336 Token::ILLEGAL,
337 Token::ILLEGAL,
338 Token::ILLEGAL,
339 Token::LBRACK, // 0x5B
340 Token::ILLEGAL,
341 Token::RBRACK, // 0x5D
342 Token::ILLEGAL,
343 Token::ILLEGAL,
344 Token::ILLEGAL,
345 Token::ILLEGAL,
346 Token::ILLEGAL,
347 Token::ILLEGAL,
348 Token::ILLEGAL,
349 Token::ILLEGAL,
350 Token::ILLEGAL,
351 Token::ILLEGAL,
352 Token::ILLEGAL,
353 Token::ILLEGAL,
354 Token::ILLEGAL,
355 Token::ILLEGAL,
356 Token::ILLEGAL,
357 Token::ILLEGAL,
358 Token::ILLEGAL,
359 Token::ILLEGAL,
360 Token::ILLEGAL,
361 Token::ILLEGAL,
362 Token::ILLEGAL,
363 Token::ILLEGAL,
364 Token::ILLEGAL,
365 Token::ILLEGAL,
366 Token::ILLEGAL,
367 Token::ILLEGAL,
368 Token::ILLEGAL,
369 Token::ILLEGAL,
370 Token::ILLEGAL,
371 Token::LBRACE, // 0x7B
372 Token::ILLEGAL,
373 Token::RBRACE, // 0x7D
374 Token::BIT_NOT, // 0x7E
375 Token::ILLEGAL
376 };
377 // clang-format on
378
Next()379 Token::Value Scanner::Next() {
380 if (next_.token == Token::EOS) {
381 next_.location.beg_pos = current_.location.beg_pos;
382 next_.location.end_pos = current_.location.end_pos;
383 }
384 current_ = next_;
385 if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {
386 next_ = next_next_;
387 next_next_.token = Token::UNINITIALIZED;
388 next_next_.contextual_token = Token::UNINITIALIZED;
389 has_line_terminator_before_next_ = has_line_terminator_after_next_;
390 return current_.token;
391 }
392 has_line_terminator_before_next_ = false;
393 has_multiline_comment_before_next_ = false;
394 if (static_cast<unsigned>(c0_) <= 0x7F) {
395 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
396 if (token != Token::ILLEGAL) {
397 int pos = source_pos();
398 next_.token = token;
399 next_.contextual_token = Token::UNINITIALIZED;
400 next_.location.beg_pos = pos;
401 next_.location.end_pos = pos + 1;
402 next_.literal_chars = nullptr;
403 next_.raw_literal_chars = nullptr;
404 next_.invalid_template_escape_message = MessageTemplate::kNone;
405 Advance();
406 return current_.token;
407 }
408 }
409 Scan();
410 return current_.token;
411 }
412
413
PeekAhead()414 Token::Value Scanner::PeekAhead() {
415 DCHECK(next_.token != Token::DIV);
416 DCHECK(next_.token != Token::ASSIGN_DIV);
417
418 if (next_next_.token != Token::UNINITIALIZED) {
419 return next_next_.token;
420 }
421 TokenDesc prev = current_;
422 bool has_line_terminator_before_next =
423 has_line_terminator_before_next_ || has_multiline_comment_before_next_;
424 Next();
425 has_line_terminator_after_next_ =
426 has_line_terminator_before_next_ || has_multiline_comment_before_next_;
427 has_line_terminator_before_next_ = has_line_terminator_before_next;
428 Token::Value ret = next_.token;
429 next_next_ = next_;
430 next_ = current_;
431 current_ = prev;
432 return ret;
433 }
434
435
SkipWhiteSpace()436 Token::Value Scanner::SkipWhiteSpace() {
437 int start_position = source_pos();
438
439 while (true) {
440 while (true) {
441 // Don't skip behind the end of input.
442 if (c0_ == kEndOfInput) break;
443
444 // Advance as long as character is a WhiteSpace or LineTerminator.
445 // Remember if the latter is the case.
446 if (unibrow::IsLineTerminator(c0_)) {
447 has_line_terminator_before_next_ = true;
448 } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
449 break;
450 }
451 Advance();
452 }
453
454 // If there is an HTML comment end '-->' at the beginning of a
455 // line (with only whitespace in front of it), we treat the rest
456 // of the line as a comment. This is in line with the way
457 // SpiderMonkey handles it.
458 if (c0_ != '-' || !has_line_terminator_before_next_) break;
459
460 Advance();
461 if (c0_ != '-') {
462 PushBack('-'); // undo Advance()
463 break;
464 }
465
466 Advance();
467 if (c0_ != '>') {
468 PushBack2('-', '-'); // undo 2x Advance();
469 break;
470 }
471
472 // Treat the rest of the line as a comment.
473 Token::Value token = SkipSingleHTMLComment();
474 if (token == Token::ILLEGAL) {
475 return token;
476 }
477 }
478
479 // Return whether or not we skipped any characters.
480 if (source_pos() == start_position) {
481 return Token::ILLEGAL;
482 }
483
484 return Token::WHITESPACE;
485 }
486
SkipSingleHTMLComment()487 Token::Value Scanner::SkipSingleHTMLComment() {
488 if (is_module_) {
489 ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
490 return Token::ILLEGAL;
491 }
492 return SkipSingleLineComment();
493 }
494
SkipSingleLineComment()495 Token::Value Scanner::SkipSingleLineComment() {
496 Advance();
497
498 // The line terminator at the end of the line is not considered
499 // to be part of the single-line comment; it is recognized
500 // separately by the lexical grammar and becomes part of the
501 // stream of input elements for the syntactic grammar (see
502 // ECMA-262, section 7.4).
503 while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
504 Advance();
505 }
506
507 return Token::WHITESPACE;
508 }
509
510
SkipSourceURLComment()511 Token::Value Scanner::SkipSourceURLComment() {
512 TryToParseSourceURLComment();
513 while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
514 Advance();
515 }
516
517 return Token::WHITESPACE;
518 }
519
520
TryToParseSourceURLComment()521 void Scanner::TryToParseSourceURLComment() {
522 // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
523 // function will just return if it cannot parse a magic comment.
524 if (c0_ == kEndOfInput || !unicode_cache_->IsWhiteSpace(c0_)) return;
525 Advance();
526 LiteralBuffer name;
527 while (c0_ != kEndOfInput &&
528 !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {
529 name.AddChar(c0_);
530 Advance();
531 }
532 if (!name.is_one_byte()) return;
533 Vector<const uint8_t> name_literal = name.one_byte_literal();
534 LiteralBuffer* value;
535 if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {
536 value = &source_url_;
537 } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {
538 value = &source_mapping_url_;
539 } else {
540 return;
541 }
542 if (c0_ != '=')
543 return;
544 Advance();
545 value->Reset();
546 while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
547 Advance();
548 }
549 while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
550 // Disallowed characters.
551 if (c0_ == '"' || c0_ == '\'') {
552 value->Reset();
553 return;
554 }
555 if (unicode_cache_->IsWhiteSpace(c0_)) {
556 break;
557 }
558 value->AddChar(c0_);
559 Advance();
560 }
561 // Allow whitespace at the end.
562 while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
563 if (!unicode_cache_->IsWhiteSpace(c0_)) {
564 value->Reset();
565 break;
566 }
567 Advance();
568 }
569 }
570
571
SkipMultiLineComment()572 Token::Value Scanner::SkipMultiLineComment() {
573 DCHECK_EQ(c0_, '*');
574 Advance();
575
576 while (c0_ != kEndOfInput) {
577 uc32 ch = c0_;
578 Advance();
579 if (c0_ != kEndOfInput && unibrow::IsLineTerminator(ch)) {
580 // Following ECMA-262, section 7.4, a comment containing
581 // a newline will make the comment count as a line-terminator.
582 has_multiline_comment_before_next_ = true;
583 }
584 // If we have reached the end of the multi-line comment, we
585 // consume the '/' and insert a whitespace. This way all
586 // multi-line comments are treated as whitespace.
587 if (ch == '*' && c0_ == '/') {
588 c0_ = ' ';
589 return Token::WHITESPACE;
590 }
591 }
592
593 // Unterminated multi-line comment.
594 return Token::ILLEGAL;
595 }
596
ScanHtmlComment()597 Token::Value Scanner::ScanHtmlComment() {
598 // Check for <!-- comments.
599 DCHECK_EQ(c0_, '!');
600 Advance();
601 if (c0_ != '-') {
602 PushBack('!'); // undo Advance()
603 return Token::LT;
604 }
605
606 Advance();
607 if (c0_ != '-') {
608 PushBack2('-', '!'); // undo 2x Advance()
609 return Token::LT;
610 }
611
612 found_html_comment_ = true;
613 return SkipSingleHTMLComment();
614 }
615
Scan()616 void Scanner::Scan() {
617 next_.literal_chars = nullptr;
618 next_.raw_literal_chars = nullptr;
619 next_.invalid_template_escape_message = MessageTemplate::kNone;
620 Token::Value token;
621 do {
622 // Remember the position of the next token
623 next_.location.beg_pos = source_pos();
624
625 switch (c0_) {
626 case ' ':
627 case '\t':
628 Advance();
629 token = Token::WHITESPACE;
630 break;
631
632 case '\n':
633 Advance();
634 has_line_terminator_before_next_ = true;
635 token = Token::WHITESPACE;
636 break;
637
638 case '"':
639 case '\'':
640 token = ScanString();
641 break;
642
643 case '<':
644 // < <= << <<= <!--
645 Advance();
646 if (c0_ == '=') {
647 token = Select(Token::LTE);
648 } else if (c0_ == '<') {
649 token = Select('=', Token::ASSIGN_SHL, Token::SHL);
650 } else if (c0_ == '!') {
651 token = ScanHtmlComment();
652 } else {
653 token = Token::LT;
654 }
655 break;
656
657 case '>':
658 // > >= >> >>= >>> >>>=
659 Advance();
660 if (c0_ == '=') {
661 token = Select(Token::GTE);
662 } else if (c0_ == '>') {
663 // >> >>= >>> >>>=
664 Advance();
665 if (c0_ == '=') {
666 token = Select(Token::ASSIGN_SAR);
667 } else if (c0_ == '>') {
668 token = Select('=', Token::ASSIGN_SHR, Token::SHR);
669 } else {
670 token = Token::SAR;
671 }
672 } else {
673 token = Token::GT;
674 }
675 break;
676
677 case '=':
678 // = == === =>
679 Advance();
680 if (c0_ == '=') {
681 token = Select('=', Token::EQ_STRICT, Token::EQ);
682 } else if (c0_ == '>') {
683 token = Select(Token::ARROW);
684 } else {
685 token = Token::ASSIGN;
686 }
687 break;
688
689 case '!':
690 // ! != !==
691 Advance();
692 if (c0_ == '=') {
693 token = Select('=', Token::NE_STRICT, Token::NE);
694 } else {
695 token = Token::NOT;
696 }
697 break;
698
699 case '+':
700 // + ++ +=
701 Advance();
702 if (c0_ == '+') {
703 token = Select(Token::INC);
704 } else if (c0_ == '=') {
705 token = Select(Token::ASSIGN_ADD);
706 } else {
707 token = Token::ADD;
708 }
709 break;
710
711 case '-':
712 // - -- --> -=
713 Advance();
714 if (c0_ == '-') {
715 Advance();
716 if (c0_ == '>' && HasAnyLineTerminatorBeforeNext()) {
717 // For compatibility with SpiderMonkey, we skip lines that
718 // start with an HTML comment end '-->'.
719 token = SkipSingleHTMLComment();
720 } else {
721 token = Token::DEC;
722 }
723 } else if (c0_ == '=') {
724 token = Select(Token::ASSIGN_SUB);
725 } else {
726 token = Token::SUB;
727 }
728 break;
729
730 case '*':
731 // * *=
732 Advance();
733 if (c0_ == '*') {
734 token = Select('=', Token::ASSIGN_EXP, Token::EXP);
735 } else if (c0_ == '=') {
736 token = Select(Token::ASSIGN_MUL);
737 } else {
738 token = Token::MUL;
739 }
740 break;
741
742 case '%':
743 // % %=
744 token = Select('=', Token::ASSIGN_MOD, Token::MOD);
745 break;
746
747 case '/':
748 // / // /* /=
749 Advance();
750 if (c0_ == '/') {
751 Advance();
752 if (c0_ == '#' || c0_ == '@') {
753 Advance();
754 token = SkipSourceURLComment();
755 } else {
756 PushBack(c0_);
757 token = SkipSingleLineComment();
758 }
759 } else if (c0_ == '*') {
760 token = SkipMultiLineComment();
761 } else if (c0_ == '=') {
762 token = Select(Token::ASSIGN_DIV);
763 } else {
764 token = Token::DIV;
765 }
766 break;
767
768 case '&':
769 // & && &=
770 Advance();
771 if (c0_ == '&') {
772 token = Select(Token::AND);
773 } else if (c0_ == '=') {
774 token = Select(Token::ASSIGN_BIT_AND);
775 } else {
776 token = Token::BIT_AND;
777 }
778 break;
779
780 case '|':
781 // | || |=
782 Advance();
783 if (c0_ == '|') {
784 token = Select(Token::OR);
785 } else if (c0_ == '=') {
786 token = Select(Token::ASSIGN_BIT_OR);
787 } else {
788 token = Token::BIT_OR;
789 }
790 break;
791
792 case '^':
793 // ^ ^=
794 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
795 break;
796
797 case '.':
798 // . Number
799 Advance();
800 if (IsDecimalDigit(c0_)) {
801 token = ScanNumber(true);
802 } else {
803 token = Token::PERIOD;
804 if (c0_ == '.') {
805 Advance();
806 if (c0_ == '.') {
807 Advance();
808 token = Token::ELLIPSIS;
809 } else {
810 PushBack('.');
811 }
812 }
813 }
814 break;
815
816 case ':':
817 token = Select(Token::COLON);
818 break;
819
820 case ';':
821 token = Select(Token::SEMICOLON);
822 break;
823
824 case ',':
825 token = Select(Token::COMMA);
826 break;
827
828 case '(':
829 token = Select(Token::LPAREN);
830 break;
831
832 case ')':
833 token = Select(Token::RPAREN);
834 break;
835
836 case '[':
837 token = Select(Token::LBRACK);
838 break;
839
840 case ']':
841 token = Select(Token::RBRACK);
842 break;
843
844 case '{':
845 token = Select(Token::LBRACE);
846 break;
847
848 case '}':
849 token = Select(Token::RBRACE);
850 break;
851
852 case '?':
853 token = Select(Token::CONDITIONAL);
854 break;
855
856 case '~':
857 token = Select(Token::BIT_NOT);
858 break;
859
860 case '`':
861 token = ScanTemplateStart();
862 break;
863
864 case '#':
865 token = ScanPrivateName();
866 break;
867
868 default:
869 if (c0_ == kEndOfInput) {
870 token = Token::EOS;
871 } else if (unicode_cache_->IsIdentifierStart(c0_)) {
872 token = ScanIdentifierOrKeyword();
873 } else if (IsDecimalDigit(c0_)) {
874 token = ScanNumber(false);
875 } else {
876 token = SkipWhiteSpace();
877 if (token == Token::ILLEGAL) {
878 Advance();
879 }
880 }
881 break;
882 }
883
884 // Continue scanning for tokens as long as we're just skipping
885 // whitespace.
886 } while (token == Token::WHITESPACE);
887
888 next_.location.end_pos = source_pos();
889 if (Token::IsContextualKeyword(token)) {
890 next_.token = Token::IDENTIFIER;
891 next_.contextual_token = token;
892 } else {
893 next_.token = token;
894 next_.contextual_token = Token::UNINITIALIZED;
895 }
896
897 #ifdef DEBUG
898 SanityCheckTokenDesc(current_);
899 SanityCheckTokenDesc(next_);
900 SanityCheckTokenDesc(next_next_);
901 #endif
902 }
903
904 #ifdef DEBUG
SanityCheckTokenDesc(const TokenDesc & token) const905 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
906 // Most tokens should not have literal_chars or even raw_literal chars.
907 // The rules are:
908 // - UNINITIALIZED: we don't care.
909 // - TEMPLATE_*: need both literal + raw literal chars.
910 // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal.
911 // - all others: should have neither.
912 // Furthermore, only TEMPLATE_* tokens can have a
913 // invalid_template_escape_message.
914
915 switch (token.token) {
916 case Token::UNINITIALIZED:
917 // token.literal_chars & other members might be garbage. That's ok.
918 break;
919 case Token::TEMPLATE_SPAN:
920 case Token::TEMPLATE_TAIL:
921 DCHECK_NOT_NULL(token.raw_literal_chars);
922 DCHECK_NOT_NULL(token.literal_chars);
923 break;
924 case Token::ESCAPED_KEYWORD:
925 case Token::ESCAPED_STRICT_RESERVED_WORD:
926 case Token::FUTURE_STRICT_RESERVED_WORD:
927 case Token::IDENTIFIER:
928 case Token::NUMBER:
929 case Token::BIGINT:
930 case Token::REGEXP_LITERAL:
931 case Token::SMI:
932 case Token::STRING:
933 case Token::PRIVATE_NAME:
934 DCHECK_NOT_NULL(token.literal_chars);
935 DCHECK_NULL(token.raw_literal_chars);
936 DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
937 break;
938 default:
939 DCHECK_NULL(token.literal_chars);
940 DCHECK_NULL(token.raw_literal_chars);
941 DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
942 break;
943 }
944
945 DCHECK_IMPLIES(token.token != Token::IDENTIFIER,
946 token.contextual_token == Token::UNINITIALIZED);
947 DCHECK_IMPLIES(token.contextual_token != Token::UNINITIALIZED,
948 token.token == Token::IDENTIFIER &&
949 Token::IsContextualKeyword(token.contextual_token));
950 DCHECK(!Token::IsContextualKeyword(token.token));
951 }
952 #endif // DEBUG
953
SeekForward(int pos)954 void Scanner::SeekForward(int pos) {
955 // After this call, we will have the token at the given position as
956 // the "next" token. The "current" token will be invalid.
957 if (pos == next_.location.beg_pos) return;
958 int current_pos = source_pos();
959 DCHECK_EQ(next_.location.end_pos, current_pos);
960 // Positions inside the lookahead token aren't supported.
961 DCHECK(pos >= current_pos);
962 if (pos != current_pos) {
963 source_->Seek(pos);
964 Advance();
965 // This function is only called to seek to the location
966 // of the end of a function (at the "}" token). It doesn't matter
967 // whether there was a line terminator in the part we skip.
968 has_line_terminator_before_next_ = false;
969 has_multiline_comment_before_next_ = false;
970 }
971 Scan();
972 }
973
974
975 template <bool capture_raw, bool in_template_literal>
ScanEscape()976 bool Scanner::ScanEscape() {
977 uc32 c = c0_;
978 Advance<capture_raw>();
979
980 // Skip escaped newlines.
981 if (!in_template_literal && c0_ != kEndOfInput &&
982 unibrow::IsLineTerminator(c)) {
983 // Allow escaped CR+LF newlines in multiline string literals.
984 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
985 return true;
986 }
987
988 switch (c) {
989 case '\'': // fall through
990 case '"' : // fall through
991 case '\\': break;
992 case 'b' : c = '\b'; break;
993 case 'f' : c = '\f'; break;
994 case 'n' : c = '\n'; break;
995 case 'r' : c = '\r'; break;
996 case 't' : c = '\t'; break;
997 case 'u' : {
998 c = ScanUnicodeEscape<capture_raw>();
999 if (c < 0) return false;
1000 break;
1001 }
1002 case 'v':
1003 c = '\v';
1004 break;
1005 case 'x': {
1006 c = ScanHexNumber<capture_raw>(2);
1007 if (c < 0) return false;
1008 break;
1009 }
1010 case '0': // Fall through.
1011 case '1': // fall through
1012 case '2': // fall through
1013 case '3': // fall through
1014 case '4': // fall through
1015 case '5': // fall through
1016 case '6': // fall through
1017 case '7':
1018 c = ScanOctalEscape<capture_raw>(c, 2, in_template_literal);
1019 break;
1020 }
1021
1022 // Other escaped characters are interpreted as their non-escaped version.
1023 AddLiteralChar(c);
1024 return true;
1025 }
1026
1027 template <bool capture_raw>
ScanOctalEscape(uc32 c,int length,bool in_template_literal)1028 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool in_template_literal) {
1029 uc32 x = c - '0';
1030 int i = 0;
1031 for (; i < length; i++) {
1032 int d = c0_ - '0';
1033 if (d < 0 || d > 7) break;
1034 int nx = x * 8 + d;
1035 if (nx >= 256) break;
1036 x = nx;
1037 Advance<capture_raw>();
1038 }
1039 // Anything except '\0' is an octal escape sequence, illegal in strict mode.
1040 // Remember the position of octal escape sequences so that an error
1041 // can be reported later (in strict mode).
1042 // We don't report the error immediately, because the octal escape can
1043 // occur before the "use strict" directive.
1044 if (c != '0' || i > 0 || c0_ == '8' || c0_ == '9') {
1045 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
1046 octal_message_ = in_template_literal
1047 ? MessageTemplate::kTemplateOctalLiteral
1048 : MessageTemplate::kStrictOctalEscape;
1049 }
1050 return x;
1051 }
1052
1053
ScanString()1054 Token::Value Scanner::ScanString() {
1055 uc32 quote = c0_;
1056 Advance<false, false>(); // consume quote
1057
1058 LiteralScope literal(this);
1059 while (true) {
1060 if (c0_ > kMaxAscii) {
1061 HandleLeadSurrogate();
1062 break;
1063 }
1064 if (c0_ == kEndOfInput || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL;
1065 if (c0_ == quote) {
1066 literal.Complete();
1067 Advance<false, false>();
1068 return Token::STRING;
1069 }
1070 char c = static_cast<char>(c0_);
1071 if (c == '\\') break;
1072 Advance<false, false>();
1073 AddLiteralChar(c);
1074 }
1075
1076 while (c0_ != quote && c0_ != kEndOfInput &&
1077 !unibrow::IsStringLiteralLineTerminator(c0_)) {
1078 uc32 c = c0_;
1079 Advance();
1080 if (c == '\\') {
1081 if (c0_ == kEndOfInput || !ScanEscape<false, false>()) {
1082 return Token::ILLEGAL;
1083 }
1084 } else {
1085 AddLiteralChar(c);
1086 }
1087 }
1088 if (c0_ != quote) return Token::ILLEGAL;
1089 literal.Complete();
1090
1091 Advance(); // consume quote
1092 return Token::STRING;
1093 }
1094
ScanPrivateName()1095 Token::Value Scanner::ScanPrivateName() {
1096 if (!allow_harmony_private_fields()) {
1097 ReportScannerError(source_pos(),
1098 MessageTemplate::kInvalidOrUnexpectedToken);
1099 return Token::ILLEGAL;
1100 }
1101
1102 LiteralScope literal(this);
1103 DCHECK_EQ(c0_, '#');
1104 AddLiteralCharAdvance();
1105 if (c0_ == kEndOfInput || !unicode_cache_->IsIdentifierStart(c0_)) {
1106 PushBack(c0_);
1107 ReportScannerError(source_pos(),
1108 MessageTemplate::kInvalidOrUnexpectedToken);
1109 return Token::ILLEGAL;
1110 }
1111
1112 Token::Value token = ScanIdentifierOrKeywordInner(&literal);
1113 return token == Token::ILLEGAL ? Token::ILLEGAL : Token::PRIVATE_NAME;
1114 }
1115
ScanTemplateSpan()1116 Token::Value Scanner::ScanTemplateSpan() {
1117 // When scanning a TemplateSpan, we are looking for the following construct:
1118 // TEMPLATE_SPAN ::
1119 // ` LiteralChars* ${
1120 // | } LiteralChars* ${
1121 //
1122 // TEMPLATE_TAIL ::
1123 // ` LiteralChars* `
1124 // | } LiteralChar* `
1125 //
1126 // A TEMPLATE_SPAN should always be followed by an Expression, while a
1127 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
1128 // followed by an Expression.
1129
1130 // These scoped helpers save and restore the original error state, so that we
1131 // can specially treat invalid escape sequences in templates (which are
1132 // handled by the parser).
1133 ErrorState scanner_error_state(&scanner_error_, &scanner_error_location_);
1134 ErrorState octal_error_state(&octal_message_, &octal_pos_);
1135
1136 Token::Value result = Token::TEMPLATE_SPAN;
1137 LiteralScope literal(this);
1138 StartRawLiteral();
1139 const bool capture_raw = true;
1140 const bool in_template_literal = true;
1141 while (true) {
1142 uc32 c = c0_;
1143 Advance<capture_raw>();
1144 if (c == '`') {
1145 result = Token::TEMPLATE_TAIL;
1146 ReduceRawLiteralLength(1);
1147 break;
1148 } else if (c == '$' && c0_ == '{') {
1149 Advance<capture_raw>(); // Consume '{'
1150 ReduceRawLiteralLength(2);
1151 break;
1152 } else if (c == '\\') {
1153 if (c0_ != kEndOfInput && unibrow::IsLineTerminator(c0_)) {
1154 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
1155 // code unit sequence.
1156 uc32 lastChar = c0_;
1157 Advance<capture_raw>();
1158 if (lastChar == '\r') {
1159 ReduceRawLiteralLength(1); // Remove \r
1160 if (c0_ == '\n') {
1161 Advance<capture_raw>(); // Adds \n
1162 } else {
1163 AddRawLiteralChar('\n');
1164 }
1165 }
1166 } else {
1167 bool success = ScanEscape<capture_raw, in_template_literal>();
1168 USE(success);
1169 DCHECK_EQ(!success, has_error());
1170 // For templates, invalid escape sequence checking is handled in the
1171 // parser.
1172 scanner_error_state.MoveErrorTo(&next_);
1173 octal_error_state.MoveErrorTo(&next_);
1174 }
1175 } else if (c < 0) {
1176 // Unterminated template literal
1177 PushBack(c);
1178 break;
1179 } else {
1180 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
1181 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
1182 // consisting of the CV 0x000A.
1183 if (c == '\r') {
1184 ReduceRawLiteralLength(1); // Remove \r
1185 if (c0_ == '\n') {
1186 Advance<capture_raw>(); // Adds \n
1187 } else {
1188 AddRawLiteralChar('\n');
1189 }
1190 c = '\n';
1191 }
1192 AddLiteralChar(c);
1193 }
1194 }
1195 literal.Complete();
1196 next_.location.end_pos = source_pos();
1197 next_.token = result;
1198 next_.contextual_token = Token::UNINITIALIZED;
1199
1200 return result;
1201 }
1202
1203
ScanTemplateStart()1204 Token::Value Scanner::ScanTemplateStart() {
1205 DCHECK_EQ(next_next_.token, Token::UNINITIALIZED);
1206 DCHECK_EQ(c0_, '`');
1207 next_.location.beg_pos = source_pos();
1208 Advance(); // Consume `
1209 return ScanTemplateSpan();
1210 }
1211
SourceUrl(Isolate * isolate) const1212 Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
1213 Handle<String> tmp;
1214 if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate);
1215 return tmp;
1216 }
1217
SourceMappingUrl(Isolate * isolate) const1218 Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
1219 Handle<String> tmp;
1220 if (source_mapping_url_.length() > 0)
1221 tmp = source_mapping_url_.Internalize(isolate);
1222 return tmp;
1223 }
1224
ScanDigitsWithNumericSeparators(bool (* predicate)(uc32 ch),bool is_check_first_digit)1225 bool Scanner::ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
1226 bool is_check_first_digit) {
1227 // we must have at least one digit after 'x'/'b'/'o'
1228 if (is_check_first_digit && !predicate(c0_)) return false;
1229
1230 bool separator_seen = false;
1231 while (predicate(c0_) || c0_ == '_') {
1232 if (c0_ == '_') {
1233 Advance<false, false>();
1234 if (c0_ == '_') {
1235 ReportScannerError(Location(source_pos(), source_pos() + 1),
1236 MessageTemplate::kContinuousNumericSeparator);
1237 return false;
1238 }
1239 separator_seen = true;
1240 continue;
1241 }
1242 separator_seen = false;
1243 AddLiteralCharAdvance();
1244 }
1245
1246 if (separator_seen) {
1247 ReportScannerError(Location(source_pos(), source_pos() + 1),
1248 MessageTemplate::kTrailingNumericSeparator);
1249 return false;
1250 }
1251
1252 return true;
1253 }
1254
ScanDecimalDigits()1255 bool Scanner::ScanDecimalDigits() {
1256 if (allow_harmony_numeric_separator()) {
1257 return ScanDigitsWithNumericSeparators(&IsDecimalDigit, false);
1258 }
1259 while (IsDecimalDigit(c0_)) {
1260 AddLiteralCharAdvance();
1261 }
1262 return true;
1263 }
1264
ScanDecimalAsSmiWithNumericSeparators(uint64_t * value)1265 bool Scanner::ScanDecimalAsSmiWithNumericSeparators(uint64_t* value) {
1266 bool separator_seen = false;
1267 while (IsDecimalDigit(c0_) || c0_ == '_') {
1268 if (c0_ == '_') {
1269 Advance<false, false>();
1270 if (c0_ == '_') {
1271 ReportScannerError(Location(source_pos(), source_pos() + 1),
1272 MessageTemplate::kContinuousNumericSeparator);
1273 return false;
1274 }
1275 separator_seen = true;
1276 continue;
1277 }
1278 separator_seen = false;
1279 *value = 10 * *value + (c0_ - '0');
1280 uc32 first_char = c0_;
1281 Advance<false, false>();
1282 AddLiteralChar(first_char);
1283 }
1284
1285 if (separator_seen) {
1286 ReportScannerError(Location(source_pos(), source_pos() + 1),
1287 MessageTemplate::kTrailingNumericSeparator);
1288 return false;
1289 }
1290
1291 return true;
1292 }
1293
ScanDecimalAsSmi(uint64_t * value)1294 bool Scanner::ScanDecimalAsSmi(uint64_t* value) {
1295 if (allow_harmony_numeric_separator()) {
1296 return ScanDecimalAsSmiWithNumericSeparators(value);
1297 }
1298
1299 while (IsDecimalDigit(c0_)) {
1300 *value = 10 * *value + (c0_ - '0');
1301 uc32 first_char = c0_;
1302 Advance<false, false>();
1303 AddLiteralChar(first_char);
1304 }
1305 return true;
1306 }
1307
ScanBinaryDigits()1308 bool Scanner::ScanBinaryDigits() {
1309 if (allow_harmony_numeric_separator()) {
1310 return ScanDigitsWithNumericSeparators(&IsBinaryDigit, true);
1311 }
1312
1313 // we must have at least one binary digit after 'b'/'B'
1314 if (!IsBinaryDigit(c0_)) {
1315 return false;
1316 }
1317
1318 while (IsBinaryDigit(c0_)) {
1319 AddLiteralCharAdvance();
1320 }
1321 return true;
1322 }
1323
ScanOctalDigits()1324 bool Scanner::ScanOctalDigits() {
1325 if (allow_harmony_numeric_separator()) {
1326 return ScanDigitsWithNumericSeparators(&IsOctalDigit, true);
1327 }
1328
1329 // we must have at least one octal digit after 'o'/'O'
1330 if (!IsOctalDigit(c0_)) {
1331 return false;
1332 }
1333
1334 while (IsOctalDigit(c0_)) {
1335 AddLiteralCharAdvance();
1336 }
1337 return true;
1338 }
1339
ScanImplicitOctalDigits(int start_pos,Scanner::NumberKind * kind)1340 bool Scanner::ScanImplicitOctalDigits(int start_pos,
1341 Scanner::NumberKind* kind) {
1342 *kind = IMPLICIT_OCTAL;
1343
1344 while (true) {
1345 // (possible) octal number
1346 if (c0_ == '8' || c0_ == '9') {
1347 *kind = DECIMAL_WITH_LEADING_ZERO;
1348 return true;
1349 }
1350 if (c0_ < '0' || '7' < c0_) {
1351 // Octal literal finished.
1352 octal_pos_ = Location(start_pos, source_pos());
1353 octal_message_ = MessageTemplate::kStrictOctalLiteral;
1354 return true;
1355 }
1356 AddLiteralCharAdvance();
1357 }
1358 }
1359
ScanHexDigits()1360 bool Scanner::ScanHexDigits() {
1361 if (allow_harmony_numeric_separator()) {
1362 return ScanDigitsWithNumericSeparators(&IsHexDigit, true);
1363 }
1364
1365 // we must have at least one hex digit after 'x'/'X'
1366 if (!IsHexDigit(c0_)) {
1367 return false;
1368 }
1369
1370 while (IsHexDigit(c0_)) {
1371 AddLiteralCharAdvance();
1372 }
1373 return true;
1374 }
1375
ScanSignedInteger()1376 bool Scanner::ScanSignedInteger() {
1377 if (c0_ == '+' || c0_ == '-') AddLiteralCharAdvance();
1378 // we must have at least one decimal digit after 'e'/'E'
1379 if (!IsDecimalDigit(c0_)) return false;
1380 return ScanDecimalDigits();
1381 }
1382
ScanNumber(bool seen_period)1383 Token::Value Scanner::ScanNumber(bool seen_period) {
1384 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
1385
1386 NumberKind kind = DECIMAL;
1387
1388 LiteralScope literal(this);
1389 bool at_start = !seen_period;
1390 int start_pos = source_pos(); // For reporting octal positions.
1391 if (seen_period) {
1392 // we have already seen a decimal point of the float
1393 AddLiteralChar('.');
1394 if (allow_harmony_numeric_separator() && c0_ == '_') {
1395 return Token::ILLEGAL;
1396 }
1397 // we know we have at least one digit
1398 if (!ScanDecimalDigits()) return Token::ILLEGAL;
1399 } else {
1400 // if the first character is '0' we must check for octals and hex
1401 if (c0_ == '0') {
1402 AddLiteralCharAdvance();
1403
1404 // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
1405 // an octal number.
1406 if (c0_ == 'x' || c0_ == 'X') {
1407 AddLiteralCharAdvance();
1408 kind = HEX;
1409 if (!ScanHexDigits()) return Token::ILLEGAL;
1410 } else if (c0_ == 'o' || c0_ == 'O') {
1411 AddLiteralCharAdvance();
1412 kind = OCTAL;
1413 if (!ScanOctalDigits()) return Token::ILLEGAL;
1414 } else if (c0_ == 'b' || c0_ == 'B') {
1415 AddLiteralCharAdvance();
1416 kind = BINARY;
1417 if (!ScanBinaryDigits()) return Token::ILLEGAL;
1418 } else if ('0' <= c0_ && c0_ <= '7') {
1419 kind = IMPLICIT_OCTAL;
1420 if (!ScanImplicitOctalDigits(start_pos, &kind)) {
1421 return Token::ILLEGAL;
1422 }
1423 if (kind == DECIMAL_WITH_LEADING_ZERO) {
1424 at_start = false;
1425 }
1426 } else if (c0_ == '8' || c0_ == '9') {
1427 kind = DECIMAL_WITH_LEADING_ZERO;
1428 } else if (allow_harmony_numeric_separator() && c0_ == '_') {
1429 ReportScannerError(Location(source_pos(), source_pos() + 1),
1430 MessageTemplate::kZeroDigitNumericSeparator);
1431 return Token::ILLEGAL;
1432 }
1433 }
1434
1435 // Parse decimal digits and allow trailing fractional part.
1436 if (kind == DECIMAL || kind == DECIMAL_WITH_LEADING_ZERO) {
1437 // This is an optimization for parsing Decimal numbers as Smi's.
1438 if (at_start) {
1439 uint64_t value = 0;
1440 // scan subsequent decimal digits
1441 if (!ScanDecimalAsSmi(&value)) {
1442 return Token::ILLEGAL;
1443 }
1444
1445 if (next_.literal_chars->one_byte_literal().length() <= 10 &&
1446 value <= Smi::kMaxValue && c0_ != '.' &&
1447 (c0_ == kEndOfInput || !unicode_cache_->IsIdentifierStart(c0_))) {
1448 next_.smi_value_ = static_cast<uint32_t>(value);
1449 literal.Complete();
1450 HandleLeadSurrogate();
1451
1452 if (kind == DECIMAL_WITH_LEADING_ZERO) {
1453 octal_pos_ = Location(start_pos, source_pos());
1454 octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
1455 }
1456 return Token::SMI;
1457 }
1458 HandleLeadSurrogate();
1459 }
1460
1461 if (!ScanDecimalDigits()) return Token::ILLEGAL;
1462 if (c0_ == '.') {
1463 seen_period = true;
1464 AddLiteralCharAdvance();
1465 if (allow_harmony_numeric_separator() && c0_ == '_') {
1466 return Token::ILLEGAL;
1467 }
1468 if (!ScanDecimalDigits()) return Token::ILLEGAL;
1469 }
1470 }
1471 }
1472
1473 bool is_bigint = false;
1474 if (allow_harmony_bigint() && c0_ == 'n' && !seen_period &&
1475 (kind == DECIMAL || kind == HEX || kind == OCTAL || kind == BINARY)) {
1476 // Check that the literal is within our limits for BigInt length.
1477 // For simplicity, use 4 bits per character to calculate the maximum
1478 // allowed literal length.
1479 static const int kMaxBigIntCharacters = BigInt::kMaxLengthBits / 4;
1480 int length = source_pos() - start_pos - (kind != DECIMAL ? 2 : 0);
1481 if (length > kMaxBigIntCharacters) {
1482 ReportScannerError(Location(start_pos, source_pos()),
1483 MessageTemplate::kBigIntTooBig);
1484 return Token::ILLEGAL;
1485 }
1486
1487 is_bigint = true;
1488 Advance();
1489 } else if (c0_ == 'e' || c0_ == 'E') {
1490 // scan exponent, if any
1491 DCHECK(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
1492
1493 if (!(kind == DECIMAL || kind == DECIMAL_WITH_LEADING_ZERO))
1494 return Token::ILLEGAL;
1495
1496 // scan exponent
1497 AddLiteralCharAdvance();
1498
1499 if (!ScanSignedInteger()) return Token::ILLEGAL;
1500 }
1501
1502 // The source character immediately following a numeric literal must
1503 // not be an identifier start or a decimal digit; see ECMA-262
1504 // section 7.8.3, page 17 (note that we read only one decimal digit
1505 // if the value is 0).
1506 if (IsDecimalDigit(c0_) ||
1507 (c0_ != kEndOfInput && unicode_cache_->IsIdentifierStart(c0_)))
1508 return Token::ILLEGAL;
1509
1510 literal.Complete();
1511
1512 if (kind == DECIMAL_WITH_LEADING_ZERO) {
1513 octal_pos_ = Location(start_pos, source_pos());
1514 octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
1515 }
1516
1517 return is_bigint ? Token::BIGINT : Token::NUMBER;
1518 }
1519
1520
ScanIdentifierUnicodeEscape()1521 uc32 Scanner::ScanIdentifierUnicodeEscape() {
1522 Advance();
1523 if (c0_ != 'u') return -1;
1524 Advance();
1525 return ScanUnicodeEscape<false>();
1526 }
1527
1528
1529 template <bool capture_raw>
ScanUnicodeEscape()1530 uc32 Scanner::ScanUnicodeEscape() {
1531 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
1532 // hex digits between { } is arbitrary. \ and u have already been read.
1533 if (c0_ == '{') {
1534 int begin = source_pos() - 2;
1535 Advance<capture_raw>();
1536 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10FFFF, begin);
1537 if (cp < 0 || c0_ != '}') {
1538 ReportScannerError(source_pos(),
1539 MessageTemplate::kInvalidUnicodeEscapeSequence);
1540 return -1;
1541 }
1542 Advance<capture_raw>();
1543 return cp;
1544 }
1545 const bool unicode = true;
1546 return ScanHexNumber<capture_raw, unicode>(4);
1547 }
1548
1549
1550 // ----------------------------------------------------------------------------
1551 // Keyword Matcher
1552
1553 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
1554 KEYWORD_GROUP('a') \
1555 KEYWORD("arguments", Token::ARGUMENTS) \
1556 KEYWORD("as", Token::AS) \
1557 KEYWORD("async", Token::ASYNC) \
1558 KEYWORD("await", Token::AWAIT) \
1559 KEYWORD("anonymous", Token::ANONYMOUS) \
1560 KEYWORD_GROUP('b') \
1561 KEYWORD("break", Token::BREAK) \
1562 KEYWORD_GROUP('c') \
1563 KEYWORD("case", Token::CASE) \
1564 KEYWORD("catch", Token::CATCH) \
1565 KEYWORD("class", Token::CLASS) \
1566 KEYWORD("const", Token::CONST) \
1567 KEYWORD("constructor", Token::CONSTRUCTOR) \
1568 KEYWORD("continue", Token::CONTINUE) \
1569 KEYWORD_GROUP('d') \
1570 KEYWORD("debugger", Token::DEBUGGER) \
1571 KEYWORD("default", Token::DEFAULT) \
1572 KEYWORD("delete", Token::DELETE) \
1573 KEYWORD("do", Token::DO) \
1574 KEYWORD_GROUP('e') \
1575 KEYWORD("else", Token::ELSE) \
1576 KEYWORD("enum", Token::ENUM) \
1577 KEYWORD("eval", Token::EVAL) \
1578 KEYWORD("export", Token::EXPORT) \
1579 KEYWORD("extends", Token::EXTENDS) \
1580 KEYWORD_GROUP('f') \
1581 KEYWORD("false", Token::FALSE_LITERAL) \
1582 KEYWORD("finally", Token::FINALLY) \
1583 KEYWORD("for", Token::FOR) \
1584 KEYWORD("from", Token::FROM) \
1585 KEYWORD("function", Token::FUNCTION) \
1586 KEYWORD_GROUP('g') \
1587 KEYWORD("get", Token::GET) \
1588 KEYWORD_GROUP('i') \
1589 KEYWORD("if", Token::IF) \
1590 KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
1591 KEYWORD("import", Token::IMPORT) \
1592 KEYWORD("in", Token::IN) \
1593 KEYWORD("instanceof", Token::INSTANCEOF) \
1594 KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
1595 KEYWORD_GROUP('l') \
1596 KEYWORD("let", Token::LET) \
1597 KEYWORD_GROUP('m') \
1598 KEYWORD("meta", Token::META) \
1599 KEYWORD_GROUP('n') \
1600 KEYWORD("name", Token::NAME) \
1601 KEYWORD("new", Token::NEW) \
1602 KEYWORD("null", Token::NULL_LITERAL) \
1603 KEYWORD_GROUP('o') \
1604 KEYWORD("of", Token::OF) \
1605 KEYWORD_GROUP('p') \
1606 KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
1607 KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
1608 KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
1609 KEYWORD("prototype", Token::PROTOTYPE) \
1610 KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
1611 KEYWORD_GROUP('r') \
1612 KEYWORD("return", Token::RETURN) \
1613 KEYWORD_GROUP('s') \
1614 KEYWORD("set", Token::SET) \
1615 KEYWORD("static", Token::STATIC) \
1616 KEYWORD("super", Token::SUPER) \
1617 KEYWORD("switch", Token::SWITCH) \
1618 KEYWORD_GROUP('t') \
1619 KEYWORD("target", Token::TARGET) \
1620 KEYWORD("this", Token::THIS) \
1621 KEYWORD("throw", Token::THROW) \
1622 KEYWORD("true", Token::TRUE_LITERAL) \
1623 KEYWORD("try", Token::TRY) \
1624 KEYWORD("typeof", Token::TYPEOF) \
1625 KEYWORD_GROUP('u') \
1626 KEYWORD("undefined", Token::UNDEFINED) \
1627 KEYWORD_GROUP('v') \
1628 KEYWORD("var", Token::VAR) \
1629 KEYWORD("void", Token::VOID) \
1630 KEYWORD_GROUP('w') \
1631 KEYWORD("while", Token::WHILE) \
1632 KEYWORD("with", Token::WITH) \
1633 KEYWORD_GROUP('y') \
1634 KEYWORD("yield", Token::YIELD) \
1635 KEYWORD_GROUP('_') \
1636 KEYWORD("__proto__", Token::PROTO_UNDERSCORED) \
1637 KEYWORD_GROUP('#') \
1638 KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR)
1639
KeywordOrIdentifierToken(const uint8_t * input,int input_length)1640 static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
1641 int input_length) {
1642 DCHECK_GE(input_length, 1);
1643 const int kMinLength = 2;
1644 const int kMaxLength = 12;
1645 if (input_length < kMinLength || input_length > kMaxLength) {
1646 return Token::IDENTIFIER;
1647 }
1648 switch (input[0]) {
1649 default:
1650 #define KEYWORD_GROUP_CASE(ch) \
1651 break; \
1652 case ch:
1653 #define KEYWORD(keyword, token) \
1654 { \
1655 /* 'keyword' is a char array, so sizeof(keyword) is */ \
1656 /* strlen(keyword) plus 1 for the NUL char. */ \
1657 const int keyword_length = sizeof(keyword) - 1; \
1658 STATIC_ASSERT(keyword_length >= kMinLength); \
1659 STATIC_ASSERT(keyword_length <= kMaxLength); \
1660 DCHECK_EQ(input[0], keyword[0]); \
1661 DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD || \
1662 0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
1663 if (input_length == keyword_length && input[1] == keyword[1] && \
1664 (keyword_length <= 2 || input[2] == keyword[2]) && \
1665 (keyword_length <= 3 || input[3] == keyword[3]) && \
1666 (keyword_length <= 4 || input[4] == keyword[4]) && \
1667 (keyword_length <= 5 || input[5] == keyword[5]) && \
1668 (keyword_length <= 6 || input[6] == keyword[6]) && \
1669 (keyword_length <= 7 || input[7] == keyword[7]) && \
1670 (keyword_length <= 8 || input[8] == keyword[8]) && \
1671 (keyword_length <= 9 || input[9] == keyword[9]) && \
1672 (keyword_length <= 10 || input[10] == keyword[10])) { \
1673 return token; \
1674 } \
1675 }
1676 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
1677 }
1678 return Token::IDENTIFIER;
1679 #undef KEYWORDS
1680 #undef KEYWORD
1681 #undef KEYWORD_GROUP_CASE
1682 }
1683
ScanIdentifierOrKeyword()1684 Token::Value Scanner::ScanIdentifierOrKeyword() {
1685 LiteralScope literal(this);
1686 return ScanIdentifierOrKeywordInner(&literal);
1687 }
1688
ScanIdentifierOrKeywordInner(LiteralScope * literal)1689 Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
1690 DCHECK(unicode_cache_->IsIdentifierStart(c0_));
1691 if (IsInRange(c0_, 'a', 'z') || c0_ == '_') {
1692 do {
1693 char first_char = static_cast<char>(c0_);
1694 Advance<false, false>();
1695 AddLiteralChar(first_char);
1696 } while (IsInRange(c0_, 'a', 'z') || c0_ == '_');
1697
1698 if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' ||
1699 c0_ == '$') {
1700 // Identifier starting with lowercase.
1701 char first_char = static_cast<char>(c0_);
1702 Advance<false, false>();
1703 AddLiteralChar(first_char);
1704 while (IsAsciiIdentifier(c0_)) {
1705 char first_char = static_cast<char>(c0_);
1706 Advance<false, false>();
1707 AddLiteralChar(first_char);
1708 }
1709 if (c0_ <= kMaxAscii && c0_ != '\\') {
1710 literal->Complete();
1711 return Token::IDENTIFIER;
1712 }
1713 } else if (c0_ <= kMaxAscii && c0_ != '\\') {
1714 // Only a-z+ or _: could be a keyword or identifier.
1715 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1716 Token::Value token =
1717 KeywordOrIdentifierToken(chars.start(), chars.length());
1718 if (token == Token::IDENTIFIER ||
1719 token == Token::FUTURE_STRICT_RESERVED_WORD ||
1720 Token::IsContextualKeyword(token))
1721 literal->Complete();
1722 return token;
1723 }
1724
1725 HandleLeadSurrogate();
1726 } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') {
1727 do {
1728 char first_char = static_cast<char>(c0_);
1729 Advance<false, false>();
1730 AddLiteralChar(first_char);
1731 } while (IsAsciiIdentifier(c0_));
1732
1733 if (c0_ <= kMaxAscii && c0_ != '\\') {
1734 literal->Complete();
1735 return Token::IDENTIFIER;
1736 }
1737
1738 HandleLeadSurrogate();
1739 } else if (c0_ == '\\') {
1740 // Scan identifier start character.
1741 uc32 c = ScanIdentifierUnicodeEscape();
1742 // Only allow legal identifier start characters.
1743 if (c < 0 ||
1744 c == '\\' || // No recursive escapes.
1745 !unicode_cache_->IsIdentifierStart(c)) {
1746 return Token::ILLEGAL;
1747 }
1748 AddLiteralChar(c);
1749 return ScanIdentifierSuffix(literal, true);
1750 } else {
1751 uc32 first_char = c0_;
1752 Advance();
1753 AddLiteralChar(first_char);
1754 }
1755
1756 // Scan the rest of the identifier characters.
1757 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1758 if (c0_ != '\\') {
1759 uc32 next_char = c0_;
1760 Advance();
1761 AddLiteralChar(next_char);
1762 continue;
1763 }
1764 // Fallthrough if no longer able to complete keyword.
1765 return ScanIdentifierSuffix(literal, false);
1766 }
1767
1768 if (next_.literal_chars->is_one_byte()) {
1769 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1770 Token::Value token =
1771 KeywordOrIdentifierToken(chars.start(), chars.length());
1772 if (token == Token::IDENTIFIER ||
1773 token == Token::FUTURE_STRICT_RESERVED_WORD ||
1774 Token::IsContextualKeyword(token))
1775 literal->Complete();
1776 return token;
1777 }
1778 literal->Complete();
1779 return Token::IDENTIFIER;
1780 }
1781
1782
ScanIdentifierSuffix(LiteralScope * literal,bool escaped)1783 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,
1784 bool escaped) {
1785 // Scan the rest of the identifier characters.
1786 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1787 if (c0_ == '\\') {
1788 uc32 c = ScanIdentifierUnicodeEscape();
1789 escaped = true;
1790 // Only allow legal identifier part characters.
1791 if (c < 0 ||
1792 c == '\\' ||
1793 !unicode_cache_->IsIdentifierPart(c)) {
1794 return Token::ILLEGAL;
1795 }
1796 AddLiteralChar(c);
1797 } else {
1798 AddLiteralChar(c0_);
1799 Advance();
1800 }
1801 }
1802 literal->Complete();
1803
1804 if (escaped && next_.literal_chars->is_one_byte()) {
1805 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1806 Token::Value token =
1807 KeywordOrIdentifierToken(chars.start(), chars.length());
1808 /* TODO(adamk): YIELD should be handled specially. */
1809 if (token == Token::IDENTIFIER || Token::IsContextualKeyword(token)) {
1810 return token;
1811 } else if (token == Token::FUTURE_STRICT_RESERVED_WORD ||
1812 token == Token::LET || token == Token::STATIC) {
1813 return Token::ESCAPED_STRICT_RESERVED_WORD;
1814 } else {
1815 return Token::ESCAPED_KEYWORD;
1816 }
1817 }
1818 return Token::IDENTIFIER;
1819 }
1820
ScanRegExpPattern()1821 bool Scanner::ScanRegExpPattern() {
1822 DCHECK(next_next_.token == Token::UNINITIALIZED);
1823 DCHECK(next_.token == Token::DIV || next_.token == Token::ASSIGN_DIV);
1824
1825 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1826 bool in_character_class = false;
1827 bool seen_equal = (next_.token == Token::ASSIGN_DIV);
1828
1829 // Previous token is either '/' or '/=', in the second case, the
1830 // pattern starts at =.
1831 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1832 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1833
1834 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1835 // the scanner should pass uninterpreted bodies to the RegExp
1836 // constructor.
1837 LiteralScope literal(this);
1838 if (seen_equal) {
1839 AddLiteralChar('=');
1840 }
1841
1842 while (c0_ != '/' || in_character_class) {
1843 if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
1844 return false;
1845 }
1846 if (c0_ == '\\') { // Escape sequence.
1847 AddLiteralCharAdvance();
1848 if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
1849 return false;
1850 }
1851 AddLiteralCharAdvance();
1852 // If the escape allows more characters, i.e., \x??, \u????, or \c?,
1853 // only "safe" characters are allowed (letters, digits, underscore),
1854 // otherwise the escape isn't valid and the invalid character has
1855 // its normal meaning. I.e., we can just continue scanning without
1856 // worrying whether the following characters are part of the escape
1857 // or not, since any '/', '\\' or '[' is guaranteed to not be part
1858 // of the escape sequence.
1859
1860 // TODO(896): At some point, parse RegExps more thoroughly to capture
1861 // octal esacpes in strict mode.
1862 } else { // Unescaped character.
1863 if (c0_ == '[') in_character_class = true;
1864 if (c0_ == ']') in_character_class = false;
1865 AddLiteralCharAdvance();
1866 }
1867 }
1868 Advance(); // consume '/'
1869
1870 literal.Complete();
1871 next_.token = Token::REGEXP_LITERAL;
1872 next_.contextual_token = Token::UNINITIALIZED;
1873 return true;
1874 }
1875
1876
ScanRegExpFlags()1877 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
1878 DCHECK(next_.token == Token::REGEXP_LITERAL);
1879
1880 // Scan regular expression flags.
1881 int flags = 0;
1882 while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1883 RegExp::Flags flag = RegExp::kNone;
1884 switch (c0_) {
1885 case 'g':
1886 flag = RegExp::kGlobal;
1887 break;
1888 case 'i':
1889 flag = RegExp::kIgnoreCase;
1890 break;
1891 case 'm':
1892 flag = RegExp::kMultiline;
1893 break;
1894 case 's':
1895 flag = RegExp::kDotAll;
1896 break;
1897 case 'u':
1898 flag = RegExp::kUnicode;
1899 break;
1900 case 'y':
1901 flag = RegExp::kSticky;
1902 break;
1903 default:
1904 return Nothing<RegExp::Flags>();
1905 }
1906 if (flags & flag) {
1907 return Nothing<RegExp::Flags>();
1908 }
1909 Advance();
1910 flags |= flag;
1911 }
1912
1913 next_.location.end_pos = source_pos();
1914 return Just(RegExp::Flags(flags));
1915 }
1916
CurrentSymbol(AstValueFactory * ast_value_factory) const1917 const AstRawString* Scanner::CurrentSymbol(
1918 AstValueFactory* ast_value_factory) const {
1919 if (is_literal_one_byte()) {
1920 return ast_value_factory->GetOneByteString(literal_one_byte_string());
1921 }
1922 return ast_value_factory->GetTwoByteString(literal_two_byte_string());
1923 }
1924
NextSymbol(AstValueFactory * ast_value_factory) const1925 const AstRawString* Scanner::NextSymbol(
1926 AstValueFactory* ast_value_factory) const {
1927 if (is_next_literal_one_byte()) {
1928 return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
1929 }
1930 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
1931 }
1932
CurrentRawSymbol(AstValueFactory * ast_value_factory) const1933 const AstRawString* Scanner::CurrentRawSymbol(
1934 AstValueFactory* ast_value_factory) const {
1935 if (is_raw_literal_one_byte()) {
1936 return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
1937 }
1938 return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());
1939 }
1940
1941
DoubleValue()1942 double Scanner::DoubleValue() {
1943 DCHECK(is_literal_one_byte());
1944 return StringToDouble(
1945 unicode_cache_,
1946 literal_one_byte_string(),
1947 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
1948 }
1949
CurrentLiteralAsCString(Zone * zone) const1950 const char* Scanner::CurrentLiteralAsCString(Zone* zone) const {
1951 DCHECK(is_literal_one_byte());
1952 Vector<const uint8_t> vector = literal_one_byte_string();
1953 int length = vector.length();
1954 char* buffer = zone->NewArray<char>(length + 1);
1955 memcpy(buffer, vector.start(), length);
1956 buffer[length] = '\0';
1957 return buffer;
1958 }
1959
IsDuplicateSymbol(DuplicateFinder * duplicate_finder,AstValueFactory * ast_value_factory) const1960 bool Scanner::IsDuplicateSymbol(DuplicateFinder* duplicate_finder,
1961 AstValueFactory* ast_value_factory) const {
1962 DCHECK_NOT_NULL(duplicate_finder);
1963 DCHECK_NOT_NULL(ast_value_factory);
1964 const AstRawString* string = CurrentSymbol(ast_value_factory);
1965 return !duplicate_finder->known_symbols_.insert(string).second;
1966 }
1967
SeekNext(size_t position)1968 void Scanner::SeekNext(size_t position) {
1969 // Use with care: This cleanly resets most, but not all scanner state.
1970 // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
1971
1972 // To re-scan from a given character position, we need to:
1973 // 1, Reset the current_, next_ and next_next_ tokens
1974 // (next_ + next_next_ will be overwrittem by Next(),
1975 // current_ will remain unchanged, so overwrite it fully.)
1976 current_ = {{0, 0},
1977 nullptr,
1978 nullptr,
1979 0,
1980 Token::UNINITIALIZED,
1981 MessageTemplate::kNone,
1982 {0, 0},
1983 Token::UNINITIALIZED};
1984 next_.token = Token::UNINITIALIZED;
1985 next_.contextual_token = Token::UNINITIALIZED;
1986 next_next_.token = Token::UNINITIALIZED;
1987 next_next_.contextual_token = Token::UNINITIALIZED;
1988 // 2, reset the source to the desired position,
1989 source_->Seek(position);
1990 // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
1991 c0_ = source_->Advance();
1992 Next();
1993 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));
1994 }
1995
1996 } // namespace internal
1997 } // namespace v8
1998