1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Features shared by parsing and pre-parsing scanners.
6 
7 #include "src/parsing/scanner.h"
8 
9 #include <stdint.h>
10 
11 #include <cmath>
12 
13 #include "src/ast/ast-value-factory.h"
14 #include "src/char-predicates-inl.h"
15 #include "src/conversions-inl.h"
16 #include "src/objects/bigint.h"
17 #include "src/parsing/duplicate-finder.h"  // For Scanner::FindSymbol
18 #include "src/unicode-cache-inl.h"
19 
20 namespace v8 {
21 namespace internal {
22 
23 class Scanner::ErrorState {
24  public:
ErrorState(MessageTemplate::Template * message_stack,Scanner::Location * location_stack)25   ErrorState(MessageTemplate::Template* message_stack,
26              Scanner::Location* location_stack)
27       : message_stack_(message_stack),
28         old_message_(*message_stack),
29         location_stack_(location_stack),
30         old_location_(*location_stack) {
31     *message_stack_ = MessageTemplate::kNone;
32     *location_stack_ = Location::invalid();
33   }
34 
~ErrorState()35   ~ErrorState() {
36     *message_stack_ = old_message_;
37     *location_stack_ = old_location_;
38   }
39 
MoveErrorTo(TokenDesc * dest)40   void MoveErrorTo(TokenDesc* dest) {
41     if (*message_stack_ == MessageTemplate::kNone) {
42       return;
43     }
44     if (dest->invalid_template_escape_message == MessageTemplate::kNone) {
45       dest->invalid_template_escape_message = *message_stack_;
46       dest->invalid_template_escape_location = *location_stack_;
47     }
48     *message_stack_ = MessageTemplate::kNone;
49     *location_stack_ = Location::invalid();
50   }
51 
52  private:
53   MessageTemplate::Template* const message_stack_;
54   MessageTemplate::Template const old_message_;
55   Scanner::Location* const location_stack_;
56   Scanner::Location const old_location_;
57 };
58 
59 // ----------------------------------------------------------------------------
60 // Scanner::LiteralBuffer
61 
Internalize(Isolate * isolate) const62 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
63   if (is_one_byte()) {
64     return isolate->factory()->InternalizeOneByteString(one_byte_literal());
65   }
66   return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
67 }
68 
NewCapacity(int min_capacity)69 int Scanner::LiteralBuffer::NewCapacity(int min_capacity) {
70   int capacity = Max(min_capacity, backing_store_.length());
71   int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
72   return new_capacity;
73 }
74 
ExpandBuffer()75 void Scanner::LiteralBuffer::ExpandBuffer() {
76   Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
77   MemCopy(new_store.start(), backing_store_.start(), position_);
78   backing_store_.Dispose();
79   backing_store_ = new_store;
80 }
81 
ConvertToTwoByte()82 void Scanner::LiteralBuffer::ConvertToTwoByte() {
83   DCHECK(is_one_byte_);
84   Vector<byte> new_store;
85   int new_content_size = position_ * kUC16Size;
86   if (new_content_size >= backing_store_.length()) {
87     // Ensure room for all currently read code units as UC16 as well
88     // as the code unit about to be stored.
89     new_store = Vector<byte>::New(NewCapacity(new_content_size));
90   } else {
91     new_store = backing_store_;
92   }
93   uint8_t* src = backing_store_.start();
94   uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
95   for (int i = position_ - 1; i >= 0; i--) {
96     dst[i] = src[i];
97   }
98   if (new_store.start() != backing_store_.start()) {
99     backing_store_.Dispose();
100     backing_store_ = new_store;
101   }
102   position_ = new_content_size;
103   is_one_byte_ = false;
104 }
105 
AddCharSlow(uc32 code_unit)106 void Scanner::LiteralBuffer::AddCharSlow(uc32 code_unit) {
107   if (position_ >= backing_store_.length()) ExpandBuffer();
108   if (is_one_byte_) {
109     if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
110       backing_store_[position_] = static_cast<byte>(code_unit);
111       position_ += kOneByteSize;
112       return;
113     }
114     ConvertToTwoByte();
115   }
116   if (code_unit <=
117       static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
118     *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
119     position_ += kUC16Size;
120   } else {
121     *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
122         unibrow::Utf16::LeadSurrogate(code_unit);
123     position_ += kUC16Size;
124     if (position_ >= backing_store_.length()) ExpandBuffer();
125     *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
126         unibrow::Utf16::TrailSurrogate(code_unit);
127     position_ += kUC16Size;
128   }
129 }
130 
131 // ----------------------------------------------------------------------------
132 // Scanner::BookmarkScope
133 
134 const size_t Scanner::BookmarkScope::kBookmarkAtFirstPos =
135     std::numeric_limits<size_t>::max() - 2;
136 const size_t Scanner::BookmarkScope::kNoBookmark =
137     std::numeric_limits<size_t>::max() - 1;
138 const size_t Scanner::BookmarkScope::kBookmarkWasApplied =
139     std::numeric_limits<size_t>::max();
140 
Set()141 void Scanner::BookmarkScope::Set() {
142   DCHECK_EQ(bookmark_, kNoBookmark);
143   DCHECK_EQ(scanner_->next_next_.token, Token::UNINITIALIZED);
144 
145   // The first token is a bit special, since current_ will still be
146   // uninitialized. In this case, store kBookmarkAtFirstPos and special-case it
147   // when
148   // applying the bookmark.
149   DCHECK_IMPLIES(
150       scanner_->current_.token == Token::UNINITIALIZED,
151       scanner_->current_.location.beg_pos == scanner_->next_.location.beg_pos);
152   bookmark_ = (scanner_->current_.token == Token::UNINITIALIZED)
153                   ? kBookmarkAtFirstPos
154                   : scanner_->location().beg_pos;
155 }
156 
Apply()157 void Scanner::BookmarkScope::Apply() {
158   DCHECK(HasBeenSet());  // Caller hasn't called SetBookmark.
159   if (bookmark_ == kBookmarkAtFirstPos) {
160     scanner_->SeekNext(0);
161   } else {
162     scanner_->SeekNext(bookmark_);
163     scanner_->Next();
164     DCHECK_EQ(scanner_->location().beg_pos, static_cast<int>(bookmark_));
165   }
166   bookmark_ = kBookmarkWasApplied;
167 }
168 
HasBeenSet()169 bool Scanner::BookmarkScope::HasBeenSet() {
170   return bookmark_ != kNoBookmark && bookmark_ != kBookmarkWasApplied;
171 }
172 
HasBeenApplied()173 bool Scanner::BookmarkScope::HasBeenApplied() {
174   return bookmark_ == kBookmarkWasApplied;
175 }
176 
177 // ----------------------------------------------------------------------------
178 // Scanner
179 
Scanner(UnicodeCache * unicode_cache)180 Scanner::Scanner(UnicodeCache* unicode_cache)
181     : unicode_cache_(unicode_cache),
182       octal_pos_(Location::invalid()),
183       octal_message_(MessageTemplate::kNone),
184       found_html_comment_(false),
185       allow_harmony_bigint_(false),
186       allow_harmony_numeric_separator_(false) {}
187 
Initialize(Utf16CharacterStream * source,bool is_module)188 void Scanner::Initialize(Utf16CharacterStream* source, bool is_module) {
189   DCHECK_NOT_NULL(source);
190   source_ = source;
191   is_module_ = is_module;
192   // Need to capture identifiers in order to recognize "get" and "set"
193   // in object literals.
194   Init();
195   has_line_terminator_before_next_ = true;
196   Scan();
197 }
198 
199 template <bool capture_raw, bool unicode>
ScanHexNumber(int expected_length)200 uc32 Scanner::ScanHexNumber(int expected_length) {
201   DCHECK_LE(expected_length, 4);  // prevent overflow
202 
203   int begin = source_pos() - 2;
204   uc32 x = 0;
205   for (int i = 0; i < expected_length; i++) {
206     int d = HexValue(c0_);
207     if (d < 0) {
208       ReportScannerError(Location(begin, begin + expected_length + 2),
209                          unicode
210                              ? MessageTemplate::kInvalidUnicodeEscapeSequence
211                              : MessageTemplate::kInvalidHexEscapeSequence);
212       return -1;
213     }
214     x = x * 16 + d;
215     Advance<capture_raw>();
216   }
217 
218   return x;
219 }
220 
221 template <bool capture_raw>
ScanUnlimitedLengthHexNumber(int max_value,int beg_pos)222 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
223   uc32 x = 0;
224   int d = HexValue(c0_);
225   if (d < 0) return -1;
226 
227   while (d >= 0) {
228     x = x * 16 + d;
229     if (x > max_value) {
230       ReportScannerError(Location(beg_pos, source_pos() + 1),
231                          MessageTemplate::kUndefinedUnicodeCodePoint);
232       return -1;
233     }
234     Advance<capture_raw>();
235     d = HexValue(c0_);
236   }
237 
238   return x;
239 }
240 
241 
242 // Ensure that tokens can be stored in a byte.
243 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
244 
245 // Table of one-character tokens, by character (0x00..0x7F only).
246 // clang-format off
247 static const byte one_char_tokens[] = {
248   Token::ILLEGAL,
249   Token::ILLEGAL,
250   Token::ILLEGAL,
251   Token::ILLEGAL,
252   Token::ILLEGAL,
253   Token::ILLEGAL,
254   Token::ILLEGAL,
255   Token::ILLEGAL,
256   Token::ILLEGAL,
257   Token::ILLEGAL,
258   Token::ILLEGAL,
259   Token::ILLEGAL,
260   Token::ILLEGAL,
261   Token::ILLEGAL,
262   Token::ILLEGAL,
263   Token::ILLEGAL,
264   Token::ILLEGAL,
265   Token::ILLEGAL,
266   Token::ILLEGAL,
267   Token::ILLEGAL,
268   Token::ILLEGAL,
269   Token::ILLEGAL,
270   Token::ILLEGAL,
271   Token::ILLEGAL,
272   Token::ILLEGAL,
273   Token::ILLEGAL,
274   Token::ILLEGAL,
275   Token::ILLEGAL,
276   Token::ILLEGAL,
277   Token::ILLEGAL,
278   Token::ILLEGAL,
279   Token::ILLEGAL,
280   Token::ILLEGAL,
281   Token::ILLEGAL,
282   Token::ILLEGAL,
283   Token::ILLEGAL,
284   Token::ILLEGAL,
285   Token::ILLEGAL,
286   Token::ILLEGAL,
287   Token::ILLEGAL,
288   Token::LPAREN,       // 0x28
289   Token::RPAREN,       // 0x29
290   Token::ILLEGAL,
291   Token::ILLEGAL,
292   Token::COMMA,        // 0x2C
293   Token::ILLEGAL,
294   Token::ILLEGAL,
295   Token::ILLEGAL,
296   Token::ILLEGAL,
297   Token::ILLEGAL,
298   Token::ILLEGAL,
299   Token::ILLEGAL,
300   Token::ILLEGAL,
301   Token::ILLEGAL,
302   Token::ILLEGAL,
303   Token::ILLEGAL,
304   Token::ILLEGAL,
305   Token::ILLEGAL,
306   Token::COLON,        // 0x3A
307   Token::SEMICOLON,    // 0x3B
308   Token::ILLEGAL,
309   Token::ILLEGAL,
310   Token::ILLEGAL,
311   Token::CONDITIONAL,  // 0x3F
312   Token::ILLEGAL,
313   Token::ILLEGAL,
314   Token::ILLEGAL,
315   Token::ILLEGAL,
316   Token::ILLEGAL,
317   Token::ILLEGAL,
318   Token::ILLEGAL,
319   Token::ILLEGAL,
320   Token::ILLEGAL,
321   Token::ILLEGAL,
322   Token::ILLEGAL,
323   Token::ILLEGAL,
324   Token::ILLEGAL,
325   Token::ILLEGAL,
326   Token::ILLEGAL,
327   Token::ILLEGAL,
328   Token::ILLEGAL,
329   Token::ILLEGAL,
330   Token::ILLEGAL,
331   Token::ILLEGAL,
332   Token::ILLEGAL,
333   Token::ILLEGAL,
334   Token::ILLEGAL,
335   Token::ILLEGAL,
336   Token::ILLEGAL,
337   Token::ILLEGAL,
338   Token::ILLEGAL,
339   Token::LBRACK,     // 0x5B
340   Token::ILLEGAL,
341   Token::RBRACK,     // 0x5D
342   Token::ILLEGAL,
343   Token::ILLEGAL,
344   Token::ILLEGAL,
345   Token::ILLEGAL,
346   Token::ILLEGAL,
347   Token::ILLEGAL,
348   Token::ILLEGAL,
349   Token::ILLEGAL,
350   Token::ILLEGAL,
351   Token::ILLEGAL,
352   Token::ILLEGAL,
353   Token::ILLEGAL,
354   Token::ILLEGAL,
355   Token::ILLEGAL,
356   Token::ILLEGAL,
357   Token::ILLEGAL,
358   Token::ILLEGAL,
359   Token::ILLEGAL,
360   Token::ILLEGAL,
361   Token::ILLEGAL,
362   Token::ILLEGAL,
363   Token::ILLEGAL,
364   Token::ILLEGAL,
365   Token::ILLEGAL,
366   Token::ILLEGAL,
367   Token::ILLEGAL,
368   Token::ILLEGAL,
369   Token::ILLEGAL,
370   Token::ILLEGAL,
371   Token::LBRACE,       // 0x7B
372   Token::ILLEGAL,
373   Token::RBRACE,       // 0x7D
374   Token::BIT_NOT,      // 0x7E
375   Token::ILLEGAL
376 };
377 // clang-format on
378 
Next()379 Token::Value Scanner::Next() {
380   if (next_.token == Token::EOS) {
381     next_.location.beg_pos = current_.location.beg_pos;
382     next_.location.end_pos = current_.location.end_pos;
383   }
384   current_ = next_;
385   if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {
386     next_ = next_next_;
387     next_next_.token = Token::UNINITIALIZED;
388     next_next_.contextual_token = Token::UNINITIALIZED;
389     has_line_terminator_before_next_ = has_line_terminator_after_next_;
390     return current_.token;
391   }
392   has_line_terminator_before_next_ = false;
393   has_multiline_comment_before_next_ = false;
394   if (static_cast<unsigned>(c0_) <= 0x7F) {
395     Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
396     if (token != Token::ILLEGAL) {
397       int pos = source_pos();
398       next_.token = token;
399       next_.contextual_token = Token::UNINITIALIZED;
400       next_.location.beg_pos = pos;
401       next_.location.end_pos = pos + 1;
402       next_.literal_chars = nullptr;
403       next_.raw_literal_chars = nullptr;
404       next_.invalid_template_escape_message = MessageTemplate::kNone;
405       Advance();
406       return current_.token;
407     }
408   }
409   Scan();
410   return current_.token;
411 }
412 
413 
PeekAhead()414 Token::Value Scanner::PeekAhead() {
415   DCHECK(next_.token != Token::DIV);
416   DCHECK(next_.token != Token::ASSIGN_DIV);
417 
418   if (next_next_.token != Token::UNINITIALIZED) {
419     return next_next_.token;
420   }
421   TokenDesc prev = current_;
422   bool has_line_terminator_before_next =
423       has_line_terminator_before_next_ || has_multiline_comment_before_next_;
424   Next();
425   has_line_terminator_after_next_ =
426       has_line_terminator_before_next_ || has_multiline_comment_before_next_;
427   has_line_terminator_before_next_ = has_line_terminator_before_next;
428   Token::Value ret = next_.token;
429   next_next_ = next_;
430   next_ = current_;
431   current_ = prev;
432   return ret;
433 }
434 
435 
SkipWhiteSpace()436 Token::Value Scanner::SkipWhiteSpace() {
437   int start_position = source_pos();
438 
439   while (true) {
440     while (true) {
441       // Don't skip behind the end of input.
442       if (c0_ == kEndOfInput) break;
443 
444       // Advance as long as character is a WhiteSpace or LineTerminator.
445       // Remember if the latter is the case.
446       if (unibrow::IsLineTerminator(c0_)) {
447         has_line_terminator_before_next_ = true;
448       } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
449         break;
450       }
451       Advance();
452     }
453 
454     // If there is an HTML comment end '-->' at the beginning of a
455     // line (with only whitespace in front of it), we treat the rest
456     // of the line as a comment. This is in line with the way
457     // SpiderMonkey handles it.
458     if (c0_ != '-' || !has_line_terminator_before_next_) break;
459 
460     Advance();
461     if (c0_ != '-') {
462       PushBack('-');  // undo Advance()
463       break;
464     }
465 
466     Advance();
467     if (c0_ != '>') {
468       PushBack2('-', '-');  // undo 2x Advance();
469       break;
470     }
471 
472     // Treat the rest of the line as a comment.
473     Token::Value token = SkipSingleHTMLComment();
474     if (token == Token::ILLEGAL) {
475       return token;
476     }
477   }
478 
479   // Return whether or not we skipped any characters.
480   if (source_pos() == start_position) {
481     return Token::ILLEGAL;
482   }
483 
484   return Token::WHITESPACE;
485 }
486 
SkipSingleHTMLComment()487 Token::Value Scanner::SkipSingleHTMLComment() {
488   if (is_module_) {
489     ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
490     return Token::ILLEGAL;
491   }
492   return SkipSingleLineComment();
493 }
494 
SkipSingleLineComment()495 Token::Value Scanner::SkipSingleLineComment() {
496   Advance();
497 
498   // The line terminator at the end of the line is not considered
499   // to be part of the single-line comment; it is recognized
500   // separately by the lexical grammar and becomes part of the
501   // stream of input elements for the syntactic grammar (see
502   // ECMA-262, section 7.4).
503   while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
504     Advance();
505   }
506 
507   return Token::WHITESPACE;
508 }
509 
510 
SkipSourceURLComment()511 Token::Value Scanner::SkipSourceURLComment() {
512   TryToParseSourceURLComment();
513   while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
514     Advance();
515   }
516 
517   return Token::WHITESPACE;
518 }
519 
520 
TryToParseSourceURLComment()521 void Scanner::TryToParseSourceURLComment() {
522   // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
523   // function will just return if it cannot parse a magic comment.
524   if (c0_ == kEndOfInput || !unicode_cache_->IsWhiteSpace(c0_)) return;
525   Advance();
526   LiteralBuffer name;
527   while (c0_ != kEndOfInput &&
528          !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {
529     name.AddChar(c0_);
530     Advance();
531   }
532   if (!name.is_one_byte()) return;
533   Vector<const uint8_t> name_literal = name.one_byte_literal();
534   LiteralBuffer* value;
535   if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {
536     value = &source_url_;
537   } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {
538     value = &source_mapping_url_;
539   } else {
540     return;
541   }
542   if (c0_ != '=')
543     return;
544   Advance();
545   value->Reset();
546   while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
547     Advance();
548   }
549   while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
550     // Disallowed characters.
551     if (c0_ == '"' || c0_ == '\'') {
552       value->Reset();
553       return;
554     }
555     if (unicode_cache_->IsWhiteSpace(c0_)) {
556       break;
557     }
558     value->AddChar(c0_);
559     Advance();
560   }
561   // Allow whitespace at the end.
562   while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
563     if (!unicode_cache_->IsWhiteSpace(c0_)) {
564       value->Reset();
565       break;
566     }
567     Advance();
568   }
569 }
570 
571 
SkipMultiLineComment()572 Token::Value Scanner::SkipMultiLineComment() {
573   DCHECK_EQ(c0_, '*');
574   Advance();
575 
576   while (c0_ != kEndOfInput) {
577     uc32 ch = c0_;
578     Advance();
579     if (c0_ != kEndOfInput && unibrow::IsLineTerminator(ch)) {
580       // Following ECMA-262, section 7.4, a comment containing
581       // a newline will make the comment count as a line-terminator.
582       has_multiline_comment_before_next_ = true;
583     }
584     // If we have reached the end of the multi-line comment, we
585     // consume the '/' and insert a whitespace. This way all
586     // multi-line comments are treated as whitespace.
587     if (ch == '*' && c0_ == '/') {
588       c0_ = ' ';
589       return Token::WHITESPACE;
590     }
591   }
592 
593   // Unterminated multi-line comment.
594   return Token::ILLEGAL;
595 }
596 
ScanHtmlComment()597 Token::Value Scanner::ScanHtmlComment() {
598   // Check for <!-- comments.
599   DCHECK_EQ(c0_, '!');
600   Advance();
601   if (c0_ != '-') {
602     PushBack('!');  // undo Advance()
603     return Token::LT;
604   }
605 
606   Advance();
607   if (c0_ != '-') {
608     PushBack2('-', '!');  // undo 2x Advance()
609     return Token::LT;
610   }
611 
612   found_html_comment_ = true;
613   return SkipSingleHTMLComment();
614 }
615 
Scan()616 void Scanner::Scan() {
617   next_.literal_chars = nullptr;
618   next_.raw_literal_chars = nullptr;
619   next_.invalid_template_escape_message = MessageTemplate::kNone;
620   Token::Value token;
621   do {
622     // Remember the position of the next token
623     next_.location.beg_pos = source_pos();
624 
625     switch (c0_) {
626       case ' ':
627       case '\t':
628         Advance();
629         token = Token::WHITESPACE;
630         break;
631 
632       case '\n':
633         Advance();
634         has_line_terminator_before_next_ = true;
635         token = Token::WHITESPACE;
636         break;
637 
638       case '"':
639       case '\'':
640         token = ScanString();
641         break;
642 
643       case '<':
644         // < <= << <<= <!--
645         Advance();
646         if (c0_ == '=') {
647           token = Select(Token::LTE);
648         } else if (c0_ == '<') {
649           token = Select('=', Token::ASSIGN_SHL, Token::SHL);
650         } else if (c0_ == '!') {
651           token = ScanHtmlComment();
652         } else {
653           token = Token::LT;
654         }
655         break;
656 
657       case '>':
658         // > >= >> >>= >>> >>>=
659         Advance();
660         if (c0_ == '=') {
661           token = Select(Token::GTE);
662         } else if (c0_ == '>') {
663           // >> >>= >>> >>>=
664           Advance();
665           if (c0_ == '=') {
666             token = Select(Token::ASSIGN_SAR);
667           } else if (c0_ == '>') {
668             token = Select('=', Token::ASSIGN_SHR, Token::SHR);
669           } else {
670             token = Token::SAR;
671           }
672         } else {
673           token = Token::GT;
674         }
675         break;
676 
677       case '=':
678         // = == === =>
679         Advance();
680         if (c0_ == '=') {
681           token = Select('=', Token::EQ_STRICT, Token::EQ);
682         } else if (c0_ == '>') {
683           token = Select(Token::ARROW);
684         } else {
685           token = Token::ASSIGN;
686         }
687         break;
688 
689       case '!':
690         // ! != !==
691         Advance();
692         if (c0_ == '=') {
693           token = Select('=', Token::NE_STRICT, Token::NE);
694         } else {
695           token = Token::NOT;
696         }
697         break;
698 
699       case '+':
700         // + ++ +=
701         Advance();
702         if (c0_ == '+') {
703           token = Select(Token::INC);
704         } else if (c0_ == '=') {
705           token = Select(Token::ASSIGN_ADD);
706         } else {
707           token = Token::ADD;
708         }
709         break;
710 
711       case '-':
712         // - -- --> -=
713         Advance();
714         if (c0_ == '-') {
715           Advance();
716           if (c0_ == '>' && HasAnyLineTerminatorBeforeNext()) {
717             // For compatibility with SpiderMonkey, we skip lines that
718             // start with an HTML comment end '-->'.
719             token = SkipSingleHTMLComment();
720           } else {
721             token = Token::DEC;
722           }
723         } else if (c0_ == '=') {
724           token = Select(Token::ASSIGN_SUB);
725         } else {
726           token = Token::SUB;
727         }
728         break;
729 
730       case '*':
731         // * *=
732         Advance();
733         if (c0_ == '*') {
734           token = Select('=', Token::ASSIGN_EXP, Token::EXP);
735         } else if (c0_ == '=') {
736           token = Select(Token::ASSIGN_MUL);
737         } else {
738           token = Token::MUL;
739         }
740         break;
741 
742       case '%':
743         // % %=
744         token = Select('=', Token::ASSIGN_MOD, Token::MOD);
745         break;
746 
747       case '/':
748         // /  // /* /=
749         Advance();
750         if (c0_ == '/') {
751           Advance();
752           if (c0_ == '#' || c0_ == '@') {
753             Advance();
754             token = SkipSourceURLComment();
755           } else {
756             PushBack(c0_);
757             token = SkipSingleLineComment();
758           }
759         } else if (c0_ == '*') {
760           token = SkipMultiLineComment();
761         } else if (c0_ == '=') {
762           token = Select(Token::ASSIGN_DIV);
763         } else {
764           token = Token::DIV;
765         }
766         break;
767 
768       case '&':
769         // & && &=
770         Advance();
771         if (c0_ == '&') {
772           token = Select(Token::AND);
773         } else if (c0_ == '=') {
774           token = Select(Token::ASSIGN_BIT_AND);
775         } else {
776           token = Token::BIT_AND;
777         }
778         break;
779 
780       case '|':
781         // | || |=
782         Advance();
783         if (c0_ == '|') {
784           token = Select(Token::OR);
785         } else if (c0_ == '=') {
786           token = Select(Token::ASSIGN_BIT_OR);
787         } else {
788           token = Token::BIT_OR;
789         }
790         break;
791 
792       case '^':
793         // ^ ^=
794         token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
795         break;
796 
797       case '.':
798         // . Number
799         Advance();
800         if (IsDecimalDigit(c0_)) {
801           token = ScanNumber(true);
802         } else {
803           token = Token::PERIOD;
804           if (c0_ == '.') {
805             Advance();
806             if (c0_ == '.') {
807               Advance();
808               token = Token::ELLIPSIS;
809             } else {
810               PushBack('.');
811             }
812           }
813         }
814         break;
815 
816       case ':':
817         token = Select(Token::COLON);
818         break;
819 
820       case ';':
821         token = Select(Token::SEMICOLON);
822         break;
823 
824       case ',':
825         token = Select(Token::COMMA);
826         break;
827 
828       case '(':
829         token = Select(Token::LPAREN);
830         break;
831 
832       case ')':
833         token = Select(Token::RPAREN);
834         break;
835 
836       case '[':
837         token = Select(Token::LBRACK);
838         break;
839 
840       case ']':
841         token = Select(Token::RBRACK);
842         break;
843 
844       case '{':
845         token = Select(Token::LBRACE);
846         break;
847 
848       case '}':
849         token = Select(Token::RBRACE);
850         break;
851 
852       case '?':
853         token = Select(Token::CONDITIONAL);
854         break;
855 
856       case '~':
857         token = Select(Token::BIT_NOT);
858         break;
859 
860       case '`':
861         token = ScanTemplateStart();
862         break;
863 
864       case '#':
865         token = ScanPrivateName();
866         break;
867 
868       default:
869         if (c0_ == kEndOfInput) {
870           token = Token::EOS;
871         } else if (unicode_cache_->IsIdentifierStart(c0_)) {
872           token = ScanIdentifierOrKeyword();
873         } else if (IsDecimalDigit(c0_)) {
874           token = ScanNumber(false);
875         } else {
876           token = SkipWhiteSpace();
877           if (token == Token::ILLEGAL) {
878             Advance();
879           }
880         }
881         break;
882     }
883 
884     // Continue scanning for tokens as long as we're just skipping
885     // whitespace.
886   } while (token == Token::WHITESPACE);
887 
888   next_.location.end_pos = source_pos();
889   if (Token::IsContextualKeyword(token)) {
890     next_.token = Token::IDENTIFIER;
891     next_.contextual_token = token;
892   } else {
893     next_.token = token;
894     next_.contextual_token = Token::UNINITIALIZED;
895   }
896 
897 #ifdef DEBUG
898   SanityCheckTokenDesc(current_);
899   SanityCheckTokenDesc(next_);
900   SanityCheckTokenDesc(next_next_);
901 #endif
902 }
903 
904 #ifdef DEBUG
SanityCheckTokenDesc(const TokenDesc & token) const905 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
906   // Most tokens should not have literal_chars or even raw_literal chars.
907   // The rules are:
908   // - UNINITIALIZED: we don't care.
909   // - TEMPLATE_*: need both literal + raw literal chars.
910   // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal.
911   // - all others: should have neither.
912   // Furthermore, only TEMPLATE_* tokens can have a
913   // invalid_template_escape_message.
914 
915   switch (token.token) {
916     case Token::UNINITIALIZED:
917       // token.literal_chars & other members might be garbage. That's ok.
918       break;
919     case Token::TEMPLATE_SPAN:
920     case Token::TEMPLATE_TAIL:
921       DCHECK_NOT_NULL(token.raw_literal_chars);
922       DCHECK_NOT_NULL(token.literal_chars);
923       break;
924     case Token::ESCAPED_KEYWORD:
925     case Token::ESCAPED_STRICT_RESERVED_WORD:
926     case Token::FUTURE_STRICT_RESERVED_WORD:
927     case Token::IDENTIFIER:
928     case Token::NUMBER:
929     case Token::BIGINT:
930     case Token::REGEXP_LITERAL:
931     case Token::SMI:
932     case Token::STRING:
933     case Token::PRIVATE_NAME:
934       DCHECK_NOT_NULL(token.literal_chars);
935       DCHECK_NULL(token.raw_literal_chars);
936       DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
937       break;
938     default:
939       DCHECK_NULL(token.literal_chars);
940       DCHECK_NULL(token.raw_literal_chars);
941       DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
942       break;
943   }
944 
945   DCHECK_IMPLIES(token.token != Token::IDENTIFIER,
946                  token.contextual_token == Token::UNINITIALIZED);
947   DCHECK_IMPLIES(token.contextual_token != Token::UNINITIALIZED,
948                  token.token == Token::IDENTIFIER &&
949                      Token::IsContextualKeyword(token.contextual_token));
950   DCHECK(!Token::IsContextualKeyword(token.token));
951 }
952 #endif  // DEBUG
953 
SeekForward(int pos)954 void Scanner::SeekForward(int pos) {
955   // After this call, we will have the token at the given position as
956   // the "next" token. The "current" token will be invalid.
957   if (pos == next_.location.beg_pos) return;
958   int current_pos = source_pos();
959   DCHECK_EQ(next_.location.end_pos, current_pos);
960   // Positions inside the lookahead token aren't supported.
961   DCHECK(pos >= current_pos);
962   if (pos != current_pos) {
963     source_->Seek(pos);
964     Advance();
965     // This function is only called to seek to the location
966     // of the end of a function (at the "}" token). It doesn't matter
967     // whether there was a line terminator in the part we skip.
968     has_line_terminator_before_next_ = false;
969     has_multiline_comment_before_next_ = false;
970   }
971   Scan();
972 }
973 
974 
975 template <bool capture_raw, bool in_template_literal>
ScanEscape()976 bool Scanner::ScanEscape() {
977   uc32 c = c0_;
978   Advance<capture_raw>();
979 
980   // Skip escaped newlines.
981   if (!in_template_literal && c0_ != kEndOfInput &&
982       unibrow::IsLineTerminator(c)) {
983     // Allow escaped CR+LF newlines in multiline string literals.
984     if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
985     return true;
986   }
987 
988   switch (c) {
989     case '\'':  // fall through
990     case '"' :  // fall through
991     case '\\': break;
992     case 'b' : c = '\b'; break;
993     case 'f' : c = '\f'; break;
994     case 'n' : c = '\n'; break;
995     case 'r' : c = '\r'; break;
996     case 't' : c = '\t'; break;
997     case 'u' : {
998       c = ScanUnicodeEscape<capture_raw>();
999       if (c < 0) return false;
1000       break;
1001     }
1002     case 'v':
1003       c = '\v';
1004       break;
1005     case 'x': {
1006       c = ScanHexNumber<capture_raw>(2);
1007       if (c < 0) return false;
1008       break;
1009     }
1010     case '0':  // Fall through.
1011     case '1':  // fall through
1012     case '2':  // fall through
1013     case '3':  // fall through
1014     case '4':  // fall through
1015     case '5':  // fall through
1016     case '6':  // fall through
1017     case '7':
1018       c = ScanOctalEscape<capture_raw>(c, 2, in_template_literal);
1019       break;
1020   }
1021 
1022   // Other escaped characters are interpreted as their non-escaped version.
1023   AddLiteralChar(c);
1024   return true;
1025 }
1026 
1027 template <bool capture_raw>
ScanOctalEscape(uc32 c,int length,bool in_template_literal)1028 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool in_template_literal) {
1029   uc32 x = c - '0';
1030   int i = 0;
1031   for (; i < length; i++) {
1032     int d = c0_ - '0';
1033     if (d < 0 || d > 7) break;
1034     int nx = x * 8 + d;
1035     if (nx >= 256) break;
1036     x = nx;
1037     Advance<capture_raw>();
1038   }
1039   // Anything except '\0' is an octal escape sequence, illegal in strict mode.
1040   // Remember the position of octal escape sequences so that an error
1041   // can be reported later (in strict mode).
1042   // We don't report the error immediately, because the octal escape can
1043   // occur before the "use strict" directive.
1044   if (c != '0' || i > 0 || c0_ == '8' || c0_ == '9') {
1045     octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
1046     octal_message_ = in_template_literal
1047                          ? MessageTemplate::kTemplateOctalLiteral
1048                          : MessageTemplate::kStrictOctalEscape;
1049   }
1050   return x;
1051 }
1052 
1053 
ScanString()1054 Token::Value Scanner::ScanString() {
1055   uc32 quote = c0_;
1056   Advance<false, false>();  // consume quote
1057 
1058   LiteralScope literal(this);
1059   while (true) {
1060     if (c0_ > kMaxAscii) {
1061       HandleLeadSurrogate();
1062       break;
1063     }
1064     if (c0_ == kEndOfInput || c0_ == '\n' || c0_ == '\r') return Token::ILLEGAL;
1065     if (c0_ == quote) {
1066       literal.Complete();
1067       Advance<false, false>();
1068       return Token::STRING;
1069     }
1070     char c = static_cast<char>(c0_);
1071     if (c == '\\') break;
1072     Advance<false, false>();
1073     AddLiteralChar(c);
1074   }
1075 
1076   while (c0_ != quote && c0_ != kEndOfInput &&
1077          !unibrow::IsStringLiteralLineTerminator(c0_)) {
1078     uc32 c = c0_;
1079     Advance();
1080     if (c == '\\') {
1081       if (c0_ == kEndOfInput || !ScanEscape<false, false>()) {
1082         return Token::ILLEGAL;
1083       }
1084     } else {
1085       AddLiteralChar(c);
1086     }
1087   }
1088   if (c0_ != quote) return Token::ILLEGAL;
1089   literal.Complete();
1090 
1091   Advance();  // consume quote
1092   return Token::STRING;
1093 }
1094 
ScanPrivateName()1095 Token::Value Scanner::ScanPrivateName() {
1096   if (!allow_harmony_private_fields()) {
1097     ReportScannerError(source_pos(),
1098                        MessageTemplate::kInvalidOrUnexpectedToken);
1099     return Token::ILLEGAL;
1100   }
1101 
1102   LiteralScope literal(this);
1103   DCHECK_EQ(c0_, '#');
1104   AddLiteralCharAdvance();
1105   if (c0_ == kEndOfInput || !unicode_cache_->IsIdentifierStart(c0_)) {
1106     PushBack(c0_);
1107     ReportScannerError(source_pos(),
1108                        MessageTemplate::kInvalidOrUnexpectedToken);
1109     return Token::ILLEGAL;
1110   }
1111 
1112   Token::Value token = ScanIdentifierOrKeywordInner(&literal);
1113   return token == Token::ILLEGAL ? Token::ILLEGAL : Token::PRIVATE_NAME;
1114 }
1115 
ScanTemplateSpan()1116 Token::Value Scanner::ScanTemplateSpan() {
1117   // When scanning a TemplateSpan, we are looking for the following construct:
1118   // TEMPLATE_SPAN ::
1119   //     ` LiteralChars* ${
1120   //   | } LiteralChars* ${
1121   //
1122   // TEMPLATE_TAIL ::
1123   //     ` LiteralChars* `
1124   //   | } LiteralChar* `
1125   //
1126   // A TEMPLATE_SPAN should always be followed by an Expression, while a
1127   // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
1128   // followed by an Expression.
1129 
1130   // These scoped helpers save and restore the original error state, so that we
1131   // can specially treat invalid escape sequences in templates (which are
1132   // handled by the parser).
1133   ErrorState scanner_error_state(&scanner_error_, &scanner_error_location_);
1134   ErrorState octal_error_state(&octal_message_, &octal_pos_);
1135 
1136   Token::Value result = Token::TEMPLATE_SPAN;
1137   LiteralScope literal(this);
1138   StartRawLiteral();
1139   const bool capture_raw = true;
1140   const bool in_template_literal = true;
1141   while (true) {
1142     uc32 c = c0_;
1143     Advance<capture_raw>();
1144     if (c == '`') {
1145       result = Token::TEMPLATE_TAIL;
1146       ReduceRawLiteralLength(1);
1147       break;
1148     } else if (c == '$' && c0_ == '{') {
1149       Advance<capture_raw>();  // Consume '{'
1150       ReduceRawLiteralLength(2);
1151       break;
1152     } else if (c == '\\') {
1153       if (c0_ != kEndOfInput && unibrow::IsLineTerminator(c0_)) {
1154         // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
1155         // code unit sequence.
1156         uc32 lastChar = c0_;
1157         Advance<capture_raw>();
1158         if (lastChar == '\r') {
1159           ReduceRawLiteralLength(1);  // Remove \r
1160           if (c0_ == '\n') {
1161             Advance<capture_raw>();  // Adds \n
1162           } else {
1163             AddRawLiteralChar('\n');
1164           }
1165         }
1166       } else {
1167         bool success = ScanEscape<capture_raw, in_template_literal>();
1168         USE(success);
1169         DCHECK_EQ(!success, has_error());
1170         // For templates, invalid escape sequence checking is handled in the
1171         // parser.
1172         scanner_error_state.MoveErrorTo(&next_);
1173         octal_error_state.MoveErrorTo(&next_);
1174       }
1175     } else if (c < 0) {
1176       // Unterminated template literal
1177       PushBack(c);
1178       break;
1179     } else {
1180       // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
1181       // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
1182       // consisting of the CV 0x000A.
1183       if (c == '\r') {
1184         ReduceRawLiteralLength(1);  // Remove \r
1185         if (c0_ == '\n') {
1186           Advance<capture_raw>();  // Adds \n
1187         } else {
1188           AddRawLiteralChar('\n');
1189         }
1190         c = '\n';
1191       }
1192       AddLiteralChar(c);
1193     }
1194   }
1195   literal.Complete();
1196   next_.location.end_pos = source_pos();
1197   next_.token = result;
1198   next_.contextual_token = Token::UNINITIALIZED;
1199 
1200   return result;
1201 }
1202 
1203 
ScanTemplateStart()1204 Token::Value Scanner::ScanTemplateStart() {
1205   DCHECK_EQ(next_next_.token, Token::UNINITIALIZED);
1206   DCHECK_EQ(c0_, '`');
1207   next_.location.beg_pos = source_pos();
1208   Advance();  // Consume `
1209   return ScanTemplateSpan();
1210 }
1211 
SourceUrl(Isolate * isolate) const1212 Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
1213   Handle<String> tmp;
1214   if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate);
1215   return tmp;
1216 }
1217 
SourceMappingUrl(Isolate * isolate) const1218 Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
1219   Handle<String> tmp;
1220   if (source_mapping_url_.length() > 0)
1221     tmp = source_mapping_url_.Internalize(isolate);
1222   return tmp;
1223 }
1224 
ScanDigitsWithNumericSeparators(bool (* predicate)(uc32 ch),bool is_check_first_digit)1225 bool Scanner::ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
1226                                               bool is_check_first_digit) {
1227   // we must have at least one digit after 'x'/'b'/'o'
1228   if (is_check_first_digit && !predicate(c0_)) return false;
1229 
1230   bool separator_seen = false;
1231   while (predicate(c0_) || c0_ == '_') {
1232     if (c0_ == '_') {
1233       Advance<false, false>();
1234       if (c0_ == '_') {
1235         ReportScannerError(Location(source_pos(), source_pos() + 1),
1236                            MessageTemplate::kContinuousNumericSeparator);
1237         return false;
1238       }
1239       separator_seen = true;
1240       continue;
1241     }
1242     separator_seen = false;
1243     AddLiteralCharAdvance();
1244   }
1245 
1246   if (separator_seen) {
1247     ReportScannerError(Location(source_pos(), source_pos() + 1),
1248                        MessageTemplate::kTrailingNumericSeparator);
1249     return false;
1250   }
1251 
1252   return true;
1253 }
1254 
ScanDecimalDigits()1255 bool Scanner::ScanDecimalDigits() {
1256   if (allow_harmony_numeric_separator()) {
1257     return ScanDigitsWithNumericSeparators(&IsDecimalDigit, false);
1258   }
1259   while (IsDecimalDigit(c0_)) {
1260     AddLiteralCharAdvance();
1261   }
1262   return true;
1263 }
1264 
ScanDecimalAsSmiWithNumericSeparators(uint64_t * value)1265 bool Scanner::ScanDecimalAsSmiWithNumericSeparators(uint64_t* value) {
1266   bool separator_seen = false;
1267   while (IsDecimalDigit(c0_) || c0_ == '_') {
1268     if (c0_ == '_') {
1269       Advance<false, false>();
1270       if (c0_ == '_') {
1271         ReportScannerError(Location(source_pos(), source_pos() + 1),
1272                            MessageTemplate::kContinuousNumericSeparator);
1273         return false;
1274       }
1275       separator_seen = true;
1276       continue;
1277     }
1278     separator_seen = false;
1279     *value = 10 * *value + (c0_ - '0');
1280     uc32 first_char = c0_;
1281     Advance<false, false>();
1282     AddLiteralChar(first_char);
1283   }
1284 
1285   if (separator_seen) {
1286     ReportScannerError(Location(source_pos(), source_pos() + 1),
1287                        MessageTemplate::kTrailingNumericSeparator);
1288     return false;
1289   }
1290 
1291   return true;
1292 }
1293 
ScanDecimalAsSmi(uint64_t * value)1294 bool Scanner::ScanDecimalAsSmi(uint64_t* value) {
1295   if (allow_harmony_numeric_separator()) {
1296     return ScanDecimalAsSmiWithNumericSeparators(value);
1297   }
1298 
1299   while (IsDecimalDigit(c0_)) {
1300     *value = 10 * *value + (c0_ - '0');
1301     uc32 first_char = c0_;
1302     Advance<false, false>();
1303     AddLiteralChar(first_char);
1304   }
1305   return true;
1306 }
1307 
ScanBinaryDigits()1308 bool Scanner::ScanBinaryDigits() {
1309   if (allow_harmony_numeric_separator()) {
1310     return ScanDigitsWithNumericSeparators(&IsBinaryDigit, true);
1311   }
1312 
1313   // we must have at least one binary digit after 'b'/'B'
1314   if (!IsBinaryDigit(c0_)) {
1315     return false;
1316   }
1317 
1318   while (IsBinaryDigit(c0_)) {
1319     AddLiteralCharAdvance();
1320   }
1321   return true;
1322 }
1323 
ScanOctalDigits()1324 bool Scanner::ScanOctalDigits() {
1325   if (allow_harmony_numeric_separator()) {
1326     return ScanDigitsWithNumericSeparators(&IsOctalDigit, true);
1327   }
1328 
1329   // we must have at least one octal digit after 'o'/'O'
1330   if (!IsOctalDigit(c0_)) {
1331     return false;
1332   }
1333 
1334   while (IsOctalDigit(c0_)) {
1335     AddLiteralCharAdvance();
1336   }
1337   return true;
1338 }
1339 
ScanImplicitOctalDigits(int start_pos,Scanner::NumberKind * kind)1340 bool Scanner::ScanImplicitOctalDigits(int start_pos,
1341                                       Scanner::NumberKind* kind) {
1342   *kind = IMPLICIT_OCTAL;
1343 
1344   while (true) {
1345     // (possible) octal number
1346     if (c0_ == '8' || c0_ == '9') {
1347       *kind = DECIMAL_WITH_LEADING_ZERO;
1348       return true;
1349     }
1350     if (c0_ < '0' || '7' < c0_) {
1351       // Octal literal finished.
1352       octal_pos_ = Location(start_pos, source_pos());
1353       octal_message_ = MessageTemplate::kStrictOctalLiteral;
1354       return true;
1355     }
1356     AddLiteralCharAdvance();
1357   }
1358 }
1359 
ScanHexDigits()1360 bool Scanner::ScanHexDigits() {
1361   if (allow_harmony_numeric_separator()) {
1362     return ScanDigitsWithNumericSeparators(&IsHexDigit, true);
1363   }
1364 
1365   // we must have at least one hex digit after 'x'/'X'
1366   if (!IsHexDigit(c0_)) {
1367     return false;
1368   }
1369 
1370   while (IsHexDigit(c0_)) {
1371     AddLiteralCharAdvance();
1372   }
1373   return true;
1374 }
1375 
ScanSignedInteger()1376 bool Scanner::ScanSignedInteger() {
1377   if (c0_ == '+' || c0_ == '-') AddLiteralCharAdvance();
1378   // we must have at least one decimal digit after 'e'/'E'
1379   if (!IsDecimalDigit(c0_)) return false;
1380   return ScanDecimalDigits();
1381 }
1382 
ScanNumber(bool seen_period)1383 Token::Value Scanner::ScanNumber(bool seen_period) {
1384   DCHECK(IsDecimalDigit(c0_));  // the first digit of the number or the fraction
1385 
1386   NumberKind kind = DECIMAL;
1387 
1388   LiteralScope literal(this);
1389   bool at_start = !seen_period;
1390   int start_pos = source_pos();  // For reporting octal positions.
1391   if (seen_period) {
1392     // we have already seen a decimal point of the float
1393     AddLiteralChar('.');
1394     if (allow_harmony_numeric_separator() && c0_ == '_') {
1395       return Token::ILLEGAL;
1396     }
1397     // we know we have at least one digit
1398     if (!ScanDecimalDigits()) return Token::ILLEGAL;
1399   } else {
1400     // if the first character is '0' we must check for octals and hex
1401     if (c0_ == '0') {
1402       AddLiteralCharAdvance();
1403 
1404       // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
1405       // an octal number.
1406       if (c0_ == 'x' || c0_ == 'X') {
1407         AddLiteralCharAdvance();
1408         kind = HEX;
1409         if (!ScanHexDigits()) return Token::ILLEGAL;
1410       } else if (c0_ == 'o' || c0_ == 'O') {
1411         AddLiteralCharAdvance();
1412         kind = OCTAL;
1413         if (!ScanOctalDigits()) return Token::ILLEGAL;
1414       } else if (c0_ == 'b' || c0_ == 'B') {
1415         AddLiteralCharAdvance();
1416         kind = BINARY;
1417         if (!ScanBinaryDigits()) return Token::ILLEGAL;
1418       } else if ('0' <= c0_ && c0_ <= '7') {
1419         kind = IMPLICIT_OCTAL;
1420         if (!ScanImplicitOctalDigits(start_pos, &kind)) {
1421           return Token::ILLEGAL;
1422         }
1423         if (kind == DECIMAL_WITH_LEADING_ZERO) {
1424           at_start = false;
1425         }
1426       } else if (c0_ == '8' || c0_ == '9') {
1427         kind = DECIMAL_WITH_LEADING_ZERO;
1428       } else if (allow_harmony_numeric_separator() && c0_ == '_') {
1429         ReportScannerError(Location(source_pos(), source_pos() + 1),
1430                            MessageTemplate::kZeroDigitNumericSeparator);
1431         return Token::ILLEGAL;
1432       }
1433     }
1434 
1435     // Parse decimal digits and allow trailing fractional part.
1436     if (kind == DECIMAL || kind == DECIMAL_WITH_LEADING_ZERO) {
1437       // This is an optimization for parsing Decimal numbers as Smi's.
1438       if (at_start) {
1439         uint64_t value = 0;
1440         // scan subsequent decimal digits
1441         if (!ScanDecimalAsSmi(&value)) {
1442           return Token::ILLEGAL;
1443         }
1444 
1445         if (next_.literal_chars->one_byte_literal().length() <= 10 &&
1446             value <= Smi::kMaxValue && c0_ != '.' &&
1447             (c0_ == kEndOfInput || !unicode_cache_->IsIdentifierStart(c0_))) {
1448           next_.smi_value_ = static_cast<uint32_t>(value);
1449           literal.Complete();
1450           HandleLeadSurrogate();
1451 
1452           if (kind == DECIMAL_WITH_LEADING_ZERO) {
1453             octal_pos_ = Location(start_pos, source_pos());
1454             octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
1455           }
1456           return Token::SMI;
1457         }
1458         HandleLeadSurrogate();
1459       }
1460 
1461       if (!ScanDecimalDigits()) return Token::ILLEGAL;
1462       if (c0_ == '.') {
1463         seen_period = true;
1464         AddLiteralCharAdvance();
1465         if (allow_harmony_numeric_separator() && c0_ == '_') {
1466           return Token::ILLEGAL;
1467         }
1468         if (!ScanDecimalDigits()) return Token::ILLEGAL;
1469       }
1470     }
1471   }
1472 
1473   bool is_bigint = false;
1474   if (allow_harmony_bigint() && c0_ == 'n' && !seen_period &&
1475       (kind == DECIMAL || kind == HEX || kind == OCTAL || kind == BINARY)) {
1476     // Check that the literal is within our limits for BigInt length.
1477     // For simplicity, use 4 bits per character to calculate the maximum
1478     // allowed literal length.
1479     static const int kMaxBigIntCharacters = BigInt::kMaxLengthBits / 4;
1480     int length = source_pos() - start_pos - (kind != DECIMAL ? 2 : 0);
1481     if (length > kMaxBigIntCharacters) {
1482       ReportScannerError(Location(start_pos, source_pos()),
1483                          MessageTemplate::kBigIntTooBig);
1484       return Token::ILLEGAL;
1485     }
1486 
1487     is_bigint = true;
1488     Advance();
1489   } else if (c0_ == 'e' || c0_ == 'E') {
1490     // scan exponent, if any
1491     DCHECK(kind != HEX);  // 'e'/'E' must be scanned as part of the hex number
1492 
1493     if (!(kind == DECIMAL || kind == DECIMAL_WITH_LEADING_ZERO))
1494       return Token::ILLEGAL;
1495 
1496     // scan exponent
1497     AddLiteralCharAdvance();
1498 
1499     if (!ScanSignedInteger()) return Token::ILLEGAL;
1500   }
1501 
1502   // The source character immediately following a numeric literal must
1503   // not be an identifier start or a decimal digit; see ECMA-262
1504   // section 7.8.3, page 17 (note that we read only one decimal digit
1505   // if the value is 0).
1506   if (IsDecimalDigit(c0_) ||
1507       (c0_ != kEndOfInput && unicode_cache_->IsIdentifierStart(c0_)))
1508     return Token::ILLEGAL;
1509 
1510   literal.Complete();
1511 
1512   if (kind == DECIMAL_WITH_LEADING_ZERO) {
1513     octal_pos_ = Location(start_pos, source_pos());
1514     octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
1515   }
1516 
1517   return is_bigint ? Token::BIGINT : Token::NUMBER;
1518 }
1519 
1520 
ScanIdentifierUnicodeEscape()1521 uc32 Scanner::ScanIdentifierUnicodeEscape() {
1522   Advance();
1523   if (c0_ != 'u') return -1;
1524   Advance();
1525   return ScanUnicodeEscape<false>();
1526 }
1527 
1528 
1529 template <bool capture_raw>
ScanUnicodeEscape()1530 uc32 Scanner::ScanUnicodeEscape() {
1531   // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
1532   // hex digits between { } is arbitrary. \ and u have already been read.
1533   if (c0_ == '{') {
1534     int begin = source_pos() - 2;
1535     Advance<capture_raw>();
1536     uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10FFFF, begin);
1537     if (cp < 0 || c0_ != '}') {
1538       ReportScannerError(source_pos(),
1539                          MessageTemplate::kInvalidUnicodeEscapeSequence);
1540       return -1;
1541     }
1542     Advance<capture_raw>();
1543     return cp;
1544   }
1545   const bool unicode = true;
1546   return ScanHexNumber<capture_raw, unicode>(4);
1547 }
1548 
1549 
1550 // ----------------------------------------------------------------------------
1551 // Keyword Matcher
1552 
1553 #define KEYWORDS(KEYWORD_GROUP, KEYWORD)                    \
1554   KEYWORD_GROUP('a')                                        \
1555   KEYWORD("arguments", Token::ARGUMENTS)                    \
1556   KEYWORD("as", Token::AS)                                  \
1557   KEYWORD("async", Token::ASYNC)                            \
1558   KEYWORD("await", Token::AWAIT)                            \
1559   KEYWORD("anonymous", Token::ANONYMOUS)                    \
1560   KEYWORD_GROUP('b')                                        \
1561   KEYWORD("break", Token::BREAK)                            \
1562   KEYWORD_GROUP('c')                                        \
1563   KEYWORD("case", Token::CASE)                              \
1564   KEYWORD("catch", Token::CATCH)                            \
1565   KEYWORD("class", Token::CLASS)                            \
1566   KEYWORD("const", Token::CONST)                            \
1567   KEYWORD("constructor", Token::CONSTRUCTOR)                \
1568   KEYWORD("continue", Token::CONTINUE)                      \
1569   KEYWORD_GROUP('d')                                        \
1570   KEYWORD("debugger", Token::DEBUGGER)                      \
1571   KEYWORD("default", Token::DEFAULT)                        \
1572   KEYWORD("delete", Token::DELETE)                          \
1573   KEYWORD("do", Token::DO)                                  \
1574   KEYWORD_GROUP('e')                                        \
1575   KEYWORD("else", Token::ELSE)                              \
1576   KEYWORD("enum", Token::ENUM)                              \
1577   KEYWORD("eval", Token::EVAL)                              \
1578   KEYWORD("export", Token::EXPORT)                          \
1579   KEYWORD("extends", Token::EXTENDS)                        \
1580   KEYWORD_GROUP('f')                                        \
1581   KEYWORD("false", Token::FALSE_LITERAL)                    \
1582   KEYWORD("finally", Token::FINALLY)                        \
1583   KEYWORD("for", Token::FOR)                                \
1584   KEYWORD("from", Token::FROM)                              \
1585   KEYWORD("function", Token::FUNCTION)                      \
1586   KEYWORD_GROUP('g')                                        \
1587   KEYWORD("get", Token::GET)                                \
1588   KEYWORD_GROUP('i')                                        \
1589   KEYWORD("if", Token::IF)                                  \
1590   KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
1591   KEYWORD("import", Token::IMPORT)                          \
1592   KEYWORD("in", Token::IN)                                  \
1593   KEYWORD("instanceof", Token::INSTANCEOF)                  \
1594   KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD)  \
1595   KEYWORD_GROUP('l')                                        \
1596   KEYWORD("let", Token::LET)                                \
1597   KEYWORD_GROUP('m')                                        \
1598   KEYWORD("meta", Token::META)                              \
1599   KEYWORD_GROUP('n')                                        \
1600   KEYWORD("name", Token::NAME)                              \
1601   KEYWORD("new", Token::NEW)                                \
1602   KEYWORD("null", Token::NULL_LITERAL)                      \
1603   KEYWORD_GROUP('o')                                        \
1604   KEYWORD("of", Token::OF)                                  \
1605   KEYWORD_GROUP('p')                                        \
1606   KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD)    \
1607   KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD)    \
1608   KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD)  \
1609   KEYWORD("prototype", Token::PROTOTYPE)                    \
1610   KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD)     \
1611   KEYWORD_GROUP('r')                                        \
1612   KEYWORD("return", Token::RETURN)                          \
1613   KEYWORD_GROUP('s')                                        \
1614   KEYWORD("set", Token::SET)                                \
1615   KEYWORD("static", Token::STATIC)                          \
1616   KEYWORD("super", Token::SUPER)                            \
1617   KEYWORD("switch", Token::SWITCH)                          \
1618   KEYWORD_GROUP('t')                                        \
1619   KEYWORD("target", Token::TARGET)                          \
1620   KEYWORD("this", Token::THIS)                              \
1621   KEYWORD("throw", Token::THROW)                            \
1622   KEYWORD("true", Token::TRUE_LITERAL)                      \
1623   KEYWORD("try", Token::TRY)                                \
1624   KEYWORD("typeof", Token::TYPEOF)                          \
1625   KEYWORD_GROUP('u')                                        \
1626   KEYWORD("undefined", Token::UNDEFINED)                    \
1627   KEYWORD_GROUP('v')                                        \
1628   KEYWORD("var", Token::VAR)                                \
1629   KEYWORD("void", Token::VOID)                              \
1630   KEYWORD_GROUP('w')                                        \
1631   KEYWORD("while", Token::WHILE)                            \
1632   KEYWORD("with", Token::WITH)                              \
1633   KEYWORD_GROUP('y')                                        \
1634   KEYWORD("yield", Token::YIELD)                            \
1635   KEYWORD_GROUP('_')                                        \
1636   KEYWORD("__proto__", Token::PROTO_UNDERSCORED)            \
1637   KEYWORD_GROUP('#')                                        \
1638   KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR)
1639 
KeywordOrIdentifierToken(const uint8_t * input,int input_length)1640 static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
1641                                              int input_length) {
1642   DCHECK_GE(input_length, 1);
1643   const int kMinLength = 2;
1644   const int kMaxLength = 12;
1645   if (input_length < kMinLength || input_length > kMaxLength) {
1646     return Token::IDENTIFIER;
1647   }
1648   switch (input[0]) {
1649     default:
1650 #define KEYWORD_GROUP_CASE(ch)                                \
1651       break;                                                  \
1652     case ch:
1653 #define KEYWORD(keyword, token)                                           \
1654   {                                                                       \
1655     /* 'keyword' is a char array, so sizeof(keyword) is */                \
1656     /* strlen(keyword) plus 1 for the NUL char. */                        \
1657     const int keyword_length = sizeof(keyword) - 1;                       \
1658     STATIC_ASSERT(keyword_length >= kMinLength);                          \
1659     STATIC_ASSERT(keyword_length <= kMaxLength);                          \
1660     DCHECK_EQ(input[0], keyword[0]);                                      \
1661     DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD ||                 \
1662            0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
1663     if (input_length == keyword_length && input[1] == keyword[1] &&       \
1664         (keyword_length <= 2 || input[2] == keyword[2]) &&                \
1665         (keyword_length <= 3 || input[3] == keyword[3]) &&                \
1666         (keyword_length <= 4 || input[4] == keyword[4]) &&                \
1667         (keyword_length <= 5 || input[5] == keyword[5]) &&                \
1668         (keyword_length <= 6 || input[6] == keyword[6]) &&                \
1669         (keyword_length <= 7 || input[7] == keyword[7]) &&                \
1670         (keyword_length <= 8 || input[8] == keyword[8]) &&                \
1671         (keyword_length <= 9 || input[9] == keyword[9]) &&                \
1672         (keyword_length <= 10 || input[10] == keyword[10])) {             \
1673       return token;                                                       \
1674     }                                                                     \
1675   }
1676       KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
1677   }
1678   return Token::IDENTIFIER;
1679 #undef KEYWORDS
1680 #undef KEYWORD
1681 #undef KEYWORD_GROUP_CASE
1682 }
1683 
ScanIdentifierOrKeyword()1684 Token::Value Scanner::ScanIdentifierOrKeyword() {
1685   LiteralScope literal(this);
1686   return ScanIdentifierOrKeywordInner(&literal);
1687 }
1688 
ScanIdentifierOrKeywordInner(LiteralScope * literal)1689 Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
1690   DCHECK(unicode_cache_->IsIdentifierStart(c0_));
1691   if (IsInRange(c0_, 'a', 'z') || c0_ == '_') {
1692     do {
1693       char first_char = static_cast<char>(c0_);
1694       Advance<false, false>();
1695       AddLiteralChar(first_char);
1696     } while (IsInRange(c0_, 'a', 'z') || c0_ == '_');
1697 
1698     if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' ||
1699         c0_ == '$') {
1700       // Identifier starting with lowercase.
1701       char first_char = static_cast<char>(c0_);
1702       Advance<false, false>();
1703       AddLiteralChar(first_char);
1704       while (IsAsciiIdentifier(c0_)) {
1705         char first_char = static_cast<char>(c0_);
1706         Advance<false, false>();
1707         AddLiteralChar(first_char);
1708       }
1709       if (c0_ <= kMaxAscii && c0_ != '\\') {
1710         literal->Complete();
1711         return Token::IDENTIFIER;
1712       }
1713     } else if (c0_ <= kMaxAscii && c0_ != '\\') {
1714       // Only a-z+ or _: could be a keyword or identifier.
1715       Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1716       Token::Value token =
1717           KeywordOrIdentifierToken(chars.start(), chars.length());
1718       if (token == Token::IDENTIFIER ||
1719           token == Token::FUTURE_STRICT_RESERVED_WORD ||
1720           Token::IsContextualKeyword(token))
1721         literal->Complete();
1722       return token;
1723     }
1724 
1725     HandleLeadSurrogate();
1726   } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') {
1727     do {
1728       char first_char = static_cast<char>(c0_);
1729       Advance<false, false>();
1730       AddLiteralChar(first_char);
1731     } while (IsAsciiIdentifier(c0_));
1732 
1733     if (c0_ <= kMaxAscii && c0_ != '\\') {
1734       literal->Complete();
1735       return Token::IDENTIFIER;
1736     }
1737 
1738     HandleLeadSurrogate();
1739   } else if (c0_ == '\\') {
1740     // Scan identifier start character.
1741     uc32 c = ScanIdentifierUnicodeEscape();
1742     // Only allow legal identifier start characters.
1743     if (c < 0 ||
1744         c == '\\' ||  // No recursive escapes.
1745         !unicode_cache_->IsIdentifierStart(c)) {
1746       return Token::ILLEGAL;
1747     }
1748     AddLiteralChar(c);
1749     return ScanIdentifierSuffix(literal, true);
1750   } else {
1751     uc32 first_char = c0_;
1752     Advance();
1753     AddLiteralChar(first_char);
1754   }
1755 
1756   // Scan the rest of the identifier characters.
1757   while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1758     if (c0_ != '\\') {
1759       uc32 next_char = c0_;
1760       Advance();
1761       AddLiteralChar(next_char);
1762       continue;
1763     }
1764     // Fallthrough if no longer able to complete keyword.
1765     return ScanIdentifierSuffix(literal, false);
1766   }
1767 
1768   if (next_.literal_chars->is_one_byte()) {
1769     Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1770     Token::Value token =
1771         KeywordOrIdentifierToken(chars.start(), chars.length());
1772     if (token == Token::IDENTIFIER ||
1773         token == Token::FUTURE_STRICT_RESERVED_WORD ||
1774         Token::IsContextualKeyword(token))
1775       literal->Complete();
1776     return token;
1777   }
1778   literal->Complete();
1779   return Token::IDENTIFIER;
1780 }
1781 
1782 
ScanIdentifierSuffix(LiteralScope * literal,bool escaped)1783 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,
1784                                            bool escaped) {
1785   // Scan the rest of the identifier characters.
1786   while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1787     if (c0_ == '\\') {
1788       uc32 c = ScanIdentifierUnicodeEscape();
1789       escaped = true;
1790       // Only allow legal identifier part characters.
1791       if (c < 0 ||
1792           c == '\\' ||
1793           !unicode_cache_->IsIdentifierPart(c)) {
1794         return Token::ILLEGAL;
1795       }
1796       AddLiteralChar(c);
1797     } else {
1798       AddLiteralChar(c0_);
1799       Advance();
1800     }
1801   }
1802   literal->Complete();
1803 
1804   if (escaped && next_.literal_chars->is_one_byte()) {
1805     Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1806     Token::Value token =
1807         KeywordOrIdentifierToken(chars.start(), chars.length());
1808     /* TODO(adamk): YIELD should be handled specially. */
1809     if (token == Token::IDENTIFIER || Token::IsContextualKeyword(token)) {
1810       return token;
1811     } else if (token == Token::FUTURE_STRICT_RESERVED_WORD ||
1812                token == Token::LET || token == Token::STATIC) {
1813       return Token::ESCAPED_STRICT_RESERVED_WORD;
1814     } else {
1815       return Token::ESCAPED_KEYWORD;
1816     }
1817   }
1818   return Token::IDENTIFIER;
1819 }
1820 
ScanRegExpPattern()1821 bool Scanner::ScanRegExpPattern() {
1822   DCHECK(next_next_.token == Token::UNINITIALIZED);
1823   DCHECK(next_.token == Token::DIV || next_.token == Token::ASSIGN_DIV);
1824 
1825   // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1826   bool in_character_class = false;
1827   bool seen_equal = (next_.token == Token::ASSIGN_DIV);
1828 
1829   // Previous token is either '/' or '/=', in the second case, the
1830   // pattern starts at =.
1831   next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1832   next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1833 
1834   // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1835   // the scanner should pass uninterpreted bodies to the RegExp
1836   // constructor.
1837   LiteralScope literal(this);
1838   if (seen_equal) {
1839     AddLiteralChar('=');
1840   }
1841 
1842   while (c0_ != '/' || in_character_class) {
1843     if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
1844       return false;
1845     }
1846     if (c0_ == '\\') {  // Escape sequence.
1847       AddLiteralCharAdvance();
1848       if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
1849         return false;
1850       }
1851       AddLiteralCharAdvance();
1852       // If the escape allows more characters, i.e., \x??, \u????, or \c?,
1853       // only "safe" characters are allowed (letters, digits, underscore),
1854       // otherwise the escape isn't valid and the invalid character has
1855       // its normal meaning. I.e., we can just continue scanning without
1856       // worrying whether the following characters are part of the escape
1857       // or not, since any '/', '\\' or '[' is guaranteed to not be part
1858       // of the escape sequence.
1859 
1860       // TODO(896): At some point, parse RegExps more thoroughly to capture
1861       // octal esacpes in strict mode.
1862     } else {  // Unescaped character.
1863       if (c0_ == '[') in_character_class = true;
1864       if (c0_ == ']') in_character_class = false;
1865       AddLiteralCharAdvance();
1866     }
1867   }
1868   Advance();  // consume '/'
1869 
1870   literal.Complete();
1871   next_.token = Token::REGEXP_LITERAL;
1872   next_.contextual_token = Token::UNINITIALIZED;
1873   return true;
1874 }
1875 
1876 
ScanRegExpFlags()1877 Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
1878   DCHECK(next_.token == Token::REGEXP_LITERAL);
1879 
1880   // Scan regular expression flags.
1881   int flags = 0;
1882   while (c0_ != kEndOfInput && unicode_cache_->IsIdentifierPart(c0_)) {
1883     RegExp::Flags flag = RegExp::kNone;
1884     switch (c0_) {
1885       case 'g':
1886         flag = RegExp::kGlobal;
1887         break;
1888       case 'i':
1889         flag = RegExp::kIgnoreCase;
1890         break;
1891       case 'm':
1892         flag = RegExp::kMultiline;
1893         break;
1894       case 's':
1895         flag = RegExp::kDotAll;
1896         break;
1897       case 'u':
1898         flag = RegExp::kUnicode;
1899         break;
1900       case 'y':
1901         flag = RegExp::kSticky;
1902         break;
1903       default:
1904         return Nothing<RegExp::Flags>();
1905     }
1906     if (flags & flag) {
1907       return Nothing<RegExp::Flags>();
1908     }
1909     Advance();
1910     flags |= flag;
1911   }
1912 
1913   next_.location.end_pos = source_pos();
1914   return Just(RegExp::Flags(flags));
1915 }
1916 
CurrentSymbol(AstValueFactory * ast_value_factory) const1917 const AstRawString* Scanner::CurrentSymbol(
1918     AstValueFactory* ast_value_factory) const {
1919   if (is_literal_one_byte()) {
1920     return ast_value_factory->GetOneByteString(literal_one_byte_string());
1921   }
1922   return ast_value_factory->GetTwoByteString(literal_two_byte_string());
1923 }
1924 
NextSymbol(AstValueFactory * ast_value_factory) const1925 const AstRawString* Scanner::NextSymbol(
1926     AstValueFactory* ast_value_factory) const {
1927   if (is_next_literal_one_byte()) {
1928     return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
1929   }
1930   return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
1931 }
1932 
CurrentRawSymbol(AstValueFactory * ast_value_factory) const1933 const AstRawString* Scanner::CurrentRawSymbol(
1934     AstValueFactory* ast_value_factory) const {
1935   if (is_raw_literal_one_byte()) {
1936     return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
1937   }
1938   return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());
1939 }
1940 
1941 
DoubleValue()1942 double Scanner::DoubleValue() {
1943   DCHECK(is_literal_one_byte());
1944   return StringToDouble(
1945       unicode_cache_,
1946       literal_one_byte_string(),
1947       ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
1948 }
1949 
CurrentLiteralAsCString(Zone * zone) const1950 const char* Scanner::CurrentLiteralAsCString(Zone* zone) const {
1951   DCHECK(is_literal_one_byte());
1952   Vector<const uint8_t> vector = literal_one_byte_string();
1953   int length = vector.length();
1954   char* buffer = zone->NewArray<char>(length + 1);
1955   memcpy(buffer, vector.start(), length);
1956   buffer[length] = '\0';
1957   return buffer;
1958 }
1959 
IsDuplicateSymbol(DuplicateFinder * duplicate_finder,AstValueFactory * ast_value_factory) const1960 bool Scanner::IsDuplicateSymbol(DuplicateFinder* duplicate_finder,
1961                                 AstValueFactory* ast_value_factory) const {
1962   DCHECK_NOT_NULL(duplicate_finder);
1963   DCHECK_NOT_NULL(ast_value_factory);
1964   const AstRawString* string = CurrentSymbol(ast_value_factory);
1965   return !duplicate_finder->known_symbols_.insert(string).second;
1966 }
1967 
SeekNext(size_t position)1968 void Scanner::SeekNext(size_t position) {
1969   // Use with care: This cleanly resets most, but not all scanner state.
1970   // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
1971 
1972   // To re-scan from a given character position, we need to:
1973   // 1, Reset the current_, next_ and next_next_ tokens
1974   //    (next_ + next_next_ will be overwrittem by Next(),
1975   //     current_ will remain unchanged, so overwrite it fully.)
1976   current_ = {{0, 0},
1977               nullptr,
1978               nullptr,
1979               0,
1980               Token::UNINITIALIZED,
1981               MessageTemplate::kNone,
1982               {0, 0},
1983               Token::UNINITIALIZED};
1984   next_.token = Token::UNINITIALIZED;
1985   next_.contextual_token = Token::UNINITIALIZED;
1986   next_next_.token = Token::UNINITIALIZED;
1987   next_next_.contextual_token = Token::UNINITIALIZED;
1988   // 2, reset the source to the desired position,
1989   source_->Seek(position);
1990   // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
1991   c0_ = source_->Advance();
1992   Next();
1993   DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));
1994 }
1995 
1996 }  // namespace internal
1997 }  // namespace v8
1998