1 //! JavaScript lexer.
2 
3 use crate::numeric_value::{parse_float, parse_int, NumericLiteralBase};
4 use crate::parser::Parser;
5 use crate::unicode::{is_id_continue, is_id_start};
6 use ast::arena;
7 use ast::source_atom_set::{CommonSourceAtomSetIndices, SourceAtomSet};
8 use ast::source_slice_list::SourceSliceList;
9 use ast::SourceLocation;
10 use bumpalo::{collections::String, Bump};
11 use generated_parser::{ParseError, Result, TerminalId, Token, TokenValue};
12 use std::cell::RefCell;
13 use std::convert::TryFrom;
14 use std::rc::Rc;
15 use std::str::Chars;
16 
17 pub struct Lexer<'alloc> {
18     allocator: &'alloc Bump,
19 
20     /// Next token to be returned.
21     token: arena::Box<'alloc, Token>,
22 
23     /// Length of the input text, in UTF-8 bytes.
24     source_length: usize,
25 
26     /// Iterator over the remaining not-yet-parsed input.
27     chars: Chars<'alloc>,
28 
29     atoms: Rc<RefCell<SourceAtomSet<'alloc>>>,
30 
31     slices: Rc<RefCell<SourceSliceList<'alloc>>>,
32 }
33 
34 enum NumericResult {
35     Int {
36         base: NumericLiteralBase,
37     },
38     Float,
39     BigInt {
40         #[allow(dead_code)]
41         base: NumericLiteralBase,
42     },
43 }
44 
45 impl<'alloc> Lexer<'alloc> {
new( allocator: &'alloc Bump, chars: Chars<'alloc>, atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, slices: Rc<RefCell<SourceSliceList<'alloc>>>, ) -> Lexer<'alloc>46     pub fn new(
47         allocator: &'alloc Bump,
48         chars: Chars<'alloc>,
49         atoms: Rc<RefCell<SourceAtomSet<'alloc>>>,
50         slices: Rc<RefCell<SourceSliceList<'alloc>>>,
51     ) -> Lexer<'alloc> {
52         Self::with_offset(allocator, chars, 0, atoms, slices)
53     }
54 
55     /// Create a lexer for a part of a JS script or module. `offset` is the
56     /// total length of all previous parts, in bytes; source locations for
57     /// tokens created by the new lexer start counting from this number.
with_offset( allocator: &'alloc Bump, chars: Chars<'alloc>, offset: usize, atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, slices: Rc<RefCell<SourceSliceList<'alloc>>>, ) -> Lexer<'alloc>58     pub fn with_offset(
59         allocator: &'alloc Bump,
60         chars: Chars<'alloc>,
61         offset: usize,
62         atoms: Rc<RefCell<SourceAtomSet<'alloc>>>,
63         slices: Rc<RefCell<SourceSliceList<'alloc>>>,
64     ) -> Lexer<'alloc> {
65         let source_length = offset + chars.as_str().len();
66         let mut token = arena::alloc(allocator, new_token());
67         token.is_on_new_line = true;
68         Lexer {
69             allocator,
70             token,
71             source_length,
72             chars,
73             atoms,
74             slices,
75         }
76     }
77 
is_looking_at(&self, s: &str) -> bool78     fn is_looking_at(&self, s: &str) -> bool {
79         self.chars.as_str().starts_with(s)
80     }
81 
offset(&self) -> usize82     pub fn offset(&self) -> usize {
83         self.source_length - self.chars.as_str().len()
84     }
85 
peek(&self) -> Option<char>86     fn peek(&self) -> Option<char> {
87         self.chars.as_str().chars().next()
88     }
89 
double_peek(&self) -> Option<char>90     fn double_peek(&self) -> Option<char> {
91         let mut chars = self.chars.as_str().chars();
92         chars.next();
93         chars.next()
94     }
95 
set_result( &mut self, terminal_id: TerminalId, loc: SourceLocation, value: TokenValue, ) -> Result<'alloc, ()>96     fn set_result(
97         &mut self,
98         terminal_id: TerminalId,
99         loc: SourceLocation,
100         value: TokenValue,
101     ) -> Result<'alloc, ()> {
102         self.token.terminal_id = terminal_id;
103         self.token.loc = loc;
104         self.token.value = value;
105         Ok(())
106     }
107 
108     #[inline]
next<'parser>( &mut self, parser: &Parser<'parser>, ) -> Result<'alloc, arena::Box<'alloc, Token>>109     pub fn next<'parser>(
110         &mut self,
111         parser: &Parser<'parser>,
112     ) -> Result<'alloc, arena::Box<'alloc, Token>> {
113         let mut next_token = arena::alloc_with(self.allocator, || new_token());
114         self.advance_impl(parser)?;
115         std::mem::swap(&mut self.token, &mut next_token);
116         Ok(next_token)
117     }
118 
unexpected_err(&mut self) -> ParseError<'alloc>119     fn unexpected_err(&mut self) -> ParseError<'alloc> {
120         if let Some(ch) = self.peek() {
121             ParseError::IllegalCharacter(ch)
122         } else {
123             ParseError::UnexpectedEnd
124         }
125     }
126 }
127 
128 /// Returns an empty token which is meant as a place holder to be mutated later.
new_token() -> Token129 fn new_token() -> Token {
130     Token::basic_token(TerminalId::End, SourceLocation::default())
131 }
132 
133 // ----------------------------------------------------------------------------
134 // 11.1 Unicode Format-Control Characters
135 
136 /// U+200C ZERO WIDTH NON-JOINER, abbreviated in the spec as <ZWNJ>.
137 /// Specially permitted in identifiers.
138 const ZWNJ: char = '\u{200c}';
139 
140 /// U+200D ZERO WIDTH JOINER, abbreviated as <ZWJ>.
141 /// Specially permitted in identifiers.
142 const ZWJ: char = '\u{200d}';
143 
144 /// U+FEFF ZERO WIDTH NO-BREAK SPACE, abbreviated <ZWNBSP>.
145 /// Considered a whitespace character in JS.
146 const ZWNBSP: char = '\u{feff}';
147 
148 // ----------------------------------------------------------------------------
149 // 11.2 White Space
150 
151 /// U+0009 CHARACTER TABULATION, abbreviated <TAB>.
152 const TAB: char = '\u{9}';
153 
154 /// U+000B VERTICAL TAB, abbreviated <VT>.
155 const VT: char = '\u{b}';
156 
157 /// U+000C FORM FEED, abbreviated <FF>.
158 const FF: char = '\u{c}';
159 
160 /// U+0020 SPACE, abbreviated <SP>.
161 const SP: char = '\u{20}';
162 
163 /// U+00A0 NON-BREAKING SPACE, abbreviated <NBSP>.
164 const NBSP: char = '\u{a0}';
165 
166 // ----------------------------------------------------------------------------
167 // 11.3 Line Terminators
168 
169 ///  U+000A LINE FEED, abbreviated in the spec as <LF>.
170 const LF: char = '\u{a}';
171 
172 /// U+000D CARRIAGE RETURN, abbreviated in the spec as <CR>.
173 const CR: char = '\u{d}';
174 
175 /// U+2028 LINE SEPARATOR, abbreviated <LS>.
176 const LS: char = '\u{2028}';
177 
178 /// U+2029 PARAGRAPH SEPARATOR, abbreviated <PS>.
179 const PS: char = '\u{2029}';
180 
181 // ----------------------------------------------------------------------------
182 // 11.4 Comments
183 //
184 // Comment::
185 //     MultiLineComment
186 //     SingleLineComment
187 
188 impl<'alloc> Lexer<'alloc> {
189     /// Skip a *MultiLineComment*.
190     ///
191     /// ```text
192     /// MultiLineComment ::
193     ///     `/*` MultiLineCommentChars? `*/`
194     ///
195     /// MultiLineCommentChars ::
196     ///     MultiLineNotAsteriskChar MultiLineCommentChars?
197     ///     `*` PostAsteriskCommentChars?
198     ///
199     /// PostAsteriskCommentChars ::
200     ///     MultiLineNotForwardSlashOrAsteriskChar MultiLineCommentChars?
201     ///     `*` PostAsteriskCommentChars?
202     ///
203     /// MultiLineNotAsteriskChar ::
204     ///     SourceCharacter but not `*`
205     ///
206     /// MultiLineNotForwardSlashOrAsteriskChar ::
207     ///     SourceCharacter but not one of `/` or `*`
208     /// ```
209     ///
210     /// (B.1.3 splits MultiLineComment into two nonterminals: MultiLineComment
211     /// and SingleLineDelimitedComment. The point of that is to help specify
212     /// that a SingleLineHTMLCloseComment must occur at the start of a line. We
213     /// use `is_on_new_line` for that.)
214     ///
skip_multi_line_comment(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()>215     fn skip_multi_line_comment(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()> {
216         while let Some(ch) = self.chars.next() {
217             match ch {
218                 '*' if self.peek() == Some('/') => {
219                     self.chars.next();
220                     *builder = AutoCow::new(&self);
221                     return Ok(());
222                 }
223                 CR | LF | PS | LS => {
224                     self.token.is_on_new_line = true;
225                 }
226                 _ => {}
227             }
228         }
229         Err(ParseError::UnterminatedMultiLineComment.into())
230     }
231 
232     /// Skip a *SingleLineComment* and the following *LineTerminatorSequence*,
233     /// if any.
234     ///
235     /// ```text
236     /// SingleLineComment ::
237     ///     `//` SingleLineCommentChars?
238     ///
239     /// SingleLineCommentChars ::
240     ///     SingleLineCommentChar SingleLineCommentChars?
241     ///
242     /// SingleLineCommentChar ::
243     ///     SourceCharacter but not LineTerminator
244     /// ```
skip_single_line_comment(&mut self, builder: &mut AutoCow<'alloc>)245     fn skip_single_line_comment(&mut self, builder: &mut AutoCow<'alloc>) {
246         while let Some(ch) = self.chars.next() {
247             match ch {
248                 CR | LF | LS | PS => break,
249                 _ => continue,
250             }
251         }
252         *builder = AutoCow::new(&self);
253         self.token.is_on_new_line = true;
254     }
255 }
256 
257 // ----------------------------------------------------------------------------
258 // 11.6 Names and Keywords
259 
260 /// True if `c` is a one-character *IdentifierStart*.
261 ///
262 /// ```text
263 /// IdentifierStart ::
264 ///     UnicodeIDStart
265 ///     `$`
266 ///     `_`
267 ///     `\` UnicodeEscapeSequence
268 ///
269 /// UnicodeIDStart ::
270 ///     > any Unicode code point with the Unicode property "ID_Start"
271 /// ```
is_identifier_start(c: char) -> bool272 fn is_identifier_start(c: char) -> bool {
273     // Escaped case is handled separately.
274     if c.is_ascii() {
275         c == '$' || c == '_' || c.is_ascii_alphabetic()
276     } else {
277         is_id_start(c)
278     }
279 }
280 
281 /// True if `c` is a one-character *IdentifierPart*.
282 ///
283 /// ```text
284 /// IdentifierPart ::
285 ///     UnicodeIDContinue
286 ///     `$`
287 ///     `\` UnicodeEscapeSequence
288 ///     <ZWNJ>
289 ///     <ZWJ>
290 ///
291 /// UnicodeIDContinue ::
292 ///     > any Unicode code point with the Unicode property "ID_Continue"
293 /// ```
is_identifier_part(c: char) -> bool294 fn is_identifier_part(c: char) -> bool {
295     // Escaped case is handled separately.
296     if c.is_ascii() {
297         c == '$' || c == '_' || c.is_ascii_alphanumeric()
298     } else {
299         is_id_continue(c) || c == ZWNJ || c == ZWJ
300     }
301 }
302 
303 impl<'alloc> Lexer<'alloc> {
304     /// Scan the rest of an IdentifierName, having already parsed the initial
305     /// IdentifierStart and stored it in `builder`.
306     ///
307     /// On success, this returns `Ok((has_escapes, str))`, where `has_escapes`
308     /// is true if the identifier contained any UnicodeEscapeSequences, and
309     /// `str` is the un-escaped IdentifierName, including the IdentifierStart,
310     /// on success.
311     ///
312     /// ```text
313     /// IdentifierName ::
314     ///     IdentifierStart
315     ///     IdentifierName IdentifierPart
316     /// ```
identifier_name_tail( &mut self, mut builder: AutoCow<'alloc>, ) -> Result<'alloc, (bool, &'alloc str)>317     fn identifier_name_tail(
318         &mut self,
319         mut builder: AutoCow<'alloc>,
320     ) -> Result<'alloc, (bool, &'alloc str)> {
321         while let Some(ch) = self.peek() {
322             if !is_identifier_part(ch) {
323                 if ch == '\\' {
324                     self.chars.next();
325                     builder.force_allocation_without_current_ascii_char(&self);
326 
327                     let value = self.unicode_escape_sequence_after_backslash()?;
328                     if !is_identifier_part(value) {
329                         return Err(ParseError::InvalidEscapeSequence.into());
330                     }
331 
332                     builder.push_different(value);
333                     continue;
334                 }
335 
336                 break;
337             }
338             self.chars.next();
339             builder.push_matching(ch);
340         }
341         let has_different = builder.has_different();
342         Ok((has_different, builder.finish(&self)))
343     }
344 
identifier_name(&mut self, mut builder: AutoCow<'alloc>) -> Result<'alloc, &'alloc str>345     fn identifier_name(&mut self, mut builder: AutoCow<'alloc>) -> Result<'alloc, &'alloc str> {
346         match self.chars.next() {
347             None => {
348                 return Err(ParseError::UnexpectedEnd.into());
349             }
350             Some(c) => {
351                 match c {
352                     '$' | '_' | 'a'..='z' | 'A'..='Z' => {
353                         builder.push_matching(c);
354                     }
355 
356                     '\\' => {
357                         builder.force_allocation_without_current_ascii_char(&self);
358 
359                         let value = self.unicode_escape_sequence_after_backslash()?;
360                         if !is_identifier_start(value) {
361                             return Err(ParseError::IllegalCharacter(value).into());
362                         }
363                         builder.push_different(value);
364                     }
365 
366                     other if is_identifier_start(other) => {
367                         builder.push_matching(other);
368                     }
369 
370                     other => {
371                         return Err(ParseError::IllegalCharacter(other).into());
372                     }
373                 }
374                 self.identifier_name_tail(builder)
375                     .map(|(_has_escapes, name)| name)
376             }
377         }
378     }
379 
380     /// Finish scanning an *IdentifierName* or keyword, having already scanned
381     /// the *IdentifierStart* and pushed it to `builder`.
382     ///
383     /// `start` is the offset of the *IdentifierStart*.
384     ///
385     /// The lexer doesn't know the syntactic context, so it always identifies
386     /// possible keywords. It's up to the parser to understand that, for
387     /// example, `TerminalId::If` is not a keyword when it's used as a property
388     /// or method name.
389     ///
390     /// If the source string contains no escape and it matches to possible
391     /// keywords (including contextual keywords), the result is corresponding
392     /// `TerminalId`.  For example, if the source string is "yield", the result
393     /// is `TerminalId::Yield`.
394     ///
395     /// If the source string contains no escape sequence and also it doesn't
396     /// match to any possible keywords, the result is `TerminalId::Name`.
397     ///
398     /// If the source string contains at least one escape sequence,
399     /// the result is always `TerminalId::NameWithEscape`, regardless of the
400     /// StringValue of it. For example, if the source string is "\u{79}ield",
401     /// the result is `TerminalId::NameWithEscape`, and the StringValue is
402     /// "yield".
identifier_tail(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()>403     fn identifier_tail(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()> {
404         let (has_different, text) = self.identifier_name_tail(builder)?;
405 
406         // https://tc39.es/ecma262/#sec-keywords-and-reserved-words
407         //
408         // keywords in the grammar match literal sequences of specific
409         // SourceCharacter elements. A code point in a keyword cannot be
410         // expressed by a `\` UnicodeEscapeSequence.
411         let (id, value) = if has_different {
412             // Always return `NameWithEscape`.
413             //
414             // Error check against reserved word should be handled in the
415             // consumer.
416             (TerminalId::NameWithEscape, self.string_to_token_value(text))
417         } else {
418             match &text as &str {
419                 "as" => (
420                     TerminalId::As,
421                     TokenValue::Atom(CommonSourceAtomSetIndices::as_()),
422                 ),
423                 "async" => {
424                     /*
425                     (
426                         TerminalId::Async,
427                         TokenValue::Atom(CommonSourceAtomSetIndices::async_()),
428                     ),
429                     */
430                     return Err(ParseError::NotImplemented(
431                         "async cannot be handled in parser due to multiple lookahead",
432                     )
433                     .into());
434                 }
435                 "await" => {
436                     /*
437                     (
438                         TerminalId::Await,
439                         TokenValue::Atom(CommonSourceAtomSetIndices::await_()),
440                     ),
441                      */
442                     return Err(
443                         ParseError::NotImplemented("await cannot be handled in parser").into(),
444                     );
445                 }
446                 "break" => (
447                     TerminalId::Break,
448                     TokenValue::Atom(CommonSourceAtomSetIndices::break_()),
449                 ),
450                 "case" => (
451                     TerminalId::Case,
452                     TokenValue::Atom(CommonSourceAtomSetIndices::case()),
453                 ),
454                 "catch" => (
455                     TerminalId::Catch,
456                     TokenValue::Atom(CommonSourceAtomSetIndices::catch()),
457                 ),
458                 "class" => (
459                     TerminalId::Class,
460                     TokenValue::Atom(CommonSourceAtomSetIndices::class()),
461                 ),
462                 "const" => (
463                     TerminalId::Const,
464                     TokenValue::Atom(CommonSourceAtomSetIndices::const_()),
465                 ),
466                 "continue" => (
467                     TerminalId::Continue,
468                     TokenValue::Atom(CommonSourceAtomSetIndices::continue_()),
469                 ),
470                 "debugger" => (
471                     TerminalId::Debugger,
472                     TokenValue::Atom(CommonSourceAtomSetIndices::debugger()),
473                 ),
474                 "default" => (
475                     TerminalId::Default,
476                     TokenValue::Atom(CommonSourceAtomSetIndices::default()),
477                 ),
478                 "delete" => (
479                     TerminalId::Delete,
480                     TokenValue::Atom(CommonSourceAtomSetIndices::delete()),
481                 ),
482                 "do" => (
483                     TerminalId::Do,
484                     TokenValue::Atom(CommonSourceAtomSetIndices::do_()),
485                 ),
486                 "else" => (
487                     TerminalId::Else,
488                     TokenValue::Atom(CommonSourceAtomSetIndices::else_()),
489                 ),
490                 "enum" => (
491                     TerminalId::Enum,
492                     TokenValue::Atom(CommonSourceAtomSetIndices::enum_()),
493                 ),
494                 "export" => (
495                     TerminalId::Export,
496                     TokenValue::Atom(CommonSourceAtomSetIndices::export()),
497                 ),
498                 "extends" => (
499                     TerminalId::Extends,
500                     TokenValue::Atom(CommonSourceAtomSetIndices::extends()),
501                 ),
502                 "finally" => (
503                     TerminalId::Finally,
504                     TokenValue::Atom(CommonSourceAtomSetIndices::finally()),
505                 ),
506                 "for" => (
507                     TerminalId::For,
508                     TokenValue::Atom(CommonSourceAtomSetIndices::for_()),
509                 ),
510                 "from" => (
511                     TerminalId::From,
512                     TokenValue::Atom(CommonSourceAtomSetIndices::from()),
513                 ),
514                 "function" => (
515                     TerminalId::Function,
516                     TokenValue::Atom(CommonSourceAtomSetIndices::function()),
517                 ),
518                 "get" => (
519                     TerminalId::Get,
520                     TokenValue::Atom(CommonSourceAtomSetIndices::get()),
521                 ),
522                 "if" => (
523                     TerminalId::If,
524                     TokenValue::Atom(CommonSourceAtomSetIndices::if_()),
525                 ),
526                 "implements" => (
527                     TerminalId::Implements,
528                     TokenValue::Atom(CommonSourceAtomSetIndices::implements()),
529                 ),
530                 "import" => (
531                     TerminalId::Import,
532                     TokenValue::Atom(CommonSourceAtomSetIndices::import()),
533                 ),
534                 "in" => (
535                     TerminalId::In,
536                     TokenValue::Atom(CommonSourceAtomSetIndices::in_()),
537                 ),
538                 "instanceof" => (
539                     TerminalId::Instanceof,
540                     TokenValue::Atom(CommonSourceAtomSetIndices::instanceof()),
541                 ),
542                 "interface" => (
543                     TerminalId::Interface,
544                     TokenValue::Atom(CommonSourceAtomSetIndices::interface()),
545                 ),
546                 "let" => {
547                     /*
548                     (
549                         TerminalId::Let,
550                         TokenValue::Atom(CommonSourceAtomSetIndices::let_()),
551                     ),
552                     */
553                     return Err(ParseError::NotImplemented(
554                         "let cannot be handled in parser due to multiple lookahead",
555                     )
556                     .into());
557                 }
558                 "new" => (
559                     TerminalId::New,
560                     TokenValue::Atom(CommonSourceAtomSetIndices::new_()),
561                 ),
562                 "of" => (
563                     TerminalId::Of,
564                     TokenValue::Atom(CommonSourceAtomSetIndices::of()),
565                 ),
566                 "package" => (
567                     TerminalId::Package,
568                     TokenValue::Atom(CommonSourceAtomSetIndices::package()),
569                 ),
570                 "private" => (
571                     TerminalId::Private,
572                     TokenValue::Atom(CommonSourceAtomSetIndices::private()),
573                 ),
574                 "protected" => (
575                     TerminalId::Protected,
576                     TokenValue::Atom(CommonSourceAtomSetIndices::protected()),
577                 ),
578                 "public" => (
579                     TerminalId::Public,
580                     TokenValue::Atom(CommonSourceAtomSetIndices::public()),
581                 ),
582                 "return" => (
583                     TerminalId::Return,
584                     TokenValue::Atom(CommonSourceAtomSetIndices::return_()),
585                 ),
586                 "set" => (
587                     TerminalId::Set,
588                     TokenValue::Atom(CommonSourceAtomSetIndices::set()),
589                 ),
590                 "static" => (
591                     TerminalId::Static,
592                     TokenValue::Atom(CommonSourceAtomSetIndices::static_()),
593                 ),
594                 "super" => (
595                     TerminalId::Super,
596                     TokenValue::Atom(CommonSourceAtomSetIndices::super_()),
597                 ),
598                 "switch" => (
599                     TerminalId::Switch,
600                     TokenValue::Atom(CommonSourceAtomSetIndices::switch()),
601                 ),
602                 "target" => (
603                     TerminalId::Target,
604                     TokenValue::Atom(CommonSourceAtomSetIndices::target()),
605                 ),
606                 "this" => (
607                     TerminalId::This,
608                     TokenValue::Atom(CommonSourceAtomSetIndices::this()),
609                 ),
610                 "throw" => (
611                     TerminalId::Throw,
612                     TokenValue::Atom(CommonSourceAtomSetIndices::throw()),
613                 ),
614                 "try" => (
615                     TerminalId::Try,
616                     TokenValue::Atom(CommonSourceAtomSetIndices::try_()),
617                 ),
618                 "typeof" => (
619                     TerminalId::Typeof,
620                     TokenValue::Atom(CommonSourceAtomSetIndices::typeof_()),
621                 ),
622                 "var" => (
623                     TerminalId::Var,
624                     TokenValue::Atom(CommonSourceAtomSetIndices::var()),
625                 ),
626                 "void" => (
627                     TerminalId::Void,
628                     TokenValue::Atom(CommonSourceAtomSetIndices::void()),
629                 ),
630                 "while" => (
631                     TerminalId::While,
632                     TokenValue::Atom(CommonSourceAtomSetIndices::while_()),
633                 ),
634                 "with" => (
635                     TerminalId::With,
636                     TokenValue::Atom(CommonSourceAtomSetIndices::with()),
637                 ),
638                 "yield" => {
639                     /*
640                     (
641                         TerminalId::Yield,
642                         TokenValue::Atom(CommonSourceAtomSetIndices::yield_()),
643                     ),
644                      */
645                     return Err(
646                         ParseError::NotImplemented("yield cannot be handled in parser").into(),
647                     );
648                 }
649                 "null" => (
650                     TerminalId::NullLiteral,
651                     TokenValue::Atom(CommonSourceAtomSetIndices::null()),
652                 ),
653                 "true" => (
654                     TerminalId::BooleanLiteral,
655                     TokenValue::Atom(CommonSourceAtomSetIndices::true_()),
656                 ),
657                 "false" => (
658                     TerminalId::BooleanLiteral,
659                     TokenValue::Atom(CommonSourceAtomSetIndices::false_()),
660                 ),
661                 _ => (TerminalId::Name, self.string_to_token_value(text)),
662             }
663         };
664 
665         self.set_result(id, SourceLocation::new(start, self.offset()), value)
666     }
667 
668     /// ```text
669     /// PrivateIdentifier::
670     ///     `#` IdentifierName
671     /// ```
private_identifier(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()>672     fn private_identifier(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()> {
673         let name = self.identifier_name(builder)?;
674         let value = self.string_to_token_value(name);
675         self.set_result(
676             TerminalId::PrivateIdentifier,
677             SourceLocation::new(start, self.offset()),
678             value,
679         )
680     }
681 
682     /// ```text
683     /// UnicodeEscapeSequence::
684     ///     `u` Hex4Digits
685     ///     `u{` CodePoint `}`
686     /// ```
unicode_escape_sequence_after_backslash(&mut self) -> Result<'alloc, char>687     fn unicode_escape_sequence_after_backslash(&mut self) -> Result<'alloc, char> {
688         match self.chars.next() {
689             Some('u') => {}
690             _ => {
691                 return Err(ParseError::InvalidEscapeSequence.into());
692             }
693         }
694         self.unicode_escape_sequence_after_backslash_and_u()
695     }
696 
unicode_escape_sequence_after_backslash_and_u(&mut self) -> Result<'alloc, char>697     fn unicode_escape_sequence_after_backslash_and_u(&mut self) -> Result<'alloc, char> {
698         let value = match self.peek() {
699             Some('{') => {
700                 self.chars.next();
701 
702                 let value = self.code_point()?;
703                 match self.chars.next() {
704                     Some('}') => {}
705                     _ => {
706                         return Err(ParseError::InvalidEscapeSequence.into());
707                     }
708                 }
709                 value
710             }
711             _ => self.hex_4_digits()?,
712         };
713 
714         Ok(value)
715     }
716 }
717 
718 impl<'alloc> Lexer<'alloc> {
719     // ------------------------------------------------------------------------
720     // 11.8.3 Numeric Literals
721 
722     /// Advance over decimal digits in the input.
723     ///
724     /// ```text
725     /// NumericLiteralSeparator::
726     ///     `_`
727     ///
728     /// DecimalDigits ::
729     ///     DecimalDigit
730     ///     DecimalDigits NumericLiteralSeparator? DecimalDigit
731     ///
732     /// DecimalDigit :: one of
733     ///     `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
734     /// ```
decimal_digits(&mut self) -> Result<'alloc, ()>735     fn decimal_digits(&mut self) -> Result<'alloc, ()> {
736         if let Some('0'..='9') = self.peek() {
737             self.chars.next();
738         } else {
739             return Err(self.unexpected_err().into());
740         }
741 
742         self.decimal_digits_after_first_digit()?;
743         Ok(())
744     }
745 
optional_decimal_digits(&mut self) -> Result<'alloc, ()>746     fn optional_decimal_digits(&mut self) -> Result<'alloc, ()> {
747         if let Some('0'..='9') = self.peek() {
748             self.chars.next();
749         } else {
750             return Ok(());
751         }
752 
753         self.decimal_digits_after_first_digit()?;
754         Ok(())
755     }
756 
decimal_digits_after_first_digit(&mut self) -> Result<'alloc, ()>757     fn decimal_digits_after_first_digit(&mut self) -> Result<'alloc, ()> {
758         while let Some(next) = self.peek() {
759             match next {
760                 '_' => {
761                     self.chars.next();
762 
763                     if let Some('0'..='9') = self.peek() {
764                         self.chars.next();
765                     } else {
766                         return Err(self.unexpected_err().into());
767                     }
768                 }
769                 '0'..='9' => {
770                     self.chars.next();
771                 }
772                 _ => break,
773             }
774         }
775         Ok(())
776     }
777 
778     /// Skip an ExponentPart, if present.
779     ///
780     /// ```text
781     /// ExponentPart ::
782     ///     ExponentIndicator SignedInteger
783     ///
784     /// ExponentIndicator :: one of
785     ///     `e` `E`
786     ///
787     /// SignedInteger ::
788     ///     DecimalDigits
789     ///     `+` DecimalDigits
790     ///     `-` DecimalDigits
791     /// ```
optional_exponent(&mut self) -> Result<'alloc, bool>792     fn optional_exponent(&mut self) -> Result<'alloc, bool> {
793         if let Some('e') | Some('E') = self.peek() {
794             self.chars.next();
795             self.decimal_exponent()?;
796             return Ok(true);
797         }
798 
799         Ok(false)
800     }
801 
decimal_exponent(&mut self) -> Result<'alloc, ()>802     fn decimal_exponent(&mut self) -> Result<'alloc, ()> {
803         if let Some('+') | Some('-') = self.peek() {
804             self.chars.next();
805         }
806 
807         self.decimal_digits()?;
808 
809         Ok(())
810     }
811 
812     /// ```text
813     /// HexDigit :: one of
814     ///     `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F`
815     /// ```
hex_digit(&mut self) -> Result<'alloc, u32>816     fn hex_digit(&mut self) -> Result<'alloc, u32> {
817         match self.chars.next() {
818             None => Err(ParseError::InvalidEscapeSequence.into()),
819             Some(c @ '0'..='9') => Ok(c as u32 - '0' as u32),
820             Some(c @ 'a'..='f') => Ok(10 + (c as u32 - 'a' as u32)),
821             Some(c @ 'A'..='F') => Ok(10 + (c as u32 - 'A' as u32)),
822             Some(other) => Err(ParseError::IllegalCharacter(other).into()),
823         }
824     }
825 
code_point_to_char(value: u32) -> Result<'alloc, char>826     fn code_point_to_char(value: u32) -> Result<'alloc, char> {
827         if 0xd800 <= value && value <= 0xdfff {
828             Err(ParseError::NotImplemented("unicode escape sequences (surrogates)").into())
829         } else {
830             char::try_from(value).map_err(|_| ParseError::InvalidEscapeSequence.into())
831         }
832     }
833 
834     /// ```text
835     /// Hex4Digits ::
836     ///     HexDigit HexDigit HexDigit HexDigit
837     /// ```
hex_4_digits(&mut self) -> Result<'alloc, char>838     fn hex_4_digits(&mut self) -> Result<'alloc, char> {
839         let mut value = 0;
840         for _ in 0..4 {
841             value = (value << 4) | self.hex_digit()?;
842         }
843         Self::code_point_to_char(value)
844     }
845 
846     /// ```text
847     /// CodePoint ::
848     ///     HexDigits but only if MV of HexDigits ≤ 0x10FFFF
849     ///
850     /// HexDigits ::
851     ///    HexDigit
852     ///    HexDigits HexDigit
853     /// ```
code_point(&mut self) -> Result<'alloc, char>854     fn code_point(&mut self) -> Result<'alloc, char> {
855         let mut value = self.hex_digit()?;
856 
857         loop {
858             let next = match self.peek() {
859                 None => {
860                     return Err(ParseError::InvalidEscapeSequence.into());
861                 }
862                 Some(c @ '0'..='9') => c as u32 - '0' as u32,
863                 Some(c @ 'a'..='f') => 10 + (c as u32 - 'a' as u32),
864                 Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32),
865                 Some(_) => break,
866             };
867             self.chars.next();
868             value = (value << 4) | next;
869             if value > 0x10FFFF {
870                 return Err(ParseError::InvalidEscapeSequence.into());
871             }
872         }
873 
874         Self::code_point_to_char(value)
875     }
876 
877     /// Scan a NumericLiteral (defined in 11.8.3, extended by B.1.1) after
878     /// having already consumed the first character, which was `0`.
879     ///
880     /// ```text
881     /// NumericLiteral ::
882     ///     DecimalLiteral
883     ///     DecimalBigIntegerLiteral
884     ///     NonDecimalIntegerLiteral
885     ///     NonDecimalIntegerLiteral BigIntLiteralSuffix
886     ///
887     /// DecimalBigIntegerLiteral ::
888     ///     `0` BigIntLiteralSuffix
889     ///     NonZeroDigit DecimalDigits? BigIntLiteralSuffix
890     ///
891     /// NonDecimalIntegerLiteral ::
892     ///     BinaryIntegerLiteral
893     ///     OctalIntegerLiteral
894     ///     HexIntegerLiteral
895     ///
896     /// BigIntLiteralSuffix ::
897     ///     `n`
898     /// ```
numeric_literal_starting_with_zero(&mut self) -> Result<'alloc, NumericResult>899     fn numeric_literal_starting_with_zero(&mut self) -> Result<'alloc, NumericResult> {
900         let mut base = NumericLiteralBase::Decimal;
901         match self.peek() {
902             // BinaryIntegerLiteral ::
903             //     `0b` BinaryDigits
904             //     `0B` BinaryDigits
905             //
906             // BinaryDigits ::
907             //     BinaryDigit
908             //     BinaryDigits NumericLiteralSeparator? BinaryDigit
909             //
910             // BinaryDigit :: one of
911             //     `0` `1`
912             Some('b') | Some('B') => {
913                 self.chars.next();
914 
915                 base = NumericLiteralBase::Binary;
916 
917                 if let Some('0'..='1') = self.peek() {
918                     self.chars.next();
919                 } else {
920                     return Err(self.unexpected_err().into());
921                 }
922 
923                 while let Some(next) = self.peek() {
924                     match next {
925                         '_' => {
926                             self.chars.next();
927 
928                             if let Some('0'..='1') = self.peek() {
929                                 self.chars.next();
930                             } else {
931                                 return Err(self.unexpected_err().into());
932                             }
933                         }
934                         '0'..='1' => {
935                             self.chars.next();
936                         }
937                         _ => break,
938                     }
939                 }
940 
941                 if let Some('n') = self.peek() {
942                     self.chars.next();
943                     self.check_after_numeric_literal()?;
944                     return Ok(NumericResult::BigInt { base });
945                 }
946             }
947 
948             // OctalIntegerLiteral ::
949             //     `0o` OctalDigits
950             //     `0O` OctalDigits
951             //
952             // OctalDigits ::
953             //     OctalDigit
954             //     OctalDigits NumericLiteralSeparator? OctalDigit
955             //
956             // OctalDigit :: one of
957             //     `0` `1` `2` `3` `4` `5` `6` `7`
958             //
959             Some('o') | Some('O') => {
960                 self.chars.next();
961 
962                 base = NumericLiteralBase::Octal;
963 
964                 if let Some('0'..='7') = self.peek() {
965                     self.chars.next();
966                 } else {
967                     return Err(self.unexpected_err().into());
968                 }
969 
970                 while let Some(next) = self.peek() {
971                     match next {
972                         '_' => {
973                             self.chars.next();
974 
975                             if let Some('0'..='7') = self.peek() {
976                                 self.chars.next();
977                             } else {
978                                 return Err(self.unexpected_err().into());
979                             }
980                         }
981                         '0'..='7' => {
982                             self.chars.next();
983                         }
984                         _ => break,
985                     }
986                 }
987 
988                 if let Some('n') = self.peek() {
989                     self.chars.next();
990                     self.check_after_numeric_literal()?;
991                     return Ok(NumericResult::BigInt { base });
992                 }
993             }
994 
995             // HexIntegerLiteral ::
996             //     `0x` HexDigits
997             //     `0X` HexDigits
998             //
999             // HexDigits ::
1000             //     HexDigit
1001             //     HexDigits NumericLiteralSeparator? HexDigit
1002             //
1003             // HexDigit :: one of
1004             //     `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F`
1005             Some('x') | Some('X') => {
1006                 self.chars.next();
1007 
1008                 base = NumericLiteralBase::Hex;
1009 
1010                 if let Some('0'..='9') | Some('a'..='f') | Some('A'..='F') = self.peek() {
1011                     self.chars.next();
1012                 } else {
1013                     return Err(self.unexpected_err().into());
1014                 }
1015 
1016                 while let Some(next) = self.peek() {
1017                     match next {
1018                         '_' => {
1019                             self.chars.next();
1020 
1021                             if let Some('0'..='9') | Some('a'..='f') | Some('A'..='F') = self.peek()
1022                             {
1023                                 self.chars.next();
1024                             } else {
1025                                 return Err(self.unexpected_err().into());
1026                             }
1027                         }
1028                         '0'..='9' | 'a'..='f' | 'A'..='F' => {
1029                             self.chars.next();
1030                         }
1031                         _ => break,
1032                     }
1033                 }
1034 
1035                 if let Some('n') = self.peek() {
1036                     self.chars.next();
1037                     self.check_after_numeric_literal()?;
1038                     return Ok(NumericResult::BigInt { base });
1039                 }
1040             }
1041 
1042             Some('.') => {
1043                 self.chars.next();
1044                 return self.decimal_literal_after_decimal_point_after_digits();
1045             }
1046 
1047             Some('e') | Some('E') => {
1048                 self.chars.next();
1049                 self.decimal_exponent()?;
1050                 return Ok(NumericResult::Float);
1051             }
1052 
1053             Some('n') => {
1054                 self.chars.next();
1055                 self.check_after_numeric_literal()?;
1056                 return Ok(NumericResult::BigInt { base });
1057             }
1058 
1059             Some('0'..='9') => {
1060                 // This is almost always the token `0` in practice.
1061                 //
1062                 // In nonstrict code, as a legacy feature, other numbers
1063                 // starting with `0` are allowed. If /0[0-7]+/ matches, it's a
1064                 // LegacyOctalIntegerLiteral; but if we see an `8` or `9` in
1065                 // the number, it's decimal. Decimal numbers can have a decimal
1066                 // point and/or ExponentPart; octals can't.
1067                 //
1068                 // Neither is allowed with a BigIntLiteralSuffix `n`.
1069                 //
1070                 // LegacyOctalIntegerLiteral ::
1071                 //     `0` OctalDigit
1072                 //     LegacyOctalIntegerLiteral OctalDigit
1073                 //
1074                 // NonOctalDecimalIntegerLiteral ::
1075                 //     `0` NonOctalDigit
1076                 //     LegacyOctalLikeDecimalIntegerLiteral NonOctalDigit
1077                 //     NonOctalDecimalIntegerLiteral DecimalDigit
1078                 //
1079                 // LegacyOctalLikeDecimalIntegerLiteral ::
1080                 //     `0` OctalDigit
1081                 //     LegacyOctalLikeDecimalIntegerLiteral OctalDigit
1082                 //
1083                 // NonOctalDigit :: one of
1084                 //     `8` `9`
1085                 //
1086 
1087                 // TODO: implement `strict_mode` check
1088                 // let strict_mode = true;
1089                 // if !strict_mode {
1090                 //     // TODO: Distinguish between Octal and NonOctalDecimal.
1091                 //     // TODO: Support NonOctalDecimal followed by a decimal
1092                 //     //       point and/or ExponentPart.
1093                 //     self.decimal_digits()?;
1094                 // }
1095                 return Err(ParseError::NotImplemented("LegacyOctalIntegerLiteral").into());
1096             }
1097 
1098             _ => {}
1099         }
1100 
1101         self.check_after_numeric_literal()?;
1102         Ok(NumericResult::Int { base })
1103     }
1104 
1105     /// Scan a NumericLiteral (defined in 11.8.3, extended by B.1.1) after
1106     /// having already consumed the first character, which is a decimal digit.
decimal_literal_after_first_digit(&mut self) -> Result<'alloc, NumericResult>1107     fn decimal_literal_after_first_digit(&mut self) -> Result<'alloc, NumericResult> {
1108         // DecimalLiteral ::
1109         //     DecimalIntegerLiteral `.` DecimalDigits? ExponentPart?
1110         //     `.` DecimalDigits ExponentPart?
1111         //     DecimalIntegerLiteral ExponentPart?
1112         //
1113         // DecimalIntegerLiteral ::
1114         //     `0`   #see `numeric_literal_starting_with_zero`
1115         //     NonZeroDigit
1116         //     NonZeroDigit NumericLiteralSeparator? DecimalDigits
1117         //     NonOctalDecimalIntegerLiteral  #see `numeric_literal_
1118         //                                    #     starting_with_zero`
1119         //
1120         // NonZeroDigit :: one of
1121         //     `1` `2` `3` `4` `5` `6` `7` `8` `9`
1122 
1123         self.decimal_digits_after_first_digit()?;
1124         match self.peek() {
1125             Some('.') => {
1126                 self.chars.next();
1127                 return self.decimal_literal_after_decimal_point_after_digits();
1128             }
1129             Some('n') => {
1130                 self.chars.next();
1131                 self.check_after_numeric_literal()?;
1132                 return Ok(NumericResult::BigInt {
1133                     base: NumericLiteralBase::Decimal,
1134                 });
1135             }
1136             _ => {}
1137         }
1138 
1139         let has_exponent = self.optional_exponent()?;
1140         self.check_after_numeric_literal()?;
1141 
1142         let result = if has_exponent {
1143             NumericResult::Float
1144         } else {
1145             NumericResult::Int {
1146                 base: NumericLiteralBase::Decimal,
1147             }
1148         };
1149 
1150         Ok(result)
1151     }
1152 
decimal_literal_after_decimal_point(&mut self) -> Result<'alloc, NumericResult>1153     fn decimal_literal_after_decimal_point(&mut self) -> Result<'alloc, NumericResult> {
1154         // The parts after `.` in
1155         //
1156         //     `.` DecimalDigits ExponentPart?
1157         self.decimal_digits()?;
1158         self.optional_exponent()?;
1159         self.check_after_numeric_literal()?;
1160 
1161         Ok(NumericResult::Float)
1162     }
1163 
decimal_literal_after_decimal_point_after_digits( &mut self, ) -> Result<'alloc, NumericResult>1164     fn decimal_literal_after_decimal_point_after_digits(
1165         &mut self,
1166     ) -> Result<'alloc, NumericResult> {
1167         // The parts after `.` in
1168         //
1169         // DecimalLiteral ::
1170         //     DecimalIntegerLiteral `.` DecimalDigits? ExponentPart?
1171         self.optional_decimal_digits()?;
1172         self.optional_exponent()?;
1173         self.check_after_numeric_literal()?;
1174 
1175         Ok(NumericResult::Float)
1176     }
1177 
check_after_numeric_literal(&self) -> Result<'alloc, ()>1178     fn check_after_numeric_literal(&self) -> Result<'alloc, ()> {
1179         // The SourceCharacter immediately following a
1180         // NumericLiteral must not be an IdentifierStart or
1181         // DecimalDigit. (11.8.3)
1182         if let Some(ch) = self.peek() {
1183             if is_identifier_start(ch) || ch.is_digit(10) {
1184                 return Err(ParseError::IllegalCharacter(ch).into());
1185             }
1186         }
1187 
1188         Ok(())
1189     }
1190 
1191     // ------------------------------------------------------------------------
1192     // 11.8.4 String Literals (as extended by B.1.2)
1193 
1194     /// Scan an LineContinuation or EscapeSequence in a string literal, having
1195     /// already consumed the initial backslash character.
1196     ///
1197     /// ```text
1198     /// LineContinuation ::
1199     ///     `\` LineTerminatorSequence
1200     ///
1201     /// EscapeSequence ::
1202     ///     CharacterEscapeSequence
1203     ///     (in strict mode code) `0` [lookahead ∉ DecimalDigit]
1204     ///     (in non-strict code) LegacyOctalEscapeSequence
1205     ///     HexEscapeSequence
1206     ///     UnicodeEscapeSequence
1207     ///
1208     /// CharacterEscapeSequence ::
1209     ///     SingleEscapeCharacter
1210     ///     NonEscapeCharacter
1211     ///
1212     /// SingleEscapeCharacter :: one of
1213     ///     `'` `"` `\` `b` `f` `n` `r` `t` `v`
1214     ///
1215     /// LegacyOctalEscapeSequence ::
1216     ///     OctalDigit [lookahead ∉ OctalDigit]
1217     ///     ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
1218     ///     FourToSeven OctalDigit
1219     ///     ZeroToThree OctalDigit OctalDigit
1220     ///
1221     /// ZeroToThree :: one of
1222     ///     `0` `1` `2` `3`
1223     ///
1224     /// FourToSeven :: one of
1225     ///     `4` `5` `6` `7`
1226     /// ```
escape_sequence(&mut self, text: &mut String<'alloc>) -> Result<'alloc, ()>1227     fn escape_sequence(&mut self, text: &mut String<'alloc>) -> Result<'alloc, ()> {
1228         match self.chars.next() {
1229             None => {
1230                 return Err(ParseError::UnterminatedString.into());
1231             }
1232             Some(c) => match c {
1233                 LF | LS | PS => {
1234                     // LineContinuation. Ignore it.
1235                     //
1236                     // Don't set is_on_new_line because this LineContinuation
1237                     // has no bearing on whether the current string literal was
1238                     // the first token on the line where it started.
1239                 }
1240 
1241                 CR => {
1242                     // LineContinuation. Check for the sequence \r\n; otherwise
1243                     // ignore it.
1244                     if self.peek() == Some(LF) {
1245                         self.chars.next();
1246                     }
1247                 }
1248 
1249                 '\'' | '"' | '\\' => {
1250                     text.push(c);
1251                 }
1252 
1253                 'b' => {
1254                     text.push('\u{8}');
1255                 }
1256 
1257                 'f' => {
1258                     text.push(FF);
1259                 }
1260 
1261                 'n' => {
1262                     text.push(LF);
1263                 }
1264 
1265                 'r' => {
1266                     text.push(CR);
1267                 }
1268 
1269                 't' => {
1270                     text.push(TAB);
1271                 }
1272 
1273                 'v' => {
1274                     text.push(VT);
1275                 }
1276 
1277                 'x' => {
1278                     // HexEscapeSequence ::
1279                     //     `x` HexDigit HexDigit
1280                     let mut value = self.hex_digit()?;
1281                     value = (value << 4) | self.hex_digit()?;
1282                     match char::try_from(value) {
1283                         Err(_) => {
1284                             return Err(ParseError::InvalidEscapeSequence.into());
1285                         }
1286                         Ok(c) => {
1287                             text.push(c);
1288                         }
1289                     }
1290                 }
1291 
1292                 'u' => {
1293                     let c = self.unicode_escape_sequence_after_backslash_and_u()?;
1294                     text.push(c);
1295                 }
1296 
1297                 '0' => {
1298                     // In strict mode code and in template literals, the
1299                     // relevant production is
1300                     //
1301                     //     EscapeSequence ::
1302                     //         `0` [lookahead <! DecimalDigit]
1303                     //
1304                     // In non-strict StringLiterals, `\0` begins a
1305                     // LegacyOctalEscapeSequence which may contain more digits.
1306                     match self.peek() {
1307                         Some('0'..='7') => {
1308                             return Err(ParseError::NotImplemented(
1309                                 "legacy octal escape sequence in string",
1310                             )
1311                             .into());
1312                         }
1313                         Some('8'..='9') => {
1314                             return Err(ParseError::NotImplemented(
1315                                 "digit immediately following \\0 escape sequence",
1316                             )
1317                             .into());
1318                         }
1319                         _ => {}
1320                     }
1321                     text.push('\0');
1322                 }
1323 
1324                 '1'..='7' => {
1325                     return Err(ParseError::NotImplemented(
1326                         "legacy octal escape sequence in string",
1327                     )
1328                     .into());
1329                 }
1330 
1331                 other => {
1332                     // "\8" and "\9" are invalid per spec, but SpiderMonkey and
1333                     // V8 accept them, and JSC accepts them in non-strict mode.
1334                     // "\8" is "8" and "\9" is "9".
1335                     text.push(other);
1336                 }
1337             },
1338         }
1339         Ok(())
1340     }
1341 
1342     /// Scan a string literal, having already consumed the starting quote
1343     /// character `delimiter`.
1344     ///
1345     /// ```text
1346     /// StringLiteral ::
1347     ///     `"` DoubleStringCharacters? `"`
1348     ///     `'` SingleStringCharacters? `'`
1349     ///
1350     /// DoubleStringCharacters ::
1351     ///     DoubleStringCharacter DoubleStringCharacters?
1352     ///
1353     /// SingleStringCharacters ::
1354     ///     SingleStringCharacter SingleStringCharacters?
1355     ///
1356     /// DoubleStringCharacter ::
1357     ///     SourceCharacter but not one of `"` or `\` or LineTerminator
1358     ///     <LS>
1359     ///     <PS>
1360     ///     `\` EscapeSequence
1361     ///     LineContinuation
1362     ///
1363     /// SingleStringCharacter ::
1364     ///     SourceCharacter but not one of `'` or `\` or LineTerminator
1365     ///     <LS>
1366     ///     <PS>
1367     ///     `\` EscapeSequence
1368     ///     LineContinuation
1369     /// ```
string_literal(&mut self, delimiter: char) -> Result<'alloc, ()>1370     fn string_literal(&mut self, delimiter: char) -> Result<'alloc, ()> {
1371         let offset = self.offset() - 1;
1372         let mut builder = AutoCow::new(&self);
1373         loop {
1374             match self.chars.next() {
1375                 None | Some('\r') | Some('\n') => {
1376                     return Err(ParseError::UnterminatedString.into());
1377                 }
1378 
1379                 Some(c @ '"') | Some(c @ '\'') => {
1380                     if c == delimiter {
1381                         let value = self.string_to_token_value(builder.finish_without_push(&self));
1382                         return self.set_result(
1383                             TerminalId::StringLiteral,
1384                             SourceLocation::new(offset, self.offset()),
1385                             value,
1386                         );
1387                     } else {
1388                         builder.push_matching(c);
1389                     }
1390                 }
1391 
1392                 Some('\\') => {
1393                     let text = builder.get_mut_string_without_current_ascii_char(&self);
1394                     self.escape_sequence(text)?;
1395                 }
1396 
1397                 Some(other) => {
1398                     // NonEscapeCharacter ::
1399                     //     SourceCharacter but not one of EscapeCharacter or LineTerminator
1400                     //
1401                     // EscapeCharacter ::
1402                     //     SingleEscapeCharacter
1403                     //     DecimalDigit
1404                     //     `x`
1405                     //     `u`
1406                     builder.push_matching(other);
1407                 }
1408             }
1409         }
1410     }
1411 
1412     // ------------------------------------------------------------------------
1413     // 11.8.5 Regular Expression Literals
1414 
regular_expression_backslash_sequence(&mut self) -> Result<'alloc, ()>1415     fn regular_expression_backslash_sequence(&mut self) -> Result<'alloc, ()> {
1416         match self.chars.next() {
1417             None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => {
1418                 Err(ParseError::UnterminatedRegExp.into())
1419             }
1420             Some(_) => Ok(()),
1421         }
1422     }
1423 
1424     // See 12.2.8 and 11.8.5 sections.
regular_expression_literal(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()>1425     fn regular_expression_literal(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()> {
1426         let offset = self.offset();
1427 
1428         loop {
1429             match self.chars.next() {
1430                 None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => {
1431                     return Err(ParseError::UnterminatedRegExp.into());
1432                 }
1433                 Some('/') => {
1434                     break;
1435                 }
1436                 Some('[') => {
1437                     // RegularExpressionClass.
1438                     loop {
1439                         match self.chars.next() {
1440                             None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => {
1441                                 return Err(ParseError::UnterminatedRegExp.into());
1442                             }
1443                             Some(']') => {
1444                                 break;
1445                             }
1446                             Some('\\') => {
1447                                 self.regular_expression_backslash_sequence()?;
1448                             }
1449                             Some(_) => {}
1450                         }
1451                     }
1452                 }
1453                 Some('\\') => {
1454                     self.regular_expression_backslash_sequence()?;
1455                 }
1456                 Some(_) => {}
1457             }
1458         }
1459         let mut flag_text = AutoCow::new(&self);
1460         while let Some(ch) = self.peek() {
1461             match ch {
1462                 '$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' => {
1463                     self.chars.next();
1464                     flag_text.push_matching(ch);
1465                 }
1466                 _ => break,
1467             }
1468         }
1469 
1470         // 12.2.8.2.1 Assert literal is a RegularExpressionLiteral.
1471         let literal = builder.finish(&self);
1472 
1473         // 12.2.8.2.2 Check that only gimsuy flags are mentioned at most once.
1474         let gimsuy_mask: u32 = ['g', 'i', 'm', 's', 'u', 'y']
1475             .iter()
1476             .map(|x| 1 << ((*x as u8) - ('a' as u8)))
1477             .sum();
1478         let mut flag_text_set: u32 = 0;
1479         for ch in flag_text.finish(&self).chars() {
1480             if !ch.is_ascii_lowercase() {
1481                 return Err(ParseError::NotImplemented(
1482                     "Unexpected flag in regular expression literal",
1483                 )
1484                 .into());
1485             }
1486             let ch_mask = 1 << ((ch as u8) - ('a' as u8));
1487             if ch_mask & gimsuy_mask == 0 {
1488                 return Err(ParseError::NotImplemented(
1489                     "Unexpected flag in regular expression literal",
1490                 )
1491                 .into());
1492             }
1493             if flag_text_set & ch_mask != 0 {
1494                 return Err(ParseError::NotImplemented(
1495                     "Flag is mentioned twice in regular expression literal",
1496                 )
1497                 .into());
1498             }
1499             flag_text_set |= ch_mask;
1500         }
1501 
1502         // TODO: 12.2.8.2.4 and 12.2.8.2.5 Check that the body matches the
1503         // grammar defined in 21.2.1.
1504 
1505         let value = self.slice_to_token_value(literal);
1506         self.set_result(
1507             TerminalId::RegularExpressionLiteral,
1508             SourceLocation::new(offset, self.offset()),
1509             value,
1510         )
1511     }
1512 
1513     // ------------------------------------------------------------------------
1514     // 11.8.6 Template Literal Lexical Components
1515 
1516     /// Parse a template literal component token, having already consumed the
1517     /// starting `` ` `` or `}` character. On success, the `id` of the returned
1518     /// `Token` is `subst` (if the token ends with `${`) or `tail` (if the
1519     /// token ends with `` ` ``).
1520     ///
1521     /// ```text
1522     /// NoSubstitutionTemplate ::
1523     ///   ``` TemplateCharacters? ```
1524     ///
1525     /// TemplateHead ::
1526     ///   ``` TemplateCharacters? `${`
1527     ///
1528     /// TemplateMiddle ::
1529     ///   `}` TemplateCharacters? `${`
1530     ///
1531     /// TemplateTail ::
1532     ///   `}` TemplateCharacters? ```
1533     ///
1534     /// TemplateCharacters ::
1535     ///   TemplateCharacter TemplateCharacters?
1536     /// ```
template_part( &mut self, start: usize, subst: TerminalId, tail: TerminalId, ) -> Result<'alloc, ()>1537     fn template_part(
1538         &mut self,
1539         start: usize,
1540         subst: TerminalId,
1541         tail: TerminalId,
1542     ) -> Result<'alloc, ()> {
1543         let mut builder = AutoCow::new(&self);
1544         while let Some(ch) = self.chars.next() {
1545             // TemplateCharacter ::
1546             //   `$` [lookahead != `{` ]
1547             //   `\` EscapeSequence
1548             //   `\` NotEscapeSequence
1549             //   LineContinuation
1550             //   LineTerminatorSequence
1551             //   SourceCharacter but not one of ``` or `\` or `$` or LineTerminator
1552             //
1553             // NotEscapeSequence ::
1554             //   `0` DecimalDigit
1555             //   DecimalDigit but not `0`
1556             //   `x` [lookahead <! HexDigit]
1557             //   `x` HexDigit [lookahead <! HexDigit]
1558             //   `u` [lookahead <! HexDigit] [lookahead != `{`]
1559             //   `u` HexDigit [lookahead <! HexDigit]
1560             //   `u` HexDigit HexDigit [lookahead <! HexDigit]
1561             //   `u` HexDigit HexDigit HexDigit [lookahead <! HexDigit]
1562             //   `u` `{` [lookahead <! HexDigit]
1563             //   `u` `{` NotCodePoint [lookahead <! HexDigit]
1564             //   `u` `{` CodePoint [lookahead <! HexDigit] [lookahead != `}`]
1565             //
1566             // NotCodePoint ::
1567             //   HexDigits [> but only if MV of |HexDigits| > 0x10FFFF ]
1568             //
1569             // CodePoint ::
1570             //   HexDigits [> but only if MV of |HexDigits| ≤ 0x10FFFF ]
1571             if ch == '$' && self.peek() == Some('{') {
1572                 self.chars.next();
1573                 let value = self.string_to_token_value(builder.finish_without_push(&self));
1574                 return self.set_result(subst, SourceLocation::new(start, self.offset()), value);
1575             }
1576             if ch == '`' {
1577                 let value = self.string_to_token_value(builder.finish_without_push(&self));
1578                 return self.set_result(tail, SourceLocation::new(start, self.offset()), value);
1579             }
1580             // TODO: Support escape sequences.
1581             if ch == '\\' {
1582                 let text = builder.get_mut_string_without_current_ascii_char(&self);
1583                 self.escape_sequence(text)?;
1584             } else {
1585                 builder.push_matching(ch);
1586             }
1587         }
1588         Err(ParseError::UnterminatedString.into())
1589     }
1590 
advance_impl<'parser>(&mut self, parser: &Parser<'parser>) -> Result<'alloc, ()>1591     fn advance_impl<'parser>(&mut self, parser: &Parser<'parser>) -> Result<'alloc, ()> {
1592         let mut builder = AutoCow::new(&self);
1593         let mut start = self.offset();
1594         while let Some(c) = self.chars.next() {
1595             match c {
1596                 // 11.2 White Space
1597                 //
1598                 // WhiteSpace ::
1599                 //     <TAB>
1600                 //     <VT>
1601                 //     <FF>
1602                 //     <SP>
1603                 //     <NBSP>
1604                 //     <ZWNBSP>
1605                 //     <USP>
1606                 TAB |
1607                 VT |
1608                 FF |
1609                 SP |
1610                 NBSP |
1611                 ZWNBSP |
1612                 '\u{1680}' | // Ogham space mark (in <USP>)
1613                 '\u{2000}' ..= '\u{200a}' | // typesetting spaces (in <USP>)
1614                 '\u{202f}' | // Narrow no-break space (in <USP>)
1615                 '\u{205f}' | // Medium mathematical space (in <USP>)
1616                 '\u{3000}' // Ideographic space (in <USP>)
1617                     => {
1618                     // TODO - The spec uses <USP> to stand for any character
1619                     // with category "Space_Separator" (Zs). New Unicode
1620                     // standards may add characters to this set. This should therefore be
1621                     // implemented using the Unicode database somehow.
1622                     builder = AutoCow::new(&self);
1623                     start = self.offset();
1624                     continue;
1625                 }
1626 
1627                 // 11.3 Line Terminators
1628                 //
1629                 // LineTerminator ::
1630                 //     <LF>
1631                 //     <CR>
1632                 //     <LS>
1633                 //     <PS>
1634                 LF | CR | LS | PS => {
1635                     self.token.is_on_new_line = true;
1636                     builder = AutoCow::new(&self);
1637                     start = self.offset();
1638                     continue;
1639                 }
1640 
1641                 '0' => {
1642                     let result = self.numeric_literal_starting_with_zero()?;
1643                     return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?);
1644                 }
1645 
1646                 '1'..='9' => {
1647                     let result = self.decimal_literal_after_first_digit()?;
1648                     return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?);
1649                 }
1650 
1651                 '"' | '\'' => {
1652                     return self.string_literal(c);
1653                 }
1654 
1655                 '`' => {
1656                     return self.template_part(start, TerminalId::TemplateHead, TerminalId::NoSubstitutionTemplate);
1657                 }
1658 
1659                 '!' => match self.peek() {
1660                     Some('=') => {
1661                         self.chars.next();
1662                         match self.peek() {
1663                             Some('=') => {
1664                                 self.chars.next();
1665                                 return self.set_result(
1666                                     TerminalId::StrictNotEqual,
1667                                     SourceLocation::new(start, self.offset()),
1668                                     TokenValue::None,
1669                                 );
1670                             }
1671                             _ => return self.set_result(
1672                                 TerminalId::LaxNotEqual,
1673                                 SourceLocation::new(start, self.offset()),
1674                                 TokenValue::None,
1675                             ),
1676                         }
1677                     }
1678                     _ => return self.set_result(
1679                         TerminalId::LogicalNot,
1680                         SourceLocation::new(start, self.offset()),
1681                         TokenValue::None,
1682                     ),
1683                 },
1684 
1685                 '%' => match self.peek() {
1686                     Some('=') => {
1687                         self.chars.next();
1688                         return self.set_result(
1689                             TerminalId::RemainderAssign,
1690                             SourceLocation::new(start, self.offset()),
1691                             TokenValue::None,
1692                         );
1693                     }
1694                     _ => return self.set_result(
1695                         TerminalId::Remainder,
1696                         SourceLocation::new(start, self.offset()),
1697                         TokenValue::None,
1698                     ),
1699                 },
1700 
1701                 '&' => match self.peek() {
1702                     Some('&') => {
1703                         self.chars.next();
1704                         match self.peek() {
1705                             Some('=') => {
1706                                 self.chars.next();
1707                                 return self.set_result(
1708                                     TerminalId::LogicalAndAssign,
1709                                     SourceLocation::new(start, self.offset()),
1710                                     TokenValue::None,
1711                                 );
1712                             }
1713                             _ => return self.set_result(
1714                                 TerminalId::LogicalAnd,
1715                                 SourceLocation::new(start, self.offset()),
1716                                 TokenValue::None,
1717                             )
1718                         }
1719                     }
1720                     Some('=') => {
1721                         self.chars.next();
1722                         return self.set_result(
1723                             TerminalId::BitwiseAndAssign,
1724                             SourceLocation::new(start, self.offset()),
1725                             TokenValue::None,
1726                         );
1727                     }
1728                     _ => return self.set_result(
1729                         TerminalId::BitwiseAnd,
1730                         SourceLocation::new(start, self.offset()),
1731                         TokenValue::None,
1732                     ),
1733                 },
1734 
1735                 '*' => match self.peek() {
1736                     Some('*') => {
1737                         self.chars.next();
1738                         match self.peek() {
1739                             Some('=') => {
1740                                 self.chars.next();
1741                                 return self.set_result(
1742                                     TerminalId::ExponentiateAssign,
1743                                     SourceLocation::new(start, self.offset()),
1744                                     TokenValue::None,
1745                                 );
1746                             }
1747                             _ => return self.set_result(
1748                                 TerminalId::Exponentiate,
1749                                 SourceLocation::new(start, self.offset()),
1750                                 TokenValue::None,
1751                             ),
1752                         }
1753                     }
1754                     Some('=') => {
1755                         self.chars.next();
1756                         return self.set_result(
1757                             TerminalId::MultiplyAssign,
1758                             SourceLocation::new(start, self.offset()),
1759                             TokenValue::None,
1760                         );
1761                     }
1762                     _ => return self.set_result(
1763                         TerminalId::Star,
1764                         SourceLocation::new(start, self.offset()),
1765                         TokenValue::None,
1766                     ),
1767                 },
1768 
1769                 '+' => match self.peek() {
1770                     Some('+') => {
1771                         self.chars.next();
1772                         return self.set_result(
1773                             TerminalId::Increment,
1774                             SourceLocation::new(start, self.offset()),
1775                             TokenValue::None,
1776                         );
1777                     }
1778                     Some('=') => {
1779                         self.chars.next();
1780                         return self.set_result(
1781                             TerminalId::AddAssign,
1782                             SourceLocation::new(start, self.offset()),
1783                             TokenValue::None,
1784                         );
1785                     }
1786                     _ => return self.set_result(
1787                         TerminalId::Plus,
1788                         SourceLocation::new(start, self.offset()),
1789                         TokenValue::None,
1790                     ),
1791                 },
1792 
1793                 '-' => match self.peek() {
1794                     Some('-') => {
1795                         self.chars.next();
1796                         match self.peek() {
1797                             Some('>') if self.token.is_on_new_line => {
1798                                 // B.1.3 SingleLineHTMLCloseComment
1799                                 // TODO: Limit this to Script (not Module).
1800                                 self.skip_single_line_comment(&mut builder);
1801                                 continue;
1802                             }
1803                             _ => return self.set_result(
1804                                 TerminalId::Decrement,
1805                                 SourceLocation::new(start, self.offset()),
1806                                 TokenValue::None,
1807                             ),
1808                         }
1809                     }
1810                     Some('=') => {
1811                         self.chars.next();
1812                         return self.set_result(
1813                             TerminalId::SubtractAssign,
1814                             SourceLocation::new(start, self.offset()),
1815                             TokenValue::None,
1816                         );
1817                     }
1818                     _ => return self.set_result(
1819                         TerminalId::Minus,
1820                         SourceLocation::new(start, self.offset()),
1821                         TokenValue::None,
1822                     ),
1823                 },
1824 
1825                 '.' => match self.peek() {
1826                     Some('.') => {
1827                         self.chars.next();
1828                         match self.peek() {
1829                             Some('.') => {
1830                                 self.chars.next();
1831                                 return self.set_result(
1832                                     TerminalId::Ellipsis,
1833                                     SourceLocation::new(start, self.offset()),
1834                                     TokenValue::None,
1835                                 );
1836                             }
1837                             _ => return Err(ParseError::IllegalCharacter('.').into()),
1838                         }
1839                     }
1840                     Some('0'..='9') => {
1841                         let result = self.decimal_literal_after_decimal_point()?;
1842                         return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?);
1843                     }
1844                     _ => return self.set_result(
1845                         TerminalId::Dot,
1846                         SourceLocation::new(start, self.offset()),
1847                         TokenValue::None,
1848                     ),
1849                 },
1850 
1851                 '/' => match self.peek() {
1852                     Some('/') => {
1853                         // SingleLineComment :: `//` SingleLineCommentChars?
1854                         self.chars.next();
1855                         self.skip_single_line_comment(&mut builder);
1856                         start = self.offset();
1857                         continue;
1858                     }
1859                     Some('*') => {
1860                         self.chars.next();
1861                         self.skip_multi_line_comment(&mut builder)?;
1862                         start = self.offset();
1863                         continue;
1864                     }
1865                     _ => {
1866                         if parser.can_accept_terminal(TerminalId::Divide) {
1867                             match self.peek() {
1868                                 Some('=') => {
1869                                     self.chars.next();
1870                                     return self.set_result(
1871                                         TerminalId::DivideAssign,
1872                                         SourceLocation::new(start, self.offset()),
1873                                         TokenValue::None,
1874                                     );
1875                                 }
1876                                 _ => return self.set_result(
1877                                     TerminalId::Divide,
1878                                     SourceLocation::new(start, self.offset()),
1879                                     TokenValue::None,
1880                                 ),
1881                             }
1882                         }
1883                         return self.regular_expression_literal(&mut builder);
1884                     }
1885                 },
1886 
1887                 '}' => {
1888                     if parser.can_accept_terminal(TerminalId::TemplateMiddle) {
1889                         return self.template_part(start, TerminalId::TemplateMiddle, TerminalId::TemplateTail);
1890                     }
1891                     return self.set_result(
1892                         TerminalId::CloseBrace,
1893                         SourceLocation::new(start, self.offset()),
1894                         TokenValue::None,
1895                     );
1896                 }
1897 
1898                 '<' => match self.peek() {
1899                     Some('<') => {
1900                         self.chars.next();
1901                         match self.peek() {
1902                             Some('=') => {
1903                                 self.chars.next();
1904                                 return self.set_result(
1905                                     TerminalId::LeftShiftAssign,
1906                                     SourceLocation::new(start, self.offset()),
1907                                     TokenValue::None,
1908                                 );
1909                             }
1910                             _ => return self.set_result(
1911                                 TerminalId::LeftShift,
1912                                 SourceLocation::new(start, self.offset()),
1913                                 TokenValue::None,
1914                             ),
1915                         }
1916                     }
1917                     Some('=') => {
1918                         self.chars.next();
1919                         return self.set_result(
1920                             TerminalId::LessThanOrEqualTo,
1921                             SourceLocation::new(start, self.offset()),
1922                             TokenValue::None,
1923                         );
1924                     }
1925                     Some('!') if self.is_looking_at("!--") => {
1926                         // B.1.3 SingleLineHTMLOpenComment. Note that the above
1927                         // `is_looking_at` test peeked ahead at the next three
1928                         // characters of input. This lookahead is necessary
1929                         // because `x<!--` has a comment but `x<!-y` does not.
1930                         //
1931                         // TODO: Limit this to Script (not Module).
1932                         self.skip_single_line_comment(&mut builder);
1933                         start = self.offset();
1934                         continue;
1935                     }
1936                     _ => return self.set_result(
1937                         TerminalId::LessThan,
1938                         SourceLocation::new(start, self.offset()),
1939                         TokenValue::None,
1940                     ),
1941                 },
1942 
1943                 '=' => match self.peek() {
1944                     Some('=') => {
1945                         self.chars.next();
1946                         match self.peek() {
1947                             Some('=') => {
1948                                 self.chars.next();
1949                                 return self.set_result(
1950                                     TerminalId::StrictEqual,
1951                                     SourceLocation::new(start, self.offset()),
1952                                     TokenValue::None,
1953                                 );
1954                             }
1955                             _ => return self.set_result(
1956                                 TerminalId::LaxEqual,
1957                                 SourceLocation::new(start, self.offset()),
1958                                 TokenValue::None,
1959                             ),
1960                         }
1961                     }
1962                     Some('>') => {
1963                         self.chars.next();
1964                         return self.set_result(
1965                             TerminalId::Arrow,
1966                             SourceLocation::new(start, self.offset()),
1967                             TokenValue::None,
1968                         );
1969                     }
1970                     _ => return self.set_result(
1971                         TerminalId::EqualSign,
1972                         SourceLocation::new(start, self.offset()),
1973                         TokenValue::None,
1974                     ),
1975                 },
1976 
1977                 '>' => match self.peek() {
1978                     Some('>') => {
1979                         self.chars.next();
1980                         match self.peek() {
1981                             Some('>') => {
1982                                 self.chars.next();
1983                                 match self.peek() {
1984                                     Some('=') => {
1985                                         self.chars.next();
1986                                         return self.set_result(
1987                                             TerminalId::UnsignedRightShiftAssign,
1988                                             SourceLocation::new(start, self.offset()),
1989                                             TokenValue::None,
1990                                         );
1991                                     }
1992                                     _ => return self.set_result(
1993                                         TerminalId::UnsignedRightShift,
1994                                         SourceLocation::new(start, self.offset()),
1995                                         TokenValue::None,
1996                                     ),
1997                                 }
1998                             }
1999                             Some('=') => {
2000                                 self.chars.next();
2001                                 return self.set_result(
2002                                     TerminalId::SignedRightShiftAssign,
2003                                     SourceLocation::new(start, self.offset()),
2004                                     TokenValue::None,
2005                                 );
2006                             }
2007                             _ => return self.set_result(
2008                                 TerminalId::SignedRightShift,
2009                                 SourceLocation::new(start, self.offset()),
2010                                 TokenValue::None,
2011                             ),
2012                         }
2013                     }
2014                     Some('=') => {
2015                         self.chars.next();
2016                         return self.set_result(
2017                             TerminalId::GreaterThanOrEqualTo,
2018                             SourceLocation::new(start, self.offset()),
2019                             TokenValue::None,
2020                         );
2021                     }
2022                     _ => return self.set_result(
2023                         TerminalId::GreaterThan,
2024                         SourceLocation::new(start, self.offset()),
2025                         TokenValue::None,
2026                     ),
2027                 },
2028 
2029                 '^' => match self.peek() {
2030                     Some('=') => {
2031                         self.chars.next();
2032                         return self.set_result(
2033                             TerminalId::BitwiseXorAssign,
2034                             SourceLocation::new(start, self.offset()),
2035                             TokenValue::None,
2036                         );
2037                     }
2038                     _ => return self.set_result(
2039                         TerminalId::BitwiseXor,
2040                         SourceLocation::new(start, self.offset()),
2041                         TokenValue::None,
2042                     ),
2043                 },
2044 
2045                 '|' => match self.peek() {
2046                     Some('|') => {
2047                         self.chars.next();
2048                         match self.peek() {
2049                             Some('=') => {
2050                                 self.chars.next();
2051                                 return self.set_result(
2052                                     TerminalId::LogicalOrAssign,
2053                                     SourceLocation::new(start, self.offset()),
2054                                     TokenValue::None,
2055                                 );
2056                             }
2057                             _ => return self.set_result(
2058                                 TerminalId::LogicalOr,
2059                                 SourceLocation::new(start, self.offset()),
2060                                 TokenValue::None,
2061                             )
2062                         }
2063                     }
2064                     Some('=') => {
2065                         self.chars.next();
2066                         return self.set_result(
2067                             TerminalId::BitwiseOrAssign,
2068                             SourceLocation::new(start, self.offset()),
2069                             TokenValue::None,
2070                         );
2071                     }
2072                     _ => return self.set_result(
2073                         TerminalId::BitwiseOr,
2074                         SourceLocation::new(start, self.offset()),
2075                         TokenValue::None,
2076                     ),
2077                 },
2078 
2079                 '?' => match self.peek() {
2080                     Some('?') => {
2081                         self.chars.next();
2082                         match self.peek() {
2083                             Some('=') => {
2084                                 self.chars.next();
2085                                 return self.set_result(
2086                                     TerminalId::CoalesceAssign,
2087                                     SourceLocation::new(start, self.offset()),
2088                                     TokenValue::None,
2089                                 );
2090                             }
2091                             _ => return self.set_result(
2092                                 TerminalId::Coalesce,
2093                                 SourceLocation::new(start, self.offset()),
2094                                 TokenValue::None,
2095                             )
2096                         }
2097                     }
2098                     Some('.') => {
2099                         if let Some('0'..='9') = self.double_peek() {
2100                             return self.set_result(
2101                                 TerminalId::QuestionMark,
2102                                 SourceLocation::new(start, self.offset()),
2103                                 TokenValue::None,
2104                             )
2105                         }
2106                         self.chars.next();
2107                         return self.set_result(
2108                             TerminalId::OptionalChain,
2109                             SourceLocation::new(start, self.offset()),
2110                             TokenValue::None,
2111                         );
2112                     }
2113                     _ => return self.set_result(
2114                         TerminalId::QuestionMark,
2115                         SourceLocation::new(start, self.offset()),
2116                         TokenValue::None,
2117                     ),
2118                 }
2119 
2120                 '(' => return self.set_result(
2121                     TerminalId::OpenParenthesis,
2122                     SourceLocation::new(start, self.offset()),
2123                     TokenValue::None,
2124                 ),
2125                 ')' => return self.set_result(
2126                     TerminalId::CloseParenthesis,
2127                     SourceLocation::new(start, self.offset()),
2128                     TokenValue::None,
2129                 ),
2130                 ',' => return self.set_result(
2131                     TerminalId::Comma,
2132                     SourceLocation::new(start, self.offset()),
2133                     TokenValue::None,
2134                 ),
2135                 ':' => return self.set_result(
2136                     TerminalId::Colon,
2137                     SourceLocation::new(start, self.offset()),
2138                     TokenValue::None,
2139                 ),
2140                 ';' => return self.set_result(
2141                     TerminalId::Semicolon,
2142                     SourceLocation::new(start, self.offset()),
2143                     TokenValue::None,
2144                 ),
2145                 '[' => return self.set_result(
2146                     TerminalId::OpenBracket,
2147                     SourceLocation::new(start, self.offset()),
2148                     TokenValue::None,
2149                 ),
2150                 ']' => return self.set_result(
2151                     TerminalId::CloseBracket,
2152                     SourceLocation::new(start, self.offset()),
2153                     TokenValue::None,
2154                 ),
2155                 '{' => return self.set_result(
2156                     TerminalId::OpenBrace,
2157                     SourceLocation::new(start, self.offset()),
2158                     TokenValue::None,
2159                 ),
2160                 '~' => return self.set_result(
2161                     TerminalId::BitwiseNot,
2162                     SourceLocation::new(start, self.offset()),
2163                     TokenValue::None,
2164                 ),
2165 
2166                 // Idents
2167                 '$' | '_' | 'a'..='z' | 'A'..='Z' => {
2168                     builder.push_matching(c);
2169                     return self.identifier_tail(start, builder);
2170                 }
2171 
2172                 '\\' => {
2173                     builder.force_allocation_without_current_ascii_char(&self);
2174 
2175                     let value = self.unicode_escape_sequence_after_backslash()?;
2176                     if !is_identifier_start(value) {
2177                         return Err(ParseError::IllegalCharacter(value).into());
2178                     }
2179                     builder.push_different(value);
2180 
2181                     return self.identifier_tail(start, builder);
2182                 }
2183 
2184                 '#' => {
2185                     if start == 0 {
2186                         // https://tc39.es/proposal-hashbang/out.html
2187                         // HashbangComment ::
2188                         //     `#!` SingleLineCommentChars?
2189                         if let Some('!') = self.peek() {
2190                             self.skip_single_line_comment(&mut builder);
2191                             start = self.offset();
2192                             continue;
2193                         }
2194                     }
2195 
2196                     builder.push_matching(c);
2197                     return self.private_identifier(start, builder);
2198                 }
2199 
2200                 other if is_identifier_start(other) => {
2201                     builder.push_matching(other);
2202                     return self.identifier_tail(start, builder);
2203                 }
2204 
2205                 other => {
2206                     return Err(ParseError::IllegalCharacter(other).into());
2207                 }
2208             }
2209         }
2210         self.set_result(
2211             TerminalId::End,
2212             SourceLocation::new(start, self.offset()),
2213             TokenValue::None,
2214         )
2215     }
2216 
string_to_token_value(&mut self, s: &'alloc str) -> TokenValue2217     fn string_to_token_value(&mut self, s: &'alloc str) -> TokenValue {
2218         let index = self.atoms.borrow_mut().insert(s);
2219         TokenValue::Atom(index)
2220     }
2221 
slice_to_token_value(&mut self, s: &'alloc str) -> TokenValue2222     fn slice_to_token_value(&mut self, s: &'alloc str) -> TokenValue {
2223         let index = self.slices.borrow_mut().push(s);
2224         TokenValue::Slice(index)
2225     }
2226 
numeric_result_to_advance_result( &mut self, s: &'alloc str, start: usize, result: NumericResult, ) -> Result<'alloc, ()>2227     fn numeric_result_to_advance_result(
2228         &mut self,
2229         s: &'alloc str,
2230         start: usize,
2231         result: NumericResult,
2232     ) -> Result<'alloc, ()> {
2233         let (terminal_id, value) = match result {
2234             NumericResult::Int { base } => {
2235                 let n = parse_int(s, base).map_err(|s| ParseError::NotImplemented(s))?;
2236                 (TerminalId::NumericLiteral, TokenValue::Number(n))
2237             }
2238             NumericResult::Float => {
2239                 let n = parse_float(s).map_err(|s| ParseError::NotImplemented(s))?;
2240                 (TerminalId::NumericLiteral, TokenValue::Number(n))
2241             }
2242             NumericResult::BigInt { .. } => {
2243                 // FIXME
2244                 (TerminalId::BigIntLiteral, self.string_to_token_value(s))
2245             }
2246         };
2247 
2248         self.set_result(
2249             terminal_id,
2250             SourceLocation::new(start, self.offset()),
2251             value,
2252         )
2253     }
2254 }
2255 
2256 struct AutoCow<'alloc> {
2257     start: &'alloc str,
2258     value: Option<String<'alloc>>,
2259 }
2260 
2261 impl<'alloc> AutoCow<'alloc> {
new(lexer: &Lexer<'alloc>) -> Self2262     fn new(lexer: &Lexer<'alloc>) -> Self {
2263         AutoCow {
2264             start: lexer.chars.as_str(),
2265             value: None,
2266         }
2267     }
2268 
2269     // Push a char that matches lexer.chars.next()
push_matching(&mut self, c: char)2270     fn push_matching(&mut self, c: char) {
2271         if let Some(text) = &mut self.value {
2272             text.push(c);
2273         }
2274     }
2275 
2276     // Push a different character than lexer.chars.next().
2277     // force_allocation_without_current_ascii_char must be called before this.
push_different(&mut self, c: char)2278     fn push_different(&mut self, c: char) {
2279         debug_assert!(self.value.is_some());
2280         self.value.as_mut().unwrap().push(c)
2281     }
2282 
2283     // Force allocation of a String, excluding the current ASCII character,
2284     // and return the reference to it
get_mut_string_without_current_ascii_char<'b>( &'b mut self, lexer: &'_ Lexer<'alloc>, ) -> &'b mut String<'alloc>2285     fn get_mut_string_without_current_ascii_char<'b>(
2286         &'b mut self,
2287         lexer: &'_ Lexer<'alloc>,
2288     ) -> &'b mut String<'alloc> {
2289         self.force_allocation_without_current_ascii_char(lexer);
2290         self.value.as_mut().unwrap()
2291     }
2292 
2293     // Force allocation of a String, excluding the current ASCII character.
force_allocation_without_current_ascii_char(&mut self, lexer: &'_ Lexer<'alloc>)2294     fn force_allocation_without_current_ascii_char(&mut self, lexer: &'_ Lexer<'alloc>) {
2295         if self.value.is_some() {
2296             return;
2297         }
2298 
2299         self.value = Some(String::from_str_in(
2300             &self.start[..self.start.len() - lexer.chars.as_str().len() - 1],
2301             lexer.allocator,
2302         ));
2303     }
2304 
2305     // Check if the string contains a different character, such as an escape
2306     // sequence
has_different(&self) -> bool2307     fn has_different(&self) -> bool {
2308         self.value.is_some()
2309     }
2310 
finish(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str2311     fn finish(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str {
2312         match self.value.take() {
2313             Some(arena_string) => arena_string.into_bump_str(),
2314             None => &self.start[..self.start.len() - lexer.chars.as_str().len()],
2315         }
2316     }
2317 
2318     // Just like finish, but without pushing current char.
finish_without_push(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str2319     fn finish_without_push(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str {
2320         match self.value.take() {
2321             Some(arena_string) => arena_string.into_bump_str(),
2322             None => &self.start[..self.start.len() - lexer.chars.as_str().len() - 1],
2323         }
2324     }
2325 }
2326