1 //! JavaScript lexer.
2
3 use crate::numeric_value::{parse_float, parse_int, NumericLiteralBase};
4 use crate::parser::Parser;
5 use crate::unicode::{is_id_continue, is_id_start};
6 use ast::arena;
7 use ast::source_atom_set::{CommonSourceAtomSetIndices, SourceAtomSet};
8 use ast::source_slice_list::SourceSliceList;
9 use ast::SourceLocation;
10 use bumpalo::{collections::String, Bump};
11 use generated_parser::{ParseError, Result, TerminalId, Token, TokenValue};
12 use std::cell::RefCell;
13 use std::convert::TryFrom;
14 use std::rc::Rc;
15 use std::str::Chars;
16
17 pub struct Lexer<'alloc> {
18 allocator: &'alloc Bump,
19
20 /// Next token to be returned.
21 token: arena::Box<'alloc, Token>,
22
23 /// Length of the input text, in UTF-8 bytes.
24 source_length: usize,
25
26 /// Iterator over the remaining not-yet-parsed input.
27 chars: Chars<'alloc>,
28
29 atoms: Rc<RefCell<SourceAtomSet<'alloc>>>,
30
31 slices: Rc<RefCell<SourceSliceList<'alloc>>>,
32 }
33
34 enum NumericResult {
35 Int {
36 base: NumericLiteralBase,
37 },
38 Float,
39 BigInt {
40 #[allow(dead_code)]
41 base: NumericLiteralBase,
42 },
43 }
44
45 impl<'alloc> Lexer<'alloc> {
new( allocator: &'alloc Bump, chars: Chars<'alloc>, atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, slices: Rc<RefCell<SourceSliceList<'alloc>>>, ) -> Lexer<'alloc>46 pub fn new(
47 allocator: &'alloc Bump,
48 chars: Chars<'alloc>,
49 atoms: Rc<RefCell<SourceAtomSet<'alloc>>>,
50 slices: Rc<RefCell<SourceSliceList<'alloc>>>,
51 ) -> Lexer<'alloc> {
52 Self::with_offset(allocator, chars, 0, atoms, slices)
53 }
54
55 /// Create a lexer for a part of a JS script or module. `offset` is the
56 /// total length of all previous parts, in bytes; source locations for
57 /// tokens created by the new lexer start counting from this number.
with_offset( allocator: &'alloc Bump, chars: Chars<'alloc>, offset: usize, atoms: Rc<RefCell<SourceAtomSet<'alloc>>>, slices: Rc<RefCell<SourceSliceList<'alloc>>>, ) -> Lexer<'alloc>58 pub fn with_offset(
59 allocator: &'alloc Bump,
60 chars: Chars<'alloc>,
61 offset: usize,
62 atoms: Rc<RefCell<SourceAtomSet<'alloc>>>,
63 slices: Rc<RefCell<SourceSliceList<'alloc>>>,
64 ) -> Lexer<'alloc> {
65 let source_length = offset + chars.as_str().len();
66 let mut token = arena::alloc(allocator, new_token());
67 token.is_on_new_line = true;
68 Lexer {
69 allocator,
70 token,
71 source_length,
72 chars,
73 atoms,
74 slices,
75 }
76 }
77
is_looking_at(&self, s: &str) -> bool78 fn is_looking_at(&self, s: &str) -> bool {
79 self.chars.as_str().starts_with(s)
80 }
81
offset(&self) -> usize82 pub fn offset(&self) -> usize {
83 self.source_length - self.chars.as_str().len()
84 }
85
peek(&self) -> Option<char>86 fn peek(&self) -> Option<char> {
87 self.chars.as_str().chars().next()
88 }
89
double_peek(&self) -> Option<char>90 fn double_peek(&self) -> Option<char> {
91 let mut chars = self.chars.as_str().chars();
92 chars.next();
93 chars.next()
94 }
95
set_result( &mut self, terminal_id: TerminalId, loc: SourceLocation, value: TokenValue, ) -> Result<'alloc, ()>96 fn set_result(
97 &mut self,
98 terminal_id: TerminalId,
99 loc: SourceLocation,
100 value: TokenValue,
101 ) -> Result<'alloc, ()> {
102 self.token.terminal_id = terminal_id;
103 self.token.loc = loc;
104 self.token.value = value;
105 Ok(())
106 }
107
108 #[inline]
next<'parser>( &mut self, parser: &Parser<'parser>, ) -> Result<'alloc, arena::Box<'alloc, Token>>109 pub fn next<'parser>(
110 &mut self,
111 parser: &Parser<'parser>,
112 ) -> Result<'alloc, arena::Box<'alloc, Token>> {
113 let mut next_token = arena::alloc_with(self.allocator, || new_token());
114 self.advance_impl(parser)?;
115 std::mem::swap(&mut self.token, &mut next_token);
116 Ok(next_token)
117 }
118
unexpected_err(&mut self) -> ParseError<'alloc>119 fn unexpected_err(&mut self) -> ParseError<'alloc> {
120 if let Some(ch) = self.peek() {
121 ParseError::IllegalCharacter(ch)
122 } else {
123 ParseError::UnexpectedEnd
124 }
125 }
126 }
127
128 /// Returns an empty token which is meant as a place holder to be mutated later.
new_token() -> Token129 fn new_token() -> Token {
130 Token::basic_token(TerminalId::End, SourceLocation::default())
131 }
132
133 // ----------------------------------------------------------------------------
134 // 11.1 Unicode Format-Control Characters
135
136 /// U+200C ZERO WIDTH NON-JOINER, abbreviated in the spec as <ZWNJ>.
137 /// Specially permitted in identifiers.
138 const ZWNJ: char = '\u{200c}';
139
140 /// U+200D ZERO WIDTH JOINER, abbreviated as <ZWJ>.
141 /// Specially permitted in identifiers.
142 const ZWJ: char = '\u{200d}';
143
144 /// U+FEFF ZERO WIDTH NO-BREAK SPACE, abbreviated <ZWNBSP>.
145 /// Considered a whitespace character in JS.
146 const ZWNBSP: char = '\u{feff}';
147
148 // ----------------------------------------------------------------------------
149 // 11.2 White Space
150
151 /// U+0009 CHARACTER TABULATION, abbreviated <TAB>.
152 const TAB: char = '\u{9}';
153
154 /// U+000B VERTICAL TAB, abbreviated <VT>.
155 const VT: char = '\u{b}';
156
157 /// U+000C FORM FEED, abbreviated <FF>.
158 const FF: char = '\u{c}';
159
160 /// U+0020 SPACE, abbreviated <SP>.
161 const SP: char = '\u{20}';
162
163 /// U+00A0 NON-BREAKING SPACE, abbreviated <NBSP>.
164 const NBSP: char = '\u{a0}';
165
166 // ----------------------------------------------------------------------------
167 // 11.3 Line Terminators
168
169 /// U+000A LINE FEED, abbreviated in the spec as <LF>.
170 const LF: char = '\u{a}';
171
172 /// U+000D CARRIAGE RETURN, abbreviated in the spec as <CR>.
173 const CR: char = '\u{d}';
174
175 /// U+2028 LINE SEPARATOR, abbreviated <LS>.
176 const LS: char = '\u{2028}';
177
178 /// U+2029 PARAGRAPH SEPARATOR, abbreviated <PS>.
179 const PS: char = '\u{2029}';
180
181 // ----------------------------------------------------------------------------
182 // 11.4 Comments
183 //
184 // Comment::
185 // MultiLineComment
186 // SingleLineComment
187
188 impl<'alloc> Lexer<'alloc> {
189 /// Skip a *MultiLineComment*.
190 ///
191 /// ```text
192 /// MultiLineComment ::
193 /// `/*` MultiLineCommentChars? `*/`
194 ///
195 /// MultiLineCommentChars ::
196 /// MultiLineNotAsteriskChar MultiLineCommentChars?
197 /// `*` PostAsteriskCommentChars?
198 ///
199 /// PostAsteriskCommentChars ::
200 /// MultiLineNotForwardSlashOrAsteriskChar MultiLineCommentChars?
201 /// `*` PostAsteriskCommentChars?
202 ///
203 /// MultiLineNotAsteriskChar ::
204 /// SourceCharacter but not `*`
205 ///
206 /// MultiLineNotForwardSlashOrAsteriskChar ::
207 /// SourceCharacter but not one of `/` or `*`
208 /// ```
209 ///
210 /// (B.1.3 splits MultiLineComment into two nonterminals: MultiLineComment
211 /// and SingleLineDelimitedComment. The point of that is to help specify
212 /// that a SingleLineHTMLCloseComment must occur at the start of a line. We
213 /// use `is_on_new_line` for that.)
214 ///
skip_multi_line_comment(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()>215 fn skip_multi_line_comment(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()> {
216 while let Some(ch) = self.chars.next() {
217 match ch {
218 '*' if self.peek() == Some('/') => {
219 self.chars.next();
220 *builder = AutoCow::new(&self);
221 return Ok(());
222 }
223 CR | LF | PS | LS => {
224 self.token.is_on_new_line = true;
225 }
226 _ => {}
227 }
228 }
229 Err(ParseError::UnterminatedMultiLineComment.into())
230 }
231
232 /// Skip a *SingleLineComment* and the following *LineTerminatorSequence*,
233 /// if any.
234 ///
235 /// ```text
236 /// SingleLineComment ::
237 /// `//` SingleLineCommentChars?
238 ///
239 /// SingleLineCommentChars ::
240 /// SingleLineCommentChar SingleLineCommentChars?
241 ///
242 /// SingleLineCommentChar ::
243 /// SourceCharacter but not LineTerminator
244 /// ```
skip_single_line_comment(&mut self, builder: &mut AutoCow<'alloc>)245 fn skip_single_line_comment(&mut self, builder: &mut AutoCow<'alloc>) {
246 while let Some(ch) = self.chars.next() {
247 match ch {
248 CR | LF | LS | PS => break,
249 _ => continue,
250 }
251 }
252 *builder = AutoCow::new(&self);
253 self.token.is_on_new_line = true;
254 }
255 }
256
257 // ----------------------------------------------------------------------------
258 // 11.6 Names and Keywords
259
260 /// True if `c` is a one-character *IdentifierStart*.
261 ///
262 /// ```text
263 /// IdentifierStart ::
264 /// UnicodeIDStart
265 /// `$`
266 /// `_`
267 /// `\` UnicodeEscapeSequence
268 ///
269 /// UnicodeIDStart ::
270 /// > any Unicode code point with the Unicode property "ID_Start"
271 /// ```
is_identifier_start(c: char) -> bool272 fn is_identifier_start(c: char) -> bool {
273 // Escaped case is handled separately.
274 if c.is_ascii() {
275 c == '$' || c == '_' || c.is_ascii_alphabetic()
276 } else {
277 is_id_start(c)
278 }
279 }
280
281 /// True if `c` is a one-character *IdentifierPart*.
282 ///
283 /// ```text
284 /// IdentifierPart ::
285 /// UnicodeIDContinue
286 /// `$`
287 /// `\` UnicodeEscapeSequence
288 /// <ZWNJ>
289 /// <ZWJ>
290 ///
291 /// UnicodeIDContinue ::
292 /// > any Unicode code point with the Unicode property "ID_Continue"
293 /// ```
is_identifier_part(c: char) -> bool294 fn is_identifier_part(c: char) -> bool {
295 // Escaped case is handled separately.
296 if c.is_ascii() {
297 c == '$' || c == '_' || c.is_ascii_alphanumeric()
298 } else {
299 is_id_continue(c) || c == ZWNJ || c == ZWJ
300 }
301 }
302
303 impl<'alloc> Lexer<'alloc> {
304 /// Scan the rest of an IdentifierName, having already parsed the initial
305 /// IdentifierStart and stored it in `builder`.
306 ///
307 /// On success, this returns `Ok((has_escapes, str))`, where `has_escapes`
308 /// is true if the identifier contained any UnicodeEscapeSequences, and
309 /// `str` is the un-escaped IdentifierName, including the IdentifierStart,
310 /// on success.
311 ///
312 /// ```text
313 /// IdentifierName ::
314 /// IdentifierStart
315 /// IdentifierName IdentifierPart
316 /// ```
identifier_name_tail( &mut self, mut builder: AutoCow<'alloc>, ) -> Result<'alloc, (bool, &'alloc str)>317 fn identifier_name_tail(
318 &mut self,
319 mut builder: AutoCow<'alloc>,
320 ) -> Result<'alloc, (bool, &'alloc str)> {
321 while let Some(ch) = self.peek() {
322 if !is_identifier_part(ch) {
323 if ch == '\\' {
324 self.chars.next();
325 builder.force_allocation_without_current_ascii_char(&self);
326
327 let value = self.unicode_escape_sequence_after_backslash()?;
328 if !is_identifier_part(value) {
329 return Err(ParseError::InvalidEscapeSequence.into());
330 }
331
332 builder.push_different(value);
333 continue;
334 }
335
336 break;
337 }
338 self.chars.next();
339 builder.push_matching(ch);
340 }
341 let has_different = builder.has_different();
342 Ok((has_different, builder.finish(&self)))
343 }
344
identifier_name(&mut self, mut builder: AutoCow<'alloc>) -> Result<'alloc, &'alloc str>345 fn identifier_name(&mut self, mut builder: AutoCow<'alloc>) -> Result<'alloc, &'alloc str> {
346 match self.chars.next() {
347 None => {
348 return Err(ParseError::UnexpectedEnd.into());
349 }
350 Some(c) => {
351 match c {
352 '$' | '_' | 'a'..='z' | 'A'..='Z' => {
353 builder.push_matching(c);
354 }
355
356 '\\' => {
357 builder.force_allocation_without_current_ascii_char(&self);
358
359 let value = self.unicode_escape_sequence_after_backslash()?;
360 if !is_identifier_start(value) {
361 return Err(ParseError::IllegalCharacter(value).into());
362 }
363 builder.push_different(value);
364 }
365
366 other if is_identifier_start(other) => {
367 builder.push_matching(other);
368 }
369
370 other => {
371 return Err(ParseError::IllegalCharacter(other).into());
372 }
373 }
374 self.identifier_name_tail(builder)
375 .map(|(_has_escapes, name)| name)
376 }
377 }
378 }
379
380 /// Finish scanning an *IdentifierName* or keyword, having already scanned
381 /// the *IdentifierStart* and pushed it to `builder`.
382 ///
383 /// `start` is the offset of the *IdentifierStart*.
384 ///
385 /// The lexer doesn't know the syntactic context, so it always identifies
386 /// possible keywords. It's up to the parser to understand that, for
387 /// example, `TerminalId::If` is not a keyword when it's used as a property
388 /// or method name.
389 ///
390 /// If the source string contains no escape and it matches to possible
391 /// keywords (including contextual keywords), the result is corresponding
392 /// `TerminalId`. For example, if the source string is "yield", the result
393 /// is `TerminalId::Yield`.
394 ///
395 /// If the source string contains no escape sequence and also it doesn't
396 /// match to any possible keywords, the result is `TerminalId::Name`.
397 ///
398 /// If the source string contains at least one escape sequence,
399 /// the result is always `TerminalId::NameWithEscape`, regardless of the
400 /// StringValue of it. For example, if the source string is "\u{79}ield",
401 /// the result is `TerminalId::NameWithEscape`, and the StringValue is
402 /// "yield".
identifier_tail(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()>403 fn identifier_tail(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()> {
404 let (has_different, text) = self.identifier_name_tail(builder)?;
405
406 // https://tc39.es/ecma262/#sec-keywords-and-reserved-words
407 //
408 // keywords in the grammar match literal sequences of specific
409 // SourceCharacter elements. A code point in a keyword cannot be
410 // expressed by a `\` UnicodeEscapeSequence.
411 let (id, value) = if has_different {
412 // Always return `NameWithEscape`.
413 //
414 // Error check against reserved word should be handled in the
415 // consumer.
416 (TerminalId::NameWithEscape, self.string_to_token_value(text))
417 } else {
418 match &text as &str {
419 "as" => (
420 TerminalId::As,
421 TokenValue::Atom(CommonSourceAtomSetIndices::as_()),
422 ),
423 "async" => {
424 /*
425 (
426 TerminalId::Async,
427 TokenValue::Atom(CommonSourceAtomSetIndices::async_()),
428 ),
429 */
430 return Err(ParseError::NotImplemented(
431 "async cannot be handled in parser due to multiple lookahead",
432 )
433 .into());
434 }
435 "await" => {
436 /*
437 (
438 TerminalId::Await,
439 TokenValue::Atom(CommonSourceAtomSetIndices::await_()),
440 ),
441 */
442 return Err(
443 ParseError::NotImplemented("await cannot be handled in parser").into(),
444 );
445 }
446 "break" => (
447 TerminalId::Break,
448 TokenValue::Atom(CommonSourceAtomSetIndices::break_()),
449 ),
450 "case" => (
451 TerminalId::Case,
452 TokenValue::Atom(CommonSourceAtomSetIndices::case()),
453 ),
454 "catch" => (
455 TerminalId::Catch,
456 TokenValue::Atom(CommonSourceAtomSetIndices::catch()),
457 ),
458 "class" => (
459 TerminalId::Class,
460 TokenValue::Atom(CommonSourceAtomSetIndices::class()),
461 ),
462 "const" => (
463 TerminalId::Const,
464 TokenValue::Atom(CommonSourceAtomSetIndices::const_()),
465 ),
466 "continue" => (
467 TerminalId::Continue,
468 TokenValue::Atom(CommonSourceAtomSetIndices::continue_()),
469 ),
470 "debugger" => (
471 TerminalId::Debugger,
472 TokenValue::Atom(CommonSourceAtomSetIndices::debugger()),
473 ),
474 "default" => (
475 TerminalId::Default,
476 TokenValue::Atom(CommonSourceAtomSetIndices::default()),
477 ),
478 "delete" => (
479 TerminalId::Delete,
480 TokenValue::Atom(CommonSourceAtomSetIndices::delete()),
481 ),
482 "do" => (
483 TerminalId::Do,
484 TokenValue::Atom(CommonSourceAtomSetIndices::do_()),
485 ),
486 "else" => (
487 TerminalId::Else,
488 TokenValue::Atom(CommonSourceAtomSetIndices::else_()),
489 ),
490 "enum" => (
491 TerminalId::Enum,
492 TokenValue::Atom(CommonSourceAtomSetIndices::enum_()),
493 ),
494 "export" => (
495 TerminalId::Export,
496 TokenValue::Atom(CommonSourceAtomSetIndices::export()),
497 ),
498 "extends" => (
499 TerminalId::Extends,
500 TokenValue::Atom(CommonSourceAtomSetIndices::extends()),
501 ),
502 "finally" => (
503 TerminalId::Finally,
504 TokenValue::Atom(CommonSourceAtomSetIndices::finally()),
505 ),
506 "for" => (
507 TerminalId::For,
508 TokenValue::Atom(CommonSourceAtomSetIndices::for_()),
509 ),
510 "from" => (
511 TerminalId::From,
512 TokenValue::Atom(CommonSourceAtomSetIndices::from()),
513 ),
514 "function" => (
515 TerminalId::Function,
516 TokenValue::Atom(CommonSourceAtomSetIndices::function()),
517 ),
518 "get" => (
519 TerminalId::Get,
520 TokenValue::Atom(CommonSourceAtomSetIndices::get()),
521 ),
522 "if" => (
523 TerminalId::If,
524 TokenValue::Atom(CommonSourceAtomSetIndices::if_()),
525 ),
526 "implements" => (
527 TerminalId::Implements,
528 TokenValue::Atom(CommonSourceAtomSetIndices::implements()),
529 ),
530 "import" => (
531 TerminalId::Import,
532 TokenValue::Atom(CommonSourceAtomSetIndices::import()),
533 ),
534 "in" => (
535 TerminalId::In,
536 TokenValue::Atom(CommonSourceAtomSetIndices::in_()),
537 ),
538 "instanceof" => (
539 TerminalId::Instanceof,
540 TokenValue::Atom(CommonSourceAtomSetIndices::instanceof()),
541 ),
542 "interface" => (
543 TerminalId::Interface,
544 TokenValue::Atom(CommonSourceAtomSetIndices::interface()),
545 ),
546 "let" => {
547 /*
548 (
549 TerminalId::Let,
550 TokenValue::Atom(CommonSourceAtomSetIndices::let_()),
551 ),
552 */
553 return Err(ParseError::NotImplemented(
554 "let cannot be handled in parser due to multiple lookahead",
555 )
556 .into());
557 }
558 "new" => (
559 TerminalId::New,
560 TokenValue::Atom(CommonSourceAtomSetIndices::new_()),
561 ),
562 "of" => (
563 TerminalId::Of,
564 TokenValue::Atom(CommonSourceAtomSetIndices::of()),
565 ),
566 "package" => (
567 TerminalId::Package,
568 TokenValue::Atom(CommonSourceAtomSetIndices::package()),
569 ),
570 "private" => (
571 TerminalId::Private,
572 TokenValue::Atom(CommonSourceAtomSetIndices::private()),
573 ),
574 "protected" => (
575 TerminalId::Protected,
576 TokenValue::Atom(CommonSourceAtomSetIndices::protected()),
577 ),
578 "public" => (
579 TerminalId::Public,
580 TokenValue::Atom(CommonSourceAtomSetIndices::public()),
581 ),
582 "return" => (
583 TerminalId::Return,
584 TokenValue::Atom(CommonSourceAtomSetIndices::return_()),
585 ),
586 "set" => (
587 TerminalId::Set,
588 TokenValue::Atom(CommonSourceAtomSetIndices::set()),
589 ),
590 "static" => (
591 TerminalId::Static,
592 TokenValue::Atom(CommonSourceAtomSetIndices::static_()),
593 ),
594 "super" => (
595 TerminalId::Super,
596 TokenValue::Atom(CommonSourceAtomSetIndices::super_()),
597 ),
598 "switch" => (
599 TerminalId::Switch,
600 TokenValue::Atom(CommonSourceAtomSetIndices::switch()),
601 ),
602 "target" => (
603 TerminalId::Target,
604 TokenValue::Atom(CommonSourceAtomSetIndices::target()),
605 ),
606 "this" => (
607 TerminalId::This,
608 TokenValue::Atom(CommonSourceAtomSetIndices::this()),
609 ),
610 "throw" => (
611 TerminalId::Throw,
612 TokenValue::Atom(CommonSourceAtomSetIndices::throw()),
613 ),
614 "try" => (
615 TerminalId::Try,
616 TokenValue::Atom(CommonSourceAtomSetIndices::try_()),
617 ),
618 "typeof" => (
619 TerminalId::Typeof,
620 TokenValue::Atom(CommonSourceAtomSetIndices::typeof_()),
621 ),
622 "var" => (
623 TerminalId::Var,
624 TokenValue::Atom(CommonSourceAtomSetIndices::var()),
625 ),
626 "void" => (
627 TerminalId::Void,
628 TokenValue::Atom(CommonSourceAtomSetIndices::void()),
629 ),
630 "while" => (
631 TerminalId::While,
632 TokenValue::Atom(CommonSourceAtomSetIndices::while_()),
633 ),
634 "with" => (
635 TerminalId::With,
636 TokenValue::Atom(CommonSourceAtomSetIndices::with()),
637 ),
638 "yield" => {
639 /*
640 (
641 TerminalId::Yield,
642 TokenValue::Atom(CommonSourceAtomSetIndices::yield_()),
643 ),
644 */
645 return Err(
646 ParseError::NotImplemented("yield cannot be handled in parser").into(),
647 );
648 }
649 "null" => (
650 TerminalId::NullLiteral,
651 TokenValue::Atom(CommonSourceAtomSetIndices::null()),
652 ),
653 "true" => (
654 TerminalId::BooleanLiteral,
655 TokenValue::Atom(CommonSourceAtomSetIndices::true_()),
656 ),
657 "false" => (
658 TerminalId::BooleanLiteral,
659 TokenValue::Atom(CommonSourceAtomSetIndices::false_()),
660 ),
661 _ => (TerminalId::Name, self.string_to_token_value(text)),
662 }
663 };
664
665 self.set_result(id, SourceLocation::new(start, self.offset()), value)
666 }
667
668 /// ```text
669 /// PrivateIdentifier::
670 /// `#` IdentifierName
671 /// ```
private_identifier(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()>672 fn private_identifier(&mut self, start: usize, builder: AutoCow<'alloc>) -> Result<'alloc, ()> {
673 let name = self.identifier_name(builder)?;
674 let value = self.string_to_token_value(name);
675 self.set_result(
676 TerminalId::PrivateIdentifier,
677 SourceLocation::new(start, self.offset()),
678 value,
679 )
680 }
681
682 /// ```text
683 /// UnicodeEscapeSequence::
684 /// `u` Hex4Digits
685 /// `u{` CodePoint `}`
686 /// ```
unicode_escape_sequence_after_backslash(&mut self) -> Result<'alloc, char>687 fn unicode_escape_sequence_after_backslash(&mut self) -> Result<'alloc, char> {
688 match self.chars.next() {
689 Some('u') => {}
690 _ => {
691 return Err(ParseError::InvalidEscapeSequence.into());
692 }
693 }
694 self.unicode_escape_sequence_after_backslash_and_u()
695 }
696
unicode_escape_sequence_after_backslash_and_u(&mut self) -> Result<'alloc, char>697 fn unicode_escape_sequence_after_backslash_and_u(&mut self) -> Result<'alloc, char> {
698 let value = match self.peek() {
699 Some('{') => {
700 self.chars.next();
701
702 let value = self.code_point()?;
703 match self.chars.next() {
704 Some('}') => {}
705 _ => {
706 return Err(ParseError::InvalidEscapeSequence.into());
707 }
708 }
709 value
710 }
711 _ => self.hex_4_digits()?,
712 };
713
714 Ok(value)
715 }
716 }
717
718 impl<'alloc> Lexer<'alloc> {
719 // ------------------------------------------------------------------------
720 // 11.8.3 Numeric Literals
721
722 /// Advance over decimal digits in the input.
723 ///
724 /// ```text
725 /// NumericLiteralSeparator::
726 /// `_`
727 ///
728 /// DecimalDigits ::
729 /// DecimalDigit
730 /// DecimalDigits NumericLiteralSeparator? DecimalDigit
731 ///
732 /// DecimalDigit :: one of
733 /// `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
734 /// ```
decimal_digits(&mut self) -> Result<'alloc, ()>735 fn decimal_digits(&mut self) -> Result<'alloc, ()> {
736 if let Some('0'..='9') = self.peek() {
737 self.chars.next();
738 } else {
739 return Err(self.unexpected_err().into());
740 }
741
742 self.decimal_digits_after_first_digit()?;
743 Ok(())
744 }
745
optional_decimal_digits(&mut self) -> Result<'alloc, ()>746 fn optional_decimal_digits(&mut self) -> Result<'alloc, ()> {
747 if let Some('0'..='9') = self.peek() {
748 self.chars.next();
749 } else {
750 return Ok(());
751 }
752
753 self.decimal_digits_after_first_digit()?;
754 Ok(())
755 }
756
decimal_digits_after_first_digit(&mut self) -> Result<'alloc, ()>757 fn decimal_digits_after_first_digit(&mut self) -> Result<'alloc, ()> {
758 while let Some(next) = self.peek() {
759 match next {
760 '_' => {
761 self.chars.next();
762
763 if let Some('0'..='9') = self.peek() {
764 self.chars.next();
765 } else {
766 return Err(self.unexpected_err().into());
767 }
768 }
769 '0'..='9' => {
770 self.chars.next();
771 }
772 _ => break,
773 }
774 }
775 Ok(())
776 }
777
778 /// Skip an ExponentPart, if present.
779 ///
780 /// ```text
781 /// ExponentPart ::
782 /// ExponentIndicator SignedInteger
783 ///
784 /// ExponentIndicator :: one of
785 /// `e` `E`
786 ///
787 /// SignedInteger ::
788 /// DecimalDigits
789 /// `+` DecimalDigits
790 /// `-` DecimalDigits
791 /// ```
optional_exponent(&mut self) -> Result<'alloc, bool>792 fn optional_exponent(&mut self) -> Result<'alloc, bool> {
793 if let Some('e') | Some('E') = self.peek() {
794 self.chars.next();
795 self.decimal_exponent()?;
796 return Ok(true);
797 }
798
799 Ok(false)
800 }
801
decimal_exponent(&mut self) -> Result<'alloc, ()>802 fn decimal_exponent(&mut self) -> Result<'alloc, ()> {
803 if let Some('+') | Some('-') = self.peek() {
804 self.chars.next();
805 }
806
807 self.decimal_digits()?;
808
809 Ok(())
810 }
811
812 /// ```text
813 /// HexDigit :: one of
814 /// `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F`
815 /// ```
hex_digit(&mut self) -> Result<'alloc, u32>816 fn hex_digit(&mut self) -> Result<'alloc, u32> {
817 match self.chars.next() {
818 None => Err(ParseError::InvalidEscapeSequence.into()),
819 Some(c @ '0'..='9') => Ok(c as u32 - '0' as u32),
820 Some(c @ 'a'..='f') => Ok(10 + (c as u32 - 'a' as u32)),
821 Some(c @ 'A'..='F') => Ok(10 + (c as u32 - 'A' as u32)),
822 Some(other) => Err(ParseError::IllegalCharacter(other).into()),
823 }
824 }
825
code_point_to_char(value: u32) -> Result<'alloc, char>826 fn code_point_to_char(value: u32) -> Result<'alloc, char> {
827 if 0xd800 <= value && value <= 0xdfff {
828 Err(ParseError::NotImplemented("unicode escape sequences (surrogates)").into())
829 } else {
830 char::try_from(value).map_err(|_| ParseError::InvalidEscapeSequence.into())
831 }
832 }
833
834 /// ```text
835 /// Hex4Digits ::
836 /// HexDigit HexDigit HexDigit HexDigit
837 /// ```
hex_4_digits(&mut self) -> Result<'alloc, char>838 fn hex_4_digits(&mut self) -> Result<'alloc, char> {
839 let mut value = 0;
840 for _ in 0..4 {
841 value = (value << 4) | self.hex_digit()?;
842 }
843 Self::code_point_to_char(value)
844 }
845
846 /// ```text
847 /// CodePoint ::
848 /// HexDigits but only if MV of HexDigits ≤ 0x10FFFF
849 ///
850 /// HexDigits ::
851 /// HexDigit
852 /// HexDigits HexDigit
853 /// ```
code_point(&mut self) -> Result<'alloc, char>854 fn code_point(&mut self) -> Result<'alloc, char> {
855 let mut value = self.hex_digit()?;
856
857 loop {
858 let next = match self.peek() {
859 None => {
860 return Err(ParseError::InvalidEscapeSequence.into());
861 }
862 Some(c @ '0'..='9') => c as u32 - '0' as u32,
863 Some(c @ 'a'..='f') => 10 + (c as u32 - 'a' as u32),
864 Some(c @ 'A'..='F') => 10 + (c as u32 - 'A' as u32),
865 Some(_) => break,
866 };
867 self.chars.next();
868 value = (value << 4) | next;
869 if value > 0x10FFFF {
870 return Err(ParseError::InvalidEscapeSequence.into());
871 }
872 }
873
874 Self::code_point_to_char(value)
875 }
876
877 /// Scan a NumericLiteral (defined in 11.8.3, extended by B.1.1) after
878 /// having already consumed the first character, which was `0`.
879 ///
880 /// ```text
881 /// NumericLiteral ::
882 /// DecimalLiteral
883 /// DecimalBigIntegerLiteral
884 /// NonDecimalIntegerLiteral
885 /// NonDecimalIntegerLiteral BigIntLiteralSuffix
886 ///
887 /// DecimalBigIntegerLiteral ::
888 /// `0` BigIntLiteralSuffix
889 /// NonZeroDigit DecimalDigits? BigIntLiteralSuffix
890 ///
891 /// NonDecimalIntegerLiteral ::
892 /// BinaryIntegerLiteral
893 /// OctalIntegerLiteral
894 /// HexIntegerLiteral
895 ///
896 /// BigIntLiteralSuffix ::
897 /// `n`
898 /// ```
numeric_literal_starting_with_zero(&mut self) -> Result<'alloc, NumericResult>899 fn numeric_literal_starting_with_zero(&mut self) -> Result<'alloc, NumericResult> {
900 let mut base = NumericLiteralBase::Decimal;
901 match self.peek() {
902 // BinaryIntegerLiteral ::
903 // `0b` BinaryDigits
904 // `0B` BinaryDigits
905 //
906 // BinaryDigits ::
907 // BinaryDigit
908 // BinaryDigits NumericLiteralSeparator? BinaryDigit
909 //
910 // BinaryDigit :: one of
911 // `0` `1`
912 Some('b') | Some('B') => {
913 self.chars.next();
914
915 base = NumericLiteralBase::Binary;
916
917 if let Some('0'..='1') = self.peek() {
918 self.chars.next();
919 } else {
920 return Err(self.unexpected_err().into());
921 }
922
923 while let Some(next) = self.peek() {
924 match next {
925 '_' => {
926 self.chars.next();
927
928 if let Some('0'..='1') = self.peek() {
929 self.chars.next();
930 } else {
931 return Err(self.unexpected_err().into());
932 }
933 }
934 '0'..='1' => {
935 self.chars.next();
936 }
937 _ => break,
938 }
939 }
940
941 if let Some('n') = self.peek() {
942 self.chars.next();
943 self.check_after_numeric_literal()?;
944 return Ok(NumericResult::BigInt { base });
945 }
946 }
947
948 // OctalIntegerLiteral ::
949 // `0o` OctalDigits
950 // `0O` OctalDigits
951 //
952 // OctalDigits ::
953 // OctalDigit
954 // OctalDigits NumericLiteralSeparator? OctalDigit
955 //
956 // OctalDigit :: one of
957 // `0` `1` `2` `3` `4` `5` `6` `7`
958 //
959 Some('o') | Some('O') => {
960 self.chars.next();
961
962 base = NumericLiteralBase::Octal;
963
964 if let Some('0'..='7') = self.peek() {
965 self.chars.next();
966 } else {
967 return Err(self.unexpected_err().into());
968 }
969
970 while let Some(next) = self.peek() {
971 match next {
972 '_' => {
973 self.chars.next();
974
975 if let Some('0'..='7') = self.peek() {
976 self.chars.next();
977 } else {
978 return Err(self.unexpected_err().into());
979 }
980 }
981 '0'..='7' => {
982 self.chars.next();
983 }
984 _ => break,
985 }
986 }
987
988 if let Some('n') = self.peek() {
989 self.chars.next();
990 self.check_after_numeric_literal()?;
991 return Ok(NumericResult::BigInt { base });
992 }
993 }
994
995 // HexIntegerLiteral ::
996 // `0x` HexDigits
997 // `0X` HexDigits
998 //
999 // HexDigits ::
1000 // HexDigit
1001 // HexDigits NumericLiteralSeparator? HexDigit
1002 //
1003 // HexDigit :: one of
1004 // `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F`
1005 Some('x') | Some('X') => {
1006 self.chars.next();
1007
1008 base = NumericLiteralBase::Hex;
1009
1010 if let Some('0'..='9') | Some('a'..='f') | Some('A'..='F') = self.peek() {
1011 self.chars.next();
1012 } else {
1013 return Err(self.unexpected_err().into());
1014 }
1015
1016 while let Some(next) = self.peek() {
1017 match next {
1018 '_' => {
1019 self.chars.next();
1020
1021 if let Some('0'..='9') | Some('a'..='f') | Some('A'..='F') = self.peek()
1022 {
1023 self.chars.next();
1024 } else {
1025 return Err(self.unexpected_err().into());
1026 }
1027 }
1028 '0'..='9' | 'a'..='f' | 'A'..='F' => {
1029 self.chars.next();
1030 }
1031 _ => break,
1032 }
1033 }
1034
1035 if let Some('n') = self.peek() {
1036 self.chars.next();
1037 self.check_after_numeric_literal()?;
1038 return Ok(NumericResult::BigInt { base });
1039 }
1040 }
1041
1042 Some('.') => {
1043 self.chars.next();
1044 return self.decimal_literal_after_decimal_point_after_digits();
1045 }
1046
1047 Some('e') | Some('E') => {
1048 self.chars.next();
1049 self.decimal_exponent()?;
1050 return Ok(NumericResult::Float);
1051 }
1052
1053 Some('n') => {
1054 self.chars.next();
1055 self.check_after_numeric_literal()?;
1056 return Ok(NumericResult::BigInt { base });
1057 }
1058
1059 Some('0'..='9') => {
1060 // This is almost always the token `0` in practice.
1061 //
1062 // In nonstrict code, as a legacy feature, other numbers
1063 // starting with `0` are allowed. If /0[0-7]+/ matches, it's a
1064 // LegacyOctalIntegerLiteral; but if we see an `8` or `9` in
1065 // the number, it's decimal. Decimal numbers can have a decimal
1066 // point and/or ExponentPart; octals can't.
1067 //
1068 // Neither is allowed with a BigIntLiteralSuffix `n`.
1069 //
1070 // LegacyOctalIntegerLiteral ::
1071 // `0` OctalDigit
1072 // LegacyOctalIntegerLiteral OctalDigit
1073 //
1074 // NonOctalDecimalIntegerLiteral ::
1075 // `0` NonOctalDigit
1076 // LegacyOctalLikeDecimalIntegerLiteral NonOctalDigit
1077 // NonOctalDecimalIntegerLiteral DecimalDigit
1078 //
1079 // LegacyOctalLikeDecimalIntegerLiteral ::
1080 // `0` OctalDigit
1081 // LegacyOctalLikeDecimalIntegerLiteral OctalDigit
1082 //
1083 // NonOctalDigit :: one of
1084 // `8` `9`
1085 //
1086
1087 // TODO: implement `strict_mode` check
1088 // let strict_mode = true;
1089 // if !strict_mode {
1090 // // TODO: Distinguish between Octal and NonOctalDecimal.
1091 // // TODO: Support NonOctalDecimal followed by a decimal
1092 // // point and/or ExponentPart.
1093 // self.decimal_digits()?;
1094 // }
1095 return Err(ParseError::NotImplemented("LegacyOctalIntegerLiteral").into());
1096 }
1097
1098 _ => {}
1099 }
1100
1101 self.check_after_numeric_literal()?;
1102 Ok(NumericResult::Int { base })
1103 }
1104
1105 /// Scan a NumericLiteral (defined in 11.8.3, extended by B.1.1) after
1106 /// having already consumed the first character, which is a decimal digit.
decimal_literal_after_first_digit(&mut self) -> Result<'alloc, NumericResult>1107 fn decimal_literal_after_first_digit(&mut self) -> Result<'alloc, NumericResult> {
1108 // DecimalLiteral ::
1109 // DecimalIntegerLiteral `.` DecimalDigits? ExponentPart?
1110 // `.` DecimalDigits ExponentPart?
1111 // DecimalIntegerLiteral ExponentPart?
1112 //
1113 // DecimalIntegerLiteral ::
1114 // `0` #see `numeric_literal_starting_with_zero`
1115 // NonZeroDigit
1116 // NonZeroDigit NumericLiteralSeparator? DecimalDigits
1117 // NonOctalDecimalIntegerLiteral #see `numeric_literal_
1118 // # starting_with_zero`
1119 //
1120 // NonZeroDigit :: one of
1121 // `1` `2` `3` `4` `5` `6` `7` `8` `9`
1122
1123 self.decimal_digits_after_first_digit()?;
1124 match self.peek() {
1125 Some('.') => {
1126 self.chars.next();
1127 return self.decimal_literal_after_decimal_point_after_digits();
1128 }
1129 Some('n') => {
1130 self.chars.next();
1131 self.check_after_numeric_literal()?;
1132 return Ok(NumericResult::BigInt {
1133 base: NumericLiteralBase::Decimal,
1134 });
1135 }
1136 _ => {}
1137 }
1138
1139 let has_exponent = self.optional_exponent()?;
1140 self.check_after_numeric_literal()?;
1141
1142 let result = if has_exponent {
1143 NumericResult::Float
1144 } else {
1145 NumericResult::Int {
1146 base: NumericLiteralBase::Decimal,
1147 }
1148 };
1149
1150 Ok(result)
1151 }
1152
decimal_literal_after_decimal_point(&mut self) -> Result<'alloc, NumericResult>1153 fn decimal_literal_after_decimal_point(&mut self) -> Result<'alloc, NumericResult> {
1154 // The parts after `.` in
1155 //
1156 // `.` DecimalDigits ExponentPart?
1157 self.decimal_digits()?;
1158 self.optional_exponent()?;
1159 self.check_after_numeric_literal()?;
1160
1161 Ok(NumericResult::Float)
1162 }
1163
decimal_literal_after_decimal_point_after_digits( &mut self, ) -> Result<'alloc, NumericResult>1164 fn decimal_literal_after_decimal_point_after_digits(
1165 &mut self,
1166 ) -> Result<'alloc, NumericResult> {
1167 // The parts after `.` in
1168 //
1169 // DecimalLiteral ::
1170 // DecimalIntegerLiteral `.` DecimalDigits? ExponentPart?
1171 self.optional_decimal_digits()?;
1172 self.optional_exponent()?;
1173 self.check_after_numeric_literal()?;
1174
1175 Ok(NumericResult::Float)
1176 }
1177
check_after_numeric_literal(&self) -> Result<'alloc, ()>1178 fn check_after_numeric_literal(&self) -> Result<'alloc, ()> {
1179 // The SourceCharacter immediately following a
1180 // NumericLiteral must not be an IdentifierStart or
1181 // DecimalDigit. (11.8.3)
1182 if let Some(ch) = self.peek() {
1183 if is_identifier_start(ch) || ch.is_digit(10) {
1184 return Err(ParseError::IllegalCharacter(ch).into());
1185 }
1186 }
1187
1188 Ok(())
1189 }
1190
1191 // ------------------------------------------------------------------------
1192 // 11.8.4 String Literals (as extended by B.1.2)
1193
1194 /// Scan an LineContinuation or EscapeSequence in a string literal, having
1195 /// already consumed the initial backslash character.
1196 ///
1197 /// ```text
1198 /// LineContinuation ::
1199 /// `\` LineTerminatorSequence
1200 ///
1201 /// EscapeSequence ::
1202 /// CharacterEscapeSequence
1203 /// (in strict mode code) `0` [lookahead ∉ DecimalDigit]
1204 /// (in non-strict code) LegacyOctalEscapeSequence
1205 /// HexEscapeSequence
1206 /// UnicodeEscapeSequence
1207 ///
1208 /// CharacterEscapeSequence ::
1209 /// SingleEscapeCharacter
1210 /// NonEscapeCharacter
1211 ///
1212 /// SingleEscapeCharacter :: one of
1213 /// `'` `"` `\` `b` `f` `n` `r` `t` `v`
1214 ///
1215 /// LegacyOctalEscapeSequence ::
1216 /// OctalDigit [lookahead ∉ OctalDigit]
1217 /// ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
1218 /// FourToSeven OctalDigit
1219 /// ZeroToThree OctalDigit OctalDigit
1220 ///
1221 /// ZeroToThree :: one of
1222 /// `0` `1` `2` `3`
1223 ///
1224 /// FourToSeven :: one of
1225 /// `4` `5` `6` `7`
1226 /// ```
escape_sequence(&mut self, text: &mut String<'alloc>) -> Result<'alloc, ()>1227 fn escape_sequence(&mut self, text: &mut String<'alloc>) -> Result<'alloc, ()> {
1228 match self.chars.next() {
1229 None => {
1230 return Err(ParseError::UnterminatedString.into());
1231 }
1232 Some(c) => match c {
1233 LF | LS | PS => {
1234 // LineContinuation. Ignore it.
1235 //
1236 // Don't set is_on_new_line because this LineContinuation
1237 // has no bearing on whether the current string literal was
1238 // the first token on the line where it started.
1239 }
1240
1241 CR => {
1242 // LineContinuation. Check for the sequence \r\n; otherwise
1243 // ignore it.
1244 if self.peek() == Some(LF) {
1245 self.chars.next();
1246 }
1247 }
1248
1249 '\'' | '"' | '\\' => {
1250 text.push(c);
1251 }
1252
1253 'b' => {
1254 text.push('\u{8}');
1255 }
1256
1257 'f' => {
1258 text.push(FF);
1259 }
1260
1261 'n' => {
1262 text.push(LF);
1263 }
1264
1265 'r' => {
1266 text.push(CR);
1267 }
1268
1269 't' => {
1270 text.push(TAB);
1271 }
1272
1273 'v' => {
1274 text.push(VT);
1275 }
1276
1277 'x' => {
1278 // HexEscapeSequence ::
1279 // `x` HexDigit HexDigit
1280 let mut value = self.hex_digit()?;
1281 value = (value << 4) | self.hex_digit()?;
1282 match char::try_from(value) {
1283 Err(_) => {
1284 return Err(ParseError::InvalidEscapeSequence.into());
1285 }
1286 Ok(c) => {
1287 text.push(c);
1288 }
1289 }
1290 }
1291
1292 'u' => {
1293 let c = self.unicode_escape_sequence_after_backslash_and_u()?;
1294 text.push(c);
1295 }
1296
1297 '0' => {
1298 // In strict mode code and in template literals, the
1299 // relevant production is
1300 //
1301 // EscapeSequence ::
1302 // `0` [lookahead <! DecimalDigit]
1303 //
1304 // In non-strict StringLiterals, `\0` begins a
1305 // LegacyOctalEscapeSequence which may contain more digits.
1306 match self.peek() {
1307 Some('0'..='7') => {
1308 return Err(ParseError::NotImplemented(
1309 "legacy octal escape sequence in string",
1310 )
1311 .into());
1312 }
1313 Some('8'..='9') => {
1314 return Err(ParseError::NotImplemented(
1315 "digit immediately following \\0 escape sequence",
1316 )
1317 .into());
1318 }
1319 _ => {}
1320 }
1321 text.push('\0');
1322 }
1323
1324 '1'..='7' => {
1325 return Err(ParseError::NotImplemented(
1326 "legacy octal escape sequence in string",
1327 )
1328 .into());
1329 }
1330
1331 other => {
1332 // "\8" and "\9" are invalid per spec, but SpiderMonkey and
1333 // V8 accept them, and JSC accepts them in non-strict mode.
1334 // "\8" is "8" and "\9" is "9".
1335 text.push(other);
1336 }
1337 },
1338 }
1339 Ok(())
1340 }
1341
1342 /// Scan a string literal, having already consumed the starting quote
1343 /// character `delimiter`.
1344 ///
1345 /// ```text
1346 /// StringLiteral ::
1347 /// `"` DoubleStringCharacters? `"`
1348 /// `'` SingleStringCharacters? `'`
1349 ///
1350 /// DoubleStringCharacters ::
1351 /// DoubleStringCharacter DoubleStringCharacters?
1352 ///
1353 /// SingleStringCharacters ::
1354 /// SingleStringCharacter SingleStringCharacters?
1355 ///
1356 /// DoubleStringCharacter ::
1357 /// SourceCharacter but not one of `"` or `\` or LineTerminator
1358 /// <LS>
1359 /// <PS>
1360 /// `\` EscapeSequence
1361 /// LineContinuation
1362 ///
1363 /// SingleStringCharacter ::
1364 /// SourceCharacter but not one of `'` or `\` or LineTerminator
1365 /// <LS>
1366 /// <PS>
1367 /// `\` EscapeSequence
1368 /// LineContinuation
1369 /// ```
string_literal(&mut self, delimiter: char) -> Result<'alloc, ()>1370 fn string_literal(&mut self, delimiter: char) -> Result<'alloc, ()> {
1371 let offset = self.offset() - 1;
1372 let mut builder = AutoCow::new(&self);
1373 loop {
1374 match self.chars.next() {
1375 None | Some('\r') | Some('\n') => {
1376 return Err(ParseError::UnterminatedString.into());
1377 }
1378
1379 Some(c @ '"') | Some(c @ '\'') => {
1380 if c == delimiter {
1381 let value = self.string_to_token_value(builder.finish_without_push(&self));
1382 return self.set_result(
1383 TerminalId::StringLiteral,
1384 SourceLocation::new(offset, self.offset()),
1385 value,
1386 );
1387 } else {
1388 builder.push_matching(c);
1389 }
1390 }
1391
1392 Some('\\') => {
1393 let text = builder.get_mut_string_without_current_ascii_char(&self);
1394 self.escape_sequence(text)?;
1395 }
1396
1397 Some(other) => {
1398 // NonEscapeCharacter ::
1399 // SourceCharacter but not one of EscapeCharacter or LineTerminator
1400 //
1401 // EscapeCharacter ::
1402 // SingleEscapeCharacter
1403 // DecimalDigit
1404 // `x`
1405 // `u`
1406 builder.push_matching(other);
1407 }
1408 }
1409 }
1410 }
1411
1412 // ------------------------------------------------------------------------
1413 // 11.8.5 Regular Expression Literals
1414
regular_expression_backslash_sequence(&mut self) -> Result<'alloc, ()>1415 fn regular_expression_backslash_sequence(&mut self) -> Result<'alloc, ()> {
1416 match self.chars.next() {
1417 None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => {
1418 Err(ParseError::UnterminatedRegExp.into())
1419 }
1420 Some(_) => Ok(()),
1421 }
1422 }
1423
1424 // See 12.2.8 and 11.8.5 sections.
regular_expression_literal(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()>1425 fn regular_expression_literal(&mut self, builder: &mut AutoCow<'alloc>) -> Result<'alloc, ()> {
1426 let offset = self.offset();
1427
1428 loop {
1429 match self.chars.next() {
1430 None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => {
1431 return Err(ParseError::UnterminatedRegExp.into());
1432 }
1433 Some('/') => {
1434 break;
1435 }
1436 Some('[') => {
1437 // RegularExpressionClass.
1438 loop {
1439 match self.chars.next() {
1440 None | Some(CR) | Some(LF) | Some(LS) | Some(PS) => {
1441 return Err(ParseError::UnterminatedRegExp.into());
1442 }
1443 Some(']') => {
1444 break;
1445 }
1446 Some('\\') => {
1447 self.regular_expression_backslash_sequence()?;
1448 }
1449 Some(_) => {}
1450 }
1451 }
1452 }
1453 Some('\\') => {
1454 self.regular_expression_backslash_sequence()?;
1455 }
1456 Some(_) => {}
1457 }
1458 }
1459 let mut flag_text = AutoCow::new(&self);
1460 while let Some(ch) = self.peek() {
1461 match ch {
1462 '$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' => {
1463 self.chars.next();
1464 flag_text.push_matching(ch);
1465 }
1466 _ => break,
1467 }
1468 }
1469
1470 // 12.2.8.2.1 Assert literal is a RegularExpressionLiteral.
1471 let literal = builder.finish(&self);
1472
1473 // 12.2.8.2.2 Check that only gimsuy flags are mentioned at most once.
1474 let gimsuy_mask: u32 = ['g', 'i', 'm', 's', 'u', 'y']
1475 .iter()
1476 .map(|x| 1 << ((*x as u8) - ('a' as u8)))
1477 .sum();
1478 let mut flag_text_set: u32 = 0;
1479 for ch in flag_text.finish(&self).chars() {
1480 if !ch.is_ascii_lowercase() {
1481 return Err(ParseError::NotImplemented(
1482 "Unexpected flag in regular expression literal",
1483 )
1484 .into());
1485 }
1486 let ch_mask = 1 << ((ch as u8) - ('a' as u8));
1487 if ch_mask & gimsuy_mask == 0 {
1488 return Err(ParseError::NotImplemented(
1489 "Unexpected flag in regular expression literal",
1490 )
1491 .into());
1492 }
1493 if flag_text_set & ch_mask != 0 {
1494 return Err(ParseError::NotImplemented(
1495 "Flag is mentioned twice in regular expression literal",
1496 )
1497 .into());
1498 }
1499 flag_text_set |= ch_mask;
1500 }
1501
1502 // TODO: 12.2.8.2.4 and 12.2.8.2.5 Check that the body matches the
1503 // grammar defined in 21.2.1.
1504
1505 let value = self.slice_to_token_value(literal);
1506 self.set_result(
1507 TerminalId::RegularExpressionLiteral,
1508 SourceLocation::new(offset, self.offset()),
1509 value,
1510 )
1511 }
1512
1513 // ------------------------------------------------------------------------
1514 // 11.8.6 Template Literal Lexical Components
1515
1516 /// Parse a template literal component token, having already consumed the
1517 /// starting `` ` `` or `}` character. On success, the `id` of the returned
1518 /// `Token` is `subst` (if the token ends with `${`) or `tail` (if the
1519 /// token ends with `` ` ``).
1520 ///
1521 /// ```text
1522 /// NoSubstitutionTemplate ::
1523 /// ``` TemplateCharacters? ```
1524 ///
1525 /// TemplateHead ::
1526 /// ``` TemplateCharacters? `${`
1527 ///
1528 /// TemplateMiddle ::
1529 /// `}` TemplateCharacters? `${`
1530 ///
1531 /// TemplateTail ::
1532 /// `}` TemplateCharacters? ```
1533 ///
1534 /// TemplateCharacters ::
1535 /// TemplateCharacter TemplateCharacters?
1536 /// ```
template_part( &mut self, start: usize, subst: TerminalId, tail: TerminalId, ) -> Result<'alloc, ()>1537 fn template_part(
1538 &mut self,
1539 start: usize,
1540 subst: TerminalId,
1541 tail: TerminalId,
1542 ) -> Result<'alloc, ()> {
1543 let mut builder = AutoCow::new(&self);
1544 while let Some(ch) = self.chars.next() {
1545 // TemplateCharacter ::
1546 // `$` [lookahead != `{` ]
1547 // `\` EscapeSequence
1548 // `\` NotEscapeSequence
1549 // LineContinuation
1550 // LineTerminatorSequence
1551 // SourceCharacter but not one of ``` or `\` or `$` or LineTerminator
1552 //
1553 // NotEscapeSequence ::
1554 // `0` DecimalDigit
1555 // DecimalDigit but not `0`
1556 // `x` [lookahead <! HexDigit]
1557 // `x` HexDigit [lookahead <! HexDigit]
1558 // `u` [lookahead <! HexDigit] [lookahead != `{`]
1559 // `u` HexDigit [lookahead <! HexDigit]
1560 // `u` HexDigit HexDigit [lookahead <! HexDigit]
1561 // `u` HexDigit HexDigit HexDigit [lookahead <! HexDigit]
1562 // `u` `{` [lookahead <! HexDigit]
1563 // `u` `{` NotCodePoint [lookahead <! HexDigit]
1564 // `u` `{` CodePoint [lookahead <! HexDigit] [lookahead != `}`]
1565 //
1566 // NotCodePoint ::
1567 // HexDigits [> but only if MV of |HexDigits| > 0x10FFFF ]
1568 //
1569 // CodePoint ::
1570 // HexDigits [> but only if MV of |HexDigits| ≤ 0x10FFFF ]
1571 if ch == '$' && self.peek() == Some('{') {
1572 self.chars.next();
1573 let value = self.string_to_token_value(builder.finish_without_push(&self));
1574 return self.set_result(subst, SourceLocation::new(start, self.offset()), value);
1575 }
1576 if ch == '`' {
1577 let value = self.string_to_token_value(builder.finish_without_push(&self));
1578 return self.set_result(tail, SourceLocation::new(start, self.offset()), value);
1579 }
1580 // TODO: Support escape sequences.
1581 if ch == '\\' {
1582 let text = builder.get_mut_string_without_current_ascii_char(&self);
1583 self.escape_sequence(text)?;
1584 } else {
1585 builder.push_matching(ch);
1586 }
1587 }
1588 Err(ParseError::UnterminatedString.into())
1589 }
1590
advance_impl<'parser>(&mut self, parser: &Parser<'parser>) -> Result<'alloc, ()>1591 fn advance_impl<'parser>(&mut self, parser: &Parser<'parser>) -> Result<'alloc, ()> {
1592 let mut builder = AutoCow::new(&self);
1593 let mut start = self.offset();
1594 while let Some(c) = self.chars.next() {
1595 match c {
1596 // 11.2 White Space
1597 //
1598 // WhiteSpace ::
1599 // <TAB>
1600 // <VT>
1601 // <FF>
1602 // <SP>
1603 // <NBSP>
1604 // <ZWNBSP>
1605 // <USP>
1606 TAB |
1607 VT |
1608 FF |
1609 SP |
1610 NBSP |
1611 ZWNBSP |
1612 '\u{1680}' | // Ogham space mark (in <USP>)
1613 '\u{2000}' ..= '\u{200a}' | // typesetting spaces (in <USP>)
1614 '\u{202f}' | // Narrow no-break space (in <USP>)
1615 '\u{205f}' | // Medium mathematical space (in <USP>)
1616 '\u{3000}' // Ideographic space (in <USP>)
1617 => {
1618 // TODO - The spec uses <USP> to stand for any character
1619 // with category "Space_Separator" (Zs). New Unicode
1620 // standards may add characters to this set. This should therefore be
1621 // implemented using the Unicode database somehow.
1622 builder = AutoCow::new(&self);
1623 start = self.offset();
1624 continue;
1625 }
1626
1627 // 11.3 Line Terminators
1628 //
1629 // LineTerminator ::
1630 // <LF>
1631 // <CR>
1632 // <LS>
1633 // <PS>
1634 LF | CR | LS | PS => {
1635 self.token.is_on_new_line = true;
1636 builder = AutoCow::new(&self);
1637 start = self.offset();
1638 continue;
1639 }
1640
1641 '0' => {
1642 let result = self.numeric_literal_starting_with_zero()?;
1643 return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?);
1644 }
1645
1646 '1'..='9' => {
1647 let result = self.decimal_literal_after_first_digit()?;
1648 return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?);
1649 }
1650
1651 '"' | '\'' => {
1652 return self.string_literal(c);
1653 }
1654
1655 '`' => {
1656 return self.template_part(start, TerminalId::TemplateHead, TerminalId::NoSubstitutionTemplate);
1657 }
1658
1659 '!' => match self.peek() {
1660 Some('=') => {
1661 self.chars.next();
1662 match self.peek() {
1663 Some('=') => {
1664 self.chars.next();
1665 return self.set_result(
1666 TerminalId::StrictNotEqual,
1667 SourceLocation::new(start, self.offset()),
1668 TokenValue::None,
1669 );
1670 }
1671 _ => return self.set_result(
1672 TerminalId::LaxNotEqual,
1673 SourceLocation::new(start, self.offset()),
1674 TokenValue::None,
1675 ),
1676 }
1677 }
1678 _ => return self.set_result(
1679 TerminalId::LogicalNot,
1680 SourceLocation::new(start, self.offset()),
1681 TokenValue::None,
1682 ),
1683 },
1684
1685 '%' => match self.peek() {
1686 Some('=') => {
1687 self.chars.next();
1688 return self.set_result(
1689 TerminalId::RemainderAssign,
1690 SourceLocation::new(start, self.offset()),
1691 TokenValue::None,
1692 );
1693 }
1694 _ => return self.set_result(
1695 TerminalId::Remainder,
1696 SourceLocation::new(start, self.offset()),
1697 TokenValue::None,
1698 ),
1699 },
1700
1701 '&' => match self.peek() {
1702 Some('&') => {
1703 self.chars.next();
1704 match self.peek() {
1705 Some('=') => {
1706 self.chars.next();
1707 return self.set_result(
1708 TerminalId::LogicalAndAssign,
1709 SourceLocation::new(start, self.offset()),
1710 TokenValue::None,
1711 );
1712 }
1713 _ => return self.set_result(
1714 TerminalId::LogicalAnd,
1715 SourceLocation::new(start, self.offset()),
1716 TokenValue::None,
1717 )
1718 }
1719 }
1720 Some('=') => {
1721 self.chars.next();
1722 return self.set_result(
1723 TerminalId::BitwiseAndAssign,
1724 SourceLocation::new(start, self.offset()),
1725 TokenValue::None,
1726 );
1727 }
1728 _ => return self.set_result(
1729 TerminalId::BitwiseAnd,
1730 SourceLocation::new(start, self.offset()),
1731 TokenValue::None,
1732 ),
1733 },
1734
1735 '*' => match self.peek() {
1736 Some('*') => {
1737 self.chars.next();
1738 match self.peek() {
1739 Some('=') => {
1740 self.chars.next();
1741 return self.set_result(
1742 TerminalId::ExponentiateAssign,
1743 SourceLocation::new(start, self.offset()),
1744 TokenValue::None,
1745 );
1746 }
1747 _ => return self.set_result(
1748 TerminalId::Exponentiate,
1749 SourceLocation::new(start, self.offset()),
1750 TokenValue::None,
1751 ),
1752 }
1753 }
1754 Some('=') => {
1755 self.chars.next();
1756 return self.set_result(
1757 TerminalId::MultiplyAssign,
1758 SourceLocation::new(start, self.offset()),
1759 TokenValue::None,
1760 );
1761 }
1762 _ => return self.set_result(
1763 TerminalId::Star,
1764 SourceLocation::new(start, self.offset()),
1765 TokenValue::None,
1766 ),
1767 },
1768
1769 '+' => match self.peek() {
1770 Some('+') => {
1771 self.chars.next();
1772 return self.set_result(
1773 TerminalId::Increment,
1774 SourceLocation::new(start, self.offset()),
1775 TokenValue::None,
1776 );
1777 }
1778 Some('=') => {
1779 self.chars.next();
1780 return self.set_result(
1781 TerminalId::AddAssign,
1782 SourceLocation::new(start, self.offset()),
1783 TokenValue::None,
1784 );
1785 }
1786 _ => return self.set_result(
1787 TerminalId::Plus,
1788 SourceLocation::new(start, self.offset()),
1789 TokenValue::None,
1790 ),
1791 },
1792
1793 '-' => match self.peek() {
1794 Some('-') => {
1795 self.chars.next();
1796 match self.peek() {
1797 Some('>') if self.token.is_on_new_line => {
1798 // B.1.3 SingleLineHTMLCloseComment
1799 // TODO: Limit this to Script (not Module).
1800 self.skip_single_line_comment(&mut builder);
1801 continue;
1802 }
1803 _ => return self.set_result(
1804 TerminalId::Decrement,
1805 SourceLocation::new(start, self.offset()),
1806 TokenValue::None,
1807 ),
1808 }
1809 }
1810 Some('=') => {
1811 self.chars.next();
1812 return self.set_result(
1813 TerminalId::SubtractAssign,
1814 SourceLocation::new(start, self.offset()),
1815 TokenValue::None,
1816 );
1817 }
1818 _ => return self.set_result(
1819 TerminalId::Minus,
1820 SourceLocation::new(start, self.offset()),
1821 TokenValue::None,
1822 ),
1823 },
1824
1825 '.' => match self.peek() {
1826 Some('.') => {
1827 self.chars.next();
1828 match self.peek() {
1829 Some('.') => {
1830 self.chars.next();
1831 return self.set_result(
1832 TerminalId::Ellipsis,
1833 SourceLocation::new(start, self.offset()),
1834 TokenValue::None,
1835 );
1836 }
1837 _ => return Err(ParseError::IllegalCharacter('.').into()),
1838 }
1839 }
1840 Some('0'..='9') => {
1841 let result = self.decimal_literal_after_decimal_point()?;
1842 return Ok(self.numeric_result_to_advance_result(builder.finish(&self), start, result)?);
1843 }
1844 _ => return self.set_result(
1845 TerminalId::Dot,
1846 SourceLocation::new(start, self.offset()),
1847 TokenValue::None,
1848 ),
1849 },
1850
1851 '/' => match self.peek() {
1852 Some('/') => {
1853 // SingleLineComment :: `//` SingleLineCommentChars?
1854 self.chars.next();
1855 self.skip_single_line_comment(&mut builder);
1856 start = self.offset();
1857 continue;
1858 }
1859 Some('*') => {
1860 self.chars.next();
1861 self.skip_multi_line_comment(&mut builder)?;
1862 start = self.offset();
1863 continue;
1864 }
1865 _ => {
1866 if parser.can_accept_terminal(TerminalId::Divide) {
1867 match self.peek() {
1868 Some('=') => {
1869 self.chars.next();
1870 return self.set_result(
1871 TerminalId::DivideAssign,
1872 SourceLocation::new(start, self.offset()),
1873 TokenValue::None,
1874 );
1875 }
1876 _ => return self.set_result(
1877 TerminalId::Divide,
1878 SourceLocation::new(start, self.offset()),
1879 TokenValue::None,
1880 ),
1881 }
1882 }
1883 return self.regular_expression_literal(&mut builder);
1884 }
1885 },
1886
1887 '}' => {
1888 if parser.can_accept_terminal(TerminalId::TemplateMiddle) {
1889 return self.template_part(start, TerminalId::TemplateMiddle, TerminalId::TemplateTail);
1890 }
1891 return self.set_result(
1892 TerminalId::CloseBrace,
1893 SourceLocation::new(start, self.offset()),
1894 TokenValue::None,
1895 );
1896 }
1897
1898 '<' => match self.peek() {
1899 Some('<') => {
1900 self.chars.next();
1901 match self.peek() {
1902 Some('=') => {
1903 self.chars.next();
1904 return self.set_result(
1905 TerminalId::LeftShiftAssign,
1906 SourceLocation::new(start, self.offset()),
1907 TokenValue::None,
1908 );
1909 }
1910 _ => return self.set_result(
1911 TerminalId::LeftShift,
1912 SourceLocation::new(start, self.offset()),
1913 TokenValue::None,
1914 ),
1915 }
1916 }
1917 Some('=') => {
1918 self.chars.next();
1919 return self.set_result(
1920 TerminalId::LessThanOrEqualTo,
1921 SourceLocation::new(start, self.offset()),
1922 TokenValue::None,
1923 );
1924 }
1925 Some('!') if self.is_looking_at("!--") => {
1926 // B.1.3 SingleLineHTMLOpenComment. Note that the above
1927 // `is_looking_at` test peeked ahead at the next three
1928 // characters of input. This lookahead is necessary
1929 // because `x<!--` has a comment but `x<!-y` does not.
1930 //
1931 // TODO: Limit this to Script (not Module).
1932 self.skip_single_line_comment(&mut builder);
1933 start = self.offset();
1934 continue;
1935 }
1936 _ => return self.set_result(
1937 TerminalId::LessThan,
1938 SourceLocation::new(start, self.offset()),
1939 TokenValue::None,
1940 ),
1941 },
1942
1943 '=' => match self.peek() {
1944 Some('=') => {
1945 self.chars.next();
1946 match self.peek() {
1947 Some('=') => {
1948 self.chars.next();
1949 return self.set_result(
1950 TerminalId::StrictEqual,
1951 SourceLocation::new(start, self.offset()),
1952 TokenValue::None,
1953 );
1954 }
1955 _ => return self.set_result(
1956 TerminalId::LaxEqual,
1957 SourceLocation::new(start, self.offset()),
1958 TokenValue::None,
1959 ),
1960 }
1961 }
1962 Some('>') => {
1963 self.chars.next();
1964 return self.set_result(
1965 TerminalId::Arrow,
1966 SourceLocation::new(start, self.offset()),
1967 TokenValue::None,
1968 );
1969 }
1970 _ => return self.set_result(
1971 TerminalId::EqualSign,
1972 SourceLocation::new(start, self.offset()),
1973 TokenValue::None,
1974 ),
1975 },
1976
1977 '>' => match self.peek() {
1978 Some('>') => {
1979 self.chars.next();
1980 match self.peek() {
1981 Some('>') => {
1982 self.chars.next();
1983 match self.peek() {
1984 Some('=') => {
1985 self.chars.next();
1986 return self.set_result(
1987 TerminalId::UnsignedRightShiftAssign,
1988 SourceLocation::new(start, self.offset()),
1989 TokenValue::None,
1990 );
1991 }
1992 _ => return self.set_result(
1993 TerminalId::UnsignedRightShift,
1994 SourceLocation::new(start, self.offset()),
1995 TokenValue::None,
1996 ),
1997 }
1998 }
1999 Some('=') => {
2000 self.chars.next();
2001 return self.set_result(
2002 TerminalId::SignedRightShiftAssign,
2003 SourceLocation::new(start, self.offset()),
2004 TokenValue::None,
2005 );
2006 }
2007 _ => return self.set_result(
2008 TerminalId::SignedRightShift,
2009 SourceLocation::new(start, self.offset()),
2010 TokenValue::None,
2011 ),
2012 }
2013 }
2014 Some('=') => {
2015 self.chars.next();
2016 return self.set_result(
2017 TerminalId::GreaterThanOrEqualTo,
2018 SourceLocation::new(start, self.offset()),
2019 TokenValue::None,
2020 );
2021 }
2022 _ => return self.set_result(
2023 TerminalId::GreaterThan,
2024 SourceLocation::new(start, self.offset()),
2025 TokenValue::None,
2026 ),
2027 },
2028
2029 '^' => match self.peek() {
2030 Some('=') => {
2031 self.chars.next();
2032 return self.set_result(
2033 TerminalId::BitwiseXorAssign,
2034 SourceLocation::new(start, self.offset()),
2035 TokenValue::None,
2036 );
2037 }
2038 _ => return self.set_result(
2039 TerminalId::BitwiseXor,
2040 SourceLocation::new(start, self.offset()),
2041 TokenValue::None,
2042 ),
2043 },
2044
2045 '|' => match self.peek() {
2046 Some('|') => {
2047 self.chars.next();
2048 match self.peek() {
2049 Some('=') => {
2050 self.chars.next();
2051 return self.set_result(
2052 TerminalId::LogicalOrAssign,
2053 SourceLocation::new(start, self.offset()),
2054 TokenValue::None,
2055 );
2056 }
2057 _ => return self.set_result(
2058 TerminalId::LogicalOr,
2059 SourceLocation::new(start, self.offset()),
2060 TokenValue::None,
2061 )
2062 }
2063 }
2064 Some('=') => {
2065 self.chars.next();
2066 return self.set_result(
2067 TerminalId::BitwiseOrAssign,
2068 SourceLocation::new(start, self.offset()),
2069 TokenValue::None,
2070 );
2071 }
2072 _ => return self.set_result(
2073 TerminalId::BitwiseOr,
2074 SourceLocation::new(start, self.offset()),
2075 TokenValue::None,
2076 ),
2077 },
2078
2079 '?' => match self.peek() {
2080 Some('?') => {
2081 self.chars.next();
2082 match self.peek() {
2083 Some('=') => {
2084 self.chars.next();
2085 return self.set_result(
2086 TerminalId::CoalesceAssign,
2087 SourceLocation::new(start, self.offset()),
2088 TokenValue::None,
2089 );
2090 }
2091 _ => return self.set_result(
2092 TerminalId::Coalesce,
2093 SourceLocation::new(start, self.offset()),
2094 TokenValue::None,
2095 )
2096 }
2097 }
2098 Some('.') => {
2099 if let Some('0'..='9') = self.double_peek() {
2100 return self.set_result(
2101 TerminalId::QuestionMark,
2102 SourceLocation::new(start, self.offset()),
2103 TokenValue::None,
2104 )
2105 }
2106 self.chars.next();
2107 return self.set_result(
2108 TerminalId::OptionalChain,
2109 SourceLocation::new(start, self.offset()),
2110 TokenValue::None,
2111 );
2112 }
2113 _ => return self.set_result(
2114 TerminalId::QuestionMark,
2115 SourceLocation::new(start, self.offset()),
2116 TokenValue::None,
2117 ),
2118 }
2119
2120 '(' => return self.set_result(
2121 TerminalId::OpenParenthesis,
2122 SourceLocation::new(start, self.offset()),
2123 TokenValue::None,
2124 ),
2125 ')' => return self.set_result(
2126 TerminalId::CloseParenthesis,
2127 SourceLocation::new(start, self.offset()),
2128 TokenValue::None,
2129 ),
2130 ',' => return self.set_result(
2131 TerminalId::Comma,
2132 SourceLocation::new(start, self.offset()),
2133 TokenValue::None,
2134 ),
2135 ':' => return self.set_result(
2136 TerminalId::Colon,
2137 SourceLocation::new(start, self.offset()),
2138 TokenValue::None,
2139 ),
2140 ';' => return self.set_result(
2141 TerminalId::Semicolon,
2142 SourceLocation::new(start, self.offset()),
2143 TokenValue::None,
2144 ),
2145 '[' => return self.set_result(
2146 TerminalId::OpenBracket,
2147 SourceLocation::new(start, self.offset()),
2148 TokenValue::None,
2149 ),
2150 ']' => return self.set_result(
2151 TerminalId::CloseBracket,
2152 SourceLocation::new(start, self.offset()),
2153 TokenValue::None,
2154 ),
2155 '{' => return self.set_result(
2156 TerminalId::OpenBrace,
2157 SourceLocation::new(start, self.offset()),
2158 TokenValue::None,
2159 ),
2160 '~' => return self.set_result(
2161 TerminalId::BitwiseNot,
2162 SourceLocation::new(start, self.offset()),
2163 TokenValue::None,
2164 ),
2165
2166 // Idents
2167 '$' | '_' | 'a'..='z' | 'A'..='Z' => {
2168 builder.push_matching(c);
2169 return self.identifier_tail(start, builder);
2170 }
2171
2172 '\\' => {
2173 builder.force_allocation_without_current_ascii_char(&self);
2174
2175 let value = self.unicode_escape_sequence_after_backslash()?;
2176 if !is_identifier_start(value) {
2177 return Err(ParseError::IllegalCharacter(value).into());
2178 }
2179 builder.push_different(value);
2180
2181 return self.identifier_tail(start, builder);
2182 }
2183
2184 '#' => {
2185 if start == 0 {
2186 // https://tc39.es/proposal-hashbang/out.html
2187 // HashbangComment ::
2188 // `#!` SingleLineCommentChars?
2189 if let Some('!') = self.peek() {
2190 self.skip_single_line_comment(&mut builder);
2191 start = self.offset();
2192 continue;
2193 }
2194 }
2195
2196 builder.push_matching(c);
2197 return self.private_identifier(start, builder);
2198 }
2199
2200 other if is_identifier_start(other) => {
2201 builder.push_matching(other);
2202 return self.identifier_tail(start, builder);
2203 }
2204
2205 other => {
2206 return Err(ParseError::IllegalCharacter(other).into());
2207 }
2208 }
2209 }
2210 self.set_result(
2211 TerminalId::End,
2212 SourceLocation::new(start, self.offset()),
2213 TokenValue::None,
2214 )
2215 }
2216
string_to_token_value(&mut self, s: &'alloc str) -> TokenValue2217 fn string_to_token_value(&mut self, s: &'alloc str) -> TokenValue {
2218 let index = self.atoms.borrow_mut().insert(s);
2219 TokenValue::Atom(index)
2220 }
2221
slice_to_token_value(&mut self, s: &'alloc str) -> TokenValue2222 fn slice_to_token_value(&mut self, s: &'alloc str) -> TokenValue {
2223 let index = self.slices.borrow_mut().push(s);
2224 TokenValue::Slice(index)
2225 }
2226
numeric_result_to_advance_result( &mut self, s: &'alloc str, start: usize, result: NumericResult, ) -> Result<'alloc, ()>2227 fn numeric_result_to_advance_result(
2228 &mut self,
2229 s: &'alloc str,
2230 start: usize,
2231 result: NumericResult,
2232 ) -> Result<'alloc, ()> {
2233 let (terminal_id, value) = match result {
2234 NumericResult::Int { base } => {
2235 let n = parse_int(s, base).map_err(|s| ParseError::NotImplemented(s))?;
2236 (TerminalId::NumericLiteral, TokenValue::Number(n))
2237 }
2238 NumericResult::Float => {
2239 let n = parse_float(s).map_err(|s| ParseError::NotImplemented(s))?;
2240 (TerminalId::NumericLiteral, TokenValue::Number(n))
2241 }
2242 NumericResult::BigInt { .. } => {
2243 // FIXME
2244 (TerminalId::BigIntLiteral, self.string_to_token_value(s))
2245 }
2246 };
2247
2248 self.set_result(
2249 terminal_id,
2250 SourceLocation::new(start, self.offset()),
2251 value,
2252 )
2253 }
2254 }
2255
2256 struct AutoCow<'alloc> {
2257 start: &'alloc str,
2258 value: Option<String<'alloc>>,
2259 }
2260
2261 impl<'alloc> AutoCow<'alloc> {
new(lexer: &Lexer<'alloc>) -> Self2262 fn new(lexer: &Lexer<'alloc>) -> Self {
2263 AutoCow {
2264 start: lexer.chars.as_str(),
2265 value: None,
2266 }
2267 }
2268
2269 // Push a char that matches lexer.chars.next()
push_matching(&mut self, c: char)2270 fn push_matching(&mut self, c: char) {
2271 if let Some(text) = &mut self.value {
2272 text.push(c);
2273 }
2274 }
2275
2276 // Push a different character than lexer.chars.next().
2277 // force_allocation_without_current_ascii_char must be called before this.
push_different(&mut self, c: char)2278 fn push_different(&mut self, c: char) {
2279 debug_assert!(self.value.is_some());
2280 self.value.as_mut().unwrap().push(c)
2281 }
2282
2283 // Force allocation of a String, excluding the current ASCII character,
2284 // and return the reference to it
get_mut_string_without_current_ascii_char<'b>( &'b mut self, lexer: &'_ Lexer<'alloc>, ) -> &'b mut String<'alloc>2285 fn get_mut_string_without_current_ascii_char<'b>(
2286 &'b mut self,
2287 lexer: &'_ Lexer<'alloc>,
2288 ) -> &'b mut String<'alloc> {
2289 self.force_allocation_without_current_ascii_char(lexer);
2290 self.value.as_mut().unwrap()
2291 }
2292
2293 // Force allocation of a String, excluding the current ASCII character.
force_allocation_without_current_ascii_char(&mut self, lexer: &'_ Lexer<'alloc>)2294 fn force_allocation_without_current_ascii_char(&mut self, lexer: &'_ Lexer<'alloc>) {
2295 if self.value.is_some() {
2296 return;
2297 }
2298
2299 self.value = Some(String::from_str_in(
2300 &self.start[..self.start.len() - lexer.chars.as_str().len() - 1],
2301 lexer.allocator,
2302 ));
2303 }
2304
2305 // Check if the string contains a different character, such as an escape
2306 // sequence
has_different(&self) -> bool2307 fn has_different(&self) -> bool {
2308 self.value.is_some()
2309 }
2310
finish(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str2311 fn finish(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str {
2312 match self.value.take() {
2313 Some(arena_string) => arena_string.into_bump_str(),
2314 None => &self.start[..self.start.len() - lexer.chars.as_str().len()],
2315 }
2316 }
2317
2318 // Just like finish, but without pushing current char.
finish_without_push(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str2319 fn finish_without_push(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str {
2320 match self.value.take() {
2321 Some(arena_string) => arena_string.into_bump_str(),
2322 None => &self.start[..self.start.len() - lexer.chars.as_str().len() - 1],
2323 }
2324 }
2325 }
2326