1 /*!
2 This module provides a regular expression parser.
3 */
4 
5 use std::borrow::Borrow;
6 use std::cell::{Cell, RefCell};
7 use std::mem;
8 use std::result;
9 
10 use crate::ast::{self, Ast, Position, Span};
11 use crate::either::Either;
12 
13 use crate::is_meta_character;
14 
15 type Result<T> = result::Result<T, ast::Error>;
16 
17 /// A primitive is an expression with no sub-expressions. This includes
18 /// literals, assertions and non-set character classes. This representation
19 /// is used as intermediate state in the parser.
20 ///
21 /// This does not include ASCII character classes, since they can only appear
22 /// within a set character class.
23 #[derive(Clone, Debug, Eq, PartialEq)]
24 enum Primitive {
25     Literal(ast::Literal),
26     Assertion(ast::Assertion),
27     Dot(Span),
28     Perl(ast::ClassPerl),
29     Unicode(ast::ClassUnicode),
30 }
31 
32 impl Primitive {
33     /// Return the span of this primitive.
span(&self) -> &Span34     fn span(&self) -> &Span {
35         match *self {
36             Primitive::Literal(ref x) => &x.span,
37             Primitive::Assertion(ref x) => &x.span,
38             Primitive::Dot(ref span) => span,
39             Primitive::Perl(ref x) => &x.span,
40             Primitive::Unicode(ref x) => &x.span,
41         }
42     }
43 
44     /// Convert this primitive into a proper AST.
into_ast(self) -> Ast45     fn into_ast(self) -> Ast {
46         match self {
47             Primitive::Literal(lit) => Ast::Literal(lit),
48             Primitive::Assertion(assert) => Ast::Assertion(assert),
49             Primitive::Dot(span) => Ast::Dot(span),
50             Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
51             Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
52         }
53     }
54 
55     /// Convert this primitive into an item in a character class.
56     ///
57     /// If this primitive is not a legal item (i.e., an assertion or a dot),
58     /// then return an error.
into_class_set_item<P: Borrow<Parser>>( self, p: &ParserI<'_, P>, ) -> Result<ast::ClassSetItem>59     fn into_class_set_item<P: Borrow<Parser>>(
60         self,
61         p: &ParserI<'_, P>,
62     ) -> Result<ast::ClassSetItem> {
63         use self::Primitive::*;
64         use crate::ast::ClassSetItem;
65 
66         match self {
67             Literal(lit) => Ok(ClassSetItem::Literal(lit)),
68             Perl(cls) => Ok(ClassSetItem::Perl(cls)),
69             Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
70             x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
71         }
72     }
73 
74     /// Convert this primitive into a literal in a character class. In
75     /// particular, literals are the only valid items that can appear in
76     /// ranges.
77     ///
78     /// If this primitive is not a legal item (i.e., a class, assertion or a
79     /// dot), then return an error.
into_class_literal<P: Borrow<Parser>>( self, p: &ParserI<'_, P>, ) -> Result<ast::Literal>80     fn into_class_literal<P: Borrow<Parser>>(
81         self,
82         p: &ParserI<'_, P>,
83     ) -> Result<ast::Literal> {
84         use self::Primitive::*;
85 
86         match self {
87             Literal(lit) => Ok(lit),
88             x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
89         }
90     }
91 }
92 
93 /// Returns true if the given character is a hexadecimal digit.
is_hex(c: char) -> bool94 fn is_hex(c: char) -> bool {
95     ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
96 }
97 
98 /// Returns true if the given character is a valid in a capture group name.
99 ///
100 /// If `first` is true, then `c` is treated as the first character in the
101 /// group name (which must be alphabetic or underscore).
is_capture_char(c: char, first: bool) -> bool102 fn is_capture_char(c: char, first: bool) -> bool {
103     c == '_'
104         || (!first
105             && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']'))
106         || ('A' <= c && c <= 'Z')
107         || ('a' <= c && c <= 'z')
108 }
109 
110 /// A builder for a regular expression parser.
111 ///
112 /// This builder permits modifying configuration options for the parser.
113 #[derive(Clone, Debug)]
114 pub struct ParserBuilder {
115     ignore_whitespace: bool,
116     nest_limit: u32,
117     octal: bool,
118 }
119 
120 impl Default for ParserBuilder {
default() -> ParserBuilder121     fn default() -> ParserBuilder {
122         ParserBuilder::new()
123     }
124 }
125 
126 impl ParserBuilder {
127     /// Create a new parser builder with a default configuration.
new() -> ParserBuilder128     pub fn new() -> ParserBuilder {
129         ParserBuilder {
130             ignore_whitespace: false,
131             nest_limit: 250,
132             octal: false,
133         }
134     }
135 
136     /// Build a parser from this configuration with the given pattern.
build(&self) -> Parser137     pub fn build(&self) -> Parser {
138         Parser {
139             pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
140             capture_index: Cell::new(0),
141             nest_limit: self.nest_limit,
142             octal: self.octal,
143             initial_ignore_whitespace: self.ignore_whitespace,
144             ignore_whitespace: Cell::new(self.ignore_whitespace),
145             comments: RefCell::new(vec![]),
146             stack_group: RefCell::new(vec![]),
147             stack_class: RefCell::new(vec![]),
148             capture_names: RefCell::new(vec![]),
149             scratch: RefCell::new(String::new()),
150         }
151     }
152 
153     /// Set the nesting limit for this parser.
154     ///
155     /// The nesting limit controls how deep the abstract syntax tree is allowed
156     /// to be. If the AST exceeds the given limit (e.g., with too many nested
157     /// groups), then an error is returned by the parser.
158     ///
159     /// The purpose of this limit is to act as a heuristic to prevent stack
160     /// overflow for consumers that do structural induction on an `Ast` using
161     /// explicit recursion. While this crate never does this (instead using
162     /// constant stack space and moving the call stack to the heap), other
163     /// crates may.
164     ///
165     /// This limit is not checked until the entire Ast is parsed. Therefore,
166     /// if callers want to put a limit on the amount of heap space used, then
167     /// they should impose a limit on the length, in bytes, of the concrete
168     /// pattern string. In particular, this is viable since this parser
169     /// implementation will limit itself to heap space proportional to the
170     /// lenth of the pattern string.
171     ///
172     /// Note that a nest limit of `0` will return a nest limit error for most
173     /// patterns but not all. For example, a nest limit of `0` permits `a` but
174     /// not `ab`, since `ab` requires a concatenation, which results in a nest
175     /// depth of `1`. In general, a nest limit is not something that manifests
176     /// in an obvious way in the concrete syntax, therefore, it should not be
177     /// used in a granular way.
nest_limit(&mut self, limit: u32) -> &mut ParserBuilder178     pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
179         self.nest_limit = limit;
180         self
181     }
182 
183     /// Whether to support octal syntax or not.
184     ///
185     /// Octal syntax is a little-known way of uttering Unicode codepoints in
186     /// a regular expression. For example, `a`, `\x61`, `\u0061` and
187     /// `\141` are all equivalent regular expressions, where the last example
188     /// shows octal syntax.
189     ///
190     /// While supporting octal syntax isn't in and of itself a problem, it does
191     /// make good error messages harder. That is, in PCRE based regex engines,
192     /// syntax like `\0` invokes a backreference, which is explicitly
193     /// unsupported in Rust's regex engine. However, many users expect it to
194     /// be supported. Therefore, when octal support is disabled, the error
195     /// message will explicitly mention that backreferences aren't supported.
196     ///
197     /// Octal syntax is disabled by default.
octal(&mut self, yes: bool) -> &mut ParserBuilder198     pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
199         self.octal = yes;
200         self
201     }
202 
203     /// Enable verbose mode in the regular expression.
204     ///
205     /// When enabled, verbose mode permits insigificant whitespace in many
206     /// places in the regular expression, as well as comments. Comments are
207     /// started using `#` and continue until the end of the line.
208     ///
209     /// By default, this is disabled. It may be selectively enabled in the
210     /// regular expression by using the `x` flag regardless of this setting.
ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder211     pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
212         self.ignore_whitespace = yes;
213         self
214     }
215 }
216 
217 /// A regular expression parser.
218 ///
219 /// This parses a string representation of a regular expression into an
220 /// abstract syntax tree. The size of the tree is proportional to the length
221 /// of the regular expression pattern.
222 ///
223 /// A `Parser` can be configured in more detail via a
224 /// [`ParserBuilder`](struct.ParserBuilder.html).
225 #[derive(Clone, Debug)]
226 pub struct Parser {
227     /// The current position of the parser.
228     pos: Cell<Position>,
229     /// The current capture index.
230     capture_index: Cell<u32>,
231     /// The maximum number of open parens/brackets allowed. If the parser
232     /// exceeds this number, then an error is returned.
233     nest_limit: u32,
234     /// Whether to support octal syntax or not. When `false`, the parser will
235     /// return an error helpfully pointing out that backreferences are not
236     /// supported.
237     octal: bool,
238     /// The initial setting for `ignore_whitespace` as provided by
239     /// Th`ParserBuilder`. is is used when reseting the parser's state.
240     initial_ignore_whitespace: bool,
241     /// Whether whitespace should be ignored. When enabled, comments are
242     /// also permitted.
243     ignore_whitespace: Cell<bool>,
244     /// A list of comments, in order of appearance.
245     comments: RefCell<Vec<ast::Comment>>,
246     /// A stack of grouped sub-expressions, including alternations.
247     stack_group: RefCell<Vec<GroupState>>,
248     /// A stack of nested character classes. This is only non-empty when
249     /// parsing a class.
250     stack_class: RefCell<Vec<ClassState>>,
251     /// A sorted sequence of capture names. This is used to detect duplicate
252     /// capture names and report an error if one is detected.
253     capture_names: RefCell<Vec<ast::CaptureName>>,
254     /// A scratch buffer used in various places. Mostly this is used to
255     /// accumulate relevant characters from parts of a pattern.
256     scratch: RefCell<String>,
257 }
258 
259 /// ParserI is the internal parser implementation.
260 ///
261 /// We use this separate type so that we can carry the provided pattern string
262 /// along with us. In particular, a `Parser` internal state is not tied to any
263 /// one pattern, but `ParserI` is.
264 ///
265 /// This type also lets us use `ParserI<&Parser>` in production code while
266 /// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
267 /// work against the internal interface of the parser.
268 #[derive(Clone, Debug)]
269 struct ParserI<'s, P> {
270     /// The parser state/configuration.
271     parser: P,
272     /// The full regular expression provided by the user.
273     pattern: &'s str,
274 }
275 
276 /// GroupState represents a single stack frame while parsing nested groups
277 /// and alternations. Each frame records the state up to an opening parenthesis
278 /// or a alternating bracket `|`.
279 #[derive(Clone, Debug)]
280 enum GroupState {
281     /// This state is pushed whenever an opening group is found.
282     Group {
283         /// The concatenation immediately preceding the opening group.
284         concat: ast::Concat,
285         /// The group that has been opened. Its sub-AST is always empty.
286         group: ast::Group,
287         /// Whether this group has the `x` flag enabled or not.
288         ignore_whitespace: bool,
289     },
290     /// This state is pushed whenever a new alternation branch is found. If
291     /// an alternation branch is found and this state is at the top of the
292     /// stack, then this state should be modified to include the new
293     /// alternation.
294     Alternation(ast::Alternation),
295 }
296 
297 /// ClassState represents a single stack frame while parsing character classes.
298 /// Each frame records the state up to an intersection, difference, symmetric
299 /// difference or nested class.
300 ///
301 /// Note that a parser's character class stack is only non-empty when parsing
302 /// a character class. In all other cases, it is empty.
303 #[derive(Clone, Debug)]
304 enum ClassState {
305     /// This state is pushed whenever an opening bracket is found.
306     Open {
307         /// The union of class items immediately preceding this class.
308         union: ast::ClassSetUnion,
309         /// The class that has been opened. Typically this just corresponds
310         /// to the `[`, but it can also include `[^` since `^` indicates
311         /// negation of the class.
312         set: ast::ClassBracketed,
313     },
314     /// This state is pushed when a operator is seen. When popped, the stored
315     /// set becomes the left hand side of the operator.
316     Op {
317         /// The type of the operation, i.e., &&, -- or ~~.
318         kind: ast::ClassSetBinaryOpKind,
319         /// The left-hand side of the operator.
320         lhs: ast::ClassSet,
321     },
322 }
323 
324 impl Parser {
325     /// Create a new parser with a default configuration.
326     ///
327     /// The parser can be run with either the `parse` or `parse_with_comments`
328     /// methods. The parse methods return an abstract syntax tree.
329     ///
330     /// To set configuration options on the parser, use
331     /// [`ParserBuilder`](struct.ParserBuilder.html).
new() -> Parser332     pub fn new() -> Parser {
333         ParserBuilder::new().build()
334     }
335 
336     /// Parse the regular expression into an abstract syntax tree.
parse(&mut self, pattern: &str) -> Result<Ast>337     pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
338         ParserI::new(self, pattern).parse()
339     }
340 
341     /// Parse the regular expression and return an abstract syntax tree with
342     /// all of the comments found in the pattern.
parse_with_comments( &mut self, pattern: &str, ) -> Result<ast::WithComments>343     pub fn parse_with_comments(
344         &mut self,
345         pattern: &str,
346     ) -> Result<ast::WithComments> {
347         ParserI::new(self, pattern).parse_with_comments()
348     }
349 
350     /// Reset the internal state of a parser.
351     ///
352     /// This is called at the beginning of every parse. This prevents the
353     /// parser from running with inconsistent state (say, if a previous
354     /// invocation returned an error and the parser is reused).
reset(&self)355     fn reset(&self) {
356         // These settings should be in line with the construction
357         // in `ParserBuilder::build`.
358         self.pos.set(Position { offset: 0, line: 1, column: 1 });
359         self.ignore_whitespace.set(self.initial_ignore_whitespace);
360         self.comments.borrow_mut().clear();
361         self.stack_group.borrow_mut().clear();
362         self.stack_class.borrow_mut().clear();
363     }
364 }
365 
366 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
367     /// Build an internal parser from a parser configuration and a pattern.
new(parser: P, pattern: &'s str) -> ParserI<'s, P>368     fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
369         ParserI { parser: parser, pattern: pattern }
370     }
371 
372     /// Return a reference to the parser state.
parser(&self) -> &Parser373     fn parser(&self) -> &Parser {
374         self.parser.borrow()
375     }
376 
377     /// Return a reference to the pattern being parsed.
pattern(&self) -> &str378     fn pattern(&self) -> &str {
379         self.pattern.borrow()
380     }
381 
382     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error383     fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
384         ast::Error {
385             kind: kind,
386             pattern: self.pattern().to_string(),
387             span: span,
388         }
389     }
390 
391     /// Return the current offset of the parser.
392     ///
393     /// The offset starts at `0` from the beginning of the regular expression
394     /// pattern string.
offset(&self) -> usize395     fn offset(&self) -> usize {
396         self.parser().pos.get().offset
397     }
398 
399     /// Return the current line number of the parser.
400     ///
401     /// The line number starts at `1`.
line(&self) -> usize402     fn line(&self) -> usize {
403         self.parser().pos.get().line
404     }
405 
406     /// Return the current column of the parser.
407     ///
408     /// The column number starts at `1` and is reset whenever a `\n` is seen.
column(&self) -> usize409     fn column(&self) -> usize {
410         self.parser().pos.get().column
411     }
412 
413     /// Return the next capturing index. Each subsequent call increments the
414     /// internal index.
415     ///
416     /// The span given should correspond to the location of the opening
417     /// parenthesis.
418     ///
419     /// If the capture limit is exceeded, then an error is returned.
next_capture_index(&self, span: Span) -> Result<u32>420     fn next_capture_index(&self, span: Span) -> Result<u32> {
421         let current = self.parser().capture_index.get();
422         let i = current.checked_add(1).ok_or_else(|| {
423             self.error(span, ast::ErrorKind::CaptureLimitExceeded)
424         })?;
425         self.parser().capture_index.set(i);
426         Ok(i)
427     }
428 
429     /// Adds the given capture name to this parser. If this capture name has
430     /// already been used, then an error is returned.
add_capture_name(&self, cap: &ast::CaptureName) -> Result<()>431     fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
432         let mut names = self.parser().capture_names.borrow_mut();
433         match names
434             .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
435         {
436             Err(i) => {
437                 names.insert(i, cap.clone());
438                 Ok(())
439             }
440             Ok(i) => Err(self.error(
441                 cap.span,
442                 ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
443             )),
444         }
445     }
446 
447     /// Return whether the parser should ignore whitespace or not.
ignore_whitespace(&self) -> bool448     fn ignore_whitespace(&self) -> bool {
449         self.parser().ignore_whitespace.get()
450     }
451 
452     /// Return the character at the current position of the parser.
453     ///
454     /// This panics if the current position does not point to a valid char.
char(&self) -> char455     fn char(&self) -> char {
456         self.char_at(self.offset())
457     }
458 
459     /// Return the character at the given position.
460     ///
461     /// This panics if the given position does not point to a valid char.
char_at(&self, i: usize) -> char462     fn char_at(&self, i: usize) -> char {
463         self.pattern()[i..]
464             .chars()
465             .next()
466             .unwrap_or_else(|| panic!("expected char at offset {}", i))
467     }
468 
469     /// Bump the parser to the next Unicode scalar value.
470     ///
471     /// If the end of the input has been reached, then `false` is returned.
bump(&self) -> bool472     fn bump(&self) -> bool {
473         if self.is_eof() {
474             return false;
475         }
476         let Position { mut offset, mut line, mut column } = self.pos();
477         if self.char() == '\n' {
478             line = line.checked_add(1).unwrap();
479             column = 1;
480         } else {
481             column = column.checked_add(1).unwrap();
482         }
483         offset += self.char().len_utf8();
484         self.parser().pos.set(Position {
485             offset: offset,
486             line: line,
487             column: column,
488         });
489         self.pattern()[self.offset()..].chars().next().is_some()
490     }
491 
492     /// If the substring starting at the current position of the parser has
493     /// the given prefix, then bump the parser to the character immediately
494     /// following the prefix and return true. Otherwise, don't bump the parser
495     /// and return false.
bump_if(&self, prefix: &str) -> bool496     fn bump_if(&self, prefix: &str) -> bool {
497         if self.pattern()[self.offset()..].starts_with(prefix) {
498             for _ in 0..prefix.chars().count() {
499                 self.bump();
500             }
501             true
502         } else {
503             false
504         }
505     }
506 
507     /// Returns true if and only if the parser is positioned at a look-around
508     /// prefix. The conditions under which this returns true must always
509     /// correspond to a regular expression that would otherwise be consider
510     /// invalid.
511     ///
512     /// This should only be called immediately after parsing the opening of
513     /// a group or a set of flags.
is_lookaround_prefix(&self) -> bool514     fn is_lookaround_prefix(&self) -> bool {
515         self.bump_if("?=")
516             || self.bump_if("?!")
517             || self.bump_if("?<=")
518             || self.bump_if("?<!")
519     }
520 
521     /// Bump the parser, and if the `x` flag is enabled, bump through any
522     /// subsequent spaces. Return true if and only if the parser is not at
523     /// EOF.
bump_and_bump_space(&self) -> bool524     fn bump_and_bump_space(&self) -> bool {
525         if !self.bump() {
526             return false;
527         }
528         self.bump_space();
529         !self.is_eof()
530     }
531 
532     /// If the `x` flag is enabled (i.e., whitespace insensitivity with
533     /// comments), then this will advance the parser through all whitespace
534     /// and comments to the next non-whitespace non-comment byte.
535     ///
536     /// If the `x` flag is disabled, then this is a no-op.
537     ///
538     /// This should be used selectively throughout the parser where
539     /// arbitrary whitespace is permitted when the `x` flag is enabled. For
540     /// example, `{   5  , 6}` is equivalent to `{5,6}`.
bump_space(&self)541     fn bump_space(&self) {
542         if !self.ignore_whitespace() {
543             return;
544         }
545         while !self.is_eof() {
546             if self.char().is_whitespace() {
547                 self.bump();
548             } else if self.char() == '#' {
549                 let start = self.pos();
550                 let mut comment_text = String::new();
551                 self.bump();
552                 while !self.is_eof() {
553                     let c = self.char();
554                     self.bump();
555                     if c == '\n' {
556                         break;
557                     }
558                     comment_text.push(c);
559                 }
560                 let comment = ast::Comment {
561                     span: Span::new(start, self.pos()),
562                     comment: comment_text,
563                 };
564                 self.parser().comments.borrow_mut().push(comment);
565             } else {
566                 break;
567             }
568         }
569     }
570 
571     /// Peek at the next character in the input without advancing the parser.
572     ///
573     /// If the input has been exhausted, then this returns `None`.
peek(&self) -> Option<char>574     fn peek(&self) -> Option<char> {
575         if self.is_eof() {
576             return None;
577         }
578         self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
579     }
580 
581     /// Like peek, but will ignore spaces when the parser is in whitespace
582     /// insensitive mode.
peek_space(&self) -> Option<char>583     fn peek_space(&self) -> Option<char> {
584         if !self.ignore_whitespace() {
585             return self.peek();
586         }
587         if self.is_eof() {
588             return None;
589         }
590         let mut start = self.offset() + self.char().len_utf8();
591         let mut in_comment = false;
592         for (i, c) in self.pattern()[start..].char_indices() {
593             if c.is_whitespace() {
594                 continue;
595             } else if !in_comment && c == '#' {
596                 in_comment = true;
597             } else if in_comment && c == '\n' {
598                 in_comment = false;
599             } else {
600                 start += i;
601                 break;
602             }
603         }
604         self.pattern()[start..].chars().next()
605     }
606 
607     /// Returns true if the next call to `bump` would return false.
is_eof(&self) -> bool608     fn is_eof(&self) -> bool {
609         self.offset() == self.pattern().len()
610     }
611 
612     /// Return the current position of the parser, which includes the offset,
613     /// line and column.
pos(&self) -> Position614     fn pos(&self) -> Position {
615         self.parser().pos.get()
616     }
617 
618     /// Create a span at the current position of the parser. Both the start
619     /// and end of the span are set.
span(&self) -> Span620     fn span(&self) -> Span {
621         Span::splat(self.pos())
622     }
623 
624     /// Create a span that covers the current character.
span_char(&self) -> Span625     fn span_char(&self) -> Span {
626         let mut next = Position {
627             offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
628             line: self.line(),
629             column: self.column().checked_add(1).unwrap(),
630         };
631         if self.char() == '\n' {
632             next.line += 1;
633             next.column = 1;
634         }
635         Span::new(self.pos(), next)
636     }
637 
638     /// Parse and push a single alternation on to the parser's internal stack.
639     /// If the top of the stack already has an alternation, then add to that
640     /// instead of pushing a new one.
641     ///
642     /// The concatenation given corresponds to a single alternation branch.
643     /// The concatenation returned starts the next branch and is empty.
644     ///
645     /// This assumes the parser is currently positioned at `|` and will advance
646     /// the parser to the character following `|`.
647     #[inline(never)]
push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat>648     fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
649         assert_eq!(self.char(), '|');
650         concat.span.end = self.pos();
651         self.push_or_add_alternation(concat);
652         self.bump();
653         Ok(ast::Concat { span: self.span(), asts: vec![] })
654     }
655 
656     /// Pushes or adds the given branch of an alternation to the parser's
657     /// internal stack of state.
push_or_add_alternation(&self, concat: ast::Concat)658     fn push_or_add_alternation(&self, concat: ast::Concat) {
659         use self::GroupState::*;
660 
661         let mut stack = self.parser().stack_group.borrow_mut();
662         if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
663             alts.asts.push(concat.into_ast());
664             return;
665         }
666         stack.push(Alternation(ast::Alternation {
667             span: Span::new(concat.span.start, self.pos()),
668             asts: vec![concat.into_ast()],
669         }));
670     }
671 
672     /// Parse and push a group AST (and its parent concatenation) on to the
673     /// parser's internal stack. Return a fresh concatenation corresponding
674     /// to the group's sub-AST.
675     ///
676     /// If a set of flags was found (with no group), then the concatenation
677     /// is returned with that set of flags added.
678     ///
679     /// This assumes that the parser is currently positioned on the opening
680     /// parenthesis. It advances the parser to the character at the start
681     /// of the sub-expression (or adjoining expression).
682     ///
683     /// If there was a problem parsing the start of the group, then an error
684     /// is returned.
685     #[inline(never)]
push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat>686     fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
687         assert_eq!(self.char(), '(');
688         match self.parse_group()? {
689             Either::Left(set) => {
690                 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
691                 if let Some(v) = ignore {
692                     self.parser().ignore_whitespace.set(v);
693                 }
694 
695                 concat.asts.push(Ast::Flags(set));
696                 Ok(concat)
697             }
698             Either::Right(group) => {
699                 let old_ignore_whitespace = self.ignore_whitespace();
700                 let new_ignore_whitespace = group
701                     .flags()
702                     .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
703                     .unwrap_or(old_ignore_whitespace);
704                 self.parser().stack_group.borrow_mut().push(
705                     GroupState::Group {
706                         concat: concat,
707                         group: group,
708                         ignore_whitespace: old_ignore_whitespace,
709                     },
710                 );
711                 self.parser().ignore_whitespace.set(new_ignore_whitespace);
712                 Ok(ast::Concat { span: self.span(), asts: vec![] })
713             }
714         }
715     }
716 
717     /// Pop a group AST from the parser's internal stack and set the group's
718     /// AST to the given concatenation. Return the concatenation containing
719     /// the group.
720     ///
721     /// This assumes that the parser is currently positioned on the closing
722     /// parenthesis and advances the parser to the character following the `)`.
723     ///
724     /// If no such group could be popped, then an unopened group error is
725     /// returned.
726     #[inline(never)]
pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat>727     fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
728         use self::GroupState::*;
729 
730         assert_eq!(self.char(), ')');
731         let mut stack = self.parser().stack_group.borrow_mut();
732         let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
733             .pop()
734         {
735             Some(Group { concat, group, ignore_whitespace }) => {
736                 (concat, group, ignore_whitespace, None)
737             }
738             Some(Alternation(alt)) => match stack.pop() {
739                 Some(Group { concat, group, ignore_whitespace }) => {
740                     (concat, group, ignore_whitespace, Some(alt))
741                 }
742                 None | Some(Alternation(_)) => {
743                     return Err(self.error(
744                         self.span_char(),
745                         ast::ErrorKind::GroupUnopened,
746                     ));
747                 }
748             },
749             None => {
750                 return Err(self
751                     .error(self.span_char(), ast::ErrorKind::GroupUnopened));
752             }
753         };
754         self.parser().ignore_whitespace.set(ignore_whitespace);
755         group_concat.span.end = self.pos();
756         self.bump();
757         group.span.end = self.pos();
758         match alt {
759             Some(mut alt) => {
760                 alt.span.end = group_concat.span.end;
761                 alt.asts.push(group_concat.into_ast());
762                 group.ast = Box::new(alt.into_ast());
763             }
764             None => {
765                 group.ast = Box::new(group_concat.into_ast());
766             }
767         }
768         prior_concat.asts.push(Ast::Group(group));
769         Ok(prior_concat)
770     }
771 
772     /// Pop the last state from the parser's internal stack, if it exists, and
773     /// add the given concatenation to it. There either must be no state or a
774     /// single alternation item on the stack. Any other scenario produces an
775     /// error.
776     ///
777     /// This assumes that the parser has advanced to the end.
778     #[inline(never)]
pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast>779     fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
780         concat.span.end = self.pos();
781         let mut stack = self.parser().stack_group.borrow_mut();
782         let ast = match stack.pop() {
783             None => Ok(concat.into_ast()),
784             Some(GroupState::Alternation(mut alt)) => {
785                 alt.span.end = self.pos();
786                 alt.asts.push(concat.into_ast());
787                 Ok(Ast::Alternation(alt))
788             }
789             Some(GroupState::Group { group, .. }) => {
790                 return Err(
791                     self.error(group.span, ast::ErrorKind::GroupUnclosed)
792                 );
793             }
794         };
795         // If we try to pop again, there should be nothing.
796         match stack.pop() {
797             None => ast,
798             Some(GroupState::Alternation(_)) => {
799                 // This unreachable is unfortunate. This case can't happen
800                 // because the only way we can be here is if there were two
801                 // `GroupState::Alternation`s adjacent in the parser's stack,
802                 // which we guarantee to never happen because we never push a
803                 // `GroupState::Alternation` if one is already at the top of
804                 // the stack.
805                 unreachable!()
806             }
807             Some(GroupState::Group { group, .. }) => {
808                 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
809             }
810         }
811     }
812 
813     /// Parse the opening of a character class and push the current class
814     /// parsing context onto the parser's stack. This assumes that the parser
815     /// is positioned at an opening `[`. The given union should correspond to
816     /// the union of set items built up before seeing the `[`.
817     ///
818     /// If there was a problem parsing the opening of the class, then an error
819     /// is returned. Otherwise, a new union of set items for the class is
820     /// returned (which may be populated with either a `]` or a `-`).
821     #[inline(never)]
push_class_open( &self, parent_union: ast::ClassSetUnion, ) -> Result<ast::ClassSetUnion>822     fn push_class_open(
823         &self,
824         parent_union: ast::ClassSetUnion,
825     ) -> Result<ast::ClassSetUnion> {
826         assert_eq!(self.char(), '[');
827 
828         let (nested_set, nested_union) = self.parse_set_class_open()?;
829         self.parser()
830             .stack_class
831             .borrow_mut()
832             .push(ClassState::Open { union: parent_union, set: nested_set });
833         Ok(nested_union)
834     }
835 
836     /// Parse the end of a character class set and pop the character class
837     /// parser stack. The union given corresponds to the last union built
838     /// before seeing the closing `]`. The union returned corresponds to the
839     /// parent character class set with the nested class added to it.
840     ///
841     /// This assumes that the parser is positioned at a `]` and will advance
842     /// the parser to the byte immediately following the `]`.
843     ///
844     /// If the stack is empty after popping, then this returns the final
845     /// "top-level" character class AST (where a "top-level" character class
846     /// is one that is not nested inside any other character class).
847     ///
848     /// If there is no corresponding opening bracket on the parser's stack,
849     /// then an error is returned.
850     #[inline(never)]
pop_class( &self, nested_union: ast::ClassSetUnion, ) -> Result<Either<ast::ClassSetUnion, ast::Class>>851     fn pop_class(
852         &self,
853         nested_union: ast::ClassSetUnion,
854     ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
855         assert_eq!(self.char(), ']');
856 
857         let item = ast::ClassSet::Item(nested_union.into_item());
858         let prevset = self.pop_class_op(item);
859         let mut stack = self.parser().stack_class.borrow_mut();
860         match stack.pop() {
861             None => {
862                 // We can never observe an empty stack:
863                 //
864                 // 1) We are guaranteed to start with a non-empty stack since
865                 //    the character class parser is only initiated when it sees
866                 //    a `[`.
867                 // 2) If we ever observe an empty stack while popping after
868                 //    seeing a `]`, then we signal the character class parser
869                 //    to terminate.
870                 panic!("unexpected empty character class stack")
871             }
872             Some(ClassState::Op { .. }) => {
873                 // This panic is unfortunate, but this case is impossible
874                 // since we already popped the Op state if one exists above.
875                 // Namely, every push to the class parser stack is guarded by
876                 // whether an existing Op is already on the top of the stack.
877                 // If it is, the existing Op is modified. That is, the stack
878                 // can never have consecutive Op states.
879                 panic!("unexpected ClassState::Op")
880             }
881             Some(ClassState::Open { mut union, mut set }) => {
882                 self.bump();
883                 set.span.end = self.pos();
884                 set.kind = prevset;
885                 if stack.is_empty() {
886                     Ok(Either::Right(ast::Class::Bracketed(set)))
887                 } else {
888                     union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
889                     Ok(Either::Left(union))
890                 }
891             }
892         }
893     }
894 
895     /// Return an "unclosed class" error whose span points to the most
896     /// recently opened class.
897     ///
898     /// This should only be called while parsing a character class.
899     #[inline(never)]
unclosed_class_error(&self) -> ast::Error900     fn unclosed_class_error(&self) -> ast::Error {
901         for state in self.parser().stack_class.borrow().iter().rev() {
902             match *state {
903                 ClassState::Open { ref set, .. } => {
904                     return self
905                         .error(set.span, ast::ErrorKind::ClassUnclosed);
906                 }
907                 _ => {}
908             }
909         }
910         // We are guaranteed to have a non-empty stack with at least
911         // one open bracket, so we should never get here.
912         panic!("no open character class found")
913     }
914 
915     /// Push the current set of class items on to the class parser's stack as
916     /// the left hand side of the given operator.
917     ///
918     /// A fresh set union is returned, which should be used to build the right
919     /// hand side of this operator.
920     #[inline(never)]
push_class_op( &self, next_kind: ast::ClassSetBinaryOpKind, next_union: ast::ClassSetUnion, ) -> ast::ClassSetUnion921     fn push_class_op(
922         &self,
923         next_kind: ast::ClassSetBinaryOpKind,
924         next_union: ast::ClassSetUnion,
925     ) -> ast::ClassSetUnion {
926         let item = ast::ClassSet::Item(next_union.into_item());
927         let new_lhs = self.pop_class_op(item);
928         self.parser()
929             .stack_class
930             .borrow_mut()
931             .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
932         ast::ClassSetUnion { span: self.span(), items: vec![] }
933     }
934 
935     /// Pop a character class set from the character class parser stack. If the
936     /// top of the stack is just an item (not an operation), then return the
937     /// given set unchanged. If the top of the stack is an operation, then the
938     /// given set will be used as the rhs of the operation on the top of the
939     /// stack. In that case, the binary operation is returned as a set.
940     #[inline(never)]
pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet941     fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
942         let mut stack = self.parser().stack_class.borrow_mut();
943         let (kind, lhs) = match stack.pop() {
944             Some(ClassState::Op { kind, lhs }) => (kind, lhs),
945             Some(state @ ClassState::Open { .. }) => {
946                 stack.push(state);
947                 return rhs;
948             }
949             None => unreachable!(),
950         };
951         let span = Span::new(lhs.span().start, rhs.span().end);
952         ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
953             span: span,
954             kind: kind,
955             lhs: Box::new(lhs),
956             rhs: Box::new(rhs),
957         })
958     }
959 }
960 
961 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
962     /// Parse the regular expression into an abstract syntax tree.
parse(&self) -> Result<Ast>963     fn parse(&self) -> Result<Ast> {
964         self.parse_with_comments().map(|astc| astc.ast)
965     }
966 
967     /// Parse the regular expression and return an abstract syntax tree with
968     /// all of the comments found in the pattern.
parse_with_comments(&self) -> Result<ast::WithComments>969     fn parse_with_comments(&self) -> Result<ast::WithComments> {
970         assert_eq!(self.offset(), 0, "parser can only be used once");
971         self.parser().reset();
972         let mut concat = ast::Concat { span: self.span(), asts: vec![] };
973         loop {
974             self.bump_space();
975             if self.is_eof() {
976                 break;
977             }
978             match self.char() {
979                 '(' => concat = self.push_group(concat)?,
980                 ')' => concat = self.pop_group(concat)?,
981                 '|' => concat = self.push_alternate(concat)?,
982                 '[' => {
983                     let class = self.parse_set_class()?;
984                     concat.asts.push(Ast::Class(class));
985                 }
986                 '?' => {
987                     concat = self.parse_uncounted_repetition(
988                         concat,
989                         ast::RepetitionKind::ZeroOrOne,
990                     )?;
991                 }
992                 '*' => {
993                     concat = self.parse_uncounted_repetition(
994                         concat,
995                         ast::RepetitionKind::ZeroOrMore,
996                     )?;
997                 }
998                 '+' => {
999                     concat = self.parse_uncounted_repetition(
1000                         concat,
1001                         ast::RepetitionKind::OneOrMore,
1002                     )?;
1003                 }
1004                 '{' => {
1005                     concat = self.parse_counted_repetition(concat)?;
1006                 }
1007                 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1008             }
1009         }
1010         let ast = self.pop_group_end(concat)?;
1011         NestLimiter::new(self).check(&ast)?;
1012         Ok(ast::WithComments {
1013             ast: ast,
1014             comments: mem::replace(
1015                 &mut *self.parser().comments.borrow_mut(),
1016                 vec![],
1017             ),
1018         })
1019     }
1020 
1021     /// Parses an uncounted repetition operation. An uncounted repetition
1022     /// operator includes ?, * and +, but does not include the {m,n} syntax.
1023     /// The given `kind` should correspond to the operator observed by the
1024     /// caller.
1025     ///
1026     /// This assumes that the paser is currently positioned at the repetition
1027     /// operator and advances the parser to the first character after the
1028     /// operator. (Note that the operator may include a single additional `?`,
1029     /// which makes the operator ungreedy.)
1030     ///
1031     /// The caller should include the concatenation that is being built. The
1032     /// concatenation returned includes the repetition operator applied to the
1033     /// last expression in the given concatenation.
1034     #[inline(never)]
parse_uncounted_repetition( &self, mut concat: ast::Concat, kind: ast::RepetitionKind, ) -> Result<ast::Concat>1035     fn parse_uncounted_repetition(
1036         &self,
1037         mut concat: ast::Concat,
1038         kind: ast::RepetitionKind,
1039     ) -> Result<ast::Concat> {
1040         assert!(
1041             self.char() == '?' || self.char() == '*' || self.char() == '+'
1042         );
1043         let op_start = self.pos();
1044         let ast = match concat.asts.pop() {
1045             Some(ast) => ast,
1046             None => {
1047                 return Err(
1048                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1049                 )
1050             }
1051         };
1052         match ast {
1053             Ast::Empty(_) | Ast::Flags(_) => {
1054                 return Err(
1055                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1056                 )
1057             }
1058             _ => {}
1059         }
1060         let mut greedy = true;
1061         if self.bump() && self.char() == '?' {
1062             greedy = false;
1063             self.bump();
1064         }
1065         concat.asts.push(Ast::Repetition(ast::Repetition {
1066             span: ast.span().with_end(self.pos()),
1067             op: ast::RepetitionOp {
1068                 span: Span::new(op_start, self.pos()),
1069                 kind: kind,
1070             },
1071             greedy: greedy,
1072             ast: Box::new(ast),
1073         }));
1074         Ok(concat)
1075     }
1076 
1077     /// Parses a counted repetition operation. A counted repetition operator
1078     /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1079     /// operators.
1080     ///
1081     /// This assumes that the paser is currently positioned at the opening `{`
1082     /// and advances the parser to the first character after the operator.
1083     /// (Note that the operator may include a single additional `?`, which
1084     /// makes the operator ungreedy.)
1085     ///
1086     /// The caller should include the concatenation that is being built. The
1087     /// concatenation returned includes the repetition operator applied to the
1088     /// last expression in the given concatenation.
1089     #[inline(never)]
parse_counted_repetition( &self, mut concat: ast::Concat, ) -> Result<ast::Concat>1090     fn parse_counted_repetition(
1091         &self,
1092         mut concat: ast::Concat,
1093     ) -> Result<ast::Concat> {
1094         assert!(self.char() == '{');
1095         let start = self.pos();
1096         let ast = match concat.asts.pop() {
1097             Some(ast) => ast,
1098             None => {
1099                 return Err(
1100                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1101                 )
1102             }
1103         };
1104         match ast {
1105             Ast::Empty(_) | Ast::Flags(_) => {
1106                 return Err(
1107                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1108                 )
1109             }
1110             _ => {}
1111         }
1112         if !self.bump_and_bump_space() {
1113             return Err(self.error(
1114                 Span::new(start, self.pos()),
1115                 ast::ErrorKind::RepetitionCountUnclosed,
1116             ));
1117         }
1118         let count_start = specialize_err(
1119             self.parse_decimal(),
1120             ast::ErrorKind::DecimalEmpty,
1121             ast::ErrorKind::RepetitionCountDecimalEmpty,
1122         )?;
1123         let mut range = ast::RepetitionRange::Exactly(count_start);
1124         if self.is_eof() {
1125             return Err(self.error(
1126                 Span::new(start, self.pos()),
1127                 ast::ErrorKind::RepetitionCountUnclosed,
1128             ));
1129         }
1130         if self.char() == ',' {
1131             if !self.bump_and_bump_space() {
1132                 return Err(self.error(
1133                     Span::new(start, self.pos()),
1134                     ast::ErrorKind::RepetitionCountUnclosed,
1135                 ));
1136             }
1137             if self.char() != '}' {
1138                 let count_end = specialize_err(
1139                     self.parse_decimal(),
1140                     ast::ErrorKind::DecimalEmpty,
1141                     ast::ErrorKind::RepetitionCountDecimalEmpty,
1142                 )?;
1143                 range = ast::RepetitionRange::Bounded(count_start, count_end);
1144             } else {
1145                 range = ast::RepetitionRange::AtLeast(count_start);
1146             }
1147         }
1148         if self.is_eof() || self.char() != '}' {
1149             return Err(self.error(
1150                 Span::new(start, self.pos()),
1151                 ast::ErrorKind::RepetitionCountUnclosed,
1152             ));
1153         }
1154 
1155         let mut greedy = true;
1156         if self.bump_and_bump_space() && self.char() == '?' {
1157             greedy = false;
1158             self.bump();
1159         }
1160 
1161         let op_span = Span::new(start, self.pos());
1162         if !range.is_valid() {
1163             return Err(
1164                 self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1165             );
1166         }
1167         concat.asts.push(Ast::Repetition(ast::Repetition {
1168             span: ast.span().with_end(self.pos()),
1169             op: ast::RepetitionOp {
1170                 span: op_span,
1171                 kind: ast::RepetitionKind::Range(range),
1172             },
1173             greedy: greedy,
1174             ast: Box::new(ast),
1175         }));
1176         Ok(concat)
1177     }
1178 
1179     /// Parse a group (which contains a sub-expression) or a set of flags.
1180     ///
1181     /// If a group was found, then it is returned with an empty AST. If a set
1182     /// of flags is found, then that set is returned.
1183     ///
1184     /// The parser should be positioned at the opening parenthesis.
1185     ///
1186     /// This advances the parser to the character before the start of the
1187     /// sub-expression (in the case of a group) or to the closing parenthesis
1188     /// immediately following the set of flags.
1189     ///
1190     /// # Errors
1191     ///
1192     /// If flags are given and incorrectly specified, then a corresponding
1193     /// error is returned.
1194     ///
1195     /// If a capture name is given and it is incorrectly specified, then a
1196     /// corresponding error is returned.
1197     #[inline(never)]
parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>>1198     fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1199         assert_eq!(self.char(), '(');
1200         let open_span = self.span_char();
1201         self.bump();
1202         self.bump_space();
1203         if self.is_lookaround_prefix() {
1204             return Err(self.error(
1205                 Span::new(open_span.start, self.span().end),
1206                 ast::ErrorKind::UnsupportedLookAround,
1207             ));
1208         }
1209         let inner_span = self.span();
1210         if self.bump_if("?P<") {
1211             let capture_index = self.next_capture_index(open_span)?;
1212             let cap = self.parse_capture_name(capture_index)?;
1213             Ok(Either::Right(ast::Group {
1214                 span: open_span,
1215                 kind: ast::GroupKind::CaptureName(cap),
1216                 ast: Box::new(Ast::Empty(self.span())),
1217             }))
1218         } else if self.bump_if("?") {
1219             if self.is_eof() {
1220                 return Err(
1221                     self.error(open_span, ast::ErrorKind::GroupUnclosed)
1222                 );
1223             }
1224             let flags = self.parse_flags()?;
1225             let char_end = self.char();
1226             self.bump();
1227             if char_end == ')' {
1228                 // We don't allow empty flags, e.g., `(?)`. We instead
1229                 // interpret it as a repetition operator missing its argument.
1230                 if flags.items.is_empty() {
1231                     return Err(self.error(
1232                         inner_span,
1233                         ast::ErrorKind::RepetitionMissing,
1234                     ));
1235                 }
1236                 Ok(Either::Left(ast::SetFlags {
1237                     span: Span { end: self.pos(), ..open_span },
1238                     flags: flags,
1239                 }))
1240             } else {
1241                 assert_eq!(char_end, ':');
1242                 Ok(Either::Right(ast::Group {
1243                     span: open_span,
1244                     kind: ast::GroupKind::NonCapturing(flags),
1245                     ast: Box::new(Ast::Empty(self.span())),
1246                 }))
1247             }
1248         } else {
1249             let capture_index = self.next_capture_index(open_span)?;
1250             Ok(Either::Right(ast::Group {
1251                 span: open_span,
1252                 kind: ast::GroupKind::CaptureIndex(capture_index),
1253                 ast: Box::new(Ast::Empty(self.span())),
1254             }))
1255         }
1256     }
1257 
1258     /// Parses a capture group name. Assumes that the parser is positioned at
1259     /// the first character in the name following the opening `<` (and may
1260     /// possibly be EOF). This advances the parser to the first character
1261     /// following the closing `>`.
1262     ///
1263     /// The caller must provide the capture index of the group for this name.
1264     #[inline(never)]
parse_capture_name( &self, capture_index: u32, ) -> Result<ast::CaptureName>1265     fn parse_capture_name(
1266         &self,
1267         capture_index: u32,
1268     ) -> Result<ast::CaptureName> {
1269         if self.is_eof() {
1270             return Err(self
1271                 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1272         }
1273         let start = self.pos();
1274         loop {
1275             if self.char() == '>' {
1276                 break;
1277             }
1278             if !is_capture_char(self.char(), self.pos() == start) {
1279                 return Err(self.error(
1280                     self.span_char(),
1281                     ast::ErrorKind::GroupNameInvalid,
1282                 ));
1283             }
1284             if !self.bump() {
1285                 break;
1286             }
1287         }
1288         let end = self.pos();
1289         if self.is_eof() {
1290             return Err(self
1291                 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1292         }
1293         assert_eq!(self.char(), '>');
1294         self.bump();
1295         let name = &self.pattern()[start.offset..end.offset];
1296         if name.is_empty() {
1297             return Err(self.error(
1298                 Span::new(start, start),
1299                 ast::ErrorKind::GroupNameEmpty,
1300             ));
1301         }
1302         let capname = ast::CaptureName {
1303             span: Span::new(start, end),
1304             name: name.to_string(),
1305             index: capture_index,
1306         };
1307         self.add_capture_name(&capname)?;
1308         Ok(capname)
1309     }
1310 
1311     /// Parse a sequence of flags starting at the current character.
1312     ///
1313     /// This advances the parser to the character immediately following the
1314     /// flags, which is guaranteed to be either `:` or `)`.
1315     ///
1316     /// # Errors
1317     ///
1318     /// If any flags are duplicated, then an error is returned.
1319     ///
1320     /// If the negation operator is used more than once, then an error is
1321     /// returned.
1322     ///
1323     /// If no flags could be found or if the negation operation is not followed
1324     /// by any flags, then an error is returned.
1325     #[inline(never)]
parse_flags(&self) -> Result<ast::Flags>1326     fn parse_flags(&self) -> Result<ast::Flags> {
1327         let mut flags = ast::Flags { span: self.span(), items: vec![] };
1328         let mut last_was_negation = None;
1329         while self.char() != ':' && self.char() != ')' {
1330             if self.char() == '-' {
1331                 last_was_negation = Some(self.span_char());
1332                 let item = ast::FlagsItem {
1333                     span: self.span_char(),
1334                     kind: ast::FlagsItemKind::Negation,
1335                 };
1336                 if let Some(i) = flags.add_item(item) {
1337                     return Err(self.error(
1338                         self.span_char(),
1339                         ast::ErrorKind::FlagRepeatedNegation {
1340                             original: flags.items[i].span,
1341                         },
1342                     ));
1343                 }
1344             } else {
1345                 last_was_negation = None;
1346                 let item = ast::FlagsItem {
1347                     span: self.span_char(),
1348                     kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1349                 };
1350                 if let Some(i) = flags.add_item(item) {
1351                     return Err(self.error(
1352                         self.span_char(),
1353                         ast::ErrorKind::FlagDuplicate {
1354                             original: flags.items[i].span,
1355                         },
1356                     ));
1357                 }
1358             }
1359             if !self.bump() {
1360                 return Err(
1361                     self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1362                 );
1363             }
1364         }
1365         if let Some(span) = last_was_negation {
1366             return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1367         }
1368         flags.span.end = self.pos();
1369         Ok(flags)
1370     }
1371 
1372     /// Parse the current character as a flag. Do not advance the parser.
1373     ///
1374     /// # Errors
1375     ///
1376     /// If the flag is not recognized, then an error is returned.
1377     #[inline(never)]
parse_flag(&self) -> Result<ast::Flag>1378     fn parse_flag(&self) -> Result<ast::Flag> {
1379         match self.char() {
1380             'i' => Ok(ast::Flag::CaseInsensitive),
1381             'm' => Ok(ast::Flag::MultiLine),
1382             's' => Ok(ast::Flag::DotMatchesNewLine),
1383             'U' => Ok(ast::Flag::SwapGreed),
1384             'u' => Ok(ast::Flag::Unicode),
1385             'x' => Ok(ast::Flag::IgnoreWhitespace),
1386             _ => {
1387                 Err(self
1388                     .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1389             }
1390         }
1391     }
1392 
1393     /// Parse a primitive AST. e.g., A literal, non-set character class or
1394     /// assertion.
1395     ///
1396     /// This assumes that the parser expects a primitive at the current
1397     /// location. i.e., All other non-primitive cases have been handled.
1398     /// For example, if the parser's position is at `|`, then `|` will be
1399     /// treated as a literal (e.g., inside a character class).
1400     ///
1401     /// This advances the parser to the first character immediately following
1402     /// the primitive.
parse_primitive(&self) -> Result<Primitive>1403     fn parse_primitive(&self) -> Result<Primitive> {
1404         match self.char() {
1405             '\\' => self.parse_escape(),
1406             '.' => {
1407                 let ast = Primitive::Dot(self.span_char());
1408                 self.bump();
1409                 Ok(ast)
1410             }
1411             '^' => {
1412                 let ast = Primitive::Assertion(ast::Assertion {
1413                     span: self.span_char(),
1414                     kind: ast::AssertionKind::StartLine,
1415                 });
1416                 self.bump();
1417                 Ok(ast)
1418             }
1419             '$' => {
1420                 let ast = Primitive::Assertion(ast::Assertion {
1421                     span: self.span_char(),
1422                     kind: ast::AssertionKind::EndLine,
1423                 });
1424                 self.bump();
1425                 Ok(ast)
1426             }
1427             c => {
1428                 let ast = Primitive::Literal(ast::Literal {
1429                     span: self.span_char(),
1430                     kind: ast::LiteralKind::Verbatim,
1431                     c: c,
1432                 });
1433                 self.bump();
1434                 Ok(ast)
1435             }
1436         }
1437     }
1438 
1439     /// Parse an escape sequence as a primitive AST.
1440     ///
1441     /// This assumes the parser is positioned at the start of the escape
1442     /// sequence, i.e., `\`. It advances the parser to the first position
1443     /// immediately following the escape sequence.
1444     #[inline(never)]
parse_escape(&self) -> Result<Primitive>1445     fn parse_escape(&self) -> Result<Primitive> {
1446         assert_eq!(self.char(), '\\');
1447         let start = self.pos();
1448         if !self.bump() {
1449             return Err(self.error(
1450                 Span::new(start, self.pos()),
1451                 ast::ErrorKind::EscapeUnexpectedEof,
1452             ));
1453         }
1454         let c = self.char();
1455         // Put some of the more complicated routines into helpers.
1456         match c {
1457             '0'..='7' => {
1458                 if !self.parser().octal {
1459                     return Err(self.error(
1460                         Span::new(start, self.span_char().end),
1461                         ast::ErrorKind::UnsupportedBackreference,
1462                     ));
1463                 }
1464                 let mut lit = self.parse_octal();
1465                 lit.span.start = start;
1466                 return Ok(Primitive::Literal(lit));
1467             }
1468             '8'..='9' if !self.parser().octal => {
1469                 return Err(self.error(
1470                     Span::new(start, self.span_char().end),
1471                     ast::ErrorKind::UnsupportedBackreference,
1472                 ));
1473             }
1474             'x' | 'u' | 'U' => {
1475                 let mut lit = self.parse_hex()?;
1476                 lit.span.start = start;
1477                 return Ok(Primitive::Literal(lit));
1478             }
1479             'p' | 'P' => {
1480                 let mut cls = self.parse_unicode_class()?;
1481                 cls.span.start = start;
1482                 return Ok(Primitive::Unicode(cls));
1483             }
1484             'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1485                 let mut cls = self.parse_perl_class();
1486                 cls.span.start = start;
1487                 return Ok(Primitive::Perl(cls));
1488             }
1489             _ => {}
1490         }
1491 
1492         // Handle all of the one letter sequences inline.
1493         self.bump();
1494         let span = Span::new(start, self.pos());
1495         if is_meta_character(c) {
1496             return Ok(Primitive::Literal(ast::Literal {
1497                 span: span,
1498                 kind: ast::LiteralKind::Punctuation,
1499                 c: c,
1500             }));
1501         }
1502         let special = |kind, c| {
1503             Ok(Primitive::Literal(ast::Literal {
1504                 span: span,
1505                 kind: ast::LiteralKind::Special(kind),
1506                 c: c,
1507             }))
1508         };
1509         match c {
1510             'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1511             'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1512             't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1513             'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1514             'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1515             'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1516             ' ' if self.ignore_whitespace() => {
1517                 special(ast::SpecialLiteralKind::Space, ' ')
1518             }
1519             'A' => Ok(Primitive::Assertion(ast::Assertion {
1520                 span: span,
1521                 kind: ast::AssertionKind::StartText,
1522             })),
1523             'z' => Ok(Primitive::Assertion(ast::Assertion {
1524                 span: span,
1525                 kind: ast::AssertionKind::EndText,
1526             })),
1527             'b' => Ok(Primitive::Assertion(ast::Assertion {
1528                 span: span,
1529                 kind: ast::AssertionKind::WordBoundary,
1530             })),
1531             'B' => Ok(Primitive::Assertion(ast::Assertion {
1532                 span: span,
1533                 kind: ast::AssertionKind::NotWordBoundary,
1534             })),
1535             _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1536         }
1537     }
1538 
1539     /// Parse an octal representation of a Unicode codepoint up to 3 digits
1540     /// long. This expects the parser to be positioned at the first octal
1541     /// digit and advances the parser to the first character immediately
1542     /// following the octal number. This also assumes that parsing octal
1543     /// escapes is enabled.
1544     ///
1545     /// Assuming the preconditions are met, this routine can never fail.
1546     #[inline(never)]
parse_octal(&self) -> ast::Literal1547     fn parse_octal(&self) -> ast::Literal {
1548         use std::char;
1549         use std::u32;
1550 
1551         assert!(self.parser().octal);
1552         assert!('0' <= self.char() && self.char() <= '7');
1553         let start = self.pos();
1554         // Parse up to two more digits.
1555         while self.bump()
1556             && '0' <= self.char()
1557             && self.char() <= '7'
1558             && self.pos().offset - start.offset <= 2
1559         {}
1560         let end = self.pos();
1561         let octal = &self.pattern()[start.offset..end.offset];
1562         // Parsing the octal should never fail since the above guarantees a
1563         // valid number.
1564         let codepoint =
1565             u32::from_str_radix(octal, 8).expect("valid octal number");
1566         // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1567         // invalid Unicode scalar values.
1568         let c = char::from_u32(codepoint).expect("Unicode scalar value");
1569         ast::Literal {
1570             span: Span::new(start, end),
1571             kind: ast::LiteralKind::Octal,
1572             c: c,
1573         }
1574     }
1575 
1576     /// Parse a hex representation of a Unicode codepoint. This handles both
1577     /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1578     /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1579     /// the first character immediately following the hexadecimal literal.
1580     #[inline(never)]
parse_hex(&self) -> Result<ast::Literal>1581     fn parse_hex(&self) -> Result<ast::Literal> {
1582         assert!(
1583             self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1584         );
1585 
1586         let hex_kind = match self.char() {
1587             'x' => ast::HexLiteralKind::X,
1588             'u' => ast::HexLiteralKind::UnicodeShort,
1589             _ => ast::HexLiteralKind::UnicodeLong,
1590         };
1591         if !self.bump_and_bump_space() {
1592             return Err(
1593                 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1594             );
1595         }
1596         if self.char() == '{' {
1597             self.parse_hex_brace(hex_kind)
1598         } else {
1599             self.parse_hex_digits(hex_kind)
1600         }
1601     }
1602 
1603     /// Parse an N-digit hex representation of a Unicode codepoint. This
1604     /// expects the parser to be positioned at the first digit and will advance
1605     /// the parser to the first character immediately following the escape
1606     /// sequence.
1607     ///
1608     /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1609     /// or 8 (for `\UNNNNNNNN`).
1610     #[inline(never)]
parse_hex_digits( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1611     fn parse_hex_digits(
1612         &self,
1613         kind: ast::HexLiteralKind,
1614     ) -> Result<ast::Literal> {
1615         use std::char;
1616         use std::u32;
1617 
1618         let mut scratch = self.parser().scratch.borrow_mut();
1619         scratch.clear();
1620 
1621         let start = self.pos();
1622         for i in 0..kind.digits() {
1623             if i > 0 && !self.bump_and_bump_space() {
1624                 return Err(self
1625                     .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1626             }
1627             if !is_hex(self.char()) {
1628                 return Err(self.error(
1629                     self.span_char(),
1630                     ast::ErrorKind::EscapeHexInvalidDigit,
1631                 ));
1632             }
1633             scratch.push(self.char());
1634         }
1635         // The final bump just moves the parser past the literal, which may
1636         // be EOF.
1637         self.bump_and_bump_space();
1638         let end = self.pos();
1639         let hex = scratch.as_str();
1640         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1641             None => Err(self.error(
1642                 Span::new(start, end),
1643                 ast::ErrorKind::EscapeHexInvalid,
1644             )),
1645             Some(c) => Ok(ast::Literal {
1646                 span: Span::new(start, end),
1647                 kind: ast::LiteralKind::HexFixed(kind),
1648                 c: c,
1649             }),
1650         }
1651     }
1652 
1653     /// Parse a hex representation of any Unicode scalar value. This expects
1654     /// the parser to be positioned at the opening brace `{` and will advance
1655     /// the parser to the first character following the closing brace `}`.
1656     #[inline(never)]
parse_hex_brace( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1657     fn parse_hex_brace(
1658         &self,
1659         kind: ast::HexLiteralKind,
1660     ) -> Result<ast::Literal> {
1661         use std::char;
1662         use std::u32;
1663 
1664         let mut scratch = self.parser().scratch.borrow_mut();
1665         scratch.clear();
1666 
1667         let brace_pos = self.pos();
1668         let start = self.span_char().end;
1669         while self.bump_and_bump_space() && self.char() != '}' {
1670             if !is_hex(self.char()) {
1671                 return Err(self.error(
1672                     self.span_char(),
1673                     ast::ErrorKind::EscapeHexInvalidDigit,
1674                 ));
1675             }
1676             scratch.push(self.char());
1677         }
1678         if self.is_eof() {
1679             return Err(self.error(
1680                 Span::new(brace_pos, self.pos()),
1681                 ast::ErrorKind::EscapeUnexpectedEof,
1682             ));
1683         }
1684         let end = self.pos();
1685         let hex = scratch.as_str();
1686         assert_eq!(self.char(), '}');
1687         self.bump_and_bump_space();
1688 
1689         if hex.is_empty() {
1690             return Err(self.error(
1691                 Span::new(brace_pos, self.pos()),
1692                 ast::ErrorKind::EscapeHexEmpty,
1693             ));
1694         }
1695         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1696             None => Err(self.error(
1697                 Span::new(start, end),
1698                 ast::ErrorKind::EscapeHexInvalid,
1699             )),
1700             Some(c) => Ok(ast::Literal {
1701                 span: Span::new(start, self.pos()),
1702                 kind: ast::LiteralKind::HexBrace(kind),
1703                 c: c,
1704             }),
1705         }
1706     }
1707 
1708     /// Parse a decimal number into a u32 while trimming leading and trailing
1709     /// whitespace.
1710     ///
1711     /// This expects the parser to be positioned at the first position where
1712     /// a decimal digit could occur. This will advance the parser to the byte
1713     /// immediately following the last contiguous decimal digit.
1714     ///
1715     /// If no decimal digit could be found or if there was a problem parsing
1716     /// the complete set of digits into a u32, then an error is returned.
parse_decimal(&self) -> Result<u32>1717     fn parse_decimal(&self) -> Result<u32> {
1718         let mut scratch = self.parser().scratch.borrow_mut();
1719         scratch.clear();
1720 
1721         while !self.is_eof() && self.char().is_whitespace() {
1722             self.bump();
1723         }
1724         let start = self.pos();
1725         while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1726             scratch.push(self.char());
1727             self.bump_and_bump_space();
1728         }
1729         let span = Span::new(start, self.pos());
1730         while !self.is_eof() && self.char().is_whitespace() {
1731             self.bump_and_bump_space();
1732         }
1733         let digits = scratch.as_str();
1734         if digits.is_empty() {
1735             return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1736         }
1737         match u32::from_str_radix(digits, 10).ok() {
1738             Some(n) => Ok(n),
1739             None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1740         }
1741     }
1742 
1743     /// Parse a standard character class consisting primarily of characters or
1744     /// character ranges, but can also contain nested character classes of
1745     /// any type (sans `.`).
1746     ///
1747     /// This assumes the parser is positioned at the opening `[`. If parsing
1748     /// is successful, then the parser is advanced to the position immediately
1749     /// following the closing `]`.
1750     #[inline(never)]
parse_set_class(&self) -> Result<ast::Class>1751     fn parse_set_class(&self) -> Result<ast::Class> {
1752         assert_eq!(self.char(), '[');
1753 
1754         let mut union =
1755             ast::ClassSetUnion { span: self.span(), items: vec![] };
1756         loop {
1757             self.bump_space();
1758             if self.is_eof() {
1759                 return Err(self.unclosed_class_error());
1760             }
1761             match self.char() {
1762                 '[' => {
1763                     // If we've already parsed the opening bracket, then
1764                     // attempt to treat this as the beginning of an ASCII
1765                     // class. If ASCII class parsing fails, then the parser
1766                     // backs up to `[`.
1767                     if !self.parser().stack_class.borrow().is_empty() {
1768                         if let Some(cls) = self.maybe_parse_ascii_class() {
1769                             union.push(ast::ClassSetItem::Ascii(cls));
1770                             continue;
1771                         }
1772                     }
1773                     union = self.push_class_open(union)?;
1774                 }
1775                 ']' => match self.pop_class(union)? {
1776                     Either::Left(nested_union) => {
1777                         union = nested_union;
1778                     }
1779                     Either::Right(class) => return Ok(class),
1780                 },
1781                 '&' if self.peek() == Some('&') => {
1782                     assert!(self.bump_if("&&"));
1783                     union = self.push_class_op(
1784                         ast::ClassSetBinaryOpKind::Intersection,
1785                         union,
1786                     );
1787                 }
1788                 '-' if self.peek() == Some('-') => {
1789                     assert!(self.bump_if("--"));
1790                     union = self.push_class_op(
1791                         ast::ClassSetBinaryOpKind::Difference,
1792                         union,
1793                     );
1794                 }
1795                 '~' if self.peek() == Some('~') => {
1796                     assert!(self.bump_if("~~"));
1797                     union = self.push_class_op(
1798                         ast::ClassSetBinaryOpKind::SymmetricDifference,
1799                         union,
1800                     );
1801                 }
1802                 _ => {
1803                     union.push(self.parse_set_class_range()?);
1804                 }
1805             }
1806         }
1807     }
1808 
1809     /// Parse a single primitive item in a character class set. The item to
1810     /// be parsed can either be one of a simple literal character, a range
1811     /// between two simple literal characters or a "primitive" character
1812     /// class like \w or \p{Greek}.
1813     ///
1814     /// If an invalid escape is found, or if a character class is found where
1815     /// a simple literal is expected (e.g., in a range), then an error is
1816     /// returned.
1817     #[inline(never)]
parse_set_class_range(&self) -> Result<ast::ClassSetItem>1818     fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1819         let prim1 = self.parse_set_class_item()?;
1820         self.bump_space();
1821         if self.is_eof() {
1822             return Err(self.unclosed_class_error());
1823         }
1824         // If the next char isn't a `-`, then we don't have a range.
1825         // There are two exceptions. If the char after a `-` is a `]`, then
1826         // `-` is interpreted as a literal `-`. Alternatively, if the char
1827         // after a `-` is a `-`, then `--` corresponds to a "difference"
1828         // operation.
1829         if self.char() != '-'
1830             || self.peek_space() == Some(']')
1831             || self.peek_space() == Some('-')
1832         {
1833             return prim1.into_class_set_item(self);
1834         }
1835         // OK, now we're parsing a range, so bump past the `-` and parse the
1836         // second half of the range.
1837         if !self.bump_and_bump_space() {
1838             return Err(self.unclosed_class_error());
1839         }
1840         let prim2 = self.parse_set_class_item()?;
1841         let range = ast::ClassSetRange {
1842             span: Span::new(prim1.span().start, prim2.span().end),
1843             start: prim1.into_class_literal(self)?,
1844             end: prim2.into_class_literal(self)?,
1845         };
1846         if !range.is_valid() {
1847             return Err(
1848                 self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1849             );
1850         }
1851         Ok(ast::ClassSetItem::Range(range))
1852     }
1853 
1854     /// Parse a single item in a character class as a primitive, where the
1855     /// primitive either consists of a verbatim literal or a single escape
1856     /// sequence.
1857     ///
1858     /// This assumes the parser is positioned at the beginning of a primitive,
1859     /// and advances the parser to the first position after the primitive if
1860     /// successful.
1861     ///
1862     /// Note that it is the caller's responsibility to report an error if an
1863     /// illegal primitive was parsed.
1864     #[inline(never)]
parse_set_class_item(&self) -> Result<Primitive>1865     fn parse_set_class_item(&self) -> Result<Primitive> {
1866         if self.char() == '\\' {
1867             self.parse_escape()
1868         } else {
1869             let x = Primitive::Literal(ast::Literal {
1870                 span: self.span_char(),
1871                 kind: ast::LiteralKind::Verbatim,
1872                 c: self.char(),
1873             });
1874             self.bump();
1875             Ok(x)
1876         }
1877     }
1878 
1879     /// Parses the opening of a character class set. This includes the opening
1880     /// bracket along with `^` if present to indicate negation. This also
1881     /// starts parsing the opening set of unioned items if applicable, since
1882     /// there are special rules applied to certain characters in the opening
1883     /// of a character class. For example, `[^]]` is the class of all
1884     /// characters not equal to `]`. (`]` would need to be escaped in any other
1885     /// position.) Similarly for `-`.
1886     ///
1887     /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1888     /// empty union. This empty union should be replaced with the actual item
1889     /// when it is popped from the parser's stack.
1890     ///
1891     /// This assumes the parser is positioned at the opening `[` and advances
1892     /// the parser to the first non-special byte of the character class.
1893     ///
1894     /// An error is returned if EOF is found.
1895     #[inline(never)]
parse_set_class_open( &self, ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)>1896     fn parse_set_class_open(
1897         &self,
1898     ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1899         assert_eq!(self.char(), '[');
1900         let start = self.pos();
1901         if !self.bump_and_bump_space() {
1902             return Err(self.error(
1903                 Span::new(start, self.pos()),
1904                 ast::ErrorKind::ClassUnclosed,
1905             ));
1906         }
1907 
1908         let negated = if self.char() != '^' {
1909             false
1910         } else {
1911             if !self.bump_and_bump_space() {
1912                 return Err(self.error(
1913                     Span::new(start, self.pos()),
1914                     ast::ErrorKind::ClassUnclosed,
1915                 ));
1916             }
1917             true
1918         };
1919         // Accept any number of `-` as literal `-`.
1920         let mut union =
1921             ast::ClassSetUnion { span: self.span(), items: vec![] };
1922         while self.char() == '-' {
1923             union.push(ast::ClassSetItem::Literal(ast::Literal {
1924                 span: self.span_char(),
1925                 kind: ast::LiteralKind::Verbatim,
1926                 c: '-',
1927             }));
1928             if !self.bump_and_bump_space() {
1929                 return Err(self.error(
1930                     Span::new(start, self.pos()),
1931                     ast::ErrorKind::ClassUnclosed,
1932                 ));
1933             }
1934         }
1935         // If `]` is the *first* char in a set, then interpret it as a literal
1936         // `]`. That is, an empty class is impossible to write.
1937         if union.items.is_empty() && self.char() == ']' {
1938             union.push(ast::ClassSetItem::Literal(ast::Literal {
1939                 span: self.span_char(),
1940                 kind: ast::LiteralKind::Verbatim,
1941                 c: ']',
1942             }));
1943             if !self.bump_and_bump_space() {
1944                 return Err(self.error(
1945                     Span::new(start, self.pos()),
1946                     ast::ErrorKind::ClassUnclosed,
1947                 ));
1948             }
1949         }
1950         let set = ast::ClassBracketed {
1951             span: Span::new(start, self.pos()),
1952             negated: negated,
1953             kind: ast::ClassSet::union(ast::ClassSetUnion {
1954                 span: Span::new(union.span.start, union.span.start),
1955                 items: vec![],
1956             }),
1957         };
1958         Ok((set, union))
1959     }
1960 
1961     /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1962     ///
1963     /// This assumes the parser is positioned at the opening `[`.
1964     ///
1965     /// If no valid ASCII character class could be found, then this does not
1966     /// advance the parser and `None` is returned. Otherwise, the parser is
1967     /// advanced to the first byte following the closing `]` and the
1968     /// corresponding ASCII class is returned.
1969     #[inline(never)]
maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii>1970     fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1971         // ASCII character classes are interesting from a parsing perspective
1972         // because parsing cannot fail with any interesting error. For example,
1973         // in order to use an ASCII character class, it must be enclosed in
1974         // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1975         // of it as "ASCII character characters have the syntax `[:NAME:]`
1976         // which can only appear within character brackets." This means that
1977         // things like `[[:lower:]A]` are legal constructs.
1978         //
1979         // However, if one types an incorrect ASCII character class, e.g.,
1980         // `[[:loower:]]`, then we treat that as a normal nested character
1981         // class containing the characters `:elorw`. One might argue that we
1982         // should return an error instead since the repeated colons give away
1983         // the intent to write an ASCII class. But what if the user typed
1984         // `[[:lower]]` instead? How can we tell that was intended to be an
1985         // ASCII class and not just a normal nested class?
1986         //
1987         // Reasonable people can probably disagree over this, but for better
1988         // or worse, we implement semantics that never fails at the expense
1989         // of better failure modes.
1990         assert_eq!(self.char(), '[');
1991         // If parsing fails, then we back up the parser to this starting point.
1992         let start = self.pos();
1993         let mut negated = false;
1994         if !self.bump() || self.char() != ':' {
1995             self.parser().pos.set(start);
1996             return None;
1997         }
1998         if !self.bump() {
1999             self.parser().pos.set(start);
2000             return None;
2001         }
2002         if self.char() == '^' {
2003             negated = true;
2004             if !self.bump() {
2005                 self.parser().pos.set(start);
2006                 return None;
2007             }
2008         }
2009         let name_start = self.offset();
2010         while self.char() != ':' && self.bump() {}
2011         if self.is_eof() {
2012             self.parser().pos.set(start);
2013             return None;
2014         }
2015         let name = &self.pattern()[name_start..self.offset()];
2016         if !self.bump_if(":]") {
2017             self.parser().pos.set(start);
2018             return None;
2019         }
2020         let kind = match ast::ClassAsciiKind::from_name(name) {
2021             Some(kind) => kind,
2022             None => {
2023                 self.parser().pos.set(start);
2024                 return None;
2025             }
2026         };
2027         Some(ast::ClassAscii {
2028             span: Span::new(start, self.pos()),
2029             kind: kind,
2030             negated: negated,
2031         })
2032     }
2033 
2034     /// Parse a Unicode class in either the single character notation, `\pN`
2035     /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2036     /// the parser is positioned at the `p` (or `P` for negation) and will
2037     /// advance the parser to the character immediately following the class.
2038     ///
2039     /// Note that this does not check whether the class name is valid or not.
2040     #[inline(never)]
parse_unicode_class(&self) -> Result<ast::ClassUnicode>2041     fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2042         assert!(self.char() == 'p' || self.char() == 'P');
2043 
2044         let mut scratch = self.parser().scratch.borrow_mut();
2045         scratch.clear();
2046 
2047         let negated = self.char() == 'P';
2048         if !self.bump_and_bump_space() {
2049             return Err(
2050                 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2051             );
2052         }
2053         let (start, kind) = if self.char() == '{' {
2054             let start = self.span_char().end;
2055             while self.bump_and_bump_space() && self.char() != '}' {
2056                 scratch.push(self.char());
2057             }
2058             if self.is_eof() {
2059                 return Err(self
2060                     .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2061             }
2062             assert_eq!(self.char(), '}');
2063             self.bump();
2064 
2065             let name = scratch.as_str();
2066             if let Some(i) = name.find("!=") {
2067                 (
2068                     start,
2069                     ast::ClassUnicodeKind::NamedValue {
2070                         op: ast::ClassUnicodeOpKind::NotEqual,
2071                         name: name[..i].to_string(),
2072                         value: name[i + 2..].to_string(),
2073                     },
2074                 )
2075             } else if let Some(i) = name.find(':') {
2076                 (
2077                     start,
2078                     ast::ClassUnicodeKind::NamedValue {
2079                         op: ast::ClassUnicodeOpKind::Colon,
2080                         name: name[..i].to_string(),
2081                         value: name[i + 1..].to_string(),
2082                     },
2083                 )
2084             } else if let Some(i) = name.find('=') {
2085                 (
2086                     start,
2087                     ast::ClassUnicodeKind::NamedValue {
2088                         op: ast::ClassUnicodeOpKind::Equal,
2089                         name: name[..i].to_string(),
2090                         value: name[i + 1..].to_string(),
2091                     },
2092                 )
2093             } else {
2094                 (start, ast::ClassUnicodeKind::Named(name.to_string()))
2095             }
2096         } else {
2097             let start = self.pos();
2098             let c = self.char();
2099             if c == '\\' {
2100                 return Err(self.error(
2101                     self.span_char(),
2102                     ast::ErrorKind::UnicodeClassInvalid,
2103                 ));
2104             }
2105             self.bump_and_bump_space();
2106             let kind = ast::ClassUnicodeKind::OneLetter(c);
2107             (start, kind)
2108         };
2109         Ok(ast::ClassUnicode {
2110             span: Span::new(start, self.pos()),
2111             negated: negated,
2112             kind: kind,
2113         })
2114     }
2115 
2116     /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2117     /// parser is currently at a valid character class name and will be
2118     /// advanced to the character immediately following the class.
2119     #[inline(never)]
parse_perl_class(&self) -> ast::ClassPerl2120     fn parse_perl_class(&self) -> ast::ClassPerl {
2121         let c = self.char();
2122         let span = self.span_char();
2123         self.bump();
2124         let (negated, kind) = match c {
2125             'd' => (false, ast::ClassPerlKind::Digit),
2126             'D' => (true, ast::ClassPerlKind::Digit),
2127             's' => (false, ast::ClassPerlKind::Space),
2128             'S' => (true, ast::ClassPerlKind::Space),
2129             'w' => (false, ast::ClassPerlKind::Word),
2130             'W' => (true, ast::ClassPerlKind::Word),
2131             c => panic!("expected valid Perl class but got '{}'", c),
2132         };
2133         ast::ClassPerl { span: span, kind: kind, negated: negated }
2134     }
2135 }
2136 
2137 /// A type that traverses a fully parsed Ast and checks whether its depth
2138 /// exceeds the specified nesting limit. If it does, then an error is returned.
2139 #[derive(Debug)]
2140 struct NestLimiter<'p, 's, P> {
2141     /// The parser that is checking the nest limit.
2142     p: &'p ParserI<'s, P>,
2143     /// The current depth while walking an Ast.
2144     depth: u32,
2145 }
2146 
2147 impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P>2148     fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2149         NestLimiter { p: p, depth: 0 }
2150     }
2151 
2152     #[inline(never)]
check(self, ast: &Ast) -> Result<()>2153     fn check(self, ast: &Ast) -> Result<()> {
2154         ast::visit(ast, self)
2155     }
2156 
increment_depth(&mut self, span: &Span) -> Result<()>2157     fn increment_depth(&mut self, span: &Span) -> Result<()> {
2158         let new = self.depth.checked_add(1).ok_or_else(|| {
2159             self.p.error(
2160                 span.clone(),
2161                 ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2162             )
2163         })?;
2164         let limit = self.p.parser().nest_limit;
2165         if new > limit {
2166             return Err(self.p.error(
2167                 span.clone(),
2168                 ast::ErrorKind::NestLimitExceeded(limit),
2169             ));
2170         }
2171         self.depth = new;
2172         Ok(())
2173     }
2174 
decrement_depth(&mut self)2175     fn decrement_depth(&mut self) {
2176         // Assuming the correctness of the visitor, this should never drop
2177         // below 0.
2178         self.depth = self.depth.checked_sub(1).unwrap();
2179     }
2180 }
2181 
2182 impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2183     type Output = ();
2184     type Err = ast::Error;
2185 
finish(self) -> Result<()>2186     fn finish(self) -> Result<()> {
2187         Ok(())
2188     }
2189 
visit_pre(&mut self, ast: &Ast) -> Result<()>2190     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2191         let span = match *ast {
2192             Ast::Empty(_)
2193             | Ast::Flags(_)
2194             | Ast::Literal(_)
2195             | Ast::Dot(_)
2196             | Ast::Assertion(_)
2197             | Ast::Class(ast::Class::Unicode(_))
2198             | Ast::Class(ast::Class::Perl(_)) => {
2199                 // These are all base cases, so we don't increment depth.
2200                 return Ok(());
2201             }
2202             Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2203             Ast::Repetition(ref x) => &x.span,
2204             Ast::Group(ref x) => &x.span,
2205             Ast::Alternation(ref x) => &x.span,
2206             Ast::Concat(ref x) => &x.span,
2207         };
2208         self.increment_depth(span)
2209     }
2210 
visit_post(&mut self, ast: &Ast) -> Result<()>2211     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2212         match *ast {
2213             Ast::Empty(_)
2214             | Ast::Flags(_)
2215             | Ast::Literal(_)
2216             | Ast::Dot(_)
2217             | Ast::Assertion(_)
2218             | Ast::Class(ast::Class::Unicode(_))
2219             | Ast::Class(ast::Class::Perl(_)) => {
2220                 // These are all base cases, so we don't decrement depth.
2221                 Ok(())
2222             }
2223             Ast::Class(ast::Class::Bracketed(_))
2224             | Ast::Repetition(_)
2225             | Ast::Group(_)
2226             | Ast::Alternation(_)
2227             | Ast::Concat(_) => {
2228                 self.decrement_depth();
2229                 Ok(())
2230             }
2231         }
2232     }
2233 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2234     fn visit_class_set_item_pre(
2235         &mut self,
2236         ast: &ast::ClassSetItem,
2237     ) -> Result<()> {
2238         let span = match *ast {
2239             ast::ClassSetItem::Empty(_)
2240             | ast::ClassSetItem::Literal(_)
2241             | ast::ClassSetItem::Range(_)
2242             | ast::ClassSetItem::Ascii(_)
2243             | ast::ClassSetItem::Unicode(_)
2244             | ast::ClassSetItem::Perl(_) => {
2245                 // These are all base cases, so we don't increment depth.
2246                 return Ok(());
2247             }
2248             ast::ClassSetItem::Bracketed(ref x) => &x.span,
2249             ast::ClassSetItem::Union(ref x) => &x.span,
2250         };
2251         self.increment_depth(span)
2252     }
2253 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2254     fn visit_class_set_item_post(
2255         &mut self,
2256         ast: &ast::ClassSetItem,
2257     ) -> Result<()> {
2258         match *ast {
2259             ast::ClassSetItem::Empty(_)
2260             | ast::ClassSetItem::Literal(_)
2261             | ast::ClassSetItem::Range(_)
2262             | ast::ClassSetItem::Ascii(_)
2263             | ast::ClassSetItem::Unicode(_)
2264             | ast::ClassSetItem::Perl(_) => {
2265                 // These are all base cases, so we don't decrement depth.
2266                 Ok(())
2267             }
2268             ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2269                 self.decrement_depth();
2270                 Ok(())
2271             }
2272         }
2273     }
2274 
visit_class_set_binary_op_pre( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<()>2275     fn visit_class_set_binary_op_pre(
2276         &mut self,
2277         ast: &ast::ClassSetBinaryOp,
2278     ) -> Result<()> {
2279         self.increment_depth(&ast.span)
2280     }
2281 
visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<()>2282     fn visit_class_set_binary_op_post(
2283         &mut self,
2284         _ast: &ast::ClassSetBinaryOp,
2285     ) -> Result<()> {
2286         self.decrement_depth();
2287         Ok(())
2288     }
2289 }
2290 
2291 /// When the result is an error, transforms the ast::ErrorKind from the source
2292 /// Result into another one. This function is used to return clearer error
2293 /// messages when possible.
specialize_err<T>( result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind, ) -> Result<T>2294 fn specialize_err<T>(
2295     result: Result<T>,
2296     from: ast::ErrorKind,
2297     to: ast::ErrorKind,
2298 ) -> Result<T> {
2299     if let Err(e) = result {
2300         if e.kind == from {
2301             Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2302         } else {
2303             Err(e)
2304         }
2305     } else {
2306         result
2307     }
2308 }
2309 
2310 #[cfg(test)]
2311 mod tests {
2312     use std::ops::Range;
2313 
2314     use super::{Parser, ParserBuilder, ParserI, Primitive};
2315     use crate::ast::{self, Ast, Position, Span};
2316 
2317     // Our own assert_eq, which has slightly better formatting (but honestly
2318     // still kind of crappy).
2319     macro_rules! assert_eq {
2320         ($left:expr, $right:expr) => {{
2321             match (&$left, &$right) {
2322                 (left_val, right_val) => {
2323                     if !(*left_val == *right_val) {
2324                         panic!(
2325                             "assertion failed: `(left == right)`\n\n\
2326                              left:  `{:?}`\nright: `{:?}`\n\n",
2327                             left_val, right_val
2328                         )
2329                     }
2330                 }
2331             }
2332         }};
2333     }
2334 
2335     // We create these errors to compare with real ast::Errors in the tests.
2336     // We define equality between TestError and ast::Error to disregard the
2337     // pattern string in ast::Error, which is annoying to provide in tests.
2338     #[derive(Clone, Debug)]
2339     struct TestError {
2340         span: Span,
2341         kind: ast::ErrorKind,
2342     }
2343 
2344     impl PartialEq<ast::Error> for TestError {
eq(&self, other: &ast::Error) -> bool2345         fn eq(&self, other: &ast::Error) -> bool {
2346             self.span == other.span && self.kind == other.kind
2347         }
2348     }
2349 
2350     impl PartialEq<TestError> for ast::Error {
eq(&self, other: &TestError) -> bool2351         fn eq(&self, other: &TestError) -> bool {
2352             self.span == other.span && self.kind == other.kind
2353         }
2354     }
2355 
s(str: &str) -> String2356     fn s(str: &str) -> String {
2357         str.to_string()
2358     }
2359 
parser(pattern: &str) -> ParserI<'_, Parser>2360     fn parser(pattern: &str) -> ParserI<'_, Parser> {
2361         ParserI::new(Parser::new(), pattern)
2362     }
2363 
parser_octal(pattern: &str) -> ParserI<'_, Parser>2364     fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2365         let parser = ParserBuilder::new().octal(true).build();
2366         ParserI::new(parser, pattern)
2367     }
2368 
parser_nest_limit( pattern: &str, nest_limit: u32, ) -> ParserI<'_, Parser>2369     fn parser_nest_limit(
2370         pattern: &str,
2371         nest_limit: u32,
2372     ) -> ParserI<'_, Parser> {
2373         let p = ParserBuilder::new().nest_limit(nest_limit).build();
2374         ParserI::new(p, pattern)
2375     }
2376 
parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser>2377     fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2378         let p = ParserBuilder::new().ignore_whitespace(true).build();
2379         ParserI::new(p, pattern)
2380     }
2381 
2382     /// Short alias for creating a new span.
nspan(start: Position, end: Position) -> Span2383     fn nspan(start: Position, end: Position) -> Span {
2384         Span::new(start, end)
2385     }
2386 
2387     /// Short alias for creating a new position.
npos(offset: usize, line: usize, column: usize) -> Position2388     fn npos(offset: usize, line: usize, column: usize) -> Position {
2389         Position::new(offset, line, column)
2390     }
2391 
2392     /// Create a new span from the given offset range. This assumes a single
2393     /// line and sets the columns based on the offsets. i.e., This only works
2394     /// out of the box for ASCII, which is fine for most tests.
span(range: Range<usize>) -> Span2395     fn span(range: Range<usize>) -> Span {
2396         let start = Position::new(range.start, 1, range.start + 1);
2397         let end = Position::new(range.end, 1, range.end + 1);
2398         Span::new(start, end)
2399     }
2400 
2401     /// Create a new span for the corresponding byte range in the given string.
span_range(subject: &str, range: Range<usize>) -> Span2402     fn span_range(subject: &str, range: Range<usize>) -> Span {
2403         let start = Position {
2404             offset: range.start,
2405             line: 1 + subject[..range.start].matches('\n').count(),
2406             column: 1 + subject[..range.start]
2407                 .chars()
2408                 .rev()
2409                 .position(|c| c == '\n')
2410                 .unwrap_or(subject[..range.start].chars().count()),
2411         };
2412         let end = Position {
2413             offset: range.end,
2414             line: 1 + subject[..range.end].matches('\n').count(),
2415             column: 1 + subject[..range.end]
2416                 .chars()
2417                 .rev()
2418                 .position(|c| c == '\n')
2419                 .unwrap_or(subject[..range.end].chars().count()),
2420         };
2421         Span::new(start, end)
2422     }
2423 
2424     /// Create a verbatim literal starting at the given position.
lit(c: char, start: usize) -> Ast2425     fn lit(c: char, start: usize) -> Ast {
2426         lit_with(c, span(start..start + c.len_utf8()))
2427     }
2428 
2429     /// Create a punctuation literal starting at the given position.
punct_lit(c: char, span: Span) -> Ast2430     fn punct_lit(c: char, span: Span) -> Ast {
2431         Ast::Literal(ast::Literal {
2432             span: span,
2433             kind: ast::LiteralKind::Punctuation,
2434             c: c,
2435         })
2436     }
2437 
2438     /// Create a verbatim literal with the given span.
lit_with(c: char, span: Span) -> Ast2439     fn lit_with(c: char, span: Span) -> Ast {
2440         Ast::Literal(ast::Literal {
2441             span: span,
2442             kind: ast::LiteralKind::Verbatim,
2443             c: c,
2444         })
2445     }
2446 
2447     /// Create a concatenation with the given range.
concat(range: Range<usize>, asts: Vec<Ast>) -> Ast2448     fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2449         concat_with(span(range), asts)
2450     }
2451 
2452     /// Create a concatenation with the given span.
concat_with(span: Span, asts: Vec<Ast>) -> Ast2453     fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2454         Ast::Concat(ast::Concat { span: span, asts: asts })
2455     }
2456 
2457     /// Create an alternation with the given span.
alt(range: Range<usize>, asts: Vec<Ast>) -> Ast2458     fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2459         Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
2460     }
2461 
2462     /// Create a capturing group with the given span.
group(range: Range<usize>, index: u32, ast: Ast) -> Ast2463     fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2464         Ast::Group(ast::Group {
2465             span: span(range),
2466             kind: ast::GroupKind::CaptureIndex(index),
2467             ast: Box::new(ast),
2468         })
2469     }
2470 
2471     /// Create an ast::SetFlags.
2472     ///
2473     /// The given pattern should be the full pattern string. The range given
2474     /// should correspond to the byte offsets where the flag set occurs.
2475     ///
2476     /// If negated is true, then the set is interpreted as beginning with a
2477     /// negation.
flag_set( pat: &str, range: Range<usize>, flag: ast::Flag, negated: bool, ) -> Ast2478     fn flag_set(
2479         pat: &str,
2480         range: Range<usize>,
2481         flag: ast::Flag,
2482         negated: bool,
2483     ) -> Ast {
2484         let mut items = vec![ast::FlagsItem {
2485             span: span_range(pat, (range.end - 2)..(range.end - 1)),
2486             kind: ast::FlagsItemKind::Flag(flag),
2487         }];
2488         if negated {
2489             items.insert(
2490                 0,
2491                 ast::FlagsItem {
2492                     span: span_range(pat, (range.start + 2)..(range.end - 2)),
2493                     kind: ast::FlagsItemKind::Negation,
2494                 },
2495             );
2496         }
2497         Ast::Flags(ast::SetFlags {
2498             span: span_range(pat, range.clone()),
2499             flags: ast::Flags {
2500                 span: span_range(pat, (range.start + 2)..(range.end - 1)),
2501                 items: items,
2502             },
2503         })
2504     }
2505 
2506     #[test]
parse_nest_limit()2507     fn parse_nest_limit() {
2508         // A nest limit of 0 still allows some types of regexes.
2509         assert_eq!(
2510             parser_nest_limit("", 0).parse(),
2511             Ok(Ast::Empty(span(0..0)))
2512         );
2513         assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2514 
2515         // Test repetition operations, which require one level of nesting.
2516         assert_eq!(
2517             parser_nest_limit("a+", 0).parse().unwrap_err(),
2518             TestError {
2519                 span: span(0..2),
2520                 kind: ast::ErrorKind::NestLimitExceeded(0),
2521             }
2522         );
2523         assert_eq!(
2524             parser_nest_limit("a+", 1).parse(),
2525             Ok(Ast::Repetition(ast::Repetition {
2526                 span: span(0..2),
2527                 op: ast::RepetitionOp {
2528                     span: span(1..2),
2529                     kind: ast::RepetitionKind::OneOrMore,
2530                 },
2531                 greedy: true,
2532                 ast: Box::new(lit('a', 0)),
2533             }))
2534         );
2535         assert_eq!(
2536             parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2537             TestError {
2538                 span: span(0..3),
2539                 kind: ast::ErrorKind::NestLimitExceeded(1),
2540             }
2541         );
2542         assert_eq!(
2543             parser_nest_limit("a+*", 1).parse().unwrap_err(),
2544             TestError {
2545                 span: span(0..2),
2546                 kind: ast::ErrorKind::NestLimitExceeded(1),
2547             }
2548         );
2549         assert_eq!(
2550             parser_nest_limit("a+*", 2).parse(),
2551             Ok(Ast::Repetition(ast::Repetition {
2552                 span: span(0..3),
2553                 op: ast::RepetitionOp {
2554                     span: span(2..3),
2555                     kind: ast::RepetitionKind::ZeroOrMore,
2556                 },
2557                 greedy: true,
2558                 ast: Box::new(Ast::Repetition(ast::Repetition {
2559                     span: span(0..2),
2560                     op: ast::RepetitionOp {
2561                         span: span(1..2),
2562                         kind: ast::RepetitionKind::OneOrMore,
2563                     },
2564                     greedy: true,
2565                     ast: Box::new(lit('a', 0)),
2566                 })),
2567             }))
2568         );
2569 
2570         // Test concatenations. A concatenation requires one level of nesting.
2571         assert_eq!(
2572             parser_nest_limit("ab", 0).parse().unwrap_err(),
2573             TestError {
2574                 span: span(0..2),
2575                 kind: ast::ErrorKind::NestLimitExceeded(0),
2576             }
2577         );
2578         assert_eq!(
2579             parser_nest_limit("ab", 1).parse(),
2580             Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2581         );
2582         assert_eq!(
2583             parser_nest_limit("abc", 1).parse(),
2584             Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2585         );
2586 
2587         // Test alternations. An alternation requires one level of nesting.
2588         assert_eq!(
2589             parser_nest_limit("a|b", 0).parse().unwrap_err(),
2590             TestError {
2591                 span: span(0..3),
2592                 kind: ast::ErrorKind::NestLimitExceeded(0),
2593             }
2594         );
2595         assert_eq!(
2596             parser_nest_limit("a|b", 1).parse(),
2597             Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2598         );
2599         assert_eq!(
2600             parser_nest_limit("a|b|c", 1).parse(),
2601             Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2602         );
2603 
2604         // Test character classes. Classes form their own mini-recursive
2605         // syntax!
2606         assert_eq!(
2607             parser_nest_limit("[a]", 0).parse().unwrap_err(),
2608             TestError {
2609                 span: span(0..3),
2610                 kind: ast::ErrorKind::NestLimitExceeded(0),
2611             }
2612         );
2613         assert_eq!(
2614             parser_nest_limit("[a]", 1).parse(),
2615             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2616                 span: span(0..3),
2617                 negated: false,
2618                 kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2619                     ast::Literal {
2620                         span: span(1..2),
2621                         kind: ast::LiteralKind::Verbatim,
2622                         c: 'a',
2623                     }
2624                 )),
2625             })))
2626         );
2627         assert_eq!(
2628             parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2629             TestError {
2630                 span: span(1..3),
2631                 kind: ast::ErrorKind::NestLimitExceeded(1),
2632             }
2633         );
2634         assert_eq!(
2635             parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2636             TestError {
2637                 span: span(3..7),
2638                 kind: ast::ErrorKind::NestLimitExceeded(2),
2639             }
2640         );
2641         assert_eq!(
2642             parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2643             TestError {
2644                 span: span(4..6),
2645                 kind: ast::ErrorKind::NestLimitExceeded(3),
2646             }
2647         );
2648         assert_eq!(
2649             parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2650             TestError {
2651                 span: span(1..5),
2652                 kind: ast::ErrorKind::NestLimitExceeded(1),
2653             }
2654         );
2655         assert_eq!(
2656             parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2657             TestError {
2658                 span: span(4..6),
2659                 kind: ast::ErrorKind::NestLimitExceeded(2),
2660             }
2661         );
2662     }
2663 
2664     #[test]
parse_comments()2665     fn parse_comments() {
2666         let pat = "(?x)
2667 # This is comment 1.
2668 foo # This is comment 2.
2669   # This is comment 3.
2670 bar
2671 # This is comment 4.";
2672         let astc = parser(pat).parse_with_comments().unwrap();
2673         assert_eq!(
2674             astc.ast,
2675             concat_with(
2676                 span_range(pat, 0..pat.len()),
2677                 vec![
2678                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2679                     lit_with('f', span_range(pat, 26..27)),
2680                     lit_with('o', span_range(pat, 27..28)),
2681                     lit_with('o', span_range(pat, 28..29)),
2682                     lit_with('b', span_range(pat, 74..75)),
2683                     lit_with('a', span_range(pat, 75..76)),
2684                     lit_with('r', span_range(pat, 76..77)),
2685                 ]
2686             )
2687         );
2688         assert_eq!(
2689             astc.comments,
2690             vec![
2691                 ast::Comment {
2692                     span: span_range(pat, 5..26),
2693                     comment: s(" This is comment 1."),
2694                 },
2695                 ast::Comment {
2696                     span: span_range(pat, 30..51),
2697                     comment: s(" This is comment 2."),
2698                 },
2699                 ast::Comment {
2700                     span: span_range(pat, 53..74),
2701                     comment: s(" This is comment 3."),
2702                 },
2703                 ast::Comment {
2704                     span: span_range(pat, 78..98),
2705                     comment: s(" This is comment 4."),
2706                 },
2707             ]
2708         );
2709     }
2710 
2711     #[test]
parse_holistic()2712     fn parse_holistic() {
2713         assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2714         assert_eq!(
2715             parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2716             Ok(concat(
2717                 0..36,
2718                 vec![
2719                     punct_lit('\\', span(0..2)),
2720                     punct_lit('.', span(2..4)),
2721                     punct_lit('+', span(4..6)),
2722                     punct_lit('*', span(6..8)),
2723                     punct_lit('?', span(8..10)),
2724                     punct_lit('(', span(10..12)),
2725                     punct_lit(')', span(12..14)),
2726                     punct_lit('|', span(14..16)),
2727                     punct_lit('[', span(16..18)),
2728                     punct_lit(']', span(18..20)),
2729                     punct_lit('{', span(20..22)),
2730                     punct_lit('}', span(22..24)),
2731                     punct_lit('^', span(24..26)),
2732                     punct_lit('$', span(26..28)),
2733                     punct_lit('#', span(28..30)),
2734                     punct_lit('&', span(30..32)),
2735                     punct_lit('-', span(32..34)),
2736                     punct_lit('~', span(34..36)),
2737                 ]
2738             ))
2739         );
2740     }
2741 
2742     #[test]
parse_ignore_whitespace()2743     fn parse_ignore_whitespace() {
2744         // Test that basic whitespace insensitivity works.
2745         let pat = "(?x)a b";
2746         assert_eq!(
2747             parser(pat).parse(),
2748             Ok(concat_with(
2749                 nspan(npos(0, 1, 1), npos(7, 1, 8)),
2750                 vec![
2751                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2752                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2753                     lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2754                 ]
2755             ))
2756         );
2757 
2758         // Test that we can toggle whitespace insensitivity.
2759         let pat = "(?x)a b(?-x)a b";
2760         assert_eq!(
2761             parser(pat).parse(),
2762             Ok(concat_with(
2763                 nspan(npos(0, 1, 1), npos(15, 1, 16)),
2764                 vec![
2765                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2766                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2767                     lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2768                     flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2769                     lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2770                     lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2771                     lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2772                 ]
2773             ))
2774         );
2775 
2776         // Test that nesting whitespace insensitive flags works.
2777         let pat = "a (?x:a )a ";
2778         assert_eq!(
2779             parser(pat).parse(),
2780             Ok(concat_with(
2781                 span_range(pat, 0..11),
2782                 vec![
2783                     lit_with('a', span_range(pat, 0..1)),
2784                     lit_with(' ', span_range(pat, 1..2)),
2785                     Ast::Group(ast::Group {
2786                         span: span_range(pat, 2..9),
2787                         kind: ast::GroupKind::NonCapturing(ast::Flags {
2788                             span: span_range(pat, 4..5),
2789                             items: vec![ast::FlagsItem {
2790                                 span: span_range(pat, 4..5),
2791                                 kind: ast::FlagsItemKind::Flag(
2792                                     ast::Flag::IgnoreWhitespace
2793                                 ),
2794                             },],
2795                         }),
2796                         ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2797                     }),
2798                     lit_with('a', span_range(pat, 9..10)),
2799                     lit_with(' ', span_range(pat, 10..11)),
2800                 ]
2801             ))
2802         );
2803 
2804         // Test that whitespace after an opening paren is insignificant.
2805         let pat = "(?x)( ?P<foo> a )";
2806         assert_eq!(
2807             parser(pat).parse(),
2808             Ok(concat_with(
2809                 span_range(pat, 0..pat.len()),
2810                 vec![
2811                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2812                     Ast::Group(ast::Group {
2813                         span: span_range(pat, 4..pat.len()),
2814                         kind: ast::GroupKind::CaptureName(ast::CaptureName {
2815                             span: span_range(pat, 9..12),
2816                             name: s("foo"),
2817                             index: 1,
2818                         }),
2819                         ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2820                     }),
2821                 ]
2822             ))
2823         );
2824         let pat = "(?x)(  a )";
2825         assert_eq!(
2826             parser(pat).parse(),
2827             Ok(concat_with(
2828                 span_range(pat, 0..pat.len()),
2829                 vec![
2830                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2831                     Ast::Group(ast::Group {
2832                         span: span_range(pat, 4..pat.len()),
2833                         kind: ast::GroupKind::CaptureIndex(1),
2834                         ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2835                     }),
2836                 ]
2837             ))
2838         );
2839         let pat = "(?x)(  ?:  a )";
2840         assert_eq!(
2841             parser(pat).parse(),
2842             Ok(concat_with(
2843                 span_range(pat, 0..pat.len()),
2844                 vec![
2845                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2846                     Ast::Group(ast::Group {
2847                         span: span_range(pat, 4..pat.len()),
2848                         kind: ast::GroupKind::NonCapturing(ast::Flags {
2849                             span: span_range(pat, 8..8),
2850                             items: vec![],
2851                         }),
2852                         ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2853                     }),
2854                 ]
2855             ))
2856         );
2857         let pat = r"(?x)\x { 53 }";
2858         assert_eq!(
2859             parser(pat).parse(),
2860             Ok(concat_with(
2861                 span_range(pat, 0..pat.len()),
2862                 vec![
2863                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2864                     Ast::Literal(ast::Literal {
2865                         span: span(4..13),
2866                         kind: ast::LiteralKind::HexBrace(
2867                             ast::HexLiteralKind::X
2868                         ),
2869                         c: 'S',
2870                     }),
2871                 ]
2872             ))
2873         );
2874 
2875         // Test that whitespace after an escape is OK.
2876         let pat = r"(?x)\ ";
2877         assert_eq!(
2878             parser(pat).parse(),
2879             Ok(concat_with(
2880                 span_range(pat, 0..pat.len()),
2881                 vec![
2882                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2883                     Ast::Literal(ast::Literal {
2884                         span: span_range(pat, 4..6),
2885                         kind: ast::LiteralKind::Special(
2886                             ast::SpecialLiteralKind::Space
2887                         ),
2888                         c: ' ',
2889                     }),
2890                 ]
2891             ))
2892         );
2893         // ... but only when `x` mode is enabled.
2894         let pat = r"\ ";
2895         assert_eq!(
2896             parser(pat).parse().unwrap_err(),
2897             TestError {
2898                 span: span_range(pat, 0..2),
2899                 kind: ast::ErrorKind::EscapeUnrecognized,
2900             }
2901         );
2902     }
2903 
2904     #[test]
parse_newlines()2905     fn parse_newlines() {
2906         let pat = ".\n.";
2907         assert_eq!(
2908             parser(pat).parse(),
2909             Ok(concat_with(
2910                 span_range(pat, 0..3),
2911                 vec![
2912                     Ast::Dot(span_range(pat, 0..1)),
2913                     lit_with('\n', span_range(pat, 1..2)),
2914                     Ast::Dot(span_range(pat, 2..3)),
2915                 ]
2916             ))
2917         );
2918 
2919         let pat = "foobar\nbaz\nquux\n";
2920         assert_eq!(
2921             parser(pat).parse(),
2922             Ok(concat_with(
2923                 span_range(pat, 0..pat.len()),
2924                 vec![
2925                     lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2926                     lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2927                     lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2928                     lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2929                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2930                     lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2931                     lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2932                     lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2933                     lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2934                     lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2935                     lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2936                     lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2937                     lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2938                     lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2939                     lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2940                     lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2941                 ]
2942             ))
2943         );
2944     }
2945 
2946     #[test]
parse_uncounted_repetition()2947     fn parse_uncounted_repetition() {
2948         assert_eq!(
2949             parser(r"a*").parse(),
2950             Ok(Ast::Repetition(ast::Repetition {
2951                 span: span(0..2),
2952                 op: ast::RepetitionOp {
2953                     span: span(1..2),
2954                     kind: ast::RepetitionKind::ZeroOrMore,
2955                 },
2956                 greedy: true,
2957                 ast: Box::new(lit('a', 0)),
2958             }))
2959         );
2960         assert_eq!(
2961             parser(r"a+").parse(),
2962             Ok(Ast::Repetition(ast::Repetition {
2963                 span: span(0..2),
2964                 op: ast::RepetitionOp {
2965                     span: span(1..2),
2966                     kind: ast::RepetitionKind::OneOrMore,
2967                 },
2968                 greedy: true,
2969                 ast: Box::new(lit('a', 0)),
2970             }))
2971         );
2972 
2973         assert_eq!(
2974             parser(r"a?").parse(),
2975             Ok(Ast::Repetition(ast::Repetition {
2976                 span: span(0..2),
2977                 op: ast::RepetitionOp {
2978                     span: span(1..2),
2979                     kind: ast::RepetitionKind::ZeroOrOne,
2980                 },
2981                 greedy: true,
2982                 ast: Box::new(lit('a', 0)),
2983             }))
2984         );
2985         assert_eq!(
2986             parser(r"a??").parse(),
2987             Ok(Ast::Repetition(ast::Repetition {
2988                 span: span(0..3),
2989                 op: ast::RepetitionOp {
2990                     span: span(1..3),
2991                     kind: ast::RepetitionKind::ZeroOrOne,
2992                 },
2993                 greedy: false,
2994                 ast: Box::new(lit('a', 0)),
2995             }))
2996         );
2997         assert_eq!(
2998             parser(r"a?").parse(),
2999             Ok(Ast::Repetition(ast::Repetition {
3000                 span: span(0..2),
3001                 op: ast::RepetitionOp {
3002                     span: span(1..2),
3003                     kind: ast::RepetitionKind::ZeroOrOne,
3004                 },
3005                 greedy: true,
3006                 ast: Box::new(lit('a', 0)),
3007             }))
3008         );
3009         assert_eq!(
3010             parser(r"a?b").parse(),
3011             Ok(concat(
3012                 0..3,
3013                 vec![
3014                     Ast::Repetition(ast::Repetition {
3015                         span: span(0..2),
3016                         op: ast::RepetitionOp {
3017                             span: span(1..2),
3018                             kind: ast::RepetitionKind::ZeroOrOne,
3019                         },
3020                         greedy: true,
3021                         ast: Box::new(lit('a', 0)),
3022                     }),
3023                     lit('b', 2),
3024                 ]
3025             ))
3026         );
3027         assert_eq!(
3028             parser(r"a??b").parse(),
3029             Ok(concat(
3030                 0..4,
3031                 vec![
3032                     Ast::Repetition(ast::Repetition {
3033                         span: span(0..3),
3034                         op: ast::RepetitionOp {
3035                             span: span(1..3),
3036                             kind: ast::RepetitionKind::ZeroOrOne,
3037                         },
3038                         greedy: false,
3039                         ast: Box::new(lit('a', 0)),
3040                     }),
3041                     lit('b', 3),
3042                 ]
3043             ))
3044         );
3045         assert_eq!(
3046             parser(r"ab?").parse(),
3047             Ok(concat(
3048                 0..3,
3049                 vec![
3050                     lit('a', 0),
3051                     Ast::Repetition(ast::Repetition {
3052                         span: span(1..3),
3053                         op: ast::RepetitionOp {
3054                             span: span(2..3),
3055                             kind: ast::RepetitionKind::ZeroOrOne,
3056                         },
3057                         greedy: true,
3058                         ast: Box::new(lit('b', 1)),
3059                     }),
3060                 ]
3061             ))
3062         );
3063         assert_eq!(
3064             parser(r"(ab)?").parse(),
3065             Ok(Ast::Repetition(ast::Repetition {
3066                 span: span(0..5),
3067                 op: ast::RepetitionOp {
3068                     span: span(4..5),
3069                     kind: ast::RepetitionKind::ZeroOrOne,
3070                 },
3071                 greedy: true,
3072                 ast: Box::new(group(
3073                     0..4,
3074                     1,
3075                     concat(1..3, vec![lit('a', 1), lit('b', 2),])
3076                 )),
3077             }))
3078         );
3079         assert_eq!(
3080             parser(r"|a?").parse(),
3081             Ok(alt(
3082                 0..3,
3083                 vec![
3084                     Ast::Empty(span(0..0)),
3085                     Ast::Repetition(ast::Repetition {
3086                         span: span(1..3),
3087                         op: ast::RepetitionOp {
3088                             span: span(2..3),
3089                             kind: ast::RepetitionKind::ZeroOrOne,
3090                         },
3091                         greedy: true,
3092                         ast: Box::new(lit('a', 1)),
3093                     }),
3094                 ]
3095             ))
3096         );
3097 
3098         assert_eq!(
3099             parser(r"*").parse().unwrap_err(),
3100             TestError {
3101                 span: span(0..0),
3102                 kind: ast::ErrorKind::RepetitionMissing,
3103             }
3104         );
3105         assert_eq!(
3106             parser(r"(?i)*").parse().unwrap_err(),
3107             TestError {
3108                 span: span(4..4),
3109                 kind: ast::ErrorKind::RepetitionMissing,
3110             }
3111         );
3112         assert_eq!(
3113             parser(r"(*)").parse().unwrap_err(),
3114             TestError {
3115                 span: span(1..1),
3116                 kind: ast::ErrorKind::RepetitionMissing,
3117             }
3118         );
3119         assert_eq!(
3120             parser(r"(?:?)").parse().unwrap_err(),
3121             TestError {
3122                 span: span(3..3),
3123                 kind: ast::ErrorKind::RepetitionMissing,
3124             }
3125         );
3126         assert_eq!(
3127             parser(r"+").parse().unwrap_err(),
3128             TestError {
3129                 span: span(0..0),
3130                 kind: ast::ErrorKind::RepetitionMissing,
3131             }
3132         );
3133         assert_eq!(
3134             parser(r"?").parse().unwrap_err(),
3135             TestError {
3136                 span: span(0..0),
3137                 kind: ast::ErrorKind::RepetitionMissing,
3138             }
3139         );
3140         assert_eq!(
3141             parser(r"(?)").parse().unwrap_err(),
3142             TestError {
3143                 span: span(1..1),
3144                 kind: ast::ErrorKind::RepetitionMissing,
3145             }
3146         );
3147         assert_eq!(
3148             parser(r"|*").parse().unwrap_err(),
3149             TestError {
3150                 span: span(1..1),
3151                 kind: ast::ErrorKind::RepetitionMissing,
3152             }
3153         );
3154         assert_eq!(
3155             parser(r"|+").parse().unwrap_err(),
3156             TestError {
3157                 span: span(1..1),
3158                 kind: ast::ErrorKind::RepetitionMissing,
3159             }
3160         );
3161         assert_eq!(
3162             parser(r"|?").parse().unwrap_err(),
3163             TestError {
3164                 span: span(1..1),
3165                 kind: ast::ErrorKind::RepetitionMissing,
3166             }
3167         );
3168     }
3169 
3170     #[test]
parse_counted_repetition()3171     fn parse_counted_repetition() {
3172         assert_eq!(
3173             parser(r"a{5}").parse(),
3174             Ok(Ast::Repetition(ast::Repetition {
3175                 span: span(0..4),
3176                 op: ast::RepetitionOp {
3177                     span: span(1..4),
3178                     kind: ast::RepetitionKind::Range(
3179                         ast::RepetitionRange::Exactly(5)
3180                     ),
3181                 },
3182                 greedy: true,
3183                 ast: Box::new(lit('a', 0)),
3184             }))
3185         );
3186         assert_eq!(
3187             parser(r"a{5,}").parse(),
3188             Ok(Ast::Repetition(ast::Repetition {
3189                 span: span(0..5),
3190                 op: ast::RepetitionOp {
3191                     span: span(1..5),
3192                     kind: ast::RepetitionKind::Range(
3193                         ast::RepetitionRange::AtLeast(5)
3194                     ),
3195                 },
3196                 greedy: true,
3197                 ast: Box::new(lit('a', 0)),
3198             }))
3199         );
3200         assert_eq!(
3201             parser(r"a{5,9}").parse(),
3202             Ok(Ast::Repetition(ast::Repetition {
3203                 span: span(0..6),
3204                 op: ast::RepetitionOp {
3205                     span: span(1..6),
3206                     kind: ast::RepetitionKind::Range(
3207                         ast::RepetitionRange::Bounded(5, 9)
3208                     ),
3209                 },
3210                 greedy: true,
3211                 ast: Box::new(lit('a', 0)),
3212             }))
3213         );
3214         assert_eq!(
3215             parser(r"a{5}?").parse(),
3216             Ok(Ast::Repetition(ast::Repetition {
3217                 span: span(0..5),
3218                 op: ast::RepetitionOp {
3219                     span: span(1..5),
3220                     kind: ast::RepetitionKind::Range(
3221                         ast::RepetitionRange::Exactly(5)
3222                     ),
3223                 },
3224                 greedy: false,
3225                 ast: Box::new(lit('a', 0)),
3226             }))
3227         );
3228         assert_eq!(
3229             parser(r"ab{5}").parse(),
3230             Ok(concat(
3231                 0..5,
3232                 vec![
3233                     lit('a', 0),
3234                     Ast::Repetition(ast::Repetition {
3235                         span: span(1..5),
3236                         op: ast::RepetitionOp {
3237                             span: span(2..5),
3238                             kind: ast::RepetitionKind::Range(
3239                                 ast::RepetitionRange::Exactly(5)
3240                             ),
3241                         },
3242                         greedy: true,
3243                         ast: Box::new(lit('b', 1)),
3244                     }),
3245                 ]
3246             ))
3247         );
3248         assert_eq!(
3249             parser(r"ab{5}c").parse(),
3250             Ok(concat(
3251                 0..6,
3252                 vec![
3253                     lit('a', 0),
3254                     Ast::Repetition(ast::Repetition {
3255                         span: span(1..5),
3256                         op: ast::RepetitionOp {
3257                             span: span(2..5),
3258                             kind: ast::RepetitionKind::Range(
3259                                 ast::RepetitionRange::Exactly(5)
3260                             ),
3261                         },
3262                         greedy: true,
3263                         ast: Box::new(lit('b', 1)),
3264                     }),
3265                     lit('c', 5),
3266                 ]
3267             ))
3268         );
3269 
3270         assert_eq!(
3271             parser(r"a{ 5 }").parse(),
3272             Ok(Ast::Repetition(ast::Repetition {
3273                 span: span(0..6),
3274                 op: ast::RepetitionOp {
3275                     span: span(1..6),
3276                     kind: ast::RepetitionKind::Range(
3277                         ast::RepetitionRange::Exactly(5)
3278                     ),
3279                 },
3280                 greedy: true,
3281                 ast: Box::new(lit('a', 0)),
3282             }))
3283         );
3284         assert_eq!(
3285             parser(r"a{ 5 , 9 }").parse(),
3286             Ok(Ast::Repetition(ast::Repetition {
3287                 span: span(0..10),
3288                 op: ast::RepetitionOp {
3289                     span: span(1..10),
3290                     kind: ast::RepetitionKind::Range(
3291                         ast::RepetitionRange::Bounded(5, 9)
3292                     ),
3293                 },
3294                 greedy: true,
3295                 ast: Box::new(lit('a', 0)),
3296             }))
3297         );
3298         assert_eq!(
3299             parser_ignore_whitespace(r"a{5,9} ?").parse(),
3300             Ok(Ast::Repetition(ast::Repetition {
3301                 span: span(0..8),
3302                 op: ast::RepetitionOp {
3303                     span: span(1..8),
3304                     kind: ast::RepetitionKind::Range(
3305                         ast::RepetitionRange::Bounded(5, 9)
3306                     ),
3307                 },
3308                 greedy: false,
3309                 ast: Box::new(lit('a', 0)),
3310             }))
3311         );
3312 
3313         assert_eq!(
3314             parser(r"(?i){0}").parse().unwrap_err(),
3315             TestError {
3316                 span: span(4..4),
3317                 kind: ast::ErrorKind::RepetitionMissing,
3318             }
3319         );
3320         assert_eq!(
3321             parser(r"(?m){1,1}").parse().unwrap_err(),
3322             TestError {
3323                 span: span(4..4),
3324                 kind: ast::ErrorKind::RepetitionMissing,
3325             }
3326         );
3327         assert_eq!(
3328             parser(r"a{]}").parse().unwrap_err(),
3329             TestError {
3330                 span: span(2..2),
3331                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3332             }
3333         );
3334         assert_eq!(
3335             parser(r"a{1,]}").parse().unwrap_err(),
3336             TestError {
3337                 span: span(4..4),
3338                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3339             }
3340         );
3341         assert_eq!(
3342             parser(r"a{").parse().unwrap_err(),
3343             TestError {
3344                 span: span(1..2),
3345                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3346             }
3347         );
3348         assert_eq!(
3349             parser(r"a{}").parse().unwrap_err(),
3350             TestError {
3351                 span: span(2..2),
3352                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3353             }
3354         );
3355         assert_eq!(
3356             parser(r"a{a").parse().unwrap_err(),
3357             TestError {
3358                 span: span(2..2),
3359                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3360             }
3361         );
3362         assert_eq!(
3363             parser(r"a{9999999999}").parse().unwrap_err(),
3364             TestError {
3365                 span: span(2..12),
3366                 kind: ast::ErrorKind::DecimalInvalid,
3367             }
3368         );
3369         assert_eq!(
3370             parser(r"a{9").parse().unwrap_err(),
3371             TestError {
3372                 span: span(1..3),
3373                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3374             }
3375         );
3376         assert_eq!(
3377             parser(r"a{9,a").parse().unwrap_err(),
3378             TestError {
3379                 span: span(4..4),
3380                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3381             }
3382         );
3383         assert_eq!(
3384             parser(r"a{9,9999999999}").parse().unwrap_err(),
3385             TestError {
3386                 span: span(4..14),
3387                 kind: ast::ErrorKind::DecimalInvalid,
3388             }
3389         );
3390         assert_eq!(
3391             parser(r"a{9,").parse().unwrap_err(),
3392             TestError {
3393                 span: span(1..4),
3394                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3395             }
3396         );
3397         assert_eq!(
3398             parser(r"a{9,11").parse().unwrap_err(),
3399             TestError {
3400                 span: span(1..6),
3401                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3402             }
3403         );
3404         assert_eq!(
3405             parser(r"a{2,1}").parse().unwrap_err(),
3406             TestError {
3407                 span: span(1..6),
3408                 kind: ast::ErrorKind::RepetitionCountInvalid,
3409             }
3410         );
3411         assert_eq!(
3412             parser(r"{5}").parse().unwrap_err(),
3413             TestError {
3414                 span: span(0..0),
3415                 kind: ast::ErrorKind::RepetitionMissing,
3416             }
3417         );
3418         assert_eq!(
3419             parser(r"|{5}").parse().unwrap_err(),
3420             TestError {
3421                 span: span(1..1),
3422                 kind: ast::ErrorKind::RepetitionMissing,
3423             }
3424         );
3425     }
3426 
3427     #[test]
parse_alternate()3428     fn parse_alternate() {
3429         assert_eq!(
3430             parser(r"a|b").parse(),
3431             Ok(Ast::Alternation(ast::Alternation {
3432                 span: span(0..3),
3433                 asts: vec![lit('a', 0), lit('b', 2)],
3434             }))
3435         );
3436         assert_eq!(
3437             parser(r"(a|b)").parse(),
3438             Ok(group(
3439                 0..5,
3440                 1,
3441                 Ast::Alternation(ast::Alternation {
3442                     span: span(1..4),
3443                     asts: vec![lit('a', 1), lit('b', 3)],
3444                 })
3445             ))
3446         );
3447 
3448         assert_eq!(
3449             parser(r"a|b|c").parse(),
3450             Ok(Ast::Alternation(ast::Alternation {
3451                 span: span(0..5),
3452                 asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3453             }))
3454         );
3455         assert_eq!(
3456             parser(r"ax|by|cz").parse(),
3457             Ok(Ast::Alternation(ast::Alternation {
3458                 span: span(0..8),
3459                 asts: vec![
3460                     concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3461                     concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3462                     concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3463                 ],
3464             }))
3465         );
3466         assert_eq!(
3467             parser(r"(ax|by|cz)").parse(),
3468             Ok(group(
3469                 0..10,
3470                 1,
3471                 Ast::Alternation(ast::Alternation {
3472                     span: span(1..9),
3473                     asts: vec![
3474                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3475                         concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3476                         concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3477                     ],
3478                 })
3479             ))
3480         );
3481         assert_eq!(
3482             parser(r"(ax|(by|(cz)))").parse(),
3483             Ok(group(
3484                 0..14,
3485                 1,
3486                 alt(
3487                     1..13,
3488                     vec![
3489                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3490                         group(
3491                             4..13,
3492                             2,
3493                             alt(
3494                                 5..12,
3495                                 vec![
3496                                     concat(
3497                                         5..7,
3498                                         vec![lit('b', 5), lit('y', 6)]
3499                                     ),
3500                                     group(
3501                                         8..12,
3502                                         3,
3503                                         concat(
3504                                             9..11,
3505                                             vec![lit('c', 9), lit('z', 10),]
3506                                         )
3507                                     ),
3508                                 ]
3509                             )
3510                         ),
3511                     ]
3512                 )
3513             ))
3514         );
3515 
3516         assert_eq!(
3517             parser(r"|").parse(),
3518             Ok(alt(
3519                 0..1,
3520                 vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
3521             ))
3522         );
3523         assert_eq!(
3524             parser(r"||").parse(),
3525             Ok(alt(
3526                 0..2,
3527                 vec![
3528                     Ast::Empty(span(0..0)),
3529                     Ast::Empty(span(1..1)),
3530                     Ast::Empty(span(2..2)),
3531                 ]
3532             ))
3533         );
3534         assert_eq!(
3535             parser(r"a|").parse(),
3536             Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
3537         );
3538         assert_eq!(
3539             parser(r"|a").parse(),
3540             Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
3541         );
3542 
3543         assert_eq!(
3544             parser(r"(|)").parse(),
3545             Ok(group(
3546                 0..3,
3547                 1,
3548                 alt(
3549                     1..2,
3550                     vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
3551                 )
3552             ))
3553         );
3554         assert_eq!(
3555             parser(r"(a|)").parse(),
3556             Ok(group(
3557                 0..4,
3558                 1,
3559                 alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
3560             ))
3561         );
3562         assert_eq!(
3563             parser(r"(|a)").parse(),
3564             Ok(group(
3565                 0..4,
3566                 1,
3567                 alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
3568             ))
3569         );
3570 
3571         assert_eq!(
3572             parser(r"a|b)").parse().unwrap_err(),
3573             TestError {
3574                 span: span(3..4),
3575                 kind: ast::ErrorKind::GroupUnopened,
3576             }
3577         );
3578         assert_eq!(
3579             parser(r"(a|b").parse().unwrap_err(),
3580             TestError {
3581                 span: span(0..1),
3582                 kind: ast::ErrorKind::GroupUnclosed,
3583             }
3584         );
3585     }
3586 
3587     #[test]
parse_unsupported_lookaround()3588     fn parse_unsupported_lookaround() {
3589         assert_eq!(
3590             parser(r"(?=a)").parse().unwrap_err(),
3591             TestError {
3592                 span: span(0..3),
3593                 kind: ast::ErrorKind::UnsupportedLookAround,
3594             }
3595         );
3596         assert_eq!(
3597             parser(r"(?!a)").parse().unwrap_err(),
3598             TestError {
3599                 span: span(0..3),
3600                 kind: ast::ErrorKind::UnsupportedLookAround,
3601             }
3602         );
3603         assert_eq!(
3604             parser(r"(?<=a)").parse().unwrap_err(),
3605             TestError {
3606                 span: span(0..4),
3607                 kind: ast::ErrorKind::UnsupportedLookAround,
3608             }
3609         );
3610         assert_eq!(
3611             parser(r"(?<!a)").parse().unwrap_err(),
3612             TestError {
3613                 span: span(0..4),
3614                 kind: ast::ErrorKind::UnsupportedLookAround,
3615             }
3616         );
3617     }
3618 
3619     #[test]
parse_group()3620     fn parse_group() {
3621         assert_eq!(
3622             parser("(?i)").parse(),
3623             Ok(Ast::Flags(ast::SetFlags {
3624                 span: span(0..4),
3625                 flags: ast::Flags {
3626                     span: span(2..3),
3627                     items: vec![ast::FlagsItem {
3628                         span: span(2..3),
3629                         kind: ast::FlagsItemKind::Flag(
3630                             ast::Flag::CaseInsensitive
3631                         ),
3632                     }],
3633                 },
3634             }))
3635         );
3636         assert_eq!(
3637             parser("(?iU)").parse(),
3638             Ok(Ast::Flags(ast::SetFlags {
3639                 span: span(0..5),
3640                 flags: ast::Flags {
3641                     span: span(2..4),
3642                     items: vec![
3643                         ast::FlagsItem {
3644                             span: span(2..3),
3645                             kind: ast::FlagsItemKind::Flag(
3646                                 ast::Flag::CaseInsensitive
3647                             ),
3648                         },
3649                         ast::FlagsItem {
3650                             span: span(3..4),
3651                             kind: ast::FlagsItemKind::Flag(
3652                                 ast::Flag::SwapGreed
3653                             ),
3654                         },
3655                     ],
3656                 },
3657             }))
3658         );
3659         assert_eq!(
3660             parser("(?i-U)").parse(),
3661             Ok(Ast::Flags(ast::SetFlags {
3662                 span: span(0..6),
3663                 flags: ast::Flags {
3664                     span: span(2..5),
3665                     items: vec![
3666                         ast::FlagsItem {
3667                             span: span(2..3),
3668                             kind: ast::FlagsItemKind::Flag(
3669                                 ast::Flag::CaseInsensitive
3670                             ),
3671                         },
3672                         ast::FlagsItem {
3673                             span: span(3..4),
3674                             kind: ast::FlagsItemKind::Negation,
3675                         },
3676                         ast::FlagsItem {
3677                             span: span(4..5),
3678                             kind: ast::FlagsItemKind::Flag(
3679                                 ast::Flag::SwapGreed
3680                             ),
3681                         },
3682                     ],
3683                 },
3684             }))
3685         );
3686 
3687         assert_eq!(
3688             parser("()").parse(),
3689             Ok(Ast::Group(ast::Group {
3690                 span: span(0..2),
3691                 kind: ast::GroupKind::CaptureIndex(1),
3692                 ast: Box::new(Ast::Empty(span(1..1))),
3693             }))
3694         );
3695         assert_eq!(
3696             parser("(a)").parse(),
3697             Ok(Ast::Group(ast::Group {
3698                 span: span(0..3),
3699                 kind: ast::GroupKind::CaptureIndex(1),
3700                 ast: Box::new(lit('a', 1)),
3701             }))
3702         );
3703         assert_eq!(
3704             parser("(())").parse(),
3705             Ok(Ast::Group(ast::Group {
3706                 span: span(0..4),
3707                 kind: ast::GroupKind::CaptureIndex(1),
3708                 ast: Box::new(Ast::Group(ast::Group {
3709                     span: span(1..3),
3710                     kind: ast::GroupKind::CaptureIndex(2),
3711                     ast: Box::new(Ast::Empty(span(2..2))),
3712                 })),
3713             }))
3714         );
3715 
3716         assert_eq!(
3717             parser("(?:a)").parse(),
3718             Ok(Ast::Group(ast::Group {
3719                 span: span(0..5),
3720                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3721                     span: span(2..2),
3722                     items: vec![],
3723                 }),
3724                 ast: Box::new(lit('a', 3)),
3725             }))
3726         );
3727 
3728         assert_eq!(
3729             parser("(?i:a)").parse(),
3730             Ok(Ast::Group(ast::Group {
3731                 span: span(0..6),
3732                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3733                     span: span(2..3),
3734                     items: vec![ast::FlagsItem {
3735                         span: span(2..3),
3736                         kind: ast::FlagsItemKind::Flag(
3737                             ast::Flag::CaseInsensitive
3738                         ),
3739                     },],
3740                 }),
3741                 ast: Box::new(lit('a', 4)),
3742             }))
3743         );
3744         assert_eq!(
3745             parser("(?i-U:a)").parse(),
3746             Ok(Ast::Group(ast::Group {
3747                 span: span(0..8),
3748                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3749                     span: span(2..5),
3750                     items: vec![
3751                         ast::FlagsItem {
3752                             span: span(2..3),
3753                             kind: ast::FlagsItemKind::Flag(
3754                                 ast::Flag::CaseInsensitive
3755                             ),
3756                         },
3757                         ast::FlagsItem {
3758                             span: span(3..4),
3759                             kind: ast::FlagsItemKind::Negation,
3760                         },
3761                         ast::FlagsItem {
3762                             span: span(4..5),
3763                             kind: ast::FlagsItemKind::Flag(
3764                                 ast::Flag::SwapGreed
3765                             ),
3766                         },
3767                     ],
3768                 }),
3769                 ast: Box::new(lit('a', 6)),
3770             }))
3771         );
3772 
3773         assert_eq!(
3774             parser("(").parse().unwrap_err(),
3775             TestError {
3776                 span: span(0..1),
3777                 kind: ast::ErrorKind::GroupUnclosed,
3778             }
3779         );
3780         assert_eq!(
3781             parser("(?").parse().unwrap_err(),
3782             TestError {
3783                 span: span(0..1),
3784                 kind: ast::ErrorKind::GroupUnclosed,
3785             }
3786         );
3787         assert_eq!(
3788             parser("(?P").parse().unwrap_err(),
3789             TestError {
3790                 span: span(2..3),
3791                 kind: ast::ErrorKind::FlagUnrecognized,
3792             }
3793         );
3794         assert_eq!(
3795             parser("(?P<").parse().unwrap_err(),
3796             TestError {
3797                 span: span(4..4),
3798                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3799             }
3800         );
3801         assert_eq!(
3802             parser("(a").parse().unwrap_err(),
3803             TestError {
3804                 span: span(0..1),
3805                 kind: ast::ErrorKind::GroupUnclosed,
3806             }
3807         );
3808         assert_eq!(
3809             parser("(()").parse().unwrap_err(),
3810             TestError {
3811                 span: span(0..1),
3812                 kind: ast::ErrorKind::GroupUnclosed,
3813             }
3814         );
3815         assert_eq!(
3816             parser(")").parse().unwrap_err(),
3817             TestError {
3818                 span: span(0..1),
3819                 kind: ast::ErrorKind::GroupUnopened,
3820             }
3821         );
3822         assert_eq!(
3823             parser("a)").parse().unwrap_err(),
3824             TestError {
3825                 span: span(1..2),
3826                 kind: ast::ErrorKind::GroupUnopened,
3827             }
3828         );
3829     }
3830 
3831     #[test]
parse_capture_name()3832     fn parse_capture_name() {
3833         assert_eq!(
3834             parser("(?P<a>z)").parse(),
3835             Ok(Ast::Group(ast::Group {
3836                 span: span(0..8),
3837                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3838                     span: span(4..5),
3839                     name: s("a"),
3840                     index: 1,
3841                 }),
3842                 ast: Box::new(lit('z', 6)),
3843             }))
3844         );
3845         assert_eq!(
3846             parser("(?P<abc>z)").parse(),
3847             Ok(Ast::Group(ast::Group {
3848                 span: span(0..10),
3849                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3850                     span: span(4..7),
3851                     name: s("abc"),
3852                     index: 1,
3853                 }),
3854                 ast: Box::new(lit('z', 8)),
3855             }))
3856         );
3857 
3858         assert_eq!(
3859             parser("(?P<a_1>z)").parse(),
3860             Ok(Ast::Group(ast::Group {
3861                 span: span(0..10),
3862                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3863                     span: span(4..7),
3864                     name: s("a_1"),
3865                     index: 1,
3866                 }),
3867                 ast: Box::new(lit('z', 8)),
3868             }))
3869         );
3870 
3871         assert_eq!(
3872             parser("(?P<a.1>z)").parse(),
3873             Ok(Ast::Group(ast::Group {
3874                 span: span(0..10),
3875                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3876                     span: span(4..7),
3877                     name: s("a.1"),
3878                     index: 1,
3879                 }),
3880                 ast: Box::new(lit('z', 8)),
3881             }))
3882         );
3883 
3884         assert_eq!(
3885             parser("(?P<a[1]>z)").parse(),
3886             Ok(Ast::Group(ast::Group {
3887                 span: span(0..11),
3888                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3889                     span: span(4..8),
3890                     name: s("a[1]"),
3891                     index: 1,
3892                 }),
3893                 ast: Box::new(lit('z', 9)),
3894             }))
3895         );
3896 
3897         assert_eq!(
3898             parser("(?P<").parse().unwrap_err(),
3899             TestError {
3900                 span: span(4..4),
3901                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3902             }
3903         );
3904         assert_eq!(
3905             parser("(?P<>z)").parse().unwrap_err(),
3906             TestError {
3907                 span: span(4..4),
3908                 kind: ast::ErrorKind::GroupNameEmpty,
3909             }
3910         );
3911         assert_eq!(
3912             parser("(?P<a").parse().unwrap_err(),
3913             TestError {
3914                 span: span(5..5),
3915                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3916             }
3917         );
3918         assert_eq!(
3919             parser("(?P<ab").parse().unwrap_err(),
3920             TestError {
3921                 span: span(6..6),
3922                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3923             }
3924         );
3925         assert_eq!(
3926             parser("(?P<0a").parse().unwrap_err(),
3927             TestError {
3928                 span: span(4..5),
3929                 kind: ast::ErrorKind::GroupNameInvalid,
3930             }
3931         );
3932         assert_eq!(
3933             parser("(?P<~").parse().unwrap_err(),
3934             TestError {
3935                 span: span(4..5),
3936                 kind: ast::ErrorKind::GroupNameInvalid,
3937             }
3938         );
3939         assert_eq!(
3940             parser("(?P<abc~").parse().unwrap_err(),
3941             TestError {
3942                 span: span(7..8),
3943                 kind: ast::ErrorKind::GroupNameInvalid,
3944             }
3945         );
3946         assert_eq!(
3947             parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3948             TestError {
3949                 span: span(12..13),
3950                 kind: ast::ErrorKind::GroupNameDuplicate {
3951                     original: span(4..5),
3952                 },
3953             }
3954         );
3955     }
3956 
3957     #[test]
parse_flags()3958     fn parse_flags() {
3959         assert_eq!(
3960             parser("i:").parse_flags(),
3961             Ok(ast::Flags {
3962                 span: span(0..1),
3963                 items: vec![ast::FlagsItem {
3964                     span: span(0..1),
3965                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3966                 }],
3967             })
3968         );
3969         assert_eq!(
3970             parser("i)").parse_flags(),
3971             Ok(ast::Flags {
3972                 span: span(0..1),
3973                 items: vec![ast::FlagsItem {
3974                     span: span(0..1),
3975                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3976                 }],
3977             })
3978         );
3979 
3980         assert_eq!(
3981             parser("isU:").parse_flags(),
3982             Ok(ast::Flags {
3983                 span: span(0..3),
3984                 items: vec![
3985                     ast::FlagsItem {
3986                         span: span(0..1),
3987                         kind: ast::FlagsItemKind::Flag(
3988                             ast::Flag::CaseInsensitive
3989                         ),
3990                     },
3991                     ast::FlagsItem {
3992                         span: span(1..2),
3993                         kind: ast::FlagsItemKind::Flag(
3994                             ast::Flag::DotMatchesNewLine
3995                         ),
3996                     },
3997                     ast::FlagsItem {
3998                         span: span(2..3),
3999                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4000                     },
4001                 ],
4002             })
4003         );
4004 
4005         assert_eq!(
4006             parser("-isU:").parse_flags(),
4007             Ok(ast::Flags {
4008                 span: span(0..4),
4009                 items: vec![
4010                     ast::FlagsItem {
4011                         span: span(0..1),
4012                         kind: ast::FlagsItemKind::Negation,
4013                     },
4014                     ast::FlagsItem {
4015                         span: span(1..2),
4016                         kind: ast::FlagsItemKind::Flag(
4017                             ast::Flag::CaseInsensitive
4018                         ),
4019                     },
4020                     ast::FlagsItem {
4021                         span: span(2..3),
4022                         kind: ast::FlagsItemKind::Flag(
4023                             ast::Flag::DotMatchesNewLine
4024                         ),
4025                     },
4026                     ast::FlagsItem {
4027                         span: span(3..4),
4028                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4029                     },
4030                 ],
4031             })
4032         );
4033         assert_eq!(
4034             parser("i-sU:").parse_flags(),
4035             Ok(ast::Flags {
4036                 span: span(0..4),
4037                 items: vec![
4038                     ast::FlagsItem {
4039                         span: span(0..1),
4040                         kind: ast::FlagsItemKind::Flag(
4041                             ast::Flag::CaseInsensitive
4042                         ),
4043                     },
4044                     ast::FlagsItem {
4045                         span: span(1..2),
4046                         kind: ast::FlagsItemKind::Negation,
4047                     },
4048                     ast::FlagsItem {
4049                         span: span(2..3),
4050                         kind: ast::FlagsItemKind::Flag(
4051                             ast::Flag::DotMatchesNewLine
4052                         ),
4053                     },
4054                     ast::FlagsItem {
4055                         span: span(3..4),
4056                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4057                     },
4058                 ],
4059             })
4060         );
4061 
4062         assert_eq!(
4063             parser("isU").parse_flags().unwrap_err(),
4064             TestError {
4065                 span: span(3..3),
4066                 kind: ast::ErrorKind::FlagUnexpectedEof,
4067             }
4068         );
4069         assert_eq!(
4070             parser("isUa:").parse_flags().unwrap_err(),
4071             TestError {
4072                 span: span(3..4),
4073                 kind: ast::ErrorKind::FlagUnrecognized,
4074             }
4075         );
4076         assert_eq!(
4077             parser("isUi:").parse_flags().unwrap_err(),
4078             TestError {
4079                 span: span(3..4),
4080                 kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4081             }
4082         );
4083         assert_eq!(
4084             parser("i-sU-i:").parse_flags().unwrap_err(),
4085             TestError {
4086                 span: span(4..5),
4087                 kind: ast::ErrorKind::FlagRepeatedNegation {
4088                     original: span(1..2),
4089                 },
4090             }
4091         );
4092         assert_eq!(
4093             parser("-)").parse_flags().unwrap_err(),
4094             TestError {
4095                 span: span(0..1),
4096                 kind: ast::ErrorKind::FlagDanglingNegation,
4097             }
4098         );
4099         assert_eq!(
4100             parser("i-)").parse_flags().unwrap_err(),
4101             TestError {
4102                 span: span(1..2),
4103                 kind: ast::ErrorKind::FlagDanglingNegation,
4104             }
4105         );
4106         assert_eq!(
4107             parser("iU-)").parse_flags().unwrap_err(),
4108             TestError {
4109                 span: span(2..3),
4110                 kind: ast::ErrorKind::FlagDanglingNegation,
4111             }
4112         );
4113     }
4114 
4115     #[test]
parse_flag()4116     fn parse_flag() {
4117         assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4118         assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4119         assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4120         assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4121         assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4122         assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4123 
4124         assert_eq!(
4125             parser("a").parse_flag().unwrap_err(),
4126             TestError {
4127                 span: span(0..1),
4128                 kind: ast::ErrorKind::FlagUnrecognized,
4129             }
4130         );
4131         assert_eq!(
4132             parser("☃").parse_flag().unwrap_err(),
4133             TestError {
4134                 span: span_range("☃", 0..3),
4135                 kind: ast::ErrorKind::FlagUnrecognized,
4136             }
4137         );
4138     }
4139 
4140     #[test]
parse_primitive_non_escape()4141     fn parse_primitive_non_escape() {
4142         assert_eq!(
4143             parser(r".").parse_primitive(),
4144             Ok(Primitive::Dot(span(0..1)))
4145         );
4146         assert_eq!(
4147             parser(r"^").parse_primitive(),
4148             Ok(Primitive::Assertion(ast::Assertion {
4149                 span: span(0..1),
4150                 kind: ast::AssertionKind::StartLine,
4151             }))
4152         );
4153         assert_eq!(
4154             parser(r"$").parse_primitive(),
4155             Ok(Primitive::Assertion(ast::Assertion {
4156                 span: span(0..1),
4157                 kind: ast::AssertionKind::EndLine,
4158             }))
4159         );
4160 
4161         assert_eq!(
4162             parser(r"a").parse_primitive(),
4163             Ok(Primitive::Literal(ast::Literal {
4164                 span: span(0..1),
4165                 kind: ast::LiteralKind::Verbatim,
4166                 c: 'a',
4167             }))
4168         );
4169         assert_eq!(
4170             parser(r"|").parse_primitive(),
4171             Ok(Primitive::Literal(ast::Literal {
4172                 span: span(0..1),
4173                 kind: ast::LiteralKind::Verbatim,
4174                 c: '|',
4175             }))
4176         );
4177         assert_eq!(
4178             parser(r"☃").parse_primitive(),
4179             Ok(Primitive::Literal(ast::Literal {
4180                 span: span_range("☃", 0..3),
4181                 kind: ast::LiteralKind::Verbatim,
4182                 c: '☃',
4183             }))
4184         );
4185     }
4186 
4187     #[test]
parse_escape()4188     fn parse_escape() {
4189         assert_eq!(
4190             parser(r"\|").parse_primitive(),
4191             Ok(Primitive::Literal(ast::Literal {
4192                 span: span(0..2),
4193                 kind: ast::LiteralKind::Punctuation,
4194                 c: '|',
4195             }))
4196         );
4197         let specials = &[
4198             (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4199             (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4200             (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4201             (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4202             (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4203             (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4204         ];
4205         for &(pat, c, ref kind) in specials {
4206             assert_eq!(
4207                 parser(pat).parse_primitive(),
4208                 Ok(Primitive::Literal(ast::Literal {
4209                     span: span(0..2),
4210                     kind: ast::LiteralKind::Special(kind.clone()),
4211                     c: c,
4212                 }))
4213             );
4214         }
4215         assert_eq!(
4216             parser(r"\A").parse_primitive(),
4217             Ok(Primitive::Assertion(ast::Assertion {
4218                 span: span(0..2),
4219                 kind: ast::AssertionKind::StartText,
4220             }))
4221         );
4222         assert_eq!(
4223             parser(r"\z").parse_primitive(),
4224             Ok(Primitive::Assertion(ast::Assertion {
4225                 span: span(0..2),
4226                 kind: ast::AssertionKind::EndText,
4227             }))
4228         );
4229         assert_eq!(
4230             parser(r"\b").parse_primitive(),
4231             Ok(Primitive::Assertion(ast::Assertion {
4232                 span: span(0..2),
4233                 kind: ast::AssertionKind::WordBoundary,
4234             }))
4235         );
4236         assert_eq!(
4237             parser(r"\B").parse_primitive(),
4238             Ok(Primitive::Assertion(ast::Assertion {
4239                 span: span(0..2),
4240                 kind: ast::AssertionKind::NotWordBoundary,
4241             }))
4242         );
4243 
4244         assert_eq!(
4245             parser(r"\").parse_escape().unwrap_err(),
4246             TestError {
4247                 span: span(0..1),
4248                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4249             }
4250         );
4251         assert_eq!(
4252             parser(r"\y").parse_escape().unwrap_err(),
4253             TestError {
4254                 span: span(0..2),
4255                 kind: ast::ErrorKind::EscapeUnrecognized,
4256             }
4257         );
4258     }
4259 
4260     #[test]
parse_unsupported_backreference()4261     fn parse_unsupported_backreference() {
4262         assert_eq!(
4263             parser(r"\0").parse_escape().unwrap_err(),
4264             TestError {
4265                 span: span(0..2),
4266                 kind: ast::ErrorKind::UnsupportedBackreference,
4267             }
4268         );
4269         assert_eq!(
4270             parser(r"\9").parse_escape().unwrap_err(),
4271             TestError {
4272                 span: span(0..2),
4273                 kind: ast::ErrorKind::UnsupportedBackreference,
4274             }
4275         );
4276     }
4277 
4278     #[test]
parse_octal()4279     fn parse_octal() {
4280         for i in 0..511 {
4281             let pat = format!(r"\{:o}", i);
4282             assert_eq!(
4283                 parser_octal(&pat).parse_escape(),
4284                 Ok(Primitive::Literal(ast::Literal {
4285                     span: span(0..pat.len()),
4286                     kind: ast::LiteralKind::Octal,
4287                     c: ::std::char::from_u32(i).unwrap(),
4288                 }))
4289             );
4290         }
4291         assert_eq!(
4292             parser_octal(r"\778").parse_escape(),
4293             Ok(Primitive::Literal(ast::Literal {
4294                 span: span(0..3),
4295                 kind: ast::LiteralKind::Octal,
4296                 c: '?',
4297             }))
4298         );
4299         assert_eq!(
4300             parser_octal(r"\7777").parse_escape(),
4301             Ok(Primitive::Literal(ast::Literal {
4302                 span: span(0..4),
4303                 kind: ast::LiteralKind::Octal,
4304                 c: '\u{01FF}',
4305             }))
4306         );
4307         assert_eq!(
4308             parser_octal(r"\778").parse(),
4309             Ok(Ast::Concat(ast::Concat {
4310                 span: span(0..4),
4311                 asts: vec![
4312                     Ast::Literal(ast::Literal {
4313                         span: span(0..3),
4314                         kind: ast::LiteralKind::Octal,
4315                         c: '?',
4316                     }),
4317                     Ast::Literal(ast::Literal {
4318                         span: span(3..4),
4319                         kind: ast::LiteralKind::Verbatim,
4320                         c: '8',
4321                     }),
4322                 ],
4323             }))
4324         );
4325         assert_eq!(
4326             parser_octal(r"\7777").parse(),
4327             Ok(Ast::Concat(ast::Concat {
4328                 span: span(0..5),
4329                 asts: vec![
4330                     Ast::Literal(ast::Literal {
4331                         span: span(0..4),
4332                         kind: ast::LiteralKind::Octal,
4333                         c: '\u{01FF}',
4334                     }),
4335                     Ast::Literal(ast::Literal {
4336                         span: span(4..5),
4337                         kind: ast::LiteralKind::Verbatim,
4338                         c: '7',
4339                     }),
4340                 ],
4341             }))
4342         );
4343 
4344         assert_eq!(
4345             parser_octal(r"\8").parse_escape().unwrap_err(),
4346             TestError {
4347                 span: span(0..2),
4348                 kind: ast::ErrorKind::EscapeUnrecognized,
4349             }
4350         );
4351     }
4352 
4353     #[test]
parse_hex_two()4354     fn parse_hex_two() {
4355         for i in 0..256 {
4356             let pat = format!(r"\x{:02x}", i);
4357             assert_eq!(
4358                 parser(&pat).parse_escape(),
4359                 Ok(Primitive::Literal(ast::Literal {
4360                     span: span(0..pat.len()),
4361                     kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4362                     c: ::std::char::from_u32(i).unwrap(),
4363                 }))
4364             );
4365         }
4366 
4367         assert_eq!(
4368             parser(r"\xF").parse_escape().unwrap_err(),
4369             TestError {
4370                 span: span(3..3),
4371                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4372             }
4373         );
4374         assert_eq!(
4375             parser(r"\xG").parse_escape().unwrap_err(),
4376             TestError {
4377                 span: span(2..3),
4378                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4379             }
4380         );
4381         assert_eq!(
4382             parser(r"\xFG").parse_escape().unwrap_err(),
4383             TestError {
4384                 span: span(3..4),
4385                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4386             }
4387         );
4388     }
4389 
4390     #[test]
parse_hex_four()4391     fn parse_hex_four() {
4392         for i in 0..65536 {
4393             let c = match ::std::char::from_u32(i) {
4394                 None => continue,
4395                 Some(c) => c,
4396             };
4397             let pat = format!(r"\u{:04x}", i);
4398             assert_eq!(
4399                 parser(&pat).parse_escape(),
4400                 Ok(Primitive::Literal(ast::Literal {
4401                     span: span(0..pat.len()),
4402                     kind: ast::LiteralKind::HexFixed(
4403                         ast::HexLiteralKind::UnicodeShort
4404                     ),
4405                     c: c,
4406                 }))
4407             );
4408         }
4409 
4410         assert_eq!(
4411             parser(r"\uF").parse_escape().unwrap_err(),
4412             TestError {
4413                 span: span(3..3),
4414                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4415             }
4416         );
4417         assert_eq!(
4418             parser(r"\uG").parse_escape().unwrap_err(),
4419             TestError {
4420                 span: span(2..3),
4421                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4422             }
4423         );
4424         assert_eq!(
4425             parser(r"\uFG").parse_escape().unwrap_err(),
4426             TestError {
4427                 span: span(3..4),
4428                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4429             }
4430         );
4431         assert_eq!(
4432             parser(r"\uFFG").parse_escape().unwrap_err(),
4433             TestError {
4434                 span: span(4..5),
4435                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4436             }
4437         );
4438         assert_eq!(
4439             parser(r"\uFFFG").parse_escape().unwrap_err(),
4440             TestError {
4441                 span: span(5..6),
4442                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4443             }
4444         );
4445         assert_eq!(
4446             parser(r"\uD800").parse_escape().unwrap_err(),
4447             TestError {
4448                 span: span(2..6),
4449                 kind: ast::ErrorKind::EscapeHexInvalid,
4450             }
4451         );
4452     }
4453 
4454     #[test]
parse_hex_eight()4455     fn parse_hex_eight() {
4456         for i in 0..65536 {
4457             let c = match ::std::char::from_u32(i) {
4458                 None => continue,
4459                 Some(c) => c,
4460             };
4461             let pat = format!(r"\U{:08x}", i);
4462             assert_eq!(
4463                 parser(&pat).parse_escape(),
4464                 Ok(Primitive::Literal(ast::Literal {
4465                     span: span(0..pat.len()),
4466                     kind: ast::LiteralKind::HexFixed(
4467                         ast::HexLiteralKind::UnicodeLong
4468                     ),
4469                     c: c,
4470                 }))
4471             );
4472         }
4473 
4474         assert_eq!(
4475             parser(r"\UF").parse_escape().unwrap_err(),
4476             TestError {
4477                 span: span(3..3),
4478                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4479             }
4480         );
4481         assert_eq!(
4482             parser(r"\UG").parse_escape().unwrap_err(),
4483             TestError {
4484                 span: span(2..3),
4485                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4486             }
4487         );
4488         assert_eq!(
4489             parser(r"\UFG").parse_escape().unwrap_err(),
4490             TestError {
4491                 span: span(3..4),
4492                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4493             }
4494         );
4495         assert_eq!(
4496             parser(r"\UFFG").parse_escape().unwrap_err(),
4497             TestError {
4498                 span: span(4..5),
4499                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4500             }
4501         );
4502         assert_eq!(
4503             parser(r"\UFFFG").parse_escape().unwrap_err(),
4504             TestError {
4505                 span: span(5..6),
4506                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4507             }
4508         );
4509         assert_eq!(
4510             parser(r"\UFFFFG").parse_escape().unwrap_err(),
4511             TestError {
4512                 span: span(6..7),
4513                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4514             }
4515         );
4516         assert_eq!(
4517             parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4518             TestError {
4519                 span: span(7..8),
4520                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4521             }
4522         );
4523         assert_eq!(
4524             parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4525             TestError {
4526                 span: span(8..9),
4527                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4528             }
4529         );
4530         assert_eq!(
4531             parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4532             TestError {
4533                 span: span(9..10),
4534                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4535             }
4536         );
4537     }
4538 
4539     #[test]
parse_hex_brace()4540     fn parse_hex_brace() {
4541         assert_eq!(
4542             parser(r"\u{26c4}").parse_escape(),
4543             Ok(Primitive::Literal(ast::Literal {
4544                 span: span(0..8),
4545                 kind: ast::LiteralKind::HexBrace(
4546                     ast::HexLiteralKind::UnicodeShort
4547                 ),
4548                 c: '⛄',
4549             }))
4550         );
4551         assert_eq!(
4552             parser(r"\U{26c4}").parse_escape(),
4553             Ok(Primitive::Literal(ast::Literal {
4554                 span: span(0..8),
4555                 kind: ast::LiteralKind::HexBrace(
4556                     ast::HexLiteralKind::UnicodeLong
4557                 ),
4558                 c: '⛄',
4559             }))
4560         );
4561         assert_eq!(
4562             parser(r"\x{26c4}").parse_escape(),
4563             Ok(Primitive::Literal(ast::Literal {
4564                 span: span(0..8),
4565                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4566                 c: '⛄',
4567             }))
4568         );
4569         assert_eq!(
4570             parser(r"\x{26C4}").parse_escape(),
4571             Ok(Primitive::Literal(ast::Literal {
4572                 span: span(0..8),
4573                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4574                 c: '⛄',
4575             }))
4576         );
4577         assert_eq!(
4578             parser(r"\x{10fFfF}").parse_escape(),
4579             Ok(Primitive::Literal(ast::Literal {
4580                 span: span(0..10),
4581                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4582                 c: '\u{10FFFF}',
4583             }))
4584         );
4585 
4586         assert_eq!(
4587             parser(r"\x").parse_escape().unwrap_err(),
4588             TestError {
4589                 span: span(2..2),
4590                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4591             }
4592         );
4593         assert_eq!(
4594             parser(r"\x{").parse_escape().unwrap_err(),
4595             TestError {
4596                 span: span(2..3),
4597                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4598             }
4599         );
4600         assert_eq!(
4601             parser(r"\x{FF").parse_escape().unwrap_err(),
4602             TestError {
4603                 span: span(2..5),
4604                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4605             }
4606         );
4607         assert_eq!(
4608             parser(r"\x{}").parse_escape().unwrap_err(),
4609             TestError {
4610                 span: span(2..4),
4611                 kind: ast::ErrorKind::EscapeHexEmpty,
4612             }
4613         );
4614         assert_eq!(
4615             parser(r"\x{FGF}").parse_escape().unwrap_err(),
4616             TestError {
4617                 span: span(4..5),
4618                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4619             }
4620         );
4621         assert_eq!(
4622             parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4623             TestError {
4624                 span: span(3..9),
4625                 kind: ast::ErrorKind::EscapeHexInvalid,
4626             }
4627         );
4628         assert_eq!(
4629             parser(r"\x{D800}").parse_escape().unwrap_err(),
4630             TestError {
4631                 span: span(3..7),
4632                 kind: ast::ErrorKind::EscapeHexInvalid,
4633             }
4634         );
4635         assert_eq!(
4636             parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4637             TestError {
4638                 span: span(3..12),
4639                 kind: ast::ErrorKind::EscapeHexInvalid,
4640             }
4641         );
4642     }
4643 
4644     #[test]
parse_decimal()4645     fn parse_decimal() {
4646         assert_eq!(parser("123").parse_decimal(), Ok(123));
4647         assert_eq!(parser("0").parse_decimal(), Ok(0));
4648         assert_eq!(parser("01").parse_decimal(), Ok(1));
4649 
4650         assert_eq!(
4651             parser("-1").parse_decimal().unwrap_err(),
4652             TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4653         );
4654         assert_eq!(
4655             parser("").parse_decimal().unwrap_err(),
4656             TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4657         );
4658         assert_eq!(
4659             parser("9999999999").parse_decimal().unwrap_err(),
4660             TestError {
4661                 span: span(0..10),
4662                 kind: ast::ErrorKind::DecimalInvalid,
4663             }
4664         );
4665     }
4666 
4667     #[test]
parse_set_class()4668     fn parse_set_class() {
4669         fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4670             ast::ClassSet::union(ast::ClassSetUnion {
4671                 span: span,
4672                 items: items,
4673             })
4674         }
4675 
4676         fn intersection(
4677             span: Span,
4678             lhs: ast::ClassSet,
4679             rhs: ast::ClassSet,
4680         ) -> ast::ClassSet {
4681             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4682                 span: span,
4683                 kind: ast::ClassSetBinaryOpKind::Intersection,
4684                 lhs: Box::new(lhs),
4685                 rhs: Box::new(rhs),
4686             })
4687         }
4688 
4689         fn difference(
4690             span: Span,
4691             lhs: ast::ClassSet,
4692             rhs: ast::ClassSet,
4693         ) -> ast::ClassSet {
4694             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4695                 span: span,
4696                 kind: ast::ClassSetBinaryOpKind::Difference,
4697                 lhs: Box::new(lhs),
4698                 rhs: Box::new(rhs),
4699             })
4700         }
4701 
4702         fn symdifference(
4703             span: Span,
4704             lhs: ast::ClassSet,
4705             rhs: ast::ClassSet,
4706         ) -> ast::ClassSet {
4707             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4708                 span: span,
4709                 kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4710                 lhs: Box::new(lhs),
4711                 rhs: Box::new(rhs),
4712             })
4713         }
4714 
4715         fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4716             ast::ClassSet::Item(item)
4717         }
4718 
4719         fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4720             ast::ClassSetItem::Ascii(cls)
4721         }
4722 
4723         fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4724             ast::ClassSetItem::Unicode(cls)
4725         }
4726 
4727         fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4728             ast::ClassSetItem::Perl(cls)
4729         }
4730 
4731         fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4732             ast::ClassSetItem::Bracketed(Box::new(cls))
4733         }
4734 
4735         fn lit(span: Span, c: char) -> ast::ClassSetItem {
4736             ast::ClassSetItem::Literal(ast::Literal {
4737                 span: span,
4738                 kind: ast::LiteralKind::Verbatim,
4739                 c: c,
4740             })
4741         }
4742 
4743         fn empty(span: Span) -> ast::ClassSetItem {
4744             ast::ClassSetItem::Empty(span)
4745         }
4746 
4747         fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4748             let pos1 = Position {
4749                 offset: span.start.offset + start.len_utf8(),
4750                 column: span.start.column + 1,
4751                 ..span.start
4752             };
4753             let pos2 = Position {
4754                 offset: span.end.offset - end.len_utf8(),
4755                 column: span.end.column - 1,
4756                 ..span.end
4757             };
4758             ast::ClassSetItem::Range(ast::ClassSetRange {
4759                 span: span,
4760                 start: ast::Literal {
4761                     span: Span { end: pos1, ..span },
4762                     kind: ast::LiteralKind::Verbatim,
4763                     c: start,
4764                 },
4765                 end: ast::Literal {
4766                     span: Span { start: pos2, ..span },
4767                     kind: ast::LiteralKind::Verbatim,
4768                     c: end,
4769                 },
4770             })
4771         }
4772 
4773         fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4774             ast::ClassAscii {
4775                 span: span,
4776                 kind: ast::ClassAsciiKind::Alnum,
4777                 negated: negated,
4778             }
4779         }
4780 
4781         fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4782             ast::ClassAscii {
4783                 span: span,
4784                 kind: ast::ClassAsciiKind::Lower,
4785                 negated: negated,
4786             }
4787         }
4788 
4789         assert_eq!(
4790             parser("[[:alnum:]]").parse(),
4791             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4792                 span: span(0..11),
4793                 negated: false,
4794                 kind: itemset(item_ascii(alnum(span(1..10), false))),
4795             })))
4796         );
4797         assert_eq!(
4798             parser("[[[:alnum:]]]").parse(),
4799             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4800                 span: span(0..13),
4801                 negated: false,
4802                 kind: itemset(item_bracket(ast::ClassBracketed {
4803                     span: span(1..12),
4804                     negated: false,
4805                     kind: itemset(item_ascii(alnum(span(2..11), false))),
4806                 })),
4807             })))
4808         );
4809         assert_eq!(
4810             parser("[[:alnum:]&&[:lower:]]").parse(),
4811             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4812                 span: span(0..22),
4813                 negated: false,
4814                 kind: intersection(
4815                     span(1..21),
4816                     itemset(item_ascii(alnum(span(1..10), false))),
4817                     itemset(item_ascii(lower(span(12..21), false))),
4818                 ),
4819             })))
4820         );
4821         assert_eq!(
4822             parser("[[:alnum:]--[:lower:]]").parse(),
4823             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4824                 span: span(0..22),
4825                 negated: false,
4826                 kind: difference(
4827                     span(1..21),
4828                     itemset(item_ascii(alnum(span(1..10), false))),
4829                     itemset(item_ascii(lower(span(12..21), false))),
4830                 ),
4831             })))
4832         );
4833         assert_eq!(
4834             parser("[[:alnum:]~~[:lower:]]").parse(),
4835             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4836                 span: span(0..22),
4837                 negated: false,
4838                 kind: symdifference(
4839                     span(1..21),
4840                     itemset(item_ascii(alnum(span(1..10), false))),
4841                     itemset(item_ascii(lower(span(12..21), false))),
4842                 ),
4843             })))
4844         );
4845 
4846         assert_eq!(
4847             parser("[a]").parse(),
4848             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4849                 span: span(0..3),
4850                 negated: false,
4851                 kind: itemset(lit(span(1..2), 'a')),
4852             })))
4853         );
4854         assert_eq!(
4855             parser(r"[a\]]").parse(),
4856             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4857                 span: span(0..5),
4858                 negated: false,
4859                 kind: union(
4860                     span(1..4),
4861                     vec![
4862                         lit(span(1..2), 'a'),
4863                         ast::ClassSetItem::Literal(ast::Literal {
4864                             span: span(2..4),
4865                             kind: ast::LiteralKind::Punctuation,
4866                             c: ']',
4867                         }),
4868                     ]
4869                 ),
4870             })))
4871         );
4872         assert_eq!(
4873             parser(r"[a\-z]").parse(),
4874             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4875                 span: span(0..6),
4876                 negated: false,
4877                 kind: union(
4878                     span(1..5),
4879                     vec![
4880                         lit(span(1..2), 'a'),
4881                         ast::ClassSetItem::Literal(ast::Literal {
4882                             span: span(2..4),
4883                             kind: ast::LiteralKind::Punctuation,
4884                             c: '-',
4885                         }),
4886                         lit(span(4..5), 'z'),
4887                     ]
4888                 ),
4889             })))
4890         );
4891         assert_eq!(
4892             parser("[ab]").parse(),
4893             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4894                 span: span(0..4),
4895                 negated: false,
4896                 kind: union(
4897                     span(1..3),
4898                     vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
4899                 ),
4900             })))
4901         );
4902         assert_eq!(
4903             parser("[a-]").parse(),
4904             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4905                 span: span(0..4),
4906                 negated: false,
4907                 kind: union(
4908                     span(1..3),
4909                     vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
4910                 ),
4911             })))
4912         );
4913         assert_eq!(
4914             parser("[-a]").parse(),
4915             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4916                 span: span(0..4),
4917                 negated: false,
4918                 kind: union(
4919                     span(1..3),
4920                     vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
4921                 ),
4922             })))
4923         );
4924         assert_eq!(
4925             parser(r"[\pL]").parse(),
4926             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4927                 span: span(0..5),
4928                 negated: false,
4929                 kind: itemset(item_unicode(ast::ClassUnicode {
4930                     span: span(1..4),
4931                     negated: false,
4932                     kind: ast::ClassUnicodeKind::OneLetter('L'),
4933                 })),
4934             })))
4935         );
4936         assert_eq!(
4937             parser(r"[\w]").parse(),
4938             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4939                 span: span(0..4),
4940                 negated: false,
4941                 kind: itemset(item_perl(ast::ClassPerl {
4942                     span: span(1..3),
4943                     kind: ast::ClassPerlKind::Word,
4944                     negated: false,
4945                 })),
4946             })))
4947         );
4948         assert_eq!(
4949             parser(r"[a\wz]").parse(),
4950             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4951                 span: span(0..6),
4952                 negated: false,
4953                 kind: union(
4954                     span(1..5),
4955                     vec![
4956                         lit(span(1..2), 'a'),
4957                         item_perl(ast::ClassPerl {
4958                             span: span(2..4),
4959                             kind: ast::ClassPerlKind::Word,
4960                             negated: false,
4961                         }),
4962                         lit(span(4..5), 'z'),
4963                     ]
4964                 ),
4965             })))
4966         );
4967 
4968         assert_eq!(
4969             parser("[a-z]").parse(),
4970             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4971                 span: span(0..5),
4972                 negated: false,
4973                 kind: itemset(range(span(1..4), 'a', 'z')),
4974             })))
4975         );
4976         assert_eq!(
4977             parser("[a-cx-z]").parse(),
4978             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4979                 span: span(0..8),
4980                 negated: false,
4981                 kind: union(
4982                     span(1..7),
4983                     vec![
4984                         range(span(1..4), 'a', 'c'),
4985                         range(span(4..7), 'x', 'z'),
4986                     ]
4987                 ),
4988             })))
4989         );
4990         assert_eq!(
4991             parser(r"[\w&&a-cx-z]").parse(),
4992             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4993                 span: span(0..12),
4994                 negated: false,
4995                 kind: intersection(
4996                     span(1..11),
4997                     itemset(item_perl(ast::ClassPerl {
4998                         span: span(1..3),
4999                         kind: ast::ClassPerlKind::Word,
5000                         negated: false,
5001                     })),
5002                     union(
5003                         span(5..11),
5004                         vec![
5005                             range(span(5..8), 'a', 'c'),
5006                             range(span(8..11), 'x', 'z'),
5007                         ]
5008                     ),
5009                 ),
5010             })))
5011         );
5012         assert_eq!(
5013             parser(r"[a-cx-z&&\w]").parse(),
5014             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5015                 span: span(0..12),
5016                 negated: false,
5017                 kind: intersection(
5018                     span(1..11),
5019                     union(
5020                         span(1..7),
5021                         vec![
5022                             range(span(1..4), 'a', 'c'),
5023                             range(span(4..7), 'x', 'z'),
5024                         ]
5025                     ),
5026                     itemset(item_perl(ast::ClassPerl {
5027                         span: span(9..11),
5028                         kind: ast::ClassPerlKind::Word,
5029                         negated: false,
5030                     })),
5031                 ),
5032             })))
5033         );
5034         assert_eq!(
5035             parser(r"[a--b--c]").parse(),
5036             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5037                 span: span(0..9),
5038                 negated: false,
5039                 kind: difference(
5040                     span(1..8),
5041                     difference(
5042                         span(1..5),
5043                         itemset(lit(span(1..2), 'a')),
5044                         itemset(lit(span(4..5), 'b')),
5045                     ),
5046                     itemset(lit(span(7..8), 'c')),
5047                 ),
5048             })))
5049         );
5050         assert_eq!(
5051             parser(r"[a~~b~~c]").parse(),
5052             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5053                 span: span(0..9),
5054                 negated: false,
5055                 kind: symdifference(
5056                     span(1..8),
5057                     symdifference(
5058                         span(1..5),
5059                         itemset(lit(span(1..2), 'a')),
5060                         itemset(lit(span(4..5), 'b')),
5061                     ),
5062                     itemset(lit(span(7..8), 'c')),
5063                 ),
5064             })))
5065         );
5066         assert_eq!(
5067             parser(r"[\^&&^]").parse(),
5068             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5069                 span: span(0..7),
5070                 negated: false,
5071                 kind: intersection(
5072                     span(1..6),
5073                     itemset(ast::ClassSetItem::Literal(ast::Literal {
5074                         span: span(1..3),
5075                         kind: ast::LiteralKind::Punctuation,
5076                         c: '^',
5077                     })),
5078                     itemset(lit(span(5..6), '^')),
5079                 ),
5080             })))
5081         );
5082         assert_eq!(
5083             parser(r"[\&&&&]").parse(),
5084             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5085                 span: span(0..7),
5086                 negated: false,
5087                 kind: intersection(
5088                     span(1..6),
5089                     itemset(ast::ClassSetItem::Literal(ast::Literal {
5090                         span: span(1..3),
5091                         kind: ast::LiteralKind::Punctuation,
5092                         c: '&',
5093                     })),
5094                     itemset(lit(span(5..6), '&')),
5095                 ),
5096             })))
5097         );
5098         assert_eq!(
5099             parser(r"[&&&&]").parse(),
5100             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5101                 span: span(0..6),
5102                 negated: false,
5103                 kind: intersection(
5104                     span(1..5),
5105                     intersection(
5106                         span(1..3),
5107                         itemset(empty(span(1..1))),
5108                         itemset(empty(span(3..3))),
5109                     ),
5110                     itemset(empty(span(5..5))),
5111                 ),
5112             })))
5113         );
5114 
5115         let pat = "[☃-⛄]";
5116         assert_eq!(
5117             parser(pat).parse(),
5118             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5119                 span: span_range(pat, 0..9),
5120                 negated: false,
5121                 kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5122                     span: span_range(pat, 1..8),
5123                     start: ast::Literal {
5124                         span: span_range(pat, 1..4),
5125                         kind: ast::LiteralKind::Verbatim,
5126                         c: '☃',
5127                     },
5128                     end: ast::Literal {
5129                         span: span_range(pat, 5..8),
5130                         kind: ast::LiteralKind::Verbatim,
5131                         c: '⛄',
5132                     },
5133                 })),
5134             })))
5135         );
5136 
5137         assert_eq!(
5138             parser(r"[]]").parse(),
5139             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5140                 span: span(0..3),
5141                 negated: false,
5142                 kind: itemset(lit(span(1..2), ']')),
5143             })))
5144         );
5145         assert_eq!(
5146             parser(r"[]\[]").parse(),
5147             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5148                 span: span(0..5),
5149                 negated: false,
5150                 kind: union(
5151                     span(1..4),
5152                     vec![
5153                         lit(span(1..2), ']'),
5154                         ast::ClassSetItem::Literal(ast::Literal {
5155                             span: span(2..4),
5156                             kind: ast::LiteralKind::Punctuation,
5157                             c: '[',
5158                         }),
5159                     ]
5160                 ),
5161             })))
5162         );
5163         assert_eq!(
5164             parser(r"[\[]]").parse(),
5165             Ok(concat(
5166                 0..5,
5167                 vec![
5168                     Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5169                         span: span(0..4),
5170                         negated: false,
5171                         kind: itemset(ast::ClassSetItem::Literal(
5172                             ast::Literal {
5173                                 span: span(1..3),
5174                                 kind: ast::LiteralKind::Punctuation,
5175                                 c: '[',
5176                             }
5177                         )),
5178                     })),
5179                     Ast::Literal(ast::Literal {
5180                         span: span(4..5),
5181                         kind: ast::LiteralKind::Verbatim,
5182                         c: ']',
5183                     }),
5184                 ]
5185             ))
5186         );
5187 
5188         assert_eq!(
5189             parser("[").parse().unwrap_err(),
5190             TestError {
5191                 span: span(0..1),
5192                 kind: ast::ErrorKind::ClassUnclosed,
5193             }
5194         );
5195         assert_eq!(
5196             parser("[[").parse().unwrap_err(),
5197             TestError {
5198                 span: span(1..2),
5199                 kind: ast::ErrorKind::ClassUnclosed,
5200             }
5201         );
5202         assert_eq!(
5203             parser("[[-]").parse().unwrap_err(),
5204             TestError {
5205                 span: span(0..1),
5206                 kind: ast::ErrorKind::ClassUnclosed,
5207             }
5208         );
5209         assert_eq!(
5210             parser("[[[:alnum:]").parse().unwrap_err(),
5211             TestError {
5212                 span: span(1..2),
5213                 kind: ast::ErrorKind::ClassUnclosed,
5214             }
5215         );
5216         assert_eq!(
5217             parser(r"[\b]").parse().unwrap_err(),
5218             TestError {
5219                 span: span(1..3),
5220                 kind: ast::ErrorKind::ClassEscapeInvalid,
5221             }
5222         );
5223         assert_eq!(
5224             parser(r"[\w-a]").parse().unwrap_err(),
5225             TestError {
5226                 span: span(1..3),
5227                 kind: ast::ErrorKind::ClassRangeLiteral,
5228             }
5229         );
5230         assert_eq!(
5231             parser(r"[a-\w]").parse().unwrap_err(),
5232             TestError {
5233                 span: span(3..5),
5234                 kind: ast::ErrorKind::ClassRangeLiteral,
5235             }
5236         );
5237         assert_eq!(
5238             parser(r"[z-a]").parse().unwrap_err(),
5239             TestError {
5240                 span: span(1..4),
5241                 kind: ast::ErrorKind::ClassRangeInvalid,
5242             }
5243         );
5244 
5245         assert_eq!(
5246             parser_ignore_whitespace("[a ").parse().unwrap_err(),
5247             TestError {
5248                 span: span(0..1),
5249                 kind: ast::ErrorKind::ClassUnclosed,
5250             }
5251         );
5252         assert_eq!(
5253             parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5254             TestError {
5255                 span: span(0..1),
5256                 kind: ast::ErrorKind::ClassUnclosed,
5257             }
5258         );
5259     }
5260 
5261     #[test]
parse_set_class_open()5262     fn parse_set_class_open() {
5263         assert_eq!(parser("[a]").parse_set_class_open(), {
5264             let set = ast::ClassBracketed {
5265                 span: span(0..1),
5266                 negated: false,
5267                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5268                     span: span(1..1),
5269                     items: vec![],
5270                 }),
5271             };
5272             let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5273             Ok((set, union))
5274         });
5275         assert_eq!(
5276             parser_ignore_whitespace("[   a]").parse_set_class_open(),
5277             {
5278                 let set = ast::ClassBracketed {
5279                     span: span(0..4),
5280                     negated: false,
5281                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5282                         span: span(4..4),
5283                         items: vec![],
5284                     }),
5285                 };
5286                 let union =
5287                     ast::ClassSetUnion { span: span(4..4), items: vec![] };
5288                 Ok((set, union))
5289             }
5290         );
5291         assert_eq!(parser("[^a]").parse_set_class_open(), {
5292             let set = ast::ClassBracketed {
5293                 span: span(0..2),
5294                 negated: true,
5295                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5296                     span: span(2..2),
5297                     items: vec![],
5298                 }),
5299             };
5300             let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5301             Ok((set, union))
5302         });
5303         assert_eq!(
5304             parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5305             {
5306                 let set = ast::ClassBracketed {
5307                     span: span(0..4),
5308                     negated: true,
5309                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5310                         span: span(4..4),
5311                         items: vec![],
5312                     }),
5313                 };
5314                 let union =
5315                     ast::ClassSetUnion { span: span(4..4), items: vec![] };
5316                 Ok((set, union))
5317             }
5318         );
5319         assert_eq!(parser("[-a]").parse_set_class_open(), {
5320             let set = ast::ClassBracketed {
5321                 span: span(0..2),
5322                 negated: false,
5323                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5324                     span: span(1..1),
5325                     items: vec![],
5326                 }),
5327             };
5328             let union = ast::ClassSetUnion {
5329                 span: span(1..2),
5330                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5331                     span: span(1..2),
5332                     kind: ast::LiteralKind::Verbatim,
5333                     c: '-',
5334                 })],
5335             };
5336             Ok((set, union))
5337         });
5338         assert_eq!(
5339             parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5340             {
5341                 let set = ast::ClassBracketed {
5342                     span: span(0..4),
5343                     negated: false,
5344                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5345                         span: span(2..2),
5346                         items: vec![],
5347                     }),
5348                 };
5349                 let union = ast::ClassSetUnion {
5350                     span: span(2..3),
5351                     items: vec![ast::ClassSetItem::Literal(ast::Literal {
5352                         span: span(2..3),
5353                         kind: ast::LiteralKind::Verbatim,
5354                         c: '-',
5355                     })],
5356                 };
5357                 Ok((set, union))
5358             }
5359         );
5360         assert_eq!(parser("[^-a]").parse_set_class_open(), {
5361             let set = ast::ClassBracketed {
5362                 span: span(0..3),
5363                 negated: true,
5364                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5365                     span: span(2..2),
5366                     items: vec![],
5367                 }),
5368             };
5369             let union = ast::ClassSetUnion {
5370                 span: span(2..3),
5371                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5372                     span: span(2..3),
5373                     kind: ast::LiteralKind::Verbatim,
5374                     c: '-',
5375                 })],
5376             };
5377             Ok((set, union))
5378         });
5379         assert_eq!(parser("[--a]").parse_set_class_open(), {
5380             let set = ast::ClassBracketed {
5381                 span: span(0..3),
5382                 negated: false,
5383                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5384                     span: span(1..1),
5385                     items: vec![],
5386                 }),
5387             };
5388             let union = ast::ClassSetUnion {
5389                 span: span(1..3),
5390                 items: vec![
5391                     ast::ClassSetItem::Literal(ast::Literal {
5392                         span: span(1..2),
5393                         kind: ast::LiteralKind::Verbatim,
5394                         c: '-',
5395                     }),
5396                     ast::ClassSetItem::Literal(ast::Literal {
5397                         span: span(2..3),
5398                         kind: ast::LiteralKind::Verbatim,
5399                         c: '-',
5400                     }),
5401                 ],
5402             };
5403             Ok((set, union))
5404         });
5405         assert_eq!(parser("[]a]").parse_set_class_open(), {
5406             let set = ast::ClassBracketed {
5407                 span: span(0..2),
5408                 negated: false,
5409                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5410                     span: span(1..1),
5411                     items: vec![],
5412                 }),
5413             };
5414             let union = ast::ClassSetUnion {
5415                 span: span(1..2),
5416                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5417                     span: span(1..2),
5418                     kind: ast::LiteralKind::Verbatim,
5419                     c: ']',
5420                 })],
5421             };
5422             Ok((set, union))
5423         });
5424         assert_eq!(
5425             parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5426             {
5427                 let set = ast::ClassBracketed {
5428                     span: span(0..4),
5429                     negated: false,
5430                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5431                         span: span(2..2),
5432                         items: vec![],
5433                     }),
5434                 };
5435                 let union = ast::ClassSetUnion {
5436                     span: span(2..3),
5437                     items: vec![ast::ClassSetItem::Literal(ast::Literal {
5438                         span: span(2..3),
5439                         kind: ast::LiteralKind::Verbatim,
5440                         c: ']',
5441                     })],
5442                 };
5443                 Ok((set, union))
5444             }
5445         );
5446         assert_eq!(parser("[^]a]").parse_set_class_open(), {
5447             let set = ast::ClassBracketed {
5448                 span: span(0..3),
5449                 negated: true,
5450                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5451                     span: span(2..2),
5452                     items: vec![],
5453                 }),
5454             };
5455             let union = ast::ClassSetUnion {
5456                 span: span(2..3),
5457                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5458                     span: span(2..3),
5459                     kind: ast::LiteralKind::Verbatim,
5460                     c: ']',
5461                 })],
5462             };
5463             Ok((set, union))
5464         });
5465         assert_eq!(parser("[-]a]").parse_set_class_open(), {
5466             let set = ast::ClassBracketed {
5467                 span: span(0..2),
5468                 negated: false,
5469                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5470                     span: span(1..1),
5471                     items: vec![],
5472                 }),
5473             };
5474             let union = ast::ClassSetUnion {
5475                 span: span(1..2),
5476                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5477                     span: span(1..2),
5478                     kind: ast::LiteralKind::Verbatim,
5479                     c: '-',
5480                 })],
5481             };
5482             Ok((set, union))
5483         });
5484 
5485         assert_eq!(
5486             parser("[").parse_set_class_open().unwrap_err(),
5487             TestError {
5488                 span: span(0..1),
5489                 kind: ast::ErrorKind::ClassUnclosed,
5490             }
5491         );
5492         assert_eq!(
5493             parser_ignore_whitespace("[    ")
5494                 .parse_set_class_open()
5495                 .unwrap_err(),
5496             TestError {
5497                 span: span(0..5),
5498                 kind: ast::ErrorKind::ClassUnclosed,
5499             }
5500         );
5501         assert_eq!(
5502             parser("[^").parse_set_class_open().unwrap_err(),
5503             TestError {
5504                 span: span(0..2),
5505                 kind: ast::ErrorKind::ClassUnclosed,
5506             }
5507         );
5508         assert_eq!(
5509             parser("[]").parse_set_class_open().unwrap_err(),
5510             TestError {
5511                 span: span(0..2),
5512                 kind: ast::ErrorKind::ClassUnclosed,
5513             }
5514         );
5515         assert_eq!(
5516             parser("[-").parse_set_class_open().unwrap_err(),
5517             TestError {
5518                 span: span(0..2),
5519                 kind: ast::ErrorKind::ClassUnclosed,
5520             }
5521         );
5522         assert_eq!(
5523             parser("[--").parse_set_class_open().unwrap_err(),
5524             TestError {
5525                 span: span(0..3),
5526                 kind: ast::ErrorKind::ClassUnclosed,
5527             }
5528         );
5529     }
5530 
5531     #[test]
maybe_parse_ascii_class()5532     fn maybe_parse_ascii_class() {
5533         assert_eq!(
5534             parser(r"[:alnum:]").maybe_parse_ascii_class(),
5535             Some(ast::ClassAscii {
5536                 span: span(0..9),
5537                 kind: ast::ClassAsciiKind::Alnum,
5538                 negated: false,
5539             })
5540         );
5541         assert_eq!(
5542             parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5543             Some(ast::ClassAscii {
5544                 span: span(0..9),
5545                 kind: ast::ClassAsciiKind::Alnum,
5546                 negated: false,
5547             })
5548         );
5549         assert_eq!(
5550             parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5551             Some(ast::ClassAscii {
5552                 span: span(0..10),
5553                 kind: ast::ClassAsciiKind::Alnum,
5554                 negated: true,
5555             })
5556         );
5557 
5558         let p = parser(r"[:");
5559         assert_eq!(p.maybe_parse_ascii_class(), None);
5560         assert_eq!(p.offset(), 0);
5561 
5562         let p = parser(r"[:^");
5563         assert_eq!(p.maybe_parse_ascii_class(), None);
5564         assert_eq!(p.offset(), 0);
5565 
5566         let p = parser(r"[^:alnum:]");
5567         assert_eq!(p.maybe_parse_ascii_class(), None);
5568         assert_eq!(p.offset(), 0);
5569 
5570         let p = parser(r"[:alnnum:]");
5571         assert_eq!(p.maybe_parse_ascii_class(), None);
5572         assert_eq!(p.offset(), 0);
5573 
5574         let p = parser(r"[:alnum]");
5575         assert_eq!(p.maybe_parse_ascii_class(), None);
5576         assert_eq!(p.offset(), 0);
5577 
5578         let p = parser(r"[:alnum:");
5579         assert_eq!(p.maybe_parse_ascii_class(), None);
5580         assert_eq!(p.offset(), 0);
5581     }
5582 
5583     #[test]
parse_unicode_class()5584     fn parse_unicode_class() {
5585         assert_eq!(
5586             parser(r"\pN").parse_escape(),
5587             Ok(Primitive::Unicode(ast::ClassUnicode {
5588                 span: span(0..3),
5589                 negated: false,
5590                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5591             }))
5592         );
5593         assert_eq!(
5594             parser(r"\PN").parse_escape(),
5595             Ok(Primitive::Unicode(ast::ClassUnicode {
5596                 span: span(0..3),
5597                 negated: true,
5598                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5599             }))
5600         );
5601         assert_eq!(
5602             parser(r"\p{N}").parse_escape(),
5603             Ok(Primitive::Unicode(ast::ClassUnicode {
5604                 span: span(0..5),
5605                 negated: false,
5606                 kind: ast::ClassUnicodeKind::Named(s("N")),
5607             }))
5608         );
5609         assert_eq!(
5610             parser(r"\P{N}").parse_escape(),
5611             Ok(Primitive::Unicode(ast::ClassUnicode {
5612                 span: span(0..5),
5613                 negated: true,
5614                 kind: ast::ClassUnicodeKind::Named(s("N")),
5615             }))
5616         );
5617         assert_eq!(
5618             parser(r"\p{Greek}").parse_escape(),
5619             Ok(Primitive::Unicode(ast::ClassUnicode {
5620                 span: span(0..9),
5621                 negated: false,
5622                 kind: ast::ClassUnicodeKind::Named(s("Greek")),
5623             }))
5624         );
5625 
5626         assert_eq!(
5627             parser(r"\p{scx:Katakana}").parse_escape(),
5628             Ok(Primitive::Unicode(ast::ClassUnicode {
5629                 span: span(0..16),
5630                 negated: false,
5631                 kind: ast::ClassUnicodeKind::NamedValue {
5632                     op: ast::ClassUnicodeOpKind::Colon,
5633                     name: s("scx"),
5634                     value: s("Katakana"),
5635                 },
5636             }))
5637         );
5638         assert_eq!(
5639             parser(r"\p{scx=Katakana}").parse_escape(),
5640             Ok(Primitive::Unicode(ast::ClassUnicode {
5641                 span: span(0..16),
5642                 negated: false,
5643                 kind: ast::ClassUnicodeKind::NamedValue {
5644                     op: ast::ClassUnicodeOpKind::Equal,
5645                     name: s("scx"),
5646                     value: s("Katakana"),
5647                 },
5648             }))
5649         );
5650         assert_eq!(
5651             parser(r"\p{scx!=Katakana}").parse_escape(),
5652             Ok(Primitive::Unicode(ast::ClassUnicode {
5653                 span: span(0..17),
5654                 negated: false,
5655                 kind: ast::ClassUnicodeKind::NamedValue {
5656                     op: ast::ClassUnicodeOpKind::NotEqual,
5657                     name: s("scx"),
5658                     value: s("Katakana"),
5659                 },
5660             }))
5661         );
5662 
5663         assert_eq!(
5664             parser(r"\p{:}").parse_escape(),
5665             Ok(Primitive::Unicode(ast::ClassUnicode {
5666                 span: span(0..5),
5667                 negated: false,
5668                 kind: ast::ClassUnicodeKind::NamedValue {
5669                     op: ast::ClassUnicodeOpKind::Colon,
5670                     name: s(""),
5671                     value: s(""),
5672                 },
5673             }))
5674         );
5675         assert_eq!(
5676             parser(r"\p{=}").parse_escape(),
5677             Ok(Primitive::Unicode(ast::ClassUnicode {
5678                 span: span(0..5),
5679                 negated: false,
5680                 kind: ast::ClassUnicodeKind::NamedValue {
5681                     op: ast::ClassUnicodeOpKind::Equal,
5682                     name: s(""),
5683                     value: s(""),
5684                 },
5685             }))
5686         );
5687         assert_eq!(
5688             parser(r"\p{!=}").parse_escape(),
5689             Ok(Primitive::Unicode(ast::ClassUnicode {
5690                 span: span(0..6),
5691                 negated: false,
5692                 kind: ast::ClassUnicodeKind::NamedValue {
5693                     op: ast::ClassUnicodeOpKind::NotEqual,
5694                     name: s(""),
5695                     value: s(""),
5696                 },
5697             }))
5698         );
5699 
5700         assert_eq!(
5701             parser(r"\p").parse_escape().unwrap_err(),
5702             TestError {
5703                 span: span(2..2),
5704                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5705             }
5706         );
5707         assert_eq!(
5708             parser(r"\p{").parse_escape().unwrap_err(),
5709             TestError {
5710                 span: span(3..3),
5711                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5712             }
5713         );
5714         assert_eq!(
5715             parser(r"\p{N").parse_escape().unwrap_err(),
5716             TestError {
5717                 span: span(4..4),
5718                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5719             }
5720         );
5721         assert_eq!(
5722             parser(r"\p{Greek").parse_escape().unwrap_err(),
5723             TestError {
5724                 span: span(8..8),
5725                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5726             }
5727         );
5728 
5729         assert_eq!(
5730             parser(r"\pNz").parse(),
5731             Ok(Ast::Concat(ast::Concat {
5732                 span: span(0..4),
5733                 asts: vec![
5734                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5735                         span: span(0..3),
5736                         negated: false,
5737                         kind: ast::ClassUnicodeKind::OneLetter('N'),
5738                     })),
5739                     Ast::Literal(ast::Literal {
5740                         span: span(3..4),
5741                         kind: ast::LiteralKind::Verbatim,
5742                         c: 'z',
5743                     }),
5744                 ],
5745             }))
5746         );
5747         assert_eq!(
5748             parser(r"\p{Greek}z").parse(),
5749             Ok(Ast::Concat(ast::Concat {
5750                 span: span(0..10),
5751                 asts: vec![
5752                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5753                         span: span(0..9),
5754                         negated: false,
5755                         kind: ast::ClassUnicodeKind::Named(s("Greek")),
5756                     })),
5757                     Ast::Literal(ast::Literal {
5758                         span: span(9..10),
5759                         kind: ast::LiteralKind::Verbatim,
5760                         c: 'z',
5761                     }),
5762                 ],
5763             }))
5764         );
5765         assert_eq!(
5766             parser(r"\p\{").parse().unwrap_err(),
5767             TestError {
5768                 span: span(2..3),
5769                 kind: ast::ErrorKind::UnicodeClassInvalid,
5770             }
5771         );
5772         assert_eq!(
5773             parser(r"\P\{").parse().unwrap_err(),
5774             TestError {
5775                 span: span(2..3),
5776                 kind: ast::ErrorKind::UnicodeClassInvalid,
5777             }
5778         );
5779     }
5780 
5781     #[test]
parse_perl_class()5782     fn parse_perl_class() {
5783         assert_eq!(
5784             parser(r"\d").parse_escape(),
5785             Ok(Primitive::Perl(ast::ClassPerl {
5786                 span: span(0..2),
5787                 kind: ast::ClassPerlKind::Digit,
5788                 negated: false,
5789             }))
5790         );
5791         assert_eq!(
5792             parser(r"\D").parse_escape(),
5793             Ok(Primitive::Perl(ast::ClassPerl {
5794                 span: span(0..2),
5795                 kind: ast::ClassPerlKind::Digit,
5796                 negated: true,
5797             }))
5798         );
5799         assert_eq!(
5800             parser(r"\s").parse_escape(),
5801             Ok(Primitive::Perl(ast::ClassPerl {
5802                 span: span(0..2),
5803                 kind: ast::ClassPerlKind::Space,
5804                 negated: false,
5805             }))
5806         );
5807         assert_eq!(
5808             parser(r"\S").parse_escape(),
5809             Ok(Primitive::Perl(ast::ClassPerl {
5810                 span: span(0..2),
5811                 kind: ast::ClassPerlKind::Space,
5812                 negated: true,
5813             }))
5814         );
5815         assert_eq!(
5816             parser(r"\w").parse_escape(),
5817             Ok(Primitive::Perl(ast::ClassPerl {
5818                 span: span(0..2),
5819                 kind: ast::ClassPerlKind::Word,
5820                 negated: false,
5821             }))
5822         );
5823         assert_eq!(
5824             parser(r"\W").parse_escape(),
5825             Ok(Primitive::Perl(ast::ClassPerl {
5826                 span: span(0..2),
5827                 kind: ast::ClassPerlKind::Word,
5828                 negated: true,
5829             }))
5830         );
5831 
5832         assert_eq!(
5833             parser(r"\d").parse(),
5834             Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5835                 span: span(0..2),
5836                 kind: ast::ClassPerlKind::Digit,
5837                 negated: false,
5838             })))
5839         );
5840         assert_eq!(
5841             parser(r"\dz").parse(),
5842             Ok(Ast::Concat(ast::Concat {
5843                 span: span(0..3),
5844                 asts: vec![
5845                     Ast::Class(ast::Class::Perl(ast::ClassPerl {
5846                         span: span(0..2),
5847                         kind: ast::ClassPerlKind::Digit,
5848                         negated: false,
5849                     })),
5850                     Ast::Literal(ast::Literal {
5851                         span: span(2..3),
5852                         kind: ast::LiteralKind::Verbatim,
5853                         c: 'z',
5854                     }),
5855                 ],
5856             }))
5857         );
5858     }
5859 
5860     // This tests a bug fix where the nest limit checker wasn't decrementing
5861     // its depth during post-traversal, which causes long regexes to trip
5862     // the default limit too aggressively.
5863     #[test]
regression_454_nest_too_big()5864     fn regression_454_nest_too_big() {
5865         let pattern = r#"
5866         2(?:
5867           [45]\d{3}|
5868           7(?:
5869             1[0-267]|
5870             2[0-289]|
5871             3[0-29]|
5872             4[01]|
5873             5[1-3]|
5874             6[013]|
5875             7[0178]|
5876             91
5877           )|
5878           8(?:
5879             0[125]|
5880             [139][1-6]|
5881             2[0157-9]|
5882             41|
5883             6[1-35]|
5884             7[1-5]|
5885             8[1-8]|
5886             90
5887           )|
5888           9(?:
5889             0[0-2]|
5890             1[0-4]|
5891             2[568]|
5892             3[3-6]|
5893             5[5-7]|
5894             6[0167]|
5895             7[15]|
5896             8[0146-9]
5897           )
5898         )\d{4}
5899         "#;
5900         assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5901     }
5902 
5903     // This tests that we treat a trailing `-` in a character class as a
5904     // literal `-` even when whitespace mode is enabled and there is whitespace
5905     // after the trailing `-`.
5906     #[test]
regression_455_trailing_dash_ignore_whitespace()5907     fn regression_455_trailing_dash_ignore_whitespace() {
5908         assert!(parser("(?x)[ / - ]").parse().is_ok());
5909         assert!(parser("(?x)[ a - ]").parse().is_ok());
5910         assert!(parser(
5911             "(?x)[
5912             a
5913             - ]
5914         "
5915         )
5916         .parse()
5917         .is_ok());
5918         assert!(parser(
5919             "(?x)[
5920             a # wat
5921             - ]
5922         "
5923         )
5924         .parse()
5925         .is_ok());
5926 
5927         assert!(parser("(?x)[ / -").parse().is_err());
5928         assert!(parser("(?x)[ / - ").parse().is_err());
5929         assert!(parser(
5930             "(?x)[
5931             / -
5932         "
5933         )
5934         .parse()
5935         .is_err());
5936         assert!(parser(
5937             "(?x)[
5938             / - # wat
5939         "
5940         )
5941         .parse()
5942         .is_err());
5943     }
5944 }
5945