1 /*!
2 This module provides a regular expression parser.
3 */
4 
5 use std::borrow::Borrow;
6 use std::cell::{Cell, RefCell};
7 use std::mem;
8 use std::result;
9 
10 use ast::{self, Ast, Position, Span};
11 use either::Either;
12 
13 use is_meta_character;
14 
15 type Result<T> = result::Result<T, ast::Error>;
16 
17 /// A primitive is an expression with no sub-expressions. This includes
18 /// literals, assertions and non-set character classes. This representation
19 /// is used as intermediate state in the parser.
20 ///
21 /// This does not include ASCII character classes, since they can only appear
22 /// within a set character class.
23 #[derive(Clone, Debug, Eq, PartialEq)]
24 enum Primitive {
25     Literal(ast::Literal),
26     Assertion(ast::Assertion),
27     Dot(Span),
28     Perl(ast::ClassPerl),
29     Unicode(ast::ClassUnicode),
30 }
31 
32 impl Primitive {
33     /// Return the span of this primitive.
span(&self) -> &Span34     fn span(&self) -> &Span {
35         match *self {
36             Primitive::Literal(ref x) => &x.span,
37             Primitive::Assertion(ref x) => &x.span,
38             Primitive::Dot(ref span) => span,
39             Primitive::Perl(ref x) => &x.span,
40             Primitive::Unicode(ref x) => &x.span,
41         }
42     }
43 
44     /// Convert this primitive into a proper AST.
into_ast(self) -> Ast45     fn into_ast(self) -> Ast {
46         match self {
47             Primitive::Literal(lit) => Ast::Literal(lit),
48             Primitive::Assertion(assert) => Ast::Assertion(assert),
49             Primitive::Dot(span) => Ast::Dot(span),
50             Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
51             Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
52         }
53     }
54 
55     /// Convert this primitive into an item in a character class.
56     ///
57     /// If this primitive is not a legal item (i.e., an assertion or a dot),
58     /// then return an error.
into_class_set_item<P: Borrow<Parser>>( self, p: &ParserI<P>, ) -> Result<ast::ClassSetItem>59     fn into_class_set_item<P: Borrow<Parser>>(
60         self,
61         p: &ParserI<P>,
62     ) -> Result<ast::ClassSetItem> {
63         use self::Primitive::*;
64         use ast::ClassSetItem;
65 
66         match self {
67             Literal(lit) => Ok(ClassSetItem::Literal(lit)),
68             Perl(cls) => Ok(ClassSetItem::Perl(cls)),
69             Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
70             x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
71         }
72     }
73 
74     /// Convert this primitive into a literal in a character class. In
75     /// particular, literals are the only valid items that can appear in
76     /// ranges.
77     ///
78     /// If this primitive is not a legal item (i.e., a class, assertion or a
79     /// dot), then return an error.
into_class_literal<P: Borrow<Parser>>( self, p: &ParserI<P>, ) -> Result<ast::Literal>80     fn into_class_literal<P: Borrow<Parser>>(
81         self,
82         p: &ParserI<P>,
83     ) -> Result<ast::Literal> {
84         use self::Primitive::*;
85 
86         match self {
87             Literal(lit) => Ok(lit),
88             x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
89         }
90     }
91 }
92 
93 /// Returns true if the given character is a hexadecimal digit.
is_hex(c: char) -> bool94 fn is_hex(c: char) -> bool {
95     ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
96 }
97 
98 /// Returns true if the given character is a valid in a capture group name.
99 ///
100 /// If `first` is true, then `c` is treated as the first character in the
101 /// group name (which must be alphabetic or underscore).
is_capture_char(c: char, first: bool) -> bool102 fn is_capture_char(c: char, first: bool) -> bool {
103     c == '_'
104         || (!first
105             && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']'))
106         || ('A' <= c && c <= 'Z')
107         || ('a' <= c && c <= 'z')
108 }
109 
110 /// A builder for a regular expression parser.
111 ///
112 /// This builder permits modifying configuration options for the parser.
113 #[derive(Clone, Debug)]
114 pub struct ParserBuilder {
115     ignore_whitespace: bool,
116     nest_limit: u32,
117     octal: bool,
118 }
119 
120 impl Default for ParserBuilder {
default() -> ParserBuilder121     fn default() -> ParserBuilder {
122         ParserBuilder::new()
123     }
124 }
125 
126 impl ParserBuilder {
127     /// Create a new parser builder with a default configuration.
new() -> ParserBuilder128     pub fn new() -> ParserBuilder {
129         ParserBuilder {
130             ignore_whitespace: false,
131             nest_limit: 250,
132             octal: false,
133         }
134     }
135 
136     /// Build a parser from this configuration with the given pattern.
build(&self) -> Parser137     pub fn build(&self) -> Parser {
138         Parser {
139             pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
140             capture_index: Cell::new(0),
141             nest_limit: self.nest_limit,
142             octal: self.octal,
143             initial_ignore_whitespace: self.ignore_whitespace,
144             ignore_whitespace: Cell::new(self.ignore_whitespace),
145             comments: RefCell::new(vec![]),
146             stack_group: RefCell::new(vec![]),
147             stack_class: RefCell::new(vec![]),
148             capture_names: RefCell::new(vec![]),
149             scratch: RefCell::new(String::new()),
150         }
151     }
152 
153     /// Set the nesting limit for this parser.
154     ///
155     /// The nesting limit controls how deep the abstract syntax tree is allowed
156     /// to be. If the AST exceeds the given limit (e.g., with too many nested
157     /// groups), then an error is returned by the parser.
158     ///
159     /// The purpose of this limit is to act as a heuristic to prevent stack
160     /// overflow for consumers that do structural induction on an `Ast` using
161     /// explicit recursion. While this crate never does this (instead using
162     /// constant stack space and moving the call stack to the heap), other
163     /// crates may.
164     ///
165     /// This limit is not checked until the entire Ast is parsed. Therefore,
166     /// if callers want to put a limit on the amount of heap space used, then
167     /// they should impose a limit on the length, in bytes, of the concrete
168     /// pattern string. In particular, this is viable since this parser
169     /// implementation will limit itself to heap space proportional to the
170     /// lenth of the pattern string.
171     ///
172     /// Note that a nest limit of `0` will return a nest limit error for most
173     /// patterns but not all. For example, a nest limit of `0` permits `a` but
174     /// not `ab`, since `ab` requires a concatenation, which results in a nest
175     /// depth of `1`. In general, a nest limit is not something that manifests
176     /// in an obvious way in the concrete syntax, therefore, it should not be
177     /// used in a granular way.
nest_limit(&mut self, limit: u32) -> &mut ParserBuilder178     pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
179         self.nest_limit = limit;
180         self
181     }
182 
183     /// Whether to support octal syntax or not.
184     ///
185     /// Octal syntax is a little-known way of uttering Unicode codepoints in
186     /// a regular expression. For example, `a`, `\x61`, `\u0061` and
187     /// `\141` are all equivalent regular expressions, where the last example
188     /// shows octal syntax.
189     ///
190     /// While supporting octal syntax isn't in and of itself a problem, it does
191     /// make good error messages harder. That is, in PCRE based regex engines,
192     /// syntax like `\0` invokes a backreference, which is explicitly
193     /// unsupported in Rust's regex engine. However, many users expect it to
194     /// be supported. Therefore, when octal support is disabled, the error
195     /// message will explicitly mention that backreferences aren't supported.
196     ///
197     /// Octal syntax is disabled by default.
octal(&mut self, yes: bool) -> &mut ParserBuilder198     pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
199         self.octal = yes;
200         self
201     }
202 
203     /// Enable verbose mode in the regular expression.
204     ///
205     /// When enabled, verbose mode permits insigificant whitespace in many
206     /// places in the regular expression, as well as comments. Comments are
207     /// started using `#` and continue until the end of the line.
208     ///
209     /// By default, this is disabled. It may be selectively enabled in the
210     /// regular expression by using the `x` flag regardless of this setting.
ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder211     pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
212         self.ignore_whitespace = yes;
213         self
214     }
215 }
216 
217 /// A regular expression parser.
218 ///
219 /// This parses a string representation of a regular expression into an
220 /// abstract syntax tree. The size of the tree is proportional to the length
221 /// of the regular expression pattern.
222 ///
223 /// A `Parser` can be configured in more detail via a
224 /// [`ParserBuilder`](struct.ParserBuilder.html).
225 #[derive(Clone, Debug)]
226 pub struct Parser {
227     /// The current position of the parser.
228     pos: Cell<Position>,
229     /// The current capture index.
230     capture_index: Cell<u32>,
231     /// The maximum number of open parens/brackets allowed. If the parser
232     /// exceeds this number, then an error is returned.
233     nest_limit: u32,
234     /// Whether to support octal syntax or not. When `false`, the parser will
235     /// return an error helpfully pointing out that backreferences are not
236     /// supported.
237     octal: bool,
238     /// The initial setting for `ignore_whitespace` as provided by
239     /// Th`ParserBuilder`. is is used when reseting the parser's state.
240     initial_ignore_whitespace: bool,
241     /// Whether whitespace should be ignored. When enabled, comments are
242     /// also permitted.
243     ignore_whitespace: Cell<bool>,
244     /// A list of comments, in order of appearance.
245     comments: RefCell<Vec<ast::Comment>>,
246     /// A stack of grouped sub-expressions, including alternations.
247     stack_group: RefCell<Vec<GroupState>>,
248     /// A stack of nested character classes. This is only non-empty when
249     /// parsing a class.
250     stack_class: RefCell<Vec<ClassState>>,
251     /// A sorted sequence of capture names. This is used to detect duplicate
252     /// capture names and report an error if one is detected.
253     capture_names: RefCell<Vec<ast::CaptureName>>,
254     /// A scratch buffer used in various places. Mostly this is used to
255     /// accumulate relevant characters from parts of a pattern.
256     scratch: RefCell<String>,
257 }
258 
259 /// ParserI is the internal parser implementation.
260 ///
261 /// We use this separate type so that we can carry the provided pattern string
262 /// along with us. In particular, a `Parser` internal state is not tied to any
263 /// one pattern, but `ParserI` is.
264 ///
265 /// This type also lets us use `ParserI<&Parser>` in production code while
266 /// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
267 /// work against the internal interface of the parser.
268 #[derive(Clone, Debug)]
269 struct ParserI<'s, P> {
270     /// The parser state/configuration.
271     parser: P,
272     /// The full regular expression provided by the user.
273     pattern: &'s str,
274 }
275 
276 /// GroupState represents a single stack frame while parsing nested groups
277 /// and alternations. Each frame records the state up to an opening parenthesis
278 /// or a alternating bracket `|`.
279 #[derive(Clone, Debug)]
280 enum GroupState {
281     /// This state is pushed whenever an opening group is found.
282     Group {
283         /// The concatenation immediately preceding the opening group.
284         concat: ast::Concat,
285         /// The group that has been opened. Its sub-AST is always empty.
286         group: ast::Group,
287         /// Whether this group has the `x` flag enabled or not.
288         ignore_whitespace: bool,
289     },
290     /// This state is pushed whenever a new alternation branch is found. If
291     /// an alternation branch is found and this state is at the top of the
292     /// stack, then this state should be modified to include the new
293     /// alternation.
294     Alternation(ast::Alternation),
295 }
296 
297 /// ClassState represents a single stack frame while parsing character classes.
298 /// Each frame records the state up to an intersection, difference, symmetric
299 /// difference or nested class.
300 ///
301 /// Note that a parser's character class stack is only non-empty when parsing
302 /// a character class. In all other cases, it is empty.
303 #[derive(Clone, Debug)]
304 enum ClassState {
305     /// This state is pushed whenever an opening bracket is found.
306     Open {
307         /// The union of class items immediately preceding this class.
308         union: ast::ClassSetUnion,
309         /// The class that has been opened. Typically this just corresponds
310         /// to the `[`, but it can also include `[^` since `^` indicates
311         /// negation of the class.
312         set: ast::ClassBracketed,
313     },
314     /// This state is pushed when a operator is seen. When popped, the stored
315     /// set becomes the left hand side of the operator.
316     Op {
317         /// The type of the operation, i.e., &&, -- or ~~.
318         kind: ast::ClassSetBinaryOpKind,
319         /// The left-hand side of the operator.
320         lhs: ast::ClassSet,
321     },
322 }
323 
324 impl Parser {
325     /// Create a new parser with a default configuration.
326     ///
327     /// The parser can be run with either the `parse` or `parse_with_comments`
328     /// methods. The parse methods return an abstract syntax tree.
329     ///
330     /// To set configuration options on the parser, use
331     /// [`ParserBuilder`](struct.ParserBuilder.html).
new() -> Parser332     pub fn new() -> Parser {
333         ParserBuilder::new().build()
334     }
335 
336     /// Parse the regular expression into an abstract syntax tree.
parse(&mut self, pattern: &str) -> Result<Ast>337     pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
338         ParserI::new(self, pattern).parse()
339     }
340 
341     /// Parse the regular expression and return an abstract syntax tree with
342     /// all of the comments found in the pattern.
parse_with_comments( &mut self, pattern: &str, ) -> Result<ast::WithComments>343     pub fn parse_with_comments(
344         &mut self,
345         pattern: &str,
346     ) -> Result<ast::WithComments> {
347         ParserI::new(self, pattern).parse_with_comments()
348     }
349 
350     /// Reset the internal state of a parser.
351     ///
352     /// This is called at the beginning of every parse. This prevents the
353     /// parser from running with inconsistent state (say, if a previous
354     /// invocation returned an error and the parser is reused).
reset(&self)355     fn reset(&self) {
356         // These settings should be in line with the construction
357         // in `ParserBuilder::build`.
358         self.pos.set(Position { offset: 0, line: 1, column: 1 });
359         self.ignore_whitespace.set(self.initial_ignore_whitespace);
360         self.comments.borrow_mut().clear();
361         self.stack_group.borrow_mut().clear();
362         self.stack_class.borrow_mut().clear();
363     }
364 }
365 
366 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
367     /// Build an internal parser from a parser configuration and a pattern.
new(parser: P, pattern: &'s str) -> ParserI<'s, P>368     fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
369         ParserI { parser: parser, pattern: pattern }
370     }
371 
372     /// Return a reference to the parser state.
parser(&self) -> &Parser373     fn parser(&self) -> &Parser {
374         self.parser.borrow()
375     }
376 
377     /// Return a reference to the pattern being parsed.
pattern(&self) -> &str378     fn pattern(&self) -> &str {
379         self.pattern.borrow()
380     }
381 
382     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error383     fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
384         ast::Error {
385             kind: kind,
386             pattern: self.pattern().to_string(),
387             span: span,
388         }
389     }
390 
391     /// Return the current offset of the parser.
392     ///
393     /// The offset starts at `0` from the beginning of the regular expression
394     /// pattern string.
offset(&self) -> usize395     fn offset(&self) -> usize {
396         self.parser().pos.get().offset
397     }
398 
399     /// Return the current line number of the parser.
400     ///
401     /// The line number starts at `1`.
line(&self) -> usize402     fn line(&self) -> usize {
403         self.parser().pos.get().line
404     }
405 
406     /// Return the current column of the parser.
407     ///
408     /// The column number starts at `1` and is reset whenever a `\n` is seen.
column(&self) -> usize409     fn column(&self) -> usize {
410         self.parser().pos.get().column
411     }
412 
413     /// Return the next capturing index. Each subsequent call increments the
414     /// internal index.
415     ///
416     /// The span given should correspond to the location of the opening
417     /// parenthesis.
418     ///
419     /// If the capture limit is exceeded, then an error is returned.
next_capture_index(&self, span: Span) -> Result<u32>420     fn next_capture_index(&self, span: Span) -> Result<u32> {
421         let current = self.parser().capture_index.get();
422         let i = current.checked_add(1).ok_or_else(|| {
423             self.error(span, ast::ErrorKind::CaptureLimitExceeded)
424         })?;
425         self.parser().capture_index.set(i);
426         Ok(i)
427     }
428 
429     /// Adds the given capture name to this parser. If this capture name has
430     /// already been used, then an error is returned.
add_capture_name(&self, cap: &ast::CaptureName) -> Result<()>431     fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
432         let mut names = self.parser().capture_names.borrow_mut();
433         match names
434             .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
435         {
436             Err(i) => {
437                 names.insert(i, cap.clone());
438                 Ok(())
439             }
440             Ok(i) => Err(self.error(
441                 cap.span,
442                 ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
443             )),
444         }
445     }
446 
447     /// Return whether the parser should ignore whitespace or not.
ignore_whitespace(&self) -> bool448     fn ignore_whitespace(&self) -> bool {
449         self.parser().ignore_whitespace.get()
450     }
451 
452     /// Return the character at the current position of the parser.
453     ///
454     /// This panics if the current position does not point to a valid char.
char(&self) -> char455     fn char(&self) -> char {
456         self.char_at(self.offset())
457     }
458 
459     /// Return the character at the given position.
460     ///
461     /// This panics if the given position does not point to a valid char.
char_at(&self, i: usize) -> char462     fn char_at(&self, i: usize) -> char {
463         self.pattern()[i..]
464             .chars()
465             .next()
466             .unwrap_or_else(|| panic!("expected char at offset {}", i))
467     }
468 
469     /// Bump the parser to the next Unicode scalar value.
470     ///
471     /// If the end of the input has been reached, then `false` is returned.
bump(&self) -> bool472     fn bump(&self) -> bool {
473         if self.is_eof() {
474             return false;
475         }
476         let Position { mut offset, mut line, mut column } = self.pos();
477         if self.char() == '\n' {
478             line = line.checked_add(1).unwrap();
479             column = 1;
480         } else {
481             column = column.checked_add(1).unwrap();
482         }
483         offset += self.char().len_utf8();
484         self.parser().pos.set(Position {
485             offset: offset,
486             line: line,
487             column: column,
488         });
489         self.pattern()[self.offset()..].chars().next().is_some()
490     }
491 
492     /// If the substring starting at the current position of the parser has
493     /// the given prefix, then bump the parser to the character immediately
494     /// following the prefix and return true. Otherwise, don't bump the parser
495     /// and return false.
bump_if(&self, prefix: &str) -> bool496     fn bump_if(&self, prefix: &str) -> bool {
497         if self.pattern()[self.offset()..].starts_with(prefix) {
498             for _ in 0..prefix.chars().count() {
499                 self.bump();
500             }
501             true
502         } else {
503             false
504         }
505     }
506 
507     /// Returns true if and only if the parser is positioned at a look-around
508     /// prefix. The conditions under which this returns true must always
509     /// correspond to a regular expression that would otherwise be consider
510     /// invalid.
511     ///
512     /// This should only be called immediately after parsing the opening of
513     /// a group or a set of flags.
is_lookaround_prefix(&self) -> bool514     fn is_lookaround_prefix(&self) -> bool {
515         self.bump_if("?=")
516             || self.bump_if("?!")
517             || self.bump_if("?<=")
518             || self.bump_if("?<!")
519     }
520 
521     /// Bump the parser, and if the `x` flag is enabled, bump through any
522     /// subsequent spaces. Return true if and only if the parser is not at
523     /// EOF.
bump_and_bump_space(&self) -> bool524     fn bump_and_bump_space(&self) -> bool {
525         if !self.bump() {
526             return false;
527         }
528         self.bump_space();
529         !self.is_eof()
530     }
531 
532     /// If the `x` flag is enabled (i.e., whitespace insensitivity with
533     /// comments), then this will advance the parser through all whitespace
534     /// and comments to the next non-whitespace non-comment byte.
535     ///
536     /// If the `x` flag is disabled, then this is a no-op.
537     ///
538     /// This should be used selectively throughout the parser where
539     /// arbitrary whitespace is permitted when the `x` flag is enabled. For
540     /// example, `{   5  , 6}` is equivalent to `{5,6}`.
bump_space(&self)541     fn bump_space(&self) {
542         if !self.ignore_whitespace() {
543             return;
544         }
545         while !self.is_eof() {
546             if self.char().is_whitespace() {
547                 self.bump();
548             } else if self.char() == '#' {
549                 let start = self.pos();
550                 let mut comment_text = String::new();
551                 self.bump();
552                 while !self.is_eof() {
553                     let c = self.char();
554                     self.bump();
555                     if c == '\n' {
556                         break;
557                     }
558                     comment_text.push(c);
559                 }
560                 let comment = ast::Comment {
561                     span: Span::new(start, self.pos()),
562                     comment: comment_text,
563                 };
564                 self.parser().comments.borrow_mut().push(comment);
565             } else {
566                 break;
567             }
568         }
569     }
570 
571     /// Peek at the next character in the input without advancing the parser.
572     ///
573     /// If the input has been exhausted, then this returns `None`.
peek(&self) -> Option<char>574     fn peek(&self) -> Option<char> {
575         if self.is_eof() {
576             return None;
577         }
578         self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
579     }
580 
581     /// Like peek, but will ignore spaces when the parser is in whitespace
582     /// insensitive mode.
peek_space(&self) -> Option<char>583     fn peek_space(&self) -> Option<char> {
584         if !self.ignore_whitespace() {
585             return self.peek();
586         }
587         if self.is_eof() {
588             return None;
589         }
590         let mut start = self.offset() + self.char().len_utf8();
591         let mut in_comment = false;
592         for (i, c) in self.pattern()[start..].char_indices() {
593             if c.is_whitespace() {
594                 continue;
595             } else if !in_comment && c == '#' {
596                 in_comment = true;
597             } else if in_comment && c == '\n' {
598                 in_comment = false;
599             } else {
600                 start += i;
601                 break;
602             }
603         }
604         self.pattern()[start..].chars().next()
605     }
606 
607     /// Returns true if the next call to `bump` would return false.
is_eof(&self) -> bool608     fn is_eof(&self) -> bool {
609         self.offset() == self.pattern().len()
610     }
611 
612     /// Return the current position of the parser, which includes the offset,
613     /// line and column.
pos(&self) -> Position614     fn pos(&self) -> Position {
615         self.parser().pos.get()
616     }
617 
618     /// Create a span at the current position of the parser. Both the start
619     /// and end of the span are set.
span(&self) -> Span620     fn span(&self) -> Span {
621         Span::splat(self.pos())
622     }
623 
624     /// Create a span that covers the current character.
span_char(&self) -> Span625     fn span_char(&self) -> Span {
626         let mut next = Position {
627             offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
628             line: self.line(),
629             column: self.column().checked_add(1).unwrap(),
630         };
631         if self.char() == '\n' {
632             next.line += 1;
633             next.column = 1;
634         }
635         Span::new(self.pos(), next)
636     }
637 
638     /// Parse and push a single alternation on to the parser's internal stack.
639     /// If the top of the stack already has an alternation, then add to that
640     /// instead of pushing a new one.
641     ///
642     /// The concatenation given corresponds to a single alternation branch.
643     /// The concatenation returned starts the next branch and is empty.
644     ///
645     /// This assumes the parser is currently positioned at `|` and will advance
646     /// the parser to the character following `|`.
647     #[inline(never)]
push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat>648     fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
649         assert_eq!(self.char(), '|');
650         concat.span.end = self.pos();
651         self.push_or_add_alternation(concat);
652         self.bump();
653         Ok(ast::Concat { span: self.span(), asts: vec![] })
654     }
655 
656     /// Pushes or adds the given branch of an alternation to the parser's
657     /// internal stack of state.
push_or_add_alternation(&self, concat: ast::Concat)658     fn push_or_add_alternation(&self, concat: ast::Concat) {
659         use self::GroupState::*;
660 
661         let mut stack = self.parser().stack_group.borrow_mut();
662         if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
663             alts.asts.push(concat.into_ast());
664             return;
665         }
666         stack.push(Alternation(ast::Alternation {
667             span: Span::new(concat.span.start, self.pos()),
668             asts: vec![concat.into_ast()],
669         }));
670     }
671 
672     /// Parse and push a group AST (and its parent concatenation) on to the
673     /// parser's internal stack. Return a fresh concatenation corresponding
674     /// to the group's sub-AST.
675     ///
676     /// If a set of flags was found (with no group), then the concatenation
677     /// is returned with that set of flags added.
678     ///
679     /// This assumes that the parser is currently positioned on the opening
680     /// parenthesis. It advances the parser to the character at the start
681     /// of the sub-expression (or adjoining expression).
682     ///
683     /// If there was a problem parsing the start of the group, then an error
684     /// is returned.
685     #[inline(never)]
push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat>686     fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
687         assert_eq!(self.char(), '(');
688         match self.parse_group()? {
689             Either::Left(set) => {
690                 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
691                 if let Some(v) = ignore {
692                     self.parser().ignore_whitespace.set(v);
693                 }
694 
695                 concat.asts.push(Ast::Flags(set));
696                 Ok(concat)
697             }
698             Either::Right(group) => {
699                 let old_ignore_whitespace = self.ignore_whitespace();
700                 let new_ignore_whitespace = group
701                     .flags()
702                     .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
703                     .unwrap_or(old_ignore_whitespace);
704                 self.parser().stack_group.borrow_mut().push(
705                     GroupState::Group {
706                         concat: concat,
707                         group: group,
708                         ignore_whitespace: old_ignore_whitespace,
709                     },
710                 );
711                 self.parser().ignore_whitespace.set(new_ignore_whitespace);
712                 Ok(ast::Concat { span: self.span(), asts: vec![] })
713             }
714         }
715     }
716 
717     /// Pop a group AST from the parser's internal stack and set the group's
718     /// AST to the given concatenation. Return the concatenation containing
719     /// the group.
720     ///
721     /// This assumes that the parser is currently positioned on the closing
722     /// parenthesis and advances the parser to the character following the `)`.
723     ///
724     /// If no such group could be popped, then an unopened group error is
725     /// returned.
726     #[inline(never)]
pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat>727     fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
728         use self::GroupState::*;
729 
730         assert_eq!(self.char(), ')');
731         let mut stack = self.parser().stack_group.borrow_mut();
732         let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
733             .pop()
734         {
735             Some(Group { concat, group, ignore_whitespace }) => {
736                 (concat, group, ignore_whitespace, None)
737             }
738             Some(Alternation(alt)) => match stack.pop() {
739                 Some(Group { concat, group, ignore_whitespace }) => {
740                     (concat, group, ignore_whitespace, Some(alt))
741                 }
742                 None | Some(Alternation(_)) => {
743                     return Err(self.error(
744                         self.span_char(),
745                         ast::ErrorKind::GroupUnopened,
746                     ));
747                 }
748             },
749             None => {
750                 return Err(self
751                     .error(self.span_char(), ast::ErrorKind::GroupUnopened));
752             }
753         };
754         self.parser().ignore_whitespace.set(ignore_whitespace);
755         group_concat.span.end = self.pos();
756         self.bump();
757         group.span.end = self.pos();
758         match alt {
759             Some(mut alt) => {
760                 alt.span.end = group_concat.span.end;
761                 alt.asts.push(group_concat.into_ast());
762                 group.ast = Box::new(alt.into_ast());
763             }
764             None => {
765                 group.ast = Box::new(group_concat.into_ast());
766             }
767         }
768         prior_concat.asts.push(Ast::Group(group));
769         Ok(prior_concat)
770     }
771 
772     /// Pop the last state from the parser's internal stack, if it exists, and
773     /// add the given concatenation to it. There either must be no state or a
774     /// single alternation item on the stack. Any other scenario produces an
775     /// error.
776     ///
777     /// This assumes that the parser has advanced to the end.
778     #[inline(never)]
pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast>779     fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
780         concat.span.end = self.pos();
781         let mut stack = self.parser().stack_group.borrow_mut();
782         let ast = match stack.pop() {
783             None => Ok(concat.into_ast()),
784             Some(GroupState::Alternation(mut alt)) => {
785                 alt.span.end = self.pos();
786                 alt.asts.push(concat.into_ast());
787                 Ok(Ast::Alternation(alt))
788             }
789             Some(GroupState::Group { group, .. }) => {
790                 return Err(
791                     self.error(group.span, ast::ErrorKind::GroupUnclosed)
792                 );
793             }
794         };
795         // If we try to pop again, there should be nothing.
796         match stack.pop() {
797             None => ast,
798             Some(GroupState::Alternation(_)) => {
799                 // This unreachable is unfortunate. This case can't happen
800                 // because the only way we can be here is if there were two
801                 // `GroupState::Alternation`s adjacent in the parser's stack,
802                 // which we guarantee to never happen because we never push a
803                 // `GroupState::Alternation` if one is already at the top of
804                 // the stack.
805                 unreachable!()
806             }
807             Some(GroupState::Group { group, .. }) => {
808                 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
809             }
810         }
811     }
812 
813     /// Parse the opening of a character class and push the current class
814     /// parsing context onto the parser's stack. This assumes that the parser
815     /// is positioned at an opening `[`. The given union should correspond to
816     /// the union of set items built up before seeing the `[`.
817     ///
818     /// If there was a problem parsing the opening of the class, then an error
819     /// is returned. Otherwise, a new union of set items for the class is
820     /// returned (which may be populated with either a `]` or a `-`).
821     #[inline(never)]
push_class_open( &self, parent_union: ast::ClassSetUnion, ) -> Result<ast::ClassSetUnion>822     fn push_class_open(
823         &self,
824         parent_union: ast::ClassSetUnion,
825     ) -> Result<ast::ClassSetUnion> {
826         assert_eq!(self.char(), '[');
827 
828         let (nested_set, nested_union) = self.parse_set_class_open()?;
829         self.parser()
830             .stack_class
831             .borrow_mut()
832             .push(ClassState::Open { union: parent_union, set: nested_set });
833         Ok(nested_union)
834     }
835 
836     /// Parse the end of a character class set and pop the character class
837     /// parser stack. The union given corresponds to the last union built
838     /// before seeing the closing `]`. The union returned corresponds to the
839     /// parent character class set with the nested class added to it.
840     ///
841     /// This assumes that the parser is positioned at a `]` and will advance
842     /// the parser to the byte immediately following the `]`.
843     ///
844     /// If the stack is empty after popping, then this returns the final
845     /// "top-level" character class AST (where a "top-level" character class
846     /// is one that is not nested inside any other character class).
847     ///
848     /// If there is no corresponding opening bracket on the parser's stack,
849     /// then an error is returned.
850     #[inline(never)]
pop_class( &self, nested_union: ast::ClassSetUnion, ) -> Result<Either<ast::ClassSetUnion, ast::Class>>851     fn pop_class(
852         &self,
853         nested_union: ast::ClassSetUnion,
854     ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
855         assert_eq!(self.char(), ']');
856 
857         let item = ast::ClassSet::Item(nested_union.into_item());
858         let prevset = self.pop_class_op(item);
859         let mut stack = self.parser().stack_class.borrow_mut();
860         match stack.pop() {
861             None => {
862                 // We can never observe an empty stack:
863                 //
864                 // 1) We are guaranteed to start with a non-empty stack since
865                 //    the character class parser is only initiated when it sees
866                 //    a `[`.
867                 // 2) If we ever observe an empty stack while popping after
868                 //    seeing a `]`, then we signal the character class parser
869                 //    to terminate.
870                 panic!("unexpected empty character class stack")
871             }
872             Some(ClassState::Op { .. }) => {
873                 // This panic is unfortunate, but this case is impossible
874                 // since we already popped the Op state if one exists above.
875                 // Namely, every push to the class parser stack is guarded by
876                 // whether an existing Op is already on the top of the stack.
877                 // If it is, the existing Op is modified. That is, the stack
878                 // can never have consecutive Op states.
879                 panic!("unexpected ClassState::Op")
880             }
881             Some(ClassState::Open { mut union, mut set }) => {
882                 self.bump();
883                 set.span.end = self.pos();
884                 set.kind = prevset;
885                 if stack.is_empty() {
886                     Ok(Either::Right(ast::Class::Bracketed(set)))
887                 } else {
888                     union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
889                     Ok(Either::Left(union))
890                 }
891             }
892         }
893     }
894 
895     /// Return an "unclosed class" error whose span points to the most
896     /// recently opened class.
897     ///
898     /// This should only be called while parsing a character class.
899     #[inline(never)]
unclosed_class_error(&self) -> ast::Error900     fn unclosed_class_error(&self) -> ast::Error {
901         for state in self.parser().stack_class.borrow().iter().rev() {
902             match *state {
903                 ClassState::Open { ref set, .. } => {
904                     return self
905                         .error(set.span, ast::ErrorKind::ClassUnclosed);
906                 }
907                 _ => {}
908             }
909         }
910         // We are guaranteed to have a non-empty stack with at least
911         // one open bracket, so we should never get here.
912         panic!("no open character class found")
913     }
914 
915     /// Push the current set of class items on to the class parser's stack as
916     /// the left hand side of the given operator.
917     ///
918     /// A fresh set union is returned, which should be used to build the right
919     /// hand side of this operator.
920     #[inline(never)]
push_class_op( &self, next_kind: ast::ClassSetBinaryOpKind, next_union: ast::ClassSetUnion, ) -> ast::ClassSetUnion921     fn push_class_op(
922         &self,
923         next_kind: ast::ClassSetBinaryOpKind,
924         next_union: ast::ClassSetUnion,
925     ) -> ast::ClassSetUnion {
926         let item = ast::ClassSet::Item(next_union.into_item());
927         let new_lhs = self.pop_class_op(item);
928         self.parser()
929             .stack_class
930             .borrow_mut()
931             .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
932         ast::ClassSetUnion { span: self.span(), items: vec![] }
933     }
934 
935     /// Pop a character class set from the character class parser stack. If the
936     /// top of the stack is just an item (not an operation), then return the
937     /// given set unchanged. If the top of the stack is an operation, then the
938     /// given set will be used as the rhs of the operation on the top of the
939     /// stack. In that case, the binary operation is returned as a set.
940     #[inline(never)]
pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet941     fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
942         let mut stack = self.parser().stack_class.borrow_mut();
943         let (kind, lhs) = match stack.pop() {
944             Some(ClassState::Op { kind, lhs }) => (kind, lhs),
945             Some(state @ ClassState::Open { .. }) => {
946                 stack.push(state);
947                 return rhs;
948             }
949             None => unreachable!(),
950         };
951         let span = Span::new(lhs.span().start, rhs.span().end);
952         ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
953             span: span,
954             kind: kind,
955             lhs: Box::new(lhs),
956             rhs: Box::new(rhs),
957         })
958     }
959 }
960 
961 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
962     /// Parse the regular expression into an abstract syntax tree.
parse(&self) -> Result<Ast>963     fn parse(&self) -> Result<Ast> {
964         self.parse_with_comments().map(|astc| astc.ast)
965     }
966 
967     /// Parse the regular expression and return an abstract syntax tree with
968     /// all of the comments found in the pattern.
parse_with_comments(&self) -> Result<ast::WithComments>969     fn parse_with_comments(&self) -> Result<ast::WithComments> {
970         assert_eq!(self.offset(), 0, "parser can only be used once");
971         self.parser().reset();
972         let mut concat = ast::Concat { span: self.span(), asts: vec![] };
973         loop {
974             self.bump_space();
975             if self.is_eof() {
976                 break;
977             }
978             match self.char() {
979                 '(' => concat = self.push_group(concat)?,
980                 ')' => concat = self.pop_group(concat)?,
981                 '|' => concat = self.push_alternate(concat)?,
982                 '[' => {
983                     let class = self.parse_set_class()?;
984                     concat.asts.push(Ast::Class(class));
985                 }
986                 '?' => {
987                     concat = self.parse_uncounted_repetition(
988                         concat,
989                         ast::RepetitionKind::ZeroOrOne,
990                     )?;
991                 }
992                 '*' => {
993                     concat = self.parse_uncounted_repetition(
994                         concat,
995                         ast::RepetitionKind::ZeroOrMore,
996                     )?;
997                 }
998                 '+' => {
999                     concat = self.parse_uncounted_repetition(
1000                         concat,
1001                         ast::RepetitionKind::OneOrMore,
1002                     )?;
1003                 }
1004                 '{' => {
1005                     concat = self.parse_counted_repetition(concat)?;
1006                 }
1007                 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1008             }
1009         }
1010         let ast = self.pop_group_end(concat)?;
1011         NestLimiter::new(self).check(&ast)?;
1012         Ok(ast::WithComments {
1013             ast: ast,
1014             comments: mem::replace(
1015                 &mut *self.parser().comments.borrow_mut(),
1016                 vec![],
1017             ),
1018         })
1019     }
1020 
1021     /// Parses an uncounted repetition operation. An uncounted repetition
1022     /// operator includes ?, * and +, but does not include the {m,n} syntax.
1023     /// The given `kind` should correspond to the operator observed by the
1024     /// caller.
1025     ///
1026     /// This assumes that the paser is currently positioned at the repetition
1027     /// operator and advances the parser to the first character after the
1028     /// operator. (Note that the operator may include a single additional `?`,
1029     /// which makes the operator ungreedy.)
1030     ///
1031     /// The caller should include the concatenation that is being built. The
1032     /// concatenation returned includes the repetition operator applied to the
1033     /// last expression in the given concatenation.
1034     #[inline(never)]
parse_uncounted_repetition( &self, mut concat: ast::Concat, kind: ast::RepetitionKind, ) -> Result<ast::Concat>1035     fn parse_uncounted_repetition(
1036         &self,
1037         mut concat: ast::Concat,
1038         kind: ast::RepetitionKind,
1039     ) -> Result<ast::Concat> {
1040         assert!(
1041             self.char() == '?' || self.char() == '*' || self.char() == '+'
1042         );
1043         let op_start = self.pos();
1044         let ast = match concat.asts.pop() {
1045             Some(ast) => ast,
1046             None => {
1047                 return Err(
1048                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1049                 )
1050             }
1051         };
1052         match ast {
1053             Ast::Empty(_) | Ast::Flags(_) => {
1054                 return Err(
1055                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1056                 )
1057             }
1058             _ => {}
1059         }
1060         let mut greedy = true;
1061         if self.bump() && self.char() == '?' {
1062             greedy = false;
1063             self.bump();
1064         }
1065         concat.asts.push(Ast::Repetition(ast::Repetition {
1066             span: ast.span().with_end(self.pos()),
1067             op: ast::RepetitionOp {
1068                 span: Span::new(op_start, self.pos()),
1069                 kind: kind,
1070             },
1071             greedy: greedy,
1072             ast: Box::new(ast),
1073         }));
1074         Ok(concat)
1075     }
1076 
1077     /// Parses a counted repetition operation. A counted repetition operator
1078     /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1079     /// operators.
1080     ///
1081     /// This assumes that the paser is currently positioned at the opening `{`
1082     /// and advances the parser to the first character after the operator.
1083     /// (Note that the operator may include a single additional `?`, which
1084     /// makes the operator ungreedy.)
1085     ///
1086     /// The caller should include the concatenation that is being built. The
1087     /// concatenation returned includes the repetition operator applied to the
1088     /// last expression in the given concatenation.
1089     #[inline(never)]
parse_counted_repetition( &self, mut concat: ast::Concat, ) -> Result<ast::Concat>1090     fn parse_counted_repetition(
1091         &self,
1092         mut concat: ast::Concat,
1093     ) -> Result<ast::Concat> {
1094         assert!(self.char() == '{');
1095         let start = self.pos();
1096         let ast = match concat.asts.pop() {
1097             Some(ast) => ast,
1098             None => {
1099                 return Err(
1100                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1101                 )
1102             }
1103         };
1104         match ast {
1105             Ast::Empty(_) | Ast::Flags(_) => {
1106                 return Err(
1107                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1108                 )
1109             }
1110             _ => {}
1111         }
1112         if !self.bump_and_bump_space() {
1113             return Err(self.error(
1114                 Span::new(start, self.pos()),
1115                 ast::ErrorKind::RepetitionCountUnclosed,
1116             ));
1117         }
1118         let count_start = specialize_err(
1119             self.parse_decimal(),
1120             ast::ErrorKind::DecimalEmpty,
1121             ast::ErrorKind::RepetitionCountDecimalEmpty,
1122         )?;
1123         let mut range = ast::RepetitionRange::Exactly(count_start);
1124         if self.is_eof() {
1125             return Err(self.error(
1126                 Span::new(start, self.pos()),
1127                 ast::ErrorKind::RepetitionCountUnclosed,
1128             ));
1129         }
1130         if self.char() == ',' {
1131             if !self.bump_and_bump_space() {
1132                 return Err(self.error(
1133                     Span::new(start, self.pos()),
1134                     ast::ErrorKind::RepetitionCountUnclosed,
1135                 ));
1136             }
1137             if self.char() != '}' {
1138                 let count_end = specialize_err(
1139                     self.parse_decimal(),
1140                     ast::ErrorKind::DecimalEmpty,
1141                     ast::ErrorKind::RepetitionCountDecimalEmpty,
1142                 )?;
1143                 range = ast::RepetitionRange::Bounded(count_start, count_end);
1144             } else {
1145                 range = ast::RepetitionRange::AtLeast(count_start);
1146             }
1147         }
1148         if self.is_eof() || self.char() != '}' {
1149             return Err(self.error(
1150                 Span::new(start, self.pos()),
1151                 ast::ErrorKind::RepetitionCountUnclosed,
1152             ));
1153         }
1154 
1155         let mut greedy = true;
1156         if self.bump_and_bump_space() && self.char() == '?' {
1157             greedy = false;
1158             self.bump();
1159         }
1160 
1161         let op_span = Span::new(start, self.pos());
1162         if !range.is_valid() {
1163             return Err(
1164                 self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1165             );
1166         }
1167         concat.asts.push(Ast::Repetition(ast::Repetition {
1168             span: ast.span().with_end(self.pos()),
1169             op: ast::RepetitionOp {
1170                 span: op_span,
1171                 kind: ast::RepetitionKind::Range(range),
1172             },
1173             greedy: greedy,
1174             ast: Box::new(ast),
1175         }));
1176         Ok(concat)
1177     }
1178 
1179     /// Parse a group (which contains a sub-expression) or a set of flags.
1180     ///
1181     /// If a group was found, then it is returned with an empty AST. If a set
1182     /// of flags is found, then that set is returned.
1183     ///
1184     /// The parser should be positioned at the opening parenthesis.
1185     ///
1186     /// This advances the parser to the character before the start of the
1187     /// sub-expression (in the case of a group) or to the closing parenthesis
1188     /// immediately following the set of flags.
1189     ///
1190     /// # Errors
1191     ///
1192     /// If flags are given and incorrectly specified, then a corresponding
1193     /// error is returned.
1194     ///
1195     /// If a capture name is given and it is incorrectly specified, then a
1196     /// corresponding error is returned.
1197     #[inline(never)]
parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>>1198     fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1199         assert_eq!(self.char(), '(');
1200         let open_span = self.span_char();
1201         self.bump();
1202         self.bump_space();
1203         if self.is_lookaround_prefix() {
1204             return Err(self.error(
1205                 Span::new(open_span.start, self.span().end),
1206                 ast::ErrorKind::UnsupportedLookAround,
1207             ));
1208         }
1209         let inner_span = self.span();
1210         if self.bump_if("?P<") {
1211             let capture_index = self.next_capture_index(open_span)?;
1212             let cap = self.parse_capture_name(capture_index)?;
1213             Ok(Either::Right(ast::Group {
1214                 span: open_span,
1215                 kind: ast::GroupKind::CaptureName(cap),
1216                 ast: Box::new(Ast::Empty(self.span())),
1217             }))
1218         } else if self.bump_if("?") {
1219             if self.is_eof() {
1220                 return Err(
1221                     self.error(open_span, ast::ErrorKind::GroupUnclosed)
1222                 );
1223             }
1224             let flags = self.parse_flags()?;
1225             let char_end = self.char();
1226             self.bump();
1227             if char_end == ')' {
1228                 // We don't allow empty flags, e.g., `(?)`. We instead
1229                 // interpret it as a repetition operator missing its argument.
1230                 if flags.items.is_empty() {
1231                     return Err(self.error(
1232                         inner_span,
1233                         ast::ErrorKind::RepetitionMissing,
1234                     ));
1235                 }
1236                 Ok(Either::Left(ast::SetFlags {
1237                     span: Span { end: self.pos(), ..open_span },
1238                     flags: flags,
1239                 }))
1240             } else {
1241                 assert_eq!(char_end, ':');
1242                 Ok(Either::Right(ast::Group {
1243                     span: open_span,
1244                     kind: ast::GroupKind::NonCapturing(flags),
1245                     ast: Box::new(Ast::Empty(self.span())),
1246                 }))
1247             }
1248         } else {
1249             let capture_index = self.next_capture_index(open_span)?;
1250             Ok(Either::Right(ast::Group {
1251                 span: open_span,
1252                 kind: ast::GroupKind::CaptureIndex(capture_index),
1253                 ast: Box::new(Ast::Empty(self.span())),
1254             }))
1255         }
1256     }
1257 
1258     /// Parses a capture group name. Assumes that the parser is positioned at
1259     /// the first character in the name following the opening `<` (and may
1260     /// possibly be EOF). This advances the parser to the first character
1261     /// following the closing `>`.
1262     ///
1263     /// The caller must provide the capture index of the group for this name.
1264     #[inline(never)]
parse_capture_name( &self, capture_index: u32, ) -> Result<ast::CaptureName>1265     fn parse_capture_name(
1266         &self,
1267         capture_index: u32,
1268     ) -> Result<ast::CaptureName> {
1269         if self.is_eof() {
1270             return Err(self
1271                 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1272         }
1273         let start = self.pos();
1274         loop {
1275             if self.char() == '>' {
1276                 break;
1277             }
1278             if !is_capture_char(self.char(), self.pos() == start) {
1279                 return Err(self.error(
1280                     self.span_char(),
1281                     ast::ErrorKind::GroupNameInvalid,
1282                 ));
1283             }
1284             if !self.bump() {
1285                 break;
1286             }
1287         }
1288         let end = self.pos();
1289         if self.is_eof() {
1290             return Err(self
1291                 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1292         }
1293         assert_eq!(self.char(), '>');
1294         self.bump();
1295         let name = &self.pattern()[start.offset..end.offset];
1296         if name.is_empty() {
1297             return Err(self.error(
1298                 Span::new(start, start),
1299                 ast::ErrorKind::GroupNameEmpty,
1300             ));
1301         }
1302         let capname = ast::CaptureName {
1303             span: Span::new(start, end),
1304             name: name.to_string(),
1305             index: capture_index,
1306         };
1307         self.add_capture_name(&capname)?;
1308         Ok(capname)
1309     }
1310 
1311     /// Parse a sequence of flags starting at the current character.
1312     ///
1313     /// This advances the parser to the character immediately following the
1314     /// flags, which is guaranteed to be either `:` or `)`.
1315     ///
1316     /// # Errors
1317     ///
1318     /// If any flags are duplicated, then an error is returned.
1319     ///
1320     /// If the negation operator is used more than once, then an error is
1321     /// returned.
1322     ///
1323     /// If no flags could be found or if the negation operation is not followed
1324     /// by any flags, then an error is returned.
1325     #[inline(never)]
parse_flags(&self) -> Result<ast::Flags>1326     fn parse_flags(&self) -> Result<ast::Flags> {
1327         let mut flags = ast::Flags { span: self.span(), items: vec![] };
1328         let mut last_was_negation = None;
1329         while self.char() != ':' && self.char() != ')' {
1330             if self.char() == '-' {
1331                 last_was_negation = Some(self.span_char());
1332                 let item = ast::FlagsItem {
1333                     span: self.span_char(),
1334                     kind: ast::FlagsItemKind::Negation,
1335                 };
1336                 if let Some(i) = flags.add_item(item) {
1337                     return Err(self.error(
1338                         self.span_char(),
1339                         ast::ErrorKind::FlagRepeatedNegation {
1340                             original: flags.items[i].span,
1341                         },
1342                     ));
1343                 }
1344             } else {
1345                 last_was_negation = None;
1346                 let item = ast::FlagsItem {
1347                     span: self.span_char(),
1348                     kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1349                 };
1350                 if let Some(i) = flags.add_item(item) {
1351                     return Err(self.error(
1352                         self.span_char(),
1353                         ast::ErrorKind::FlagDuplicate {
1354                             original: flags.items[i].span,
1355                         },
1356                     ));
1357                 }
1358             }
1359             if !self.bump() {
1360                 return Err(
1361                     self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1362                 );
1363             }
1364         }
1365         if let Some(span) = last_was_negation {
1366             return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1367         }
1368         flags.span.end = self.pos();
1369         Ok(flags)
1370     }
1371 
1372     /// Parse the current character as a flag. Do not advance the parser.
1373     ///
1374     /// # Errors
1375     ///
1376     /// If the flag is not recognized, then an error is returned.
1377     #[inline(never)]
parse_flag(&self) -> Result<ast::Flag>1378     fn parse_flag(&self) -> Result<ast::Flag> {
1379         match self.char() {
1380             'i' => Ok(ast::Flag::CaseInsensitive),
1381             'm' => Ok(ast::Flag::MultiLine),
1382             's' => Ok(ast::Flag::DotMatchesNewLine),
1383             'U' => Ok(ast::Flag::SwapGreed),
1384             'u' => Ok(ast::Flag::Unicode),
1385             'x' => Ok(ast::Flag::IgnoreWhitespace),
1386             _ => {
1387                 Err(self
1388                     .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1389             }
1390         }
1391     }
1392 
1393     /// Parse a primitive AST. e.g., A literal, non-set character class or
1394     /// assertion.
1395     ///
1396     /// This assumes that the parser expects a primitive at the current
1397     /// location. i.e., All other non-primitive cases have been handled.
1398     /// For example, if the parser's position is at `|`, then `|` will be
1399     /// treated as a literal (e.g., inside a character class).
1400     ///
1401     /// This advances the parser to the first character immediately following
1402     /// the primitive.
parse_primitive(&self) -> Result<Primitive>1403     fn parse_primitive(&self) -> Result<Primitive> {
1404         match self.char() {
1405             '\\' => self.parse_escape(),
1406             '.' => {
1407                 let ast = Primitive::Dot(self.span_char());
1408                 self.bump();
1409                 Ok(ast)
1410             }
1411             '^' => {
1412                 let ast = Primitive::Assertion(ast::Assertion {
1413                     span: self.span_char(),
1414                     kind: ast::AssertionKind::StartLine,
1415                 });
1416                 self.bump();
1417                 Ok(ast)
1418             }
1419             '$' => {
1420                 let ast = Primitive::Assertion(ast::Assertion {
1421                     span: self.span_char(),
1422                     kind: ast::AssertionKind::EndLine,
1423                 });
1424                 self.bump();
1425                 Ok(ast)
1426             }
1427             c => {
1428                 let ast = Primitive::Literal(ast::Literal {
1429                     span: self.span_char(),
1430                     kind: ast::LiteralKind::Verbatim,
1431                     c: c,
1432                 });
1433                 self.bump();
1434                 Ok(ast)
1435             }
1436         }
1437     }
1438 
1439     /// Parse an escape sequence as a primitive AST.
1440     ///
1441     /// This assumes the parser is positioned at the start of the escape
1442     /// sequence, i.e., `\`. It advances the parser to the first position
1443     /// immediately following the escape sequence.
1444     #[inline(never)]
parse_escape(&self) -> Result<Primitive>1445     fn parse_escape(&self) -> Result<Primitive> {
1446         assert_eq!(self.char(), '\\');
1447         let start = self.pos();
1448         if !self.bump() {
1449             return Err(self.error(
1450                 Span::new(start, self.pos()),
1451                 ast::ErrorKind::EscapeUnexpectedEof,
1452             ));
1453         }
1454         let c = self.char();
1455         // Put some of the more complicated routines into helpers.
1456         match c {
1457             '0'..='7' => {
1458                 if !self.parser().octal {
1459                     return Err(self.error(
1460                         Span::new(start, self.span_char().end),
1461                         ast::ErrorKind::UnsupportedBackreference,
1462                     ));
1463                 }
1464                 let mut lit = self.parse_octal();
1465                 lit.span.start = start;
1466                 return Ok(Primitive::Literal(lit));
1467             }
1468             '8'..='9' if !self.parser().octal => {
1469                 return Err(self.error(
1470                     Span::new(start, self.span_char().end),
1471                     ast::ErrorKind::UnsupportedBackreference,
1472                 ));
1473             }
1474             'x' | 'u' | 'U' => {
1475                 let mut lit = self.parse_hex()?;
1476                 lit.span.start = start;
1477                 return Ok(Primitive::Literal(lit));
1478             }
1479             'p' | 'P' => {
1480                 let mut cls = self.parse_unicode_class()?;
1481                 cls.span.start = start;
1482                 return Ok(Primitive::Unicode(cls));
1483             }
1484             'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1485                 let mut cls = self.parse_perl_class();
1486                 cls.span.start = start;
1487                 return Ok(Primitive::Perl(cls));
1488             }
1489             _ => {}
1490         }
1491 
1492         // Handle all of the one letter sequences inline.
1493         self.bump();
1494         let span = Span::new(start, self.pos());
1495         if is_meta_character(c) {
1496             return Ok(Primitive::Literal(ast::Literal {
1497                 span: span,
1498                 kind: ast::LiteralKind::Punctuation,
1499                 c: c,
1500             }));
1501         }
1502         let special = |kind, c| {
1503             Ok(Primitive::Literal(ast::Literal {
1504                 span: span,
1505                 kind: ast::LiteralKind::Special(kind),
1506                 c: c,
1507             }))
1508         };
1509         match c {
1510             'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1511             'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1512             't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1513             'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1514             'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1515             'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1516             ' ' if self.ignore_whitespace() => {
1517                 special(ast::SpecialLiteralKind::Space, ' ')
1518             }
1519             'A' => Ok(Primitive::Assertion(ast::Assertion {
1520                 span: span,
1521                 kind: ast::AssertionKind::StartText,
1522             })),
1523             'z' => Ok(Primitive::Assertion(ast::Assertion {
1524                 span: span,
1525                 kind: ast::AssertionKind::EndText,
1526             })),
1527             'b' => Ok(Primitive::Assertion(ast::Assertion {
1528                 span: span,
1529                 kind: ast::AssertionKind::WordBoundary,
1530             })),
1531             'B' => Ok(Primitive::Assertion(ast::Assertion {
1532                 span: span,
1533                 kind: ast::AssertionKind::NotWordBoundary,
1534             })),
1535             _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1536         }
1537     }
1538 
1539     /// Parse an octal representation of a Unicode codepoint up to 3 digits
1540     /// long. This expects the parser to be positioned at the first octal
1541     /// digit and advances the parser to the first character immediately
1542     /// following the octal number. This also assumes that parsing octal
1543     /// escapes is enabled.
1544     ///
1545     /// Assuming the preconditions are met, this routine can never fail.
1546     #[inline(never)]
parse_octal(&self) -> ast::Literal1547     fn parse_octal(&self) -> ast::Literal {
1548         use std::char;
1549         use std::u32;
1550 
1551         assert!(self.parser().octal);
1552         assert!('0' <= self.char() && self.char() <= '7');
1553         let start = self.pos();
1554         // Parse up to two more digits.
1555         while self.bump()
1556             && '0' <= self.char()
1557             && self.char() <= '7'
1558             && self.pos().offset - start.offset <= 2
1559         {}
1560         let end = self.pos();
1561         let octal = &self.pattern()[start.offset..end.offset];
1562         // Parsing the octal should never fail since the above guarantees a
1563         // valid number.
1564         let codepoint =
1565             u32::from_str_radix(octal, 8).expect("valid octal number");
1566         // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1567         // invalid Unicode scalar values.
1568         let c = char::from_u32(codepoint).expect("Unicode scalar value");
1569         ast::Literal {
1570             span: Span::new(start, end),
1571             kind: ast::LiteralKind::Octal,
1572             c: c,
1573         }
1574     }
1575 
1576     /// Parse a hex representation of a Unicode codepoint. This handles both
1577     /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1578     /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1579     /// the first character immediately following the hexadecimal literal.
1580     #[inline(never)]
parse_hex(&self) -> Result<ast::Literal>1581     fn parse_hex(&self) -> Result<ast::Literal> {
1582         assert!(
1583             self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1584         );
1585 
1586         let hex_kind = match self.char() {
1587             'x' => ast::HexLiteralKind::X,
1588             'u' => ast::HexLiteralKind::UnicodeShort,
1589             _ => ast::HexLiteralKind::UnicodeLong,
1590         };
1591         if !self.bump_and_bump_space() {
1592             return Err(
1593                 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1594             );
1595         }
1596         if self.char() == '{' {
1597             self.parse_hex_brace(hex_kind)
1598         } else {
1599             self.parse_hex_digits(hex_kind)
1600         }
1601     }
1602 
1603     /// Parse an N-digit hex representation of a Unicode codepoint. This
1604     /// expects the parser to be positioned at the first digit and will advance
1605     /// the parser to the first character immediately following the escape
1606     /// sequence.
1607     ///
1608     /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1609     /// or 8 (for `\UNNNNNNNN`).
1610     #[inline(never)]
parse_hex_digits( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1611     fn parse_hex_digits(
1612         &self,
1613         kind: ast::HexLiteralKind,
1614     ) -> Result<ast::Literal> {
1615         use std::char;
1616         use std::u32;
1617 
1618         let mut scratch = self.parser().scratch.borrow_mut();
1619         scratch.clear();
1620 
1621         let start = self.pos();
1622         for i in 0..kind.digits() {
1623             if i > 0 && !self.bump_and_bump_space() {
1624                 return Err(self
1625                     .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1626             }
1627             if !is_hex(self.char()) {
1628                 return Err(self.error(
1629                     self.span_char(),
1630                     ast::ErrorKind::EscapeHexInvalidDigit,
1631                 ));
1632             }
1633             scratch.push(self.char());
1634         }
1635         // The final bump just moves the parser past the literal, which may
1636         // be EOF.
1637         self.bump_and_bump_space();
1638         let end = self.pos();
1639         let hex = scratch.as_str();
1640         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1641             None => Err(self.error(
1642                 Span::new(start, end),
1643                 ast::ErrorKind::EscapeHexInvalid,
1644             )),
1645             Some(c) => Ok(ast::Literal {
1646                 span: Span::new(start, end),
1647                 kind: ast::LiteralKind::HexFixed(kind),
1648                 c: c,
1649             }),
1650         }
1651     }
1652 
1653     /// Parse a hex representation of any Unicode scalar value. This expects
1654     /// the parser to be positioned at the opening brace `{` and will advance
1655     /// the parser to the first character following the closing brace `}`.
1656     #[inline(never)]
parse_hex_brace( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1657     fn parse_hex_brace(
1658         &self,
1659         kind: ast::HexLiteralKind,
1660     ) -> Result<ast::Literal> {
1661         use std::char;
1662         use std::u32;
1663 
1664         let mut scratch = self.parser().scratch.borrow_mut();
1665         scratch.clear();
1666 
1667         let brace_pos = self.pos();
1668         let start = self.span_char().end;
1669         while self.bump_and_bump_space() && self.char() != '}' {
1670             if !is_hex(self.char()) {
1671                 return Err(self.error(
1672                     self.span_char(),
1673                     ast::ErrorKind::EscapeHexInvalidDigit,
1674                 ));
1675             }
1676             scratch.push(self.char());
1677         }
1678         if self.is_eof() {
1679             return Err(self.error(
1680                 Span::new(brace_pos, self.pos()),
1681                 ast::ErrorKind::EscapeUnexpectedEof,
1682             ));
1683         }
1684         let end = self.pos();
1685         let hex = scratch.as_str();
1686         assert_eq!(self.char(), '}');
1687         self.bump_and_bump_space();
1688 
1689         if hex.is_empty() {
1690             return Err(self.error(
1691                 Span::new(brace_pos, self.pos()),
1692                 ast::ErrorKind::EscapeHexEmpty,
1693             ));
1694         }
1695         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1696             None => Err(self.error(
1697                 Span::new(start, end),
1698                 ast::ErrorKind::EscapeHexInvalid,
1699             )),
1700             Some(c) => Ok(ast::Literal {
1701                 span: Span::new(start, self.pos()),
1702                 kind: ast::LiteralKind::HexBrace(kind),
1703                 c: c,
1704             }),
1705         }
1706     }
1707 
1708     /// Parse a decimal number into a u32 while trimming leading and trailing
1709     /// whitespace.
1710     ///
1711     /// This expects the parser to be positioned at the first position where
1712     /// a decimal digit could occur. This will advance the parser to the byte
1713     /// immediately following the last contiguous decimal digit.
1714     ///
1715     /// If no decimal digit could be found or if there was a problem parsing
1716     /// the complete set of digits into a u32, then an error is returned.
parse_decimal(&self) -> Result<u32>1717     fn parse_decimal(&self) -> Result<u32> {
1718         let mut scratch = self.parser().scratch.borrow_mut();
1719         scratch.clear();
1720 
1721         while !self.is_eof() && self.char().is_whitespace() {
1722             self.bump();
1723         }
1724         let start = self.pos();
1725         while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1726             scratch.push(self.char());
1727             self.bump_and_bump_space();
1728         }
1729         let span = Span::new(start, self.pos());
1730         while !self.is_eof() && self.char().is_whitespace() {
1731             self.bump_and_bump_space();
1732         }
1733         let digits = scratch.as_str();
1734         if digits.is_empty() {
1735             return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1736         }
1737         match u32::from_str_radix(digits, 10).ok() {
1738             Some(n) => Ok(n),
1739             None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1740         }
1741     }
1742 
1743     /// Parse a standard character class consisting primarily of characters or
1744     /// character ranges, but can also contain nested character classes of
1745     /// any type (sans `.`).
1746     ///
1747     /// This assumes the parser is positioned at the opening `[`. If parsing
1748     /// is successful, then the parser is advanced to the position immediately
1749     /// following the closing `]`.
1750     #[inline(never)]
parse_set_class(&self) -> Result<ast::Class>1751     fn parse_set_class(&self) -> Result<ast::Class> {
1752         assert_eq!(self.char(), '[');
1753 
1754         let mut union =
1755             ast::ClassSetUnion { span: self.span(), items: vec![] };
1756         loop {
1757             self.bump_space();
1758             if self.is_eof() {
1759                 return Err(self.unclosed_class_error());
1760             }
1761             match self.char() {
1762                 '[' => {
1763                     // If we've already parsed the opening bracket, then
1764                     // attempt to treat this as the beginning of an ASCII
1765                     // class. If ASCII class parsing fails, then the parser
1766                     // backs up to `[`.
1767                     if !self.parser().stack_class.borrow().is_empty() {
1768                         if let Some(cls) = self.maybe_parse_ascii_class() {
1769                             union.push(ast::ClassSetItem::Ascii(cls));
1770                             continue;
1771                         }
1772                     }
1773                     union = self.push_class_open(union)?;
1774                 }
1775                 ']' => match self.pop_class(union)? {
1776                     Either::Left(nested_union) => {
1777                         union = nested_union;
1778                     }
1779                     Either::Right(class) => return Ok(class),
1780                 },
1781                 '&' if self.peek() == Some('&') => {
1782                     assert!(self.bump_if("&&"));
1783                     union = self.push_class_op(
1784                         ast::ClassSetBinaryOpKind::Intersection,
1785                         union,
1786                     );
1787                 }
1788                 '-' if self.peek() == Some('-') => {
1789                     assert!(self.bump_if("--"));
1790                     union = self.push_class_op(
1791                         ast::ClassSetBinaryOpKind::Difference,
1792                         union,
1793                     );
1794                 }
1795                 '~' if self.peek() == Some('~') => {
1796                     assert!(self.bump_if("~~"));
1797                     union = self.push_class_op(
1798                         ast::ClassSetBinaryOpKind::SymmetricDifference,
1799                         union,
1800                     );
1801                 }
1802                 _ => {
1803                     union.push(self.parse_set_class_range()?);
1804                 }
1805             }
1806         }
1807     }
1808 
1809     /// Parse a single primitive item in a character class set. The item to
1810     /// be parsed can either be one of a simple literal character, a range
1811     /// between two simple literal characters or a "primitive" character
1812     /// class like \w or \p{Greek}.
1813     ///
1814     /// If an invalid escape is found, or if a character class is found where
1815     /// a simple literal is expected (e.g., in a range), then an error is
1816     /// returned.
1817     #[inline(never)]
parse_set_class_range(&self) -> Result<ast::ClassSetItem>1818     fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1819         let prim1 = self.parse_set_class_item()?;
1820         self.bump_space();
1821         if self.is_eof() {
1822             return Err(self.unclosed_class_error());
1823         }
1824         // If the next char isn't a `-`, then we don't have a range.
1825         // There are two exceptions. If the char after a `-` is a `]`, then
1826         // `-` is interpreted as a literal `-`. Alternatively, if the char
1827         // after a `-` is a `-`, then `--` corresponds to a "difference"
1828         // operation.
1829         if self.char() != '-'
1830             || self.peek_space() == Some(']')
1831             || self.peek_space() == Some('-')
1832         {
1833             return prim1.into_class_set_item(self);
1834         }
1835         // OK, now we're parsing a range, so bump past the `-` and parse the
1836         // second half of the range.
1837         if !self.bump_and_bump_space() {
1838             return Err(self.unclosed_class_error());
1839         }
1840         let prim2 = self.parse_set_class_item()?;
1841         let range = ast::ClassSetRange {
1842             span: Span::new(prim1.span().start, prim2.span().end),
1843             start: prim1.into_class_literal(self)?,
1844             end: prim2.into_class_literal(self)?,
1845         };
1846         if !range.is_valid() {
1847             return Err(
1848                 self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1849             );
1850         }
1851         Ok(ast::ClassSetItem::Range(range))
1852     }
1853 
1854     /// Parse a single item in a character class as a primitive, where the
1855     /// primitive either consists of a verbatim literal or a single escape
1856     /// sequence.
1857     ///
1858     /// This assumes the parser is positioned at the beginning of a primitive,
1859     /// and advances the parser to the first position after the primitive if
1860     /// successful.
1861     ///
1862     /// Note that it is the caller's responsibility to report an error if an
1863     /// illegal primitive was parsed.
1864     #[inline(never)]
parse_set_class_item(&self) -> Result<Primitive>1865     fn parse_set_class_item(&self) -> Result<Primitive> {
1866         if self.char() == '\\' {
1867             self.parse_escape()
1868         } else {
1869             let x = Primitive::Literal(ast::Literal {
1870                 span: self.span_char(),
1871                 kind: ast::LiteralKind::Verbatim,
1872                 c: self.char(),
1873             });
1874             self.bump();
1875             Ok(x)
1876         }
1877     }
1878 
1879     /// Parses the opening of a character class set. This includes the opening
1880     /// bracket along with `^` if present to indicate negation. This also
1881     /// starts parsing the opening set of unioned items if applicable, since
1882     /// there are special rules applied to certain characters in the opening
1883     /// of a character class. For example, `[^]]` is the class of all
1884     /// characters not equal to `]`. (`]` would need to be escaped in any other
1885     /// position.) Similarly for `-`.
1886     ///
1887     /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1888     /// empty union. This empty union should be replaced with the actual item
1889     /// when it is popped from the parser's stack.
1890     ///
1891     /// This assumes the parser is positioned at the opening `[` and advances
1892     /// the parser to the first non-special byte of the character class.
1893     ///
1894     /// An error is returned if EOF is found.
1895     #[inline(never)]
parse_set_class_open( &self, ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)>1896     fn parse_set_class_open(
1897         &self,
1898     ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1899         assert_eq!(self.char(), '[');
1900         let start = self.pos();
1901         if !self.bump_and_bump_space() {
1902             return Err(self.error(
1903                 Span::new(start, self.pos()),
1904                 ast::ErrorKind::ClassUnclosed,
1905             ));
1906         }
1907 
1908         let negated = if self.char() != '^' {
1909             false
1910         } else {
1911             if !self.bump_and_bump_space() {
1912                 return Err(self.error(
1913                     Span::new(start, self.pos()),
1914                     ast::ErrorKind::ClassUnclosed,
1915                 ));
1916             }
1917             true
1918         };
1919         // Accept any number of `-` as literal `-`.
1920         let mut union =
1921             ast::ClassSetUnion { span: self.span(), items: vec![] };
1922         while self.char() == '-' {
1923             union.push(ast::ClassSetItem::Literal(ast::Literal {
1924                 span: self.span_char(),
1925                 kind: ast::LiteralKind::Verbatim,
1926                 c: '-',
1927             }));
1928             if !self.bump_and_bump_space() {
1929                 return Err(self.error(
1930                     Span::new(start, self.pos()),
1931                     ast::ErrorKind::ClassUnclosed,
1932                 ));
1933             }
1934         }
1935         // If `]` is the *first* char in a set, then interpret it as a literal
1936         // `]`. That is, an empty class is impossible to write.
1937         if union.items.is_empty() && self.char() == ']' {
1938             union.push(ast::ClassSetItem::Literal(ast::Literal {
1939                 span: self.span_char(),
1940                 kind: ast::LiteralKind::Verbatim,
1941                 c: ']',
1942             }));
1943             if !self.bump_and_bump_space() {
1944                 return Err(self.error(
1945                     Span::new(start, self.pos()),
1946                     ast::ErrorKind::ClassUnclosed,
1947                 ));
1948             }
1949         }
1950         let set = ast::ClassBracketed {
1951             span: Span::new(start, self.pos()),
1952             negated: negated,
1953             kind: ast::ClassSet::union(ast::ClassSetUnion {
1954                 span: Span::new(union.span.start, union.span.start),
1955                 items: vec![],
1956             }),
1957         };
1958         Ok((set, union))
1959     }
1960 
1961     /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1962     ///
1963     /// This assumes the parser is positioned at the opening `[`.
1964     ///
1965     /// If no valid ASCII character class could be found, then this does not
1966     /// advance the parser and `None` is returned. Otherwise, the parser is
1967     /// advanced to the first byte following the closing `]` and the
1968     /// corresponding ASCII class is returned.
1969     #[inline(never)]
maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii>1970     fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1971         // ASCII character classes are interesting from a parsing perspective
1972         // because parsing cannot fail with any interesting error. For example,
1973         // in order to use an ASCII character class, it must be enclosed in
1974         // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1975         // of it as "ASCII character characters have the syntax `[:NAME:]`
1976         // which can only appear within character brackets." This means that
1977         // things like `[[:lower:]A]` are legal constructs.
1978         //
1979         // However, if one types an incorrect ASCII character class, e.g.,
1980         // `[[:loower:]]`, then we treat that as a normal nested character
1981         // class containing the characters `:elorw`. One might argue that we
1982         // should return an error instead since the repeated colons give away
1983         // the intent to write an ASCII class. But what if the user typed
1984         // `[[:lower]]` instead? How can we tell that was intended to be an
1985         // ASCII class and not just a normal nested class?
1986         //
1987         // Reasonable people can probably disagree over this, but for better
1988         // or worse, we implement semantics that never fails at the expense
1989         // of better failure modes.
1990         assert_eq!(self.char(), '[');
1991         // If parsing fails, then we back up the parser to this starting point.
1992         let start = self.pos();
1993         let mut negated = false;
1994         if !self.bump() || self.char() != ':' {
1995             self.parser().pos.set(start);
1996             return None;
1997         }
1998         if !self.bump() {
1999             self.parser().pos.set(start);
2000             return None;
2001         }
2002         if self.char() == '^' {
2003             negated = true;
2004             if !self.bump() {
2005                 self.parser().pos.set(start);
2006                 return None;
2007             }
2008         }
2009         let name_start = self.offset();
2010         while self.char() != ':' && self.bump() {}
2011         if self.is_eof() {
2012             self.parser().pos.set(start);
2013             return None;
2014         }
2015         let name = &self.pattern()[name_start..self.offset()];
2016         if !self.bump_if(":]") {
2017             self.parser().pos.set(start);
2018             return None;
2019         }
2020         let kind = match ast::ClassAsciiKind::from_name(name) {
2021             Some(kind) => kind,
2022             None => {
2023                 self.parser().pos.set(start);
2024                 return None;
2025             }
2026         };
2027         Some(ast::ClassAscii {
2028             span: Span::new(start, self.pos()),
2029             kind: kind,
2030             negated: negated,
2031         })
2032     }
2033 
2034     /// Parse a Unicode class in either the single character notation, `\pN`
2035     /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2036     /// the parser is positioned at the `p` (or `P` for negation) and will
2037     /// advance the parser to the character immediately following the class.
2038     ///
2039     /// Note that this does not check whether the class name is valid or not.
2040     #[inline(never)]
parse_unicode_class(&self) -> Result<ast::ClassUnicode>2041     fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2042         assert!(self.char() == 'p' || self.char() == 'P');
2043 
2044         let mut scratch = self.parser().scratch.borrow_mut();
2045         scratch.clear();
2046 
2047         let negated = self.char() == 'P';
2048         if !self.bump_and_bump_space() {
2049             return Err(
2050                 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2051             );
2052         }
2053         let (start, kind) = if self.char() == '{' {
2054             let start = self.span_char().end;
2055             while self.bump_and_bump_space() && self.char() != '}' {
2056                 scratch.push(self.char());
2057             }
2058             if self.is_eof() {
2059                 return Err(self
2060                     .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2061             }
2062             assert_eq!(self.char(), '}');
2063             self.bump();
2064 
2065             let name = scratch.as_str();
2066             if let Some(i) = name.find("!=") {
2067                 (
2068                     start,
2069                     ast::ClassUnicodeKind::NamedValue {
2070                         op: ast::ClassUnicodeOpKind::NotEqual,
2071                         name: name[..i].to_string(),
2072                         value: name[i + 2..].to_string(),
2073                     },
2074                 )
2075             } else if let Some(i) = name.find(':') {
2076                 (
2077                     start,
2078                     ast::ClassUnicodeKind::NamedValue {
2079                         op: ast::ClassUnicodeOpKind::Colon,
2080                         name: name[..i].to_string(),
2081                         value: name[i + 1..].to_string(),
2082                     },
2083                 )
2084             } else if let Some(i) = name.find('=') {
2085                 (
2086                     start,
2087                     ast::ClassUnicodeKind::NamedValue {
2088                         op: ast::ClassUnicodeOpKind::Equal,
2089                         name: name[..i].to_string(),
2090                         value: name[i + 1..].to_string(),
2091                     },
2092                 )
2093             } else {
2094                 (start, ast::ClassUnicodeKind::Named(name.to_string()))
2095             }
2096         } else {
2097             let start = self.pos();
2098             let c = self.char();
2099             if c == '\\' {
2100                 return Err(self.error(
2101                     self.span_char(),
2102                     ast::ErrorKind::UnicodeClassInvalid,
2103                 ));
2104             }
2105             self.bump_and_bump_space();
2106             let kind = ast::ClassUnicodeKind::OneLetter(c);
2107             (start, kind)
2108         };
2109         Ok(ast::ClassUnicode {
2110             span: Span::new(start, self.pos()),
2111             negated: negated,
2112             kind: kind,
2113         })
2114     }
2115 
2116     /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2117     /// parser is currently at a valid character class name and will be
2118     /// advanced to the character immediately following the class.
2119     #[inline(never)]
parse_perl_class(&self) -> ast::ClassPerl2120     fn parse_perl_class(&self) -> ast::ClassPerl {
2121         let c = self.char();
2122         let span = self.span_char();
2123         self.bump();
2124         let (negated, kind) = match c {
2125             'd' => (false, ast::ClassPerlKind::Digit),
2126             'D' => (true, ast::ClassPerlKind::Digit),
2127             's' => (false, ast::ClassPerlKind::Space),
2128             'S' => (true, ast::ClassPerlKind::Space),
2129             'w' => (false, ast::ClassPerlKind::Word),
2130             'W' => (true, ast::ClassPerlKind::Word),
2131             c => panic!("expected valid Perl class but got '{}'", c),
2132         };
2133         ast::ClassPerl { span: span, kind: kind, negated: negated }
2134     }
2135 }
2136 
2137 /// A type that traverses a fully parsed Ast and checks whether its depth
2138 /// exceeds the specified nesting limit. If it does, then an error is returned.
2139 #[derive(Debug)]
2140 struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
2141     /// The parser that is checking the nest limit.
2142     p: &'p ParserI<'s, P>,
2143     /// The current depth while walking an Ast.
2144     depth: u32,
2145 }
2146 
2147 impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P>2148     fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2149         NestLimiter { p: p, depth: 0 }
2150     }
2151 
2152     #[inline(never)]
check(self, ast: &Ast) -> Result<()>2153     fn check(self, ast: &Ast) -> Result<()> {
2154         ast::visit(ast, self)
2155     }
2156 
increment_depth(&mut self, span: &Span) -> Result<()>2157     fn increment_depth(&mut self, span: &Span) -> Result<()> {
2158         let new = self.depth.checked_add(1).ok_or_else(|| {
2159             self.p.error(
2160                 span.clone(),
2161                 ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2162             )
2163         })?;
2164         let limit = self.p.parser().nest_limit;
2165         if new > limit {
2166             return Err(self.p.error(
2167                 span.clone(),
2168                 ast::ErrorKind::NestLimitExceeded(limit),
2169             ));
2170         }
2171         self.depth = new;
2172         Ok(())
2173     }
2174 
decrement_depth(&mut self)2175     fn decrement_depth(&mut self) {
2176         // Assuming the correctness of the visitor, this should never drop
2177         // below 0.
2178         self.depth = self.depth.checked_sub(1).unwrap();
2179     }
2180 }
2181 
2182 impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2183     type Output = ();
2184     type Err = ast::Error;
2185 
finish(self) -> Result<()>2186     fn finish(self) -> Result<()> {
2187         Ok(())
2188     }
2189 
visit_pre(&mut self, ast: &Ast) -> Result<()>2190     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2191         let span = match *ast {
2192             Ast::Empty(_)
2193             | Ast::Flags(_)
2194             | Ast::Literal(_)
2195             | Ast::Dot(_)
2196             | Ast::Assertion(_)
2197             | Ast::Class(ast::Class::Unicode(_))
2198             | Ast::Class(ast::Class::Perl(_)) => {
2199                 // These are all base cases, so we don't increment depth.
2200                 return Ok(());
2201             }
2202             Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2203             Ast::Repetition(ref x) => &x.span,
2204             Ast::Group(ref x) => &x.span,
2205             Ast::Alternation(ref x) => &x.span,
2206             Ast::Concat(ref x) => &x.span,
2207         };
2208         self.increment_depth(span)
2209     }
2210 
visit_post(&mut self, ast: &Ast) -> Result<()>2211     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2212         match *ast {
2213             Ast::Empty(_)
2214             | Ast::Flags(_)
2215             | Ast::Literal(_)
2216             | Ast::Dot(_)
2217             | Ast::Assertion(_)
2218             | Ast::Class(ast::Class::Unicode(_))
2219             | Ast::Class(ast::Class::Perl(_)) => {
2220                 // These are all base cases, so we don't decrement depth.
2221                 Ok(())
2222             }
2223             Ast::Class(ast::Class::Bracketed(_))
2224             | Ast::Repetition(_)
2225             | Ast::Group(_)
2226             | Ast::Alternation(_)
2227             | Ast::Concat(_) => {
2228                 self.decrement_depth();
2229                 Ok(())
2230             }
2231         }
2232     }
2233 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2234     fn visit_class_set_item_pre(
2235         &mut self,
2236         ast: &ast::ClassSetItem,
2237     ) -> Result<()> {
2238         let span = match *ast {
2239             ast::ClassSetItem::Empty(_)
2240             | ast::ClassSetItem::Literal(_)
2241             | ast::ClassSetItem::Range(_)
2242             | ast::ClassSetItem::Ascii(_)
2243             | ast::ClassSetItem::Unicode(_)
2244             | ast::ClassSetItem::Perl(_) => {
2245                 // These are all base cases, so we don't increment depth.
2246                 return Ok(());
2247             }
2248             ast::ClassSetItem::Bracketed(ref x) => &x.span,
2249             ast::ClassSetItem::Union(ref x) => &x.span,
2250         };
2251         self.increment_depth(span)
2252     }
2253 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2254     fn visit_class_set_item_post(
2255         &mut self,
2256         ast: &ast::ClassSetItem,
2257     ) -> Result<()> {
2258         match *ast {
2259             ast::ClassSetItem::Empty(_)
2260             | ast::ClassSetItem::Literal(_)
2261             | ast::ClassSetItem::Range(_)
2262             | ast::ClassSetItem::Ascii(_)
2263             | ast::ClassSetItem::Unicode(_)
2264             | ast::ClassSetItem::Perl(_) => {
2265                 // These are all base cases, so we don't decrement depth.
2266                 Ok(())
2267             }
2268             ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2269                 self.decrement_depth();
2270                 Ok(())
2271             }
2272         }
2273     }
2274 
visit_class_set_binary_op_pre( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<()>2275     fn visit_class_set_binary_op_pre(
2276         &mut self,
2277         ast: &ast::ClassSetBinaryOp,
2278     ) -> Result<()> {
2279         self.increment_depth(&ast.span)
2280     }
2281 
visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<()>2282     fn visit_class_set_binary_op_post(
2283         &mut self,
2284         _ast: &ast::ClassSetBinaryOp,
2285     ) -> Result<()> {
2286         self.decrement_depth();
2287         Ok(())
2288     }
2289 }
2290 
2291 /// When the result is an error, transforms the ast::ErrorKind from the source
2292 /// Result into another one. This function is used to return clearer error
2293 /// messages when possible.
specialize_err<T>( result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind, ) -> Result<T>2294 fn specialize_err<T>(
2295     result: Result<T>,
2296     from: ast::ErrorKind,
2297     to: ast::ErrorKind,
2298 ) -> Result<T> {
2299     if let Err(e) = result {
2300         if e.kind == from {
2301             Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2302         } else {
2303             Err(e)
2304         }
2305     } else {
2306         result
2307     }
2308 }
2309 
2310 #[cfg(test)]
2311 mod tests {
2312     use std::ops::Range;
2313 
2314     use super::{Parser, ParserBuilder, ParserI, Primitive};
2315     use ast::{self, Ast, Position, Span};
2316 
2317     // Our own assert_eq, which has slightly better formatting (but honestly
2318     // still kind of crappy).
2319     macro_rules! assert_eq {
2320         ($left:expr, $right:expr) => {{
2321             match (&$left, &$right) {
2322                 (left_val, right_val) => {
2323                     if !(*left_val == *right_val) {
2324                         panic!(
2325                             "assertion failed: `(left == right)`\n\n\
2326                              left:  `{:?}`\nright: `{:?}`\n\n",
2327                             left_val, right_val
2328                         )
2329                     }
2330                 }
2331             }
2332         }};
2333     }
2334 
2335     // We create these errors to compare with real ast::Errors in the tests.
2336     // We define equality between TestError and ast::Error to disregard the
2337     // pattern string in ast::Error, which is annoying to provide in tests.
2338     #[derive(Clone, Debug)]
2339     struct TestError {
2340         span: Span,
2341         kind: ast::ErrorKind,
2342     }
2343 
2344     impl PartialEq<ast::Error> for TestError {
eq(&self, other: &ast::Error) -> bool2345         fn eq(&self, other: &ast::Error) -> bool {
2346             self.span == other.span && self.kind == other.kind
2347         }
2348     }
2349 
2350     impl PartialEq<TestError> for ast::Error {
eq(&self, other: &TestError) -> bool2351         fn eq(&self, other: &TestError) -> bool {
2352             self.span == other.span && self.kind == other.kind
2353         }
2354     }
2355 
s(str: &str) -> String2356     fn s(str: &str) -> String {
2357         str.to_string()
2358     }
2359 
parser(pattern: &str) -> ParserI<Parser>2360     fn parser(pattern: &str) -> ParserI<Parser> {
2361         ParserI::new(Parser::new(), pattern)
2362     }
2363 
parser_octal(pattern: &str) -> ParserI<Parser>2364     fn parser_octal(pattern: &str) -> ParserI<Parser> {
2365         let parser = ParserBuilder::new().octal(true).build();
2366         ParserI::new(parser, pattern)
2367     }
2368 
parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser>2369     fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
2370         let p = ParserBuilder::new().nest_limit(nest_limit).build();
2371         ParserI::new(p, pattern)
2372     }
2373 
parser_ignore_whitespace(pattern: &str) -> ParserI<Parser>2374     fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
2375         let p = ParserBuilder::new().ignore_whitespace(true).build();
2376         ParserI::new(p, pattern)
2377     }
2378 
2379     /// Short alias for creating a new span.
nspan(start: Position, end: Position) -> Span2380     fn nspan(start: Position, end: Position) -> Span {
2381         Span::new(start, end)
2382     }
2383 
2384     /// Short alias for creating a new position.
npos(offset: usize, line: usize, column: usize) -> Position2385     fn npos(offset: usize, line: usize, column: usize) -> Position {
2386         Position::new(offset, line, column)
2387     }
2388 
2389     /// Create a new span from the given offset range. This assumes a single
2390     /// line and sets the columns based on the offsets. i.e., This only works
2391     /// out of the box for ASCII, which is fine for most tests.
span(range: Range<usize>) -> Span2392     fn span(range: Range<usize>) -> Span {
2393         let start = Position::new(range.start, 1, range.start + 1);
2394         let end = Position::new(range.end, 1, range.end + 1);
2395         Span::new(start, end)
2396     }
2397 
2398     /// Create a new span for the corresponding byte range in the given string.
span_range(subject: &str, range: Range<usize>) -> Span2399     fn span_range(subject: &str, range: Range<usize>) -> Span {
2400         let start = Position {
2401             offset: range.start,
2402             line: 1 + subject[..range.start].matches('\n').count(),
2403             column: 1 + subject[..range.start]
2404                 .chars()
2405                 .rev()
2406                 .position(|c| c == '\n')
2407                 .unwrap_or(subject[..range.start].chars().count()),
2408         };
2409         let end = Position {
2410             offset: range.end,
2411             line: 1 + subject[..range.end].matches('\n').count(),
2412             column: 1 + subject[..range.end]
2413                 .chars()
2414                 .rev()
2415                 .position(|c| c == '\n')
2416                 .unwrap_or(subject[..range.end].chars().count()),
2417         };
2418         Span::new(start, end)
2419     }
2420 
2421     /// Create a verbatim literal starting at the given position.
lit(c: char, start: usize) -> Ast2422     fn lit(c: char, start: usize) -> Ast {
2423         lit_with(c, span(start..start + c.len_utf8()))
2424     }
2425 
2426     /// Create a punctuation literal starting at the given position.
punct_lit(c: char, span: Span) -> Ast2427     fn punct_lit(c: char, span: Span) -> Ast {
2428         Ast::Literal(ast::Literal {
2429             span: span,
2430             kind: ast::LiteralKind::Punctuation,
2431             c: c,
2432         })
2433     }
2434 
2435     /// Create a verbatim literal with the given span.
lit_with(c: char, span: Span) -> Ast2436     fn lit_with(c: char, span: Span) -> Ast {
2437         Ast::Literal(ast::Literal {
2438             span: span,
2439             kind: ast::LiteralKind::Verbatim,
2440             c: c,
2441         })
2442     }
2443 
2444     /// Create a concatenation with the given range.
concat(range: Range<usize>, asts: Vec<Ast>) -> Ast2445     fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2446         concat_with(span(range), asts)
2447     }
2448 
2449     /// Create a concatenation with the given span.
concat_with(span: Span, asts: Vec<Ast>) -> Ast2450     fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2451         Ast::Concat(ast::Concat { span: span, asts: asts })
2452     }
2453 
2454     /// Create an alternation with the given span.
alt(range: Range<usize>, asts: Vec<Ast>) -> Ast2455     fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2456         Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
2457     }
2458 
2459     /// Create a capturing group with the given span.
group(range: Range<usize>, index: u32, ast: Ast) -> Ast2460     fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2461         Ast::Group(ast::Group {
2462             span: span(range),
2463             kind: ast::GroupKind::CaptureIndex(index),
2464             ast: Box::new(ast),
2465         })
2466     }
2467 
2468     /// Create an ast::SetFlags.
2469     ///
2470     /// The given pattern should be the full pattern string. The range given
2471     /// should correspond to the byte offsets where the flag set occurs.
2472     ///
2473     /// If negated is true, then the set is interpreted as beginning with a
2474     /// negation.
flag_set( pat: &str, range: Range<usize>, flag: ast::Flag, negated: bool, ) -> Ast2475     fn flag_set(
2476         pat: &str,
2477         range: Range<usize>,
2478         flag: ast::Flag,
2479         negated: bool,
2480     ) -> Ast {
2481         let mut items = vec![ast::FlagsItem {
2482             span: span_range(pat, (range.end - 2)..(range.end - 1)),
2483             kind: ast::FlagsItemKind::Flag(flag),
2484         }];
2485         if negated {
2486             items.insert(
2487                 0,
2488                 ast::FlagsItem {
2489                     span: span_range(pat, (range.start + 2)..(range.end - 2)),
2490                     kind: ast::FlagsItemKind::Negation,
2491                 },
2492             );
2493         }
2494         Ast::Flags(ast::SetFlags {
2495             span: span_range(pat, range.clone()),
2496             flags: ast::Flags {
2497                 span: span_range(pat, (range.start + 2)..(range.end - 1)),
2498                 items: items,
2499             },
2500         })
2501     }
2502 
2503     #[test]
parse_nest_limit()2504     fn parse_nest_limit() {
2505         // A nest limit of 0 still allows some types of regexes.
2506         assert_eq!(
2507             parser_nest_limit("", 0).parse(),
2508             Ok(Ast::Empty(span(0..0)))
2509         );
2510         assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2511 
2512         // Test repetition operations, which require one level of nesting.
2513         assert_eq!(
2514             parser_nest_limit("a+", 0).parse().unwrap_err(),
2515             TestError {
2516                 span: span(0..2),
2517                 kind: ast::ErrorKind::NestLimitExceeded(0),
2518             }
2519         );
2520         assert_eq!(
2521             parser_nest_limit("a+", 1).parse(),
2522             Ok(Ast::Repetition(ast::Repetition {
2523                 span: span(0..2),
2524                 op: ast::RepetitionOp {
2525                     span: span(1..2),
2526                     kind: ast::RepetitionKind::OneOrMore,
2527                 },
2528                 greedy: true,
2529                 ast: Box::new(lit('a', 0)),
2530             }))
2531         );
2532         assert_eq!(
2533             parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2534             TestError {
2535                 span: span(0..3),
2536                 kind: ast::ErrorKind::NestLimitExceeded(1),
2537             }
2538         );
2539         assert_eq!(
2540             parser_nest_limit("a+*", 1).parse().unwrap_err(),
2541             TestError {
2542                 span: span(0..2),
2543                 kind: ast::ErrorKind::NestLimitExceeded(1),
2544             }
2545         );
2546         assert_eq!(
2547             parser_nest_limit("a+*", 2).parse(),
2548             Ok(Ast::Repetition(ast::Repetition {
2549                 span: span(0..3),
2550                 op: ast::RepetitionOp {
2551                     span: span(2..3),
2552                     kind: ast::RepetitionKind::ZeroOrMore,
2553                 },
2554                 greedy: true,
2555                 ast: Box::new(Ast::Repetition(ast::Repetition {
2556                     span: span(0..2),
2557                     op: ast::RepetitionOp {
2558                         span: span(1..2),
2559                         kind: ast::RepetitionKind::OneOrMore,
2560                     },
2561                     greedy: true,
2562                     ast: Box::new(lit('a', 0)),
2563                 })),
2564             }))
2565         );
2566 
2567         // Test concatenations. A concatenation requires one level of nesting.
2568         assert_eq!(
2569             parser_nest_limit("ab", 0).parse().unwrap_err(),
2570             TestError {
2571                 span: span(0..2),
2572                 kind: ast::ErrorKind::NestLimitExceeded(0),
2573             }
2574         );
2575         assert_eq!(
2576             parser_nest_limit("ab", 1).parse(),
2577             Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2578         );
2579         assert_eq!(
2580             parser_nest_limit("abc", 1).parse(),
2581             Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2582         );
2583 
2584         // Test alternations. An alternation requires one level of nesting.
2585         assert_eq!(
2586             parser_nest_limit("a|b", 0).parse().unwrap_err(),
2587             TestError {
2588                 span: span(0..3),
2589                 kind: ast::ErrorKind::NestLimitExceeded(0),
2590             }
2591         );
2592         assert_eq!(
2593             parser_nest_limit("a|b", 1).parse(),
2594             Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2595         );
2596         assert_eq!(
2597             parser_nest_limit("a|b|c", 1).parse(),
2598             Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2599         );
2600 
2601         // Test character classes. Classes form their own mini-recursive
2602         // syntax!
2603         assert_eq!(
2604             parser_nest_limit("[a]", 0).parse().unwrap_err(),
2605             TestError {
2606                 span: span(0..3),
2607                 kind: ast::ErrorKind::NestLimitExceeded(0),
2608             }
2609         );
2610         assert_eq!(
2611             parser_nest_limit("[a]", 1).parse(),
2612             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2613                 span: span(0..3),
2614                 negated: false,
2615                 kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2616                     ast::Literal {
2617                         span: span(1..2),
2618                         kind: ast::LiteralKind::Verbatim,
2619                         c: 'a',
2620                     }
2621                 )),
2622             })))
2623         );
2624         assert_eq!(
2625             parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2626             TestError {
2627                 span: span(1..3),
2628                 kind: ast::ErrorKind::NestLimitExceeded(1),
2629             }
2630         );
2631         assert_eq!(
2632             parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2633             TestError {
2634                 span: span(3..7),
2635                 kind: ast::ErrorKind::NestLimitExceeded(2),
2636             }
2637         );
2638         assert_eq!(
2639             parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2640             TestError {
2641                 span: span(4..6),
2642                 kind: ast::ErrorKind::NestLimitExceeded(3),
2643             }
2644         );
2645         assert_eq!(
2646             parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2647             TestError {
2648                 span: span(1..5),
2649                 kind: ast::ErrorKind::NestLimitExceeded(1),
2650             }
2651         );
2652         assert_eq!(
2653             parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2654             TestError {
2655                 span: span(4..6),
2656                 kind: ast::ErrorKind::NestLimitExceeded(2),
2657             }
2658         );
2659     }
2660 
2661     #[test]
parse_comments()2662     fn parse_comments() {
2663         let pat = "(?x)
2664 # This is comment 1.
2665 foo # This is comment 2.
2666   # This is comment 3.
2667 bar
2668 # This is comment 4.";
2669         let astc = parser(pat).parse_with_comments().unwrap();
2670         assert_eq!(
2671             astc.ast,
2672             concat_with(
2673                 span_range(pat, 0..pat.len()),
2674                 vec![
2675                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2676                     lit_with('f', span_range(pat, 26..27)),
2677                     lit_with('o', span_range(pat, 27..28)),
2678                     lit_with('o', span_range(pat, 28..29)),
2679                     lit_with('b', span_range(pat, 74..75)),
2680                     lit_with('a', span_range(pat, 75..76)),
2681                     lit_with('r', span_range(pat, 76..77)),
2682                 ]
2683             )
2684         );
2685         assert_eq!(
2686             astc.comments,
2687             vec![
2688                 ast::Comment {
2689                     span: span_range(pat, 5..26),
2690                     comment: s(" This is comment 1."),
2691                 },
2692                 ast::Comment {
2693                     span: span_range(pat, 30..51),
2694                     comment: s(" This is comment 2."),
2695                 },
2696                 ast::Comment {
2697                     span: span_range(pat, 53..74),
2698                     comment: s(" This is comment 3."),
2699                 },
2700                 ast::Comment {
2701                     span: span_range(pat, 78..98),
2702                     comment: s(" This is comment 4."),
2703                 },
2704             ]
2705         );
2706     }
2707 
2708     #[test]
parse_holistic()2709     fn parse_holistic() {
2710         assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2711         assert_eq!(
2712             parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2713             Ok(concat(
2714                 0..36,
2715                 vec![
2716                     punct_lit('\\', span(0..2)),
2717                     punct_lit('.', span(2..4)),
2718                     punct_lit('+', span(4..6)),
2719                     punct_lit('*', span(6..8)),
2720                     punct_lit('?', span(8..10)),
2721                     punct_lit('(', span(10..12)),
2722                     punct_lit(')', span(12..14)),
2723                     punct_lit('|', span(14..16)),
2724                     punct_lit('[', span(16..18)),
2725                     punct_lit(']', span(18..20)),
2726                     punct_lit('{', span(20..22)),
2727                     punct_lit('}', span(22..24)),
2728                     punct_lit('^', span(24..26)),
2729                     punct_lit('$', span(26..28)),
2730                     punct_lit('#', span(28..30)),
2731                     punct_lit('&', span(30..32)),
2732                     punct_lit('-', span(32..34)),
2733                     punct_lit('~', span(34..36)),
2734                 ]
2735             ))
2736         );
2737     }
2738 
2739     #[test]
parse_ignore_whitespace()2740     fn parse_ignore_whitespace() {
2741         // Test that basic whitespace insensitivity works.
2742         let pat = "(?x)a b";
2743         assert_eq!(
2744             parser(pat).parse(),
2745             Ok(concat_with(
2746                 nspan(npos(0, 1, 1), npos(7, 1, 8)),
2747                 vec![
2748                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2749                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2750                     lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2751                 ]
2752             ))
2753         );
2754 
2755         // Test that we can toggle whitespace insensitivity.
2756         let pat = "(?x)a b(?-x)a b";
2757         assert_eq!(
2758             parser(pat).parse(),
2759             Ok(concat_with(
2760                 nspan(npos(0, 1, 1), npos(15, 1, 16)),
2761                 vec![
2762                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2763                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2764                     lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2765                     flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2766                     lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2767                     lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2768                     lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2769                 ]
2770             ))
2771         );
2772 
2773         // Test that nesting whitespace insensitive flags works.
2774         let pat = "a (?x:a )a ";
2775         assert_eq!(
2776             parser(pat).parse(),
2777             Ok(concat_with(
2778                 span_range(pat, 0..11),
2779                 vec![
2780                     lit_with('a', span_range(pat, 0..1)),
2781                     lit_with(' ', span_range(pat, 1..2)),
2782                     Ast::Group(ast::Group {
2783                         span: span_range(pat, 2..9),
2784                         kind: ast::GroupKind::NonCapturing(ast::Flags {
2785                             span: span_range(pat, 4..5),
2786                             items: vec![ast::FlagsItem {
2787                                 span: span_range(pat, 4..5),
2788                                 kind: ast::FlagsItemKind::Flag(
2789                                     ast::Flag::IgnoreWhitespace
2790                                 ),
2791                             },],
2792                         }),
2793                         ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2794                     }),
2795                     lit_with('a', span_range(pat, 9..10)),
2796                     lit_with(' ', span_range(pat, 10..11)),
2797                 ]
2798             ))
2799         );
2800 
2801         // Test that whitespace after an opening paren is insignificant.
2802         let pat = "(?x)( ?P<foo> a )";
2803         assert_eq!(
2804             parser(pat).parse(),
2805             Ok(concat_with(
2806                 span_range(pat, 0..pat.len()),
2807                 vec![
2808                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2809                     Ast::Group(ast::Group {
2810                         span: span_range(pat, 4..pat.len()),
2811                         kind: ast::GroupKind::CaptureName(ast::CaptureName {
2812                             span: span_range(pat, 9..12),
2813                             name: s("foo"),
2814                             index: 1,
2815                         }),
2816                         ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2817                     }),
2818                 ]
2819             ))
2820         );
2821         let pat = "(?x)(  a )";
2822         assert_eq!(
2823             parser(pat).parse(),
2824             Ok(concat_with(
2825                 span_range(pat, 0..pat.len()),
2826                 vec![
2827                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2828                     Ast::Group(ast::Group {
2829                         span: span_range(pat, 4..pat.len()),
2830                         kind: ast::GroupKind::CaptureIndex(1),
2831                         ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2832                     }),
2833                 ]
2834             ))
2835         );
2836         let pat = "(?x)(  ?:  a )";
2837         assert_eq!(
2838             parser(pat).parse(),
2839             Ok(concat_with(
2840                 span_range(pat, 0..pat.len()),
2841                 vec![
2842                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2843                     Ast::Group(ast::Group {
2844                         span: span_range(pat, 4..pat.len()),
2845                         kind: ast::GroupKind::NonCapturing(ast::Flags {
2846                             span: span_range(pat, 8..8),
2847                             items: vec![],
2848                         }),
2849                         ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2850                     }),
2851                 ]
2852             ))
2853         );
2854         let pat = r"(?x)\x { 53 }";
2855         assert_eq!(
2856             parser(pat).parse(),
2857             Ok(concat_with(
2858                 span_range(pat, 0..pat.len()),
2859                 vec![
2860                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2861                     Ast::Literal(ast::Literal {
2862                         span: span(4..13),
2863                         kind: ast::LiteralKind::HexBrace(
2864                             ast::HexLiteralKind::X
2865                         ),
2866                         c: 'S',
2867                     }),
2868                 ]
2869             ))
2870         );
2871 
2872         // Test that whitespace after an escape is OK.
2873         let pat = r"(?x)\ ";
2874         assert_eq!(
2875             parser(pat).parse(),
2876             Ok(concat_with(
2877                 span_range(pat, 0..pat.len()),
2878                 vec![
2879                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2880                     Ast::Literal(ast::Literal {
2881                         span: span_range(pat, 4..6),
2882                         kind: ast::LiteralKind::Special(
2883                             ast::SpecialLiteralKind::Space
2884                         ),
2885                         c: ' ',
2886                     }),
2887                 ]
2888             ))
2889         );
2890         // ... but only when `x` mode is enabled.
2891         let pat = r"\ ";
2892         assert_eq!(
2893             parser(pat).parse().unwrap_err(),
2894             TestError {
2895                 span: span_range(pat, 0..2),
2896                 kind: ast::ErrorKind::EscapeUnrecognized,
2897             }
2898         );
2899     }
2900 
2901     #[test]
parse_newlines()2902     fn parse_newlines() {
2903         let pat = ".\n.";
2904         assert_eq!(
2905             parser(pat).parse(),
2906             Ok(concat_with(
2907                 span_range(pat, 0..3),
2908                 vec![
2909                     Ast::Dot(span_range(pat, 0..1)),
2910                     lit_with('\n', span_range(pat, 1..2)),
2911                     Ast::Dot(span_range(pat, 2..3)),
2912                 ]
2913             ))
2914         );
2915 
2916         let pat = "foobar\nbaz\nquux\n";
2917         assert_eq!(
2918             parser(pat).parse(),
2919             Ok(concat_with(
2920                 span_range(pat, 0..pat.len()),
2921                 vec![
2922                     lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2923                     lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2924                     lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2925                     lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2926                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2927                     lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2928                     lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2929                     lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2930                     lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2931                     lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2932                     lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2933                     lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2934                     lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2935                     lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2936                     lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2937                     lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2938                 ]
2939             ))
2940         );
2941     }
2942 
2943     #[test]
parse_uncounted_repetition()2944     fn parse_uncounted_repetition() {
2945         assert_eq!(
2946             parser(r"a*").parse(),
2947             Ok(Ast::Repetition(ast::Repetition {
2948                 span: span(0..2),
2949                 op: ast::RepetitionOp {
2950                     span: span(1..2),
2951                     kind: ast::RepetitionKind::ZeroOrMore,
2952                 },
2953                 greedy: true,
2954                 ast: Box::new(lit('a', 0)),
2955             }))
2956         );
2957         assert_eq!(
2958             parser(r"a+").parse(),
2959             Ok(Ast::Repetition(ast::Repetition {
2960                 span: span(0..2),
2961                 op: ast::RepetitionOp {
2962                     span: span(1..2),
2963                     kind: ast::RepetitionKind::OneOrMore,
2964                 },
2965                 greedy: true,
2966                 ast: Box::new(lit('a', 0)),
2967             }))
2968         );
2969 
2970         assert_eq!(
2971             parser(r"a?").parse(),
2972             Ok(Ast::Repetition(ast::Repetition {
2973                 span: span(0..2),
2974                 op: ast::RepetitionOp {
2975                     span: span(1..2),
2976                     kind: ast::RepetitionKind::ZeroOrOne,
2977                 },
2978                 greedy: true,
2979                 ast: Box::new(lit('a', 0)),
2980             }))
2981         );
2982         assert_eq!(
2983             parser(r"a??").parse(),
2984             Ok(Ast::Repetition(ast::Repetition {
2985                 span: span(0..3),
2986                 op: ast::RepetitionOp {
2987                     span: span(1..3),
2988                     kind: ast::RepetitionKind::ZeroOrOne,
2989                 },
2990                 greedy: false,
2991                 ast: Box::new(lit('a', 0)),
2992             }))
2993         );
2994         assert_eq!(
2995             parser(r"a?").parse(),
2996             Ok(Ast::Repetition(ast::Repetition {
2997                 span: span(0..2),
2998                 op: ast::RepetitionOp {
2999                     span: span(1..2),
3000                     kind: ast::RepetitionKind::ZeroOrOne,
3001                 },
3002                 greedy: true,
3003                 ast: Box::new(lit('a', 0)),
3004             }))
3005         );
3006         assert_eq!(
3007             parser(r"a?b").parse(),
3008             Ok(concat(
3009                 0..3,
3010                 vec![
3011                     Ast::Repetition(ast::Repetition {
3012                         span: span(0..2),
3013                         op: ast::RepetitionOp {
3014                             span: span(1..2),
3015                             kind: ast::RepetitionKind::ZeroOrOne,
3016                         },
3017                         greedy: true,
3018                         ast: Box::new(lit('a', 0)),
3019                     }),
3020                     lit('b', 2),
3021                 ]
3022             ))
3023         );
3024         assert_eq!(
3025             parser(r"a??b").parse(),
3026             Ok(concat(
3027                 0..4,
3028                 vec![
3029                     Ast::Repetition(ast::Repetition {
3030                         span: span(0..3),
3031                         op: ast::RepetitionOp {
3032                             span: span(1..3),
3033                             kind: ast::RepetitionKind::ZeroOrOne,
3034                         },
3035                         greedy: false,
3036                         ast: Box::new(lit('a', 0)),
3037                     }),
3038                     lit('b', 3),
3039                 ]
3040             ))
3041         );
3042         assert_eq!(
3043             parser(r"ab?").parse(),
3044             Ok(concat(
3045                 0..3,
3046                 vec![
3047                     lit('a', 0),
3048                     Ast::Repetition(ast::Repetition {
3049                         span: span(1..3),
3050                         op: ast::RepetitionOp {
3051                             span: span(2..3),
3052                             kind: ast::RepetitionKind::ZeroOrOne,
3053                         },
3054                         greedy: true,
3055                         ast: Box::new(lit('b', 1)),
3056                     }),
3057                 ]
3058             ))
3059         );
3060         assert_eq!(
3061             parser(r"(ab)?").parse(),
3062             Ok(Ast::Repetition(ast::Repetition {
3063                 span: span(0..5),
3064                 op: ast::RepetitionOp {
3065                     span: span(4..5),
3066                     kind: ast::RepetitionKind::ZeroOrOne,
3067                 },
3068                 greedy: true,
3069                 ast: Box::new(group(
3070                     0..4,
3071                     1,
3072                     concat(1..3, vec![lit('a', 1), lit('b', 2),])
3073                 )),
3074             }))
3075         );
3076         assert_eq!(
3077             parser(r"|a?").parse(),
3078             Ok(alt(
3079                 0..3,
3080                 vec![
3081                     Ast::Empty(span(0..0)),
3082                     Ast::Repetition(ast::Repetition {
3083                         span: span(1..3),
3084                         op: ast::RepetitionOp {
3085                             span: span(2..3),
3086                             kind: ast::RepetitionKind::ZeroOrOne,
3087                         },
3088                         greedy: true,
3089                         ast: Box::new(lit('a', 1)),
3090                     }),
3091                 ]
3092             ))
3093         );
3094 
3095         assert_eq!(
3096             parser(r"*").parse().unwrap_err(),
3097             TestError {
3098                 span: span(0..0),
3099                 kind: ast::ErrorKind::RepetitionMissing,
3100             }
3101         );
3102         assert_eq!(
3103             parser(r"(?i)*").parse().unwrap_err(),
3104             TestError {
3105                 span: span(4..4),
3106                 kind: ast::ErrorKind::RepetitionMissing,
3107             }
3108         );
3109         assert_eq!(
3110             parser(r"(*)").parse().unwrap_err(),
3111             TestError {
3112                 span: span(1..1),
3113                 kind: ast::ErrorKind::RepetitionMissing,
3114             }
3115         );
3116         assert_eq!(
3117             parser(r"(?:?)").parse().unwrap_err(),
3118             TestError {
3119                 span: span(3..3),
3120                 kind: ast::ErrorKind::RepetitionMissing,
3121             }
3122         );
3123         assert_eq!(
3124             parser(r"+").parse().unwrap_err(),
3125             TestError {
3126                 span: span(0..0),
3127                 kind: ast::ErrorKind::RepetitionMissing,
3128             }
3129         );
3130         assert_eq!(
3131             parser(r"?").parse().unwrap_err(),
3132             TestError {
3133                 span: span(0..0),
3134                 kind: ast::ErrorKind::RepetitionMissing,
3135             }
3136         );
3137         assert_eq!(
3138             parser(r"(?)").parse().unwrap_err(),
3139             TestError {
3140                 span: span(1..1),
3141                 kind: ast::ErrorKind::RepetitionMissing,
3142             }
3143         );
3144         assert_eq!(
3145             parser(r"|*").parse().unwrap_err(),
3146             TestError {
3147                 span: span(1..1),
3148                 kind: ast::ErrorKind::RepetitionMissing,
3149             }
3150         );
3151         assert_eq!(
3152             parser(r"|+").parse().unwrap_err(),
3153             TestError {
3154                 span: span(1..1),
3155                 kind: ast::ErrorKind::RepetitionMissing,
3156             }
3157         );
3158         assert_eq!(
3159             parser(r"|?").parse().unwrap_err(),
3160             TestError {
3161                 span: span(1..1),
3162                 kind: ast::ErrorKind::RepetitionMissing,
3163             }
3164         );
3165     }
3166 
3167     #[test]
parse_counted_repetition()3168     fn parse_counted_repetition() {
3169         assert_eq!(
3170             parser(r"a{5}").parse(),
3171             Ok(Ast::Repetition(ast::Repetition {
3172                 span: span(0..4),
3173                 op: ast::RepetitionOp {
3174                     span: span(1..4),
3175                     kind: ast::RepetitionKind::Range(
3176                         ast::RepetitionRange::Exactly(5)
3177                     ),
3178                 },
3179                 greedy: true,
3180                 ast: Box::new(lit('a', 0)),
3181             }))
3182         );
3183         assert_eq!(
3184             parser(r"a{5,}").parse(),
3185             Ok(Ast::Repetition(ast::Repetition {
3186                 span: span(0..5),
3187                 op: ast::RepetitionOp {
3188                     span: span(1..5),
3189                     kind: ast::RepetitionKind::Range(
3190                         ast::RepetitionRange::AtLeast(5)
3191                     ),
3192                 },
3193                 greedy: true,
3194                 ast: Box::new(lit('a', 0)),
3195             }))
3196         );
3197         assert_eq!(
3198             parser(r"a{5,9}").parse(),
3199             Ok(Ast::Repetition(ast::Repetition {
3200                 span: span(0..6),
3201                 op: ast::RepetitionOp {
3202                     span: span(1..6),
3203                     kind: ast::RepetitionKind::Range(
3204                         ast::RepetitionRange::Bounded(5, 9)
3205                     ),
3206                 },
3207                 greedy: true,
3208                 ast: Box::new(lit('a', 0)),
3209             }))
3210         );
3211         assert_eq!(
3212             parser(r"a{5}?").parse(),
3213             Ok(Ast::Repetition(ast::Repetition {
3214                 span: span(0..5),
3215                 op: ast::RepetitionOp {
3216                     span: span(1..5),
3217                     kind: ast::RepetitionKind::Range(
3218                         ast::RepetitionRange::Exactly(5)
3219                     ),
3220                 },
3221                 greedy: false,
3222                 ast: Box::new(lit('a', 0)),
3223             }))
3224         );
3225         assert_eq!(
3226             parser(r"ab{5}").parse(),
3227             Ok(concat(
3228                 0..5,
3229                 vec![
3230                     lit('a', 0),
3231                     Ast::Repetition(ast::Repetition {
3232                         span: span(1..5),
3233                         op: ast::RepetitionOp {
3234                             span: span(2..5),
3235                             kind: ast::RepetitionKind::Range(
3236                                 ast::RepetitionRange::Exactly(5)
3237                             ),
3238                         },
3239                         greedy: true,
3240                         ast: Box::new(lit('b', 1)),
3241                     }),
3242                 ]
3243             ))
3244         );
3245         assert_eq!(
3246             parser(r"ab{5}c").parse(),
3247             Ok(concat(
3248                 0..6,
3249                 vec![
3250                     lit('a', 0),
3251                     Ast::Repetition(ast::Repetition {
3252                         span: span(1..5),
3253                         op: ast::RepetitionOp {
3254                             span: span(2..5),
3255                             kind: ast::RepetitionKind::Range(
3256                                 ast::RepetitionRange::Exactly(5)
3257                             ),
3258                         },
3259                         greedy: true,
3260                         ast: Box::new(lit('b', 1)),
3261                     }),
3262                     lit('c', 5),
3263                 ]
3264             ))
3265         );
3266 
3267         assert_eq!(
3268             parser(r"a{ 5 }").parse(),
3269             Ok(Ast::Repetition(ast::Repetition {
3270                 span: span(0..6),
3271                 op: ast::RepetitionOp {
3272                     span: span(1..6),
3273                     kind: ast::RepetitionKind::Range(
3274                         ast::RepetitionRange::Exactly(5)
3275                     ),
3276                 },
3277                 greedy: true,
3278                 ast: Box::new(lit('a', 0)),
3279             }))
3280         );
3281         assert_eq!(
3282             parser(r"a{ 5 , 9 }").parse(),
3283             Ok(Ast::Repetition(ast::Repetition {
3284                 span: span(0..10),
3285                 op: ast::RepetitionOp {
3286                     span: span(1..10),
3287                     kind: ast::RepetitionKind::Range(
3288                         ast::RepetitionRange::Bounded(5, 9)
3289                     ),
3290                 },
3291                 greedy: true,
3292                 ast: Box::new(lit('a', 0)),
3293             }))
3294         );
3295         assert_eq!(
3296             parser_ignore_whitespace(r"a{5,9} ?").parse(),
3297             Ok(Ast::Repetition(ast::Repetition {
3298                 span: span(0..8),
3299                 op: ast::RepetitionOp {
3300                     span: span(1..8),
3301                     kind: ast::RepetitionKind::Range(
3302                         ast::RepetitionRange::Bounded(5, 9)
3303                     ),
3304                 },
3305                 greedy: false,
3306                 ast: Box::new(lit('a', 0)),
3307             }))
3308         );
3309 
3310         assert_eq!(
3311             parser(r"(?i){0}").parse().unwrap_err(),
3312             TestError {
3313                 span: span(4..4),
3314                 kind: ast::ErrorKind::RepetitionMissing,
3315             }
3316         );
3317         assert_eq!(
3318             parser(r"(?m){1,1}").parse().unwrap_err(),
3319             TestError {
3320                 span: span(4..4),
3321                 kind: ast::ErrorKind::RepetitionMissing,
3322             }
3323         );
3324         assert_eq!(
3325             parser(r"a{]}").parse().unwrap_err(),
3326             TestError {
3327                 span: span(2..2),
3328                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3329             }
3330         );
3331         assert_eq!(
3332             parser(r"a{1,]}").parse().unwrap_err(),
3333             TestError {
3334                 span: span(4..4),
3335                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3336             }
3337         );
3338         assert_eq!(
3339             parser(r"a{").parse().unwrap_err(),
3340             TestError {
3341                 span: span(1..2),
3342                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3343             }
3344         );
3345         assert_eq!(
3346             parser(r"a{}").parse().unwrap_err(),
3347             TestError {
3348                 span: span(2..2),
3349                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3350             }
3351         );
3352         assert_eq!(
3353             parser(r"a{a").parse().unwrap_err(),
3354             TestError {
3355                 span: span(2..2),
3356                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3357             }
3358         );
3359         assert_eq!(
3360             parser(r"a{9999999999}").parse().unwrap_err(),
3361             TestError {
3362                 span: span(2..12),
3363                 kind: ast::ErrorKind::DecimalInvalid,
3364             }
3365         );
3366         assert_eq!(
3367             parser(r"a{9").parse().unwrap_err(),
3368             TestError {
3369                 span: span(1..3),
3370                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3371             }
3372         );
3373         assert_eq!(
3374             parser(r"a{9,a").parse().unwrap_err(),
3375             TestError {
3376                 span: span(4..4),
3377                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3378             }
3379         );
3380         assert_eq!(
3381             parser(r"a{9,9999999999}").parse().unwrap_err(),
3382             TestError {
3383                 span: span(4..14),
3384                 kind: ast::ErrorKind::DecimalInvalid,
3385             }
3386         );
3387         assert_eq!(
3388             parser(r"a{9,").parse().unwrap_err(),
3389             TestError {
3390                 span: span(1..4),
3391                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3392             }
3393         );
3394         assert_eq!(
3395             parser(r"a{9,11").parse().unwrap_err(),
3396             TestError {
3397                 span: span(1..6),
3398                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3399             }
3400         );
3401         assert_eq!(
3402             parser(r"a{2,1}").parse().unwrap_err(),
3403             TestError {
3404                 span: span(1..6),
3405                 kind: ast::ErrorKind::RepetitionCountInvalid,
3406             }
3407         );
3408         assert_eq!(
3409             parser(r"{5}").parse().unwrap_err(),
3410             TestError {
3411                 span: span(0..0),
3412                 kind: ast::ErrorKind::RepetitionMissing,
3413             }
3414         );
3415         assert_eq!(
3416             parser(r"|{5}").parse().unwrap_err(),
3417             TestError {
3418                 span: span(1..1),
3419                 kind: ast::ErrorKind::RepetitionMissing,
3420             }
3421         );
3422     }
3423 
3424     #[test]
parse_alternate()3425     fn parse_alternate() {
3426         assert_eq!(
3427             parser(r"a|b").parse(),
3428             Ok(Ast::Alternation(ast::Alternation {
3429                 span: span(0..3),
3430                 asts: vec![lit('a', 0), lit('b', 2)],
3431             }))
3432         );
3433         assert_eq!(
3434             parser(r"(a|b)").parse(),
3435             Ok(group(
3436                 0..5,
3437                 1,
3438                 Ast::Alternation(ast::Alternation {
3439                     span: span(1..4),
3440                     asts: vec![lit('a', 1), lit('b', 3)],
3441                 })
3442             ))
3443         );
3444 
3445         assert_eq!(
3446             parser(r"a|b|c").parse(),
3447             Ok(Ast::Alternation(ast::Alternation {
3448                 span: span(0..5),
3449                 asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3450             }))
3451         );
3452         assert_eq!(
3453             parser(r"ax|by|cz").parse(),
3454             Ok(Ast::Alternation(ast::Alternation {
3455                 span: span(0..8),
3456                 asts: vec![
3457                     concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3458                     concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3459                     concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3460                 ],
3461             }))
3462         );
3463         assert_eq!(
3464             parser(r"(ax|by|cz)").parse(),
3465             Ok(group(
3466                 0..10,
3467                 1,
3468                 Ast::Alternation(ast::Alternation {
3469                     span: span(1..9),
3470                     asts: vec![
3471                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3472                         concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3473                         concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3474                     ],
3475                 })
3476             ))
3477         );
3478         assert_eq!(
3479             parser(r"(ax|(by|(cz)))").parse(),
3480             Ok(group(
3481                 0..14,
3482                 1,
3483                 alt(
3484                     1..13,
3485                     vec![
3486                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3487                         group(
3488                             4..13,
3489                             2,
3490                             alt(
3491                                 5..12,
3492                                 vec![
3493                                     concat(
3494                                         5..7,
3495                                         vec![lit('b', 5), lit('y', 6)]
3496                                     ),
3497                                     group(
3498                                         8..12,
3499                                         3,
3500                                         concat(
3501                                             9..11,
3502                                             vec![lit('c', 9), lit('z', 10),]
3503                                         )
3504                                     ),
3505                                 ]
3506                             )
3507                         ),
3508                     ]
3509                 )
3510             ))
3511         );
3512 
3513         assert_eq!(
3514             parser(r"|").parse(),
3515             Ok(alt(
3516                 0..1,
3517                 vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
3518             ))
3519         );
3520         assert_eq!(
3521             parser(r"||").parse(),
3522             Ok(alt(
3523                 0..2,
3524                 vec![
3525                     Ast::Empty(span(0..0)),
3526                     Ast::Empty(span(1..1)),
3527                     Ast::Empty(span(2..2)),
3528                 ]
3529             ))
3530         );
3531         assert_eq!(
3532             parser(r"a|").parse(),
3533             Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
3534         );
3535         assert_eq!(
3536             parser(r"|a").parse(),
3537             Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
3538         );
3539 
3540         assert_eq!(
3541             parser(r"(|)").parse(),
3542             Ok(group(
3543                 0..3,
3544                 1,
3545                 alt(
3546                     1..2,
3547                     vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
3548                 )
3549             ))
3550         );
3551         assert_eq!(
3552             parser(r"(a|)").parse(),
3553             Ok(group(
3554                 0..4,
3555                 1,
3556                 alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
3557             ))
3558         );
3559         assert_eq!(
3560             parser(r"(|a)").parse(),
3561             Ok(group(
3562                 0..4,
3563                 1,
3564                 alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
3565             ))
3566         );
3567 
3568         assert_eq!(
3569             parser(r"a|b)").parse().unwrap_err(),
3570             TestError {
3571                 span: span(3..4),
3572                 kind: ast::ErrorKind::GroupUnopened,
3573             }
3574         );
3575         assert_eq!(
3576             parser(r"(a|b").parse().unwrap_err(),
3577             TestError {
3578                 span: span(0..1),
3579                 kind: ast::ErrorKind::GroupUnclosed,
3580             }
3581         );
3582     }
3583 
3584     #[test]
parse_unsupported_lookaround()3585     fn parse_unsupported_lookaround() {
3586         assert_eq!(
3587             parser(r"(?=a)").parse().unwrap_err(),
3588             TestError {
3589                 span: span(0..3),
3590                 kind: ast::ErrorKind::UnsupportedLookAround,
3591             }
3592         );
3593         assert_eq!(
3594             parser(r"(?!a)").parse().unwrap_err(),
3595             TestError {
3596                 span: span(0..3),
3597                 kind: ast::ErrorKind::UnsupportedLookAround,
3598             }
3599         );
3600         assert_eq!(
3601             parser(r"(?<=a)").parse().unwrap_err(),
3602             TestError {
3603                 span: span(0..4),
3604                 kind: ast::ErrorKind::UnsupportedLookAround,
3605             }
3606         );
3607         assert_eq!(
3608             parser(r"(?<!a)").parse().unwrap_err(),
3609             TestError {
3610                 span: span(0..4),
3611                 kind: ast::ErrorKind::UnsupportedLookAround,
3612             }
3613         );
3614     }
3615 
3616     #[test]
parse_group()3617     fn parse_group() {
3618         assert_eq!(
3619             parser("(?i)").parse(),
3620             Ok(Ast::Flags(ast::SetFlags {
3621                 span: span(0..4),
3622                 flags: ast::Flags {
3623                     span: span(2..3),
3624                     items: vec![ast::FlagsItem {
3625                         span: span(2..3),
3626                         kind: ast::FlagsItemKind::Flag(
3627                             ast::Flag::CaseInsensitive
3628                         ),
3629                     }],
3630                 },
3631             }))
3632         );
3633         assert_eq!(
3634             parser("(?iU)").parse(),
3635             Ok(Ast::Flags(ast::SetFlags {
3636                 span: span(0..5),
3637                 flags: ast::Flags {
3638                     span: span(2..4),
3639                     items: vec![
3640                         ast::FlagsItem {
3641                             span: span(2..3),
3642                             kind: ast::FlagsItemKind::Flag(
3643                                 ast::Flag::CaseInsensitive
3644                             ),
3645                         },
3646                         ast::FlagsItem {
3647                             span: span(3..4),
3648                             kind: ast::FlagsItemKind::Flag(
3649                                 ast::Flag::SwapGreed
3650                             ),
3651                         },
3652                     ],
3653                 },
3654             }))
3655         );
3656         assert_eq!(
3657             parser("(?i-U)").parse(),
3658             Ok(Ast::Flags(ast::SetFlags {
3659                 span: span(0..6),
3660                 flags: ast::Flags {
3661                     span: span(2..5),
3662                     items: vec![
3663                         ast::FlagsItem {
3664                             span: span(2..3),
3665                             kind: ast::FlagsItemKind::Flag(
3666                                 ast::Flag::CaseInsensitive
3667                             ),
3668                         },
3669                         ast::FlagsItem {
3670                             span: span(3..4),
3671                             kind: ast::FlagsItemKind::Negation,
3672                         },
3673                         ast::FlagsItem {
3674                             span: span(4..5),
3675                             kind: ast::FlagsItemKind::Flag(
3676                                 ast::Flag::SwapGreed
3677                             ),
3678                         },
3679                     ],
3680                 },
3681             }))
3682         );
3683 
3684         assert_eq!(
3685             parser("()").parse(),
3686             Ok(Ast::Group(ast::Group {
3687                 span: span(0..2),
3688                 kind: ast::GroupKind::CaptureIndex(1),
3689                 ast: Box::new(Ast::Empty(span(1..1))),
3690             }))
3691         );
3692         assert_eq!(
3693             parser("(a)").parse(),
3694             Ok(Ast::Group(ast::Group {
3695                 span: span(0..3),
3696                 kind: ast::GroupKind::CaptureIndex(1),
3697                 ast: Box::new(lit('a', 1)),
3698             }))
3699         );
3700         assert_eq!(
3701             parser("(())").parse(),
3702             Ok(Ast::Group(ast::Group {
3703                 span: span(0..4),
3704                 kind: ast::GroupKind::CaptureIndex(1),
3705                 ast: Box::new(Ast::Group(ast::Group {
3706                     span: span(1..3),
3707                     kind: ast::GroupKind::CaptureIndex(2),
3708                     ast: Box::new(Ast::Empty(span(2..2))),
3709                 })),
3710             }))
3711         );
3712 
3713         assert_eq!(
3714             parser("(?:a)").parse(),
3715             Ok(Ast::Group(ast::Group {
3716                 span: span(0..5),
3717                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3718                     span: span(2..2),
3719                     items: vec![],
3720                 }),
3721                 ast: Box::new(lit('a', 3)),
3722             }))
3723         );
3724 
3725         assert_eq!(
3726             parser("(?i:a)").parse(),
3727             Ok(Ast::Group(ast::Group {
3728                 span: span(0..6),
3729                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3730                     span: span(2..3),
3731                     items: vec![ast::FlagsItem {
3732                         span: span(2..3),
3733                         kind: ast::FlagsItemKind::Flag(
3734                             ast::Flag::CaseInsensitive
3735                         ),
3736                     },],
3737                 }),
3738                 ast: Box::new(lit('a', 4)),
3739             }))
3740         );
3741         assert_eq!(
3742             parser("(?i-U:a)").parse(),
3743             Ok(Ast::Group(ast::Group {
3744                 span: span(0..8),
3745                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3746                     span: span(2..5),
3747                     items: vec![
3748                         ast::FlagsItem {
3749                             span: span(2..3),
3750                             kind: ast::FlagsItemKind::Flag(
3751                                 ast::Flag::CaseInsensitive
3752                             ),
3753                         },
3754                         ast::FlagsItem {
3755                             span: span(3..4),
3756                             kind: ast::FlagsItemKind::Negation,
3757                         },
3758                         ast::FlagsItem {
3759                             span: span(4..5),
3760                             kind: ast::FlagsItemKind::Flag(
3761                                 ast::Flag::SwapGreed
3762                             ),
3763                         },
3764                     ],
3765                 }),
3766                 ast: Box::new(lit('a', 6)),
3767             }))
3768         );
3769 
3770         assert_eq!(
3771             parser("(").parse().unwrap_err(),
3772             TestError {
3773                 span: span(0..1),
3774                 kind: ast::ErrorKind::GroupUnclosed,
3775             }
3776         );
3777         assert_eq!(
3778             parser("(?").parse().unwrap_err(),
3779             TestError {
3780                 span: span(0..1),
3781                 kind: ast::ErrorKind::GroupUnclosed,
3782             }
3783         );
3784         assert_eq!(
3785             parser("(?P").parse().unwrap_err(),
3786             TestError {
3787                 span: span(2..3),
3788                 kind: ast::ErrorKind::FlagUnrecognized,
3789             }
3790         );
3791         assert_eq!(
3792             parser("(?P<").parse().unwrap_err(),
3793             TestError {
3794                 span: span(4..4),
3795                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3796             }
3797         );
3798         assert_eq!(
3799             parser("(a").parse().unwrap_err(),
3800             TestError {
3801                 span: span(0..1),
3802                 kind: ast::ErrorKind::GroupUnclosed,
3803             }
3804         );
3805         assert_eq!(
3806             parser("(()").parse().unwrap_err(),
3807             TestError {
3808                 span: span(0..1),
3809                 kind: ast::ErrorKind::GroupUnclosed,
3810             }
3811         );
3812         assert_eq!(
3813             parser(")").parse().unwrap_err(),
3814             TestError {
3815                 span: span(0..1),
3816                 kind: ast::ErrorKind::GroupUnopened,
3817             }
3818         );
3819         assert_eq!(
3820             parser("a)").parse().unwrap_err(),
3821             TestError {
3822                 span: span(1..2),
3823                 kind: ast::ErrorKind::GroupUnopened,
3824             }
3825         );
3826     }
3827 
3828     #[test]
parse_capture_name()3829     fn parse_capture_name() {
3830         assert_eq!(
3831             parser("(?P<a>z)").parse(),
3832             Ok(Ast::Group(ast::Group {
3833                 span: span(0..8),
3834                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3835                     span: span(4..5),
3836                     name: s("a"),
3837                     index: 1,
3838                 }),
3839                 ast: Box::new(lit('z', 6)),
3840             }))
3841         );
3842         assert_eq!(
3843             parser("(?P<abc>z)").parse(),
3844             Ok(Ast::Group(ast::Group {
3845                 span: span(0..10),
3846                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3847                     span: span(4..7),
3848                     name: s("abc"),
3849                     index: 1,
3850                 }),
3851                 ast: Box::new(lit('z', 8)),
3852             }))
3853         );
3854 
3855         assert_eq!(
3856             parser("(?P<a_1>z)").parse(),
3857             Ok(Ast::Group(ast::Group {
3858                 span: span(0..10),
3859                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3860                     span: span(4..7),
3861                     name: s("a_1"),
3862                     index: 1,
3863                 }),
3864                 ast: Box::new(lit('z', 8)),
3865             }))
3866         );
3867 
3868         assert_eq!(
3869             parser("(?P<a.1>z)").parse(),
3870             Ok(Ast::Group(ast::Group {
3871                 span: span(0..10),
3872                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3873                     span: span(4..7),
3874                     name: s("a.1"),
3875                     index: 1,
3876                 }),
3877                 ast: Box::new(lit('z', 8)),
3878             }))
3879         );
3880 
3881         assert_eq!(
3882             parser("(?P<a[1]>z)").parse(),
3883             Ok(Ast::Group(ast::Group {
3884                 span: span(0..11),
3885                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3886                     span: span(4..8),
3887                     name: s("a[1]"),
3888                     index: 1,
3889                 }),
3890                 ast: Box::new(lit('z', 9)),
3891             }))
3892         );
3893 
3894         assert_eq!(
3895             parser("(?P<").parse().unwrap_err(),
3896             TestError {
3897                 span: span(4..4),
3898                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3899             }
3900         );
3901         assert_eq!(
3902             parser("(?P<>z)").parse().unwrap_err(),
3903             TestError {
3904                 span: span(4..4),
3905                 kind: ast::ErrorKind::GroupNameEmpty,
3906             }
3907         );
3908         assert_eq!(
3909             parser("(?P<a").parse().unwrap_err(),
3910             TestError {
3911                 span: span(5..5),
3912                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3913             }
3914         );
3915         assert_eq!(
3916             parser("(?P<ab").parse().unwrap_err(),
3917             TestError {
3918                 span: span(6..6),
3919                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3920             }
3921         );
3922         assert_eq!(
3923             parser("(?P<0a").parse().unwrap_err(),
3924             TestError {
3925                 span: span(4..5),
3926                 kind: ast::ErrorKind::GroupNameInvalid,
3927             }
3928         );
3929         assert_eq!(
3930             parser("(?P<~").parse().unwrap_err(),
3931             TestError {
3932                 span: span(4..5),
3933                 kind: ast::ErrorKind::GroupNameInvalid,
3934             }
3935         );
3936         assert_eq!(
3937             parser("(?P<abc~").parse().unwrap_err(),
3938             TestError {
3939                 span: span(7..8),
3940                 kind: ast::ErrorKind::GroupNameInvalid,
3941             }
3942         );
3943         assert_eq!(
3944             parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3945             TestError {
3946                 span: span(12..13),
3947                 kind: ast::ErrorKind::GroupNameDuplicate {
3948                     original: span(4..5),
3949                 },
3950             }
3951         );
3952     }
3953 
3954     #[test]
parse_flags()3955     fn parse_flags() {
3956         assert_eq!(
3957             parser("i:").parse_flags(),
3958             Ok(ast::Flags {
3959                 span: span(0..1),
3960                 items: vec![ast::FlagsItem {
3961                     span: span(0..1),
3962                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3963                 }],
3964             })
3965         );
3966         assert_eq!(
3967             parser("i)").parse_flags(),
3968             Ok(ast::Flags {
3969                 span: span(0..1),
3970                 items: vec![ast::FlagsItem {
3971                     span: span(0..1),
3972                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3973                 }],
3974             })
3975         );
3976 
3977         assert_eq!(
3978             parser("isU:").parse_flags(),
3979             Ok(ast::Flags {
3980                 span: span(0..3),
3981                 items: vec![
3982                     ast::FlagsItem {
3983                         span: span(0..1),
3984                         kind: ast::FlagsItemKind::Flag(
3985                             ast::Flag::CaseInsensitive
3986                         ),
3987                     },
3988                     ast::FlagsItem {
3989                         span: span(1..2),
3990                         kind: ast::FlagsItemKind::Flag(
3991                             ast::Flag::DotMatchesNewLine
3992                         ),
3993                     },
3994                     ast::FlagsItem {
3995                         span: span(2..3),
3996                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3997                     },
3998                 ],
3999             })
4000         );
4001 
4002         assert_eq!(
4003             parser("-isU:").parse_flags(),
4004             Ok(ast::Flags {
4005                 span: span(0..4),
4006                 items: vec![
4007                     ast::FlagsItem {
4008                         span: span(0..1),
4009                         kind: ast::FlagsItemKind::Negation,
4010                     },
4011                     ast::FlagsItem {
4012                         span: span(1..2),
4013                         kind: ast::FlagsItemKind::Flag(
4014                             ast::Flag::CaseInsensitive
4015                         ),
4016                     },
4017                     ast::FlagsItem {
4018                         span: span(2..3),
4019                         kind: ast::FlagsItemKind::Flag(
4020                             ast::Flag::DotMatchesNewLine
4021                         ),
4022                     },
4023                     ast::FlagsItem {
4024                         span: span(3..4),
4025                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4026                     },
4027                 ],
4028             })
4029         );
4030         assert_eq!(
4031             parser("i-sU:").parse_flags(),
4032             Ok(ast::Flags {
4033                 span: span(0..4),
4034                 items: vec![
4035                     ast::FlagsItem {
4036                         span: span(0..1),
4037                         kind: ast::FlagsItemKind::Flag(
4038                             ast::Flag::CaseInsensitive
4039                         ),
4040                     },
4041                     ast::FlagsItem {
4042                         span: span(1..2),
4043                         kind: ast::FlagsItemKind::Negation,
4044                     },
4045                     ast::FlagsItem {
4046                         span: span(2..3),
4047                         kind: ast::FlagsItemKind::Flag(
4048                             ast::Flag::DotMatchesNewLine
4049                         ),
4050                     },
4051                     ast::FlagsItem {
4052                         span: span(3..4),
4053                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4054                     },
4055                 ],
4056             })
4057         );
4058 
4059         assert_eq!(
4060             parser("isU").parse_flags().unwrap_err(),
4061             TestError {
4062                 span: span(3..3),
4063                 kind: ast::ErrorKind::FlagUnexpectedEof,
4064             }
4065         );
4066         assert_eq!(
4067             parser("isUa:").parse_flags().unwrap_err(),
4068             TestError {
4069                 span: span(3..4),
4070                 kind: ast::ErrorKind::FlagUnrecognized,
4071             }
4072         );
4073         assert_eq!(
4074             parser("isUi:").parse_flags().unwrap_err(),
4075             TestError {
4076                 span: span(3..4),
4077                 kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4078             }
4079         );
4080         assert_eq!(
4081             parser("i-sU-i:").parse_flags().unwrap_err(),
4082             TestError {
4083                 span: span(4..5),
4084                 kind: ast::ErrorKind::FlagRepeatedNegation {
4085                     original: span(1..2),
4086                 },
4087             }
4088         );
4089         assert_eq!(
4090             parser("-)").parse_flags().unwrap_err(),
4091             TestError {
4092                 span: span(0..1),
4093                 kind: ast::ErrorKind::FlagDanglingNegation,
4094             }
4095         );
4096         assert_eq!(
4097             parser("i-)").parse_flags().unwrap_err(),
4098             TestError {
4099                 span: span(1..2),
4100                 kind: ast::ErrorKind::FlagDanglingNegation,
4101             }
4102         );
4103         assert_eq!(
4104             parser("iU-)").parse_flags().unwrap_err(),
4105             TestError {
4106                 span: span(2..3),
4107                 kind: ast::ErrorKind::FlagDanglingNegation,
4108             }
4109         );
4110     }
4111 
4112     #[test]
parse_flag()4113     fn parse_flag() {
4114         assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4115         assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4116         assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4117         assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4118         assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4119         assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4120 
4121         assert_eq!(
4122             parser("a").parse_flag().unwrap_err(),
4123             TestError {
4124                 span: span(0..1),
4125                 kind: ast::ErrorKind::FlagUnrecognized,
4126             }
4127         );
4128         assert_eq!(
4129             parser("☃").parse_flag().unwrap_err(),
4130             TestError {
4131                 span: span_range("☃", 0..3),
4132                 kind: ast::ErrorKind::FlagUnrecognized,
4133             }
4134         );
4135     }
4136 
4137     #[test]
parse_primitive_non_escape()4138     fn parse_primitive_non_escape() {
4139         assert_eq!(
4140             parser(r".").parse_primitive(),
4141             Ok(Primitive::Dot(span(0..1)))
4142         );
4143         assert_eq!(
4144             parser(r"^").parse_primitive(),
4145             Ok(Primitive::Assertion(ast::Assertion {
4146                 span: span(0..1),
4147                 kind: ast::AssertionKind::StartLine,
4148             }))
4149         );
4150         assert_eq!(
4151             parser(r"$").parse_primitive(),
4152             Ok(Primitive::Assertion(ast::Assertion {
4153                 span: span(0..1),
4154                 kind: ast::AssertionKind::EndLine,
4155             }))
4156         );
4157 
4158         assert_eq!(
4159             parser(r"a").parse_primitive(),
4160             Ok(Primitive::Literal(ast::Literal {
4161                 span: span(0..1),
4162                 kind: ast::LiteralKind::Verbatim,
4163                 c: 'a',
4164             }))
4165         );
4166         assert_eq!(
4167             parser(r"|").parse_primitive(),
4168             Ok(Primitive::Literal(ast::Literal {
4169                 span: span(0..1),
4170                 kind: ast::LiteralKind::Verbatim,
4171                 c: '|',
4172             }))
4173         );
4174         assert_eq!(
4175             parser(r"☃").parse_primitive(),
4176             Ok(Primitive::Literal(ast::Literal {
4177                 span: span_range("☃", 0..3),
4178                 kind: ast::LiteralKind::Verbatim,
4179                 c: '☃',
4180             }))
4181         );
4182     }
4183 
4184     #[test]
parse_escape()4185     fn parse_escape() {
4186         assert_eq!(
4187             parser(r"\|").parse_primitive(),
4188             Ok(Primitive::Literal(ast::Literal {
4189                 span: span(0..2),
4190                 kind: ast::LiteralKind::Punctuation,
4191                 c: '|',
4192             }))
4193         );
4194         let specials = &[
4195             (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4196             (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4197             (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4198             (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4199             (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4200             (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4201         ];
4202         for &(pat, c, ref kind) in specials {
4203             assert_eq!(
4204                 parser(pat).parse_primitive(),
4205                 Ok(Primitive::Literal(ast::Literal {
4206                     span: span(0..2),
4207                     kind: ast::LiteralKind::Special(kind.clone()),
4208                     c: c,
4209                 }))
4210             );
4211         }
4212         assert_eq!(
4213             parser(r"\A").parse_primitive(),
4214             Ok(Primitive::Assertion(ast::Assertion {
4215                 span: span(0..2),
4216                 kind: ast::AssertionKind::StartText,
4217             }))
4218         );
4219         assert_eq!(
4220             parser(r"\z").parse_primitive(),
4221             Ok(Primitive::Assertion(ast::Assertion {
4222                 span: span(0..2),
4223                 kind: ast::AssertionKind::EndText,
4224             }))
4225         );
4226         assert_eq!(
4227             parser(r"\b").parse_primitive(),
4228             Ok(Primitive::Assertion(ast::Assertion {
4229                 span: span(0..2),
4230                 kind: ast::AssertionKind::WordBoundary,
4231             }))
4232         );
4233         assert_eq!(
4234             parser(r"\B").parse_primitive(),
4235             Ok(Primitive::Assertion(ast::Assertion {
4236                 span: span(0..2),
4237                 kind: ast::AssertionKind::NotWordBoundary,
4238             }))
4239         );
4240 
4241         assert_eq!(
4242             parser(r"\").parse_escape().unwrap_err(),
4243             TestError {
4244                 span: span(0..1),
4245                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4246             }
4247         );
4248         assert_eq!(
4249             parser(r"\y").parse_escape().unwrap_err(),
4250             TestError {
4251                 span: span(0..2),
4252                 kind: ast::ErrorKind::EscapeUnrecognized,
4253             }
4254         );
4255     }
4256 
4257     #[test]
parse_unsupported_backreference()4258     fn parse_unsupported_backreference() {
4259         assert_eq!(
4260             parser(r"\0").parse_escape().unwrap_err(),
4261             TestError {
4262                 span: span(0..2),
4263                 kind: ast::ErrorKind::UnsupportedBackreference,
4264             }
4265         );
4266         assert_eq!(
4267             parser(r"\9").parse_escape().unwrap_err(),
4268             TestError {
4269                 span: span(0..2),
4270                 kind: ast::ErrorKind::UnsupportedBackreference,
4271             }
4272         );
4273     }
4274 
4275     #[test]
parse_octal()4276     fn parse_octal() {
4277         for i in 0..511 {
4278             let pat = format!(r"\{:o}", i);
4279             assert_eq!(
4280                 parser_octal(&pat).parse_escape(),
4281                 Ok(Primitive::Literal(ast::Literal {
4282                     span: span(0..pat.len()),
4283                     kind: ast::LiteralKind::Octal,
4284                     c: ::std::char::from_u32(i).unwrap(),
4285                 }))
4286             );
4287         }
4288         assert_eq!(
4289             parser_octal(r"\778").parse_escape(),
4290             Ok(Primitive::Literal(ast::Literal {
4291                 span: span(0..3),
4292                 kind: ast::LiteralKind::Octal,
4293                 c: '?',
4294             }))
4295         );
4296         assert_eq!(
4297             parser_octal(r"\7777").parse_escape(),
4298             Ok(Primitive::Literal(ast::Literal {
4299                 span: span(0..4),
4300                 kind: ast::LiteralKind::Octal,
4301                 c: '\u{01FF}',
4302             }))
4303         );
4304         assert_eq!(
4305             parser_octal(r"\778").parse(),
4306             Ok(Ast::Concat(ast::Concat {
4307                 span: span(0..4),
4308                 asts: vec![
4309                     Ast::Literal(ast::Literal {
4310                         span: span(0..3),
4311                         kind: ast::LiteralKind::Octal,
4312                         c: '?',
4313                     }),
4314                     Ast::Literal(ast::Literal {
4315                         span: span(3..4),
4316                         kind: ast::LiteralKind::Verbatim,
4317                         c: '8',
4318                     }),
4319                 ],
4320             }))
4321         );
4322         assert_eq!(
4323             parser_octal(r"\7777").parse(),
4324             Ok(Ast::Concat(ast::Concat {
4325                 span: span(0..5),
4326                 asts: vec![
4327                     Ast::Literal(ast::Literal {
4328                         span: span(0..4),
4329                         kind: ast::LiteralKind::Octal,
4330                         c: '\u{01FF}',
4331                     }),
4332                     Ast::Literal(ast::Literal {
4333                         span: span(4..5),
4334                         kind: ast::LiteralKind::Verbatim,
4335                         c: '7',
4336                     }),
4337                 ],
4338             }))
4339         );
4340 
4341         assert_eq!(
4342             parser_octal(r"\8").parse_escape().unwrap_err(),
4343             TestError {
4344                 span: span(0..2),
4345                 kind: ast::ErrorKind::EscapeUnrecognized,
4346             }
4347         );
4348     }
4349 
4350     #[test]
parse_hex_two()4351     fn parse_hex_two() {
4352         for i in 0..256 {
4353             let pat = format!(r"\x{:02x}", i);
4354             assert_eq!(
4355                 parser(&pat).parse_escape(),
4356                 Ok(Primitive::Literal(ast::Literal {
4357                     span: span(0..pat.len()),
4358                     kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4359                     c: ::std::char::from_u32(i).unwrap(),
4360                 }))
4361             );
4362         }
4363 
4364         assert_eq!(
4365             parser(r"\xF").parse_escape().unwrap_err(),
4366             TestError {
4367                 span: span(3..3),
4368                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4369             }
4370         );
4371         assert_eq!(
4372             parser(r"\xG").parse_escape().unwrap_err(),
4373             TestError {
4374                 span: span(2..3),
4375                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4376             }
4377         );
4378         assert_eq!(
4379             parser(r"\xFG").parse_escape().unwrap_err(),
4380             TestError {
4381                 span: span(3..4),
4382                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4383             }
4384         );
4385     }
4386 
4387     #[test]
parse_hex_four()4388     fn parse_hex_four() {
4389         for i in 0..65536 {
4390             let c = match ::std::char::from_u32(i) {
4391                 None => continue,
4392                 Some(c) => c,
4393             };
4394             let pat = format!(r"\u{:04x}", i);
4395             assert_eq!(
4396                 parser(&pat).parse_escape(),
4397                 Ok(Primitive::Literal(ast::Literal {
4398                     span: span(0..pat.len()),
4399                     kind: ast::LiteralKind::HexFixed(
4400                         ast::HexLiteralKind::UnicodeShort
4401                     ),
4402                     c: c,
4403                 }))
4404             );
4405         }
4406 
4407         assert_eq!(
4408             parser(r"\uF").parse_escape().unwrap_err(),
4409             TestError {
4410                 span: span(3..3),
4411                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4412             }
4413         );
4414         assert_eq!(
4415             parser(r"\uG").parse_escape().unwrap_err(),
4416             TestError {
4417                 span: span(2..3),
4418                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4419             }
4420         );
4421         assert_eq!(
4422             parser(r"\uFG").parse_escape().unwrap_err(),
4423             TestError {
4424                 span: span(3..4),
4425                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4426             }
4427         );
4428         assert_eq!(
4429             parser(r"\uFFG").parse_escape().unwrap_err(),
4430             TestError {
4431                 span: span(4..5),
4432                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4433             }
4434         );
4435         assert_eq!(
4436             parser(r"\uFFFG").parse_escape().unwrap_err(),
4437             TestError {
4438                 span: span(5..6),
4439                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4440             }
4441         );
4442         assert_eq!(
4443             parser(r"\uD800").parse_escape().unwrap_err(),
4444             TestError {
4445                 span: span(2..6),
4446                 kind: ast::ErrorKind::EscapeHexInvalid,
4447             }
4448         );
4449     }
4450 
4451     #[test]
parse_hex_eight()4452     fn parse_hex_eight() {
4453         for i in 0..65536 {
4454             let c = match ::std::char::from_u32(i) {
4455                 None => continue,
4456                 Some(c) => c,
4457             };
4458             let pat = format!(r"\U{:08x}", i);
4459             assert_eq!(
4460                 parser(&pat).parse_escape(),
4461                 Ok(Primitive::Literal(ast::Literal {
4462                     span: span(0..pat.len()),
4463                     kind: ast::LiteralKind::HexFixed(
4464                         ast::HexLiteralKind::UnicodeLong
4465                     ),
4466                     c: c,
4467                 }))
4468             );
4469         }
4470 
4471         assert_eq!(
4472             parser(r"\UF").parse_escape().unwrap_err(),
4473             TestError {
4474                 span: span(3..3),
4475                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4476             }
4477         );
4478         assert_eq!(
4479             parser(r"\UG").parse_escape().unwrap_err(),
4480             TestError {
4481                 span: span(2..3),
4482                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4483             }
4484         );
4485         assert_eq!(
4486             parser(r"\UFG").parse_escape().unwrap_err(),
4487             TestError {
4488                 span: span(3..4),
4489                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4490             }
4491         );
4492         assert_eq!(
4493             parser(r"\UFFG").parse_escape().unwrap_err(),
4494             TestError {
4495                 span: span(4..5),
4496                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4497             }
4498         );
4499         assert_eq!(
4500             parser(r"\UFFFG").parse_escape().unwrap_err(),
4501             TestError {
4502                 span: span(5..6),
4503                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4504             }
4505         );
4506         assert_eq!(
4507             parser(r"\UFFFFG").parse_escape().unwrap_err(),
4508             TestError {
4509                 span: span(6..7),
4510                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4511             }
4512         );
4513         assert_eq!(
4514             parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4515             TestError {
4516                 span: span(7..8),
4517                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4518             }
4519         );
4520         assert_eq!(
4521             parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4522             TestError {
4523                 span: span(8..9),
4524                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4525             }
4526         );
4527         assert_eq!(
4528             parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4529             TestError {
4530                 span: span(9..10),
4531                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4532             }
4533         );
4534     }
4535 
4536     #[test]
parse_hex_brace()4537     fn parse_hex_brace() {
4538         assert_eq!(
4539             parser(r"\u{26c4}").parse_escape(),
4540             Ok(Primitive::Literal(ast::Literal {
4541                 span: span(0..8),
4542                 kind: ast::LiteralKind::HexBrace(
4543                     ast::HexLiteralKind::UnicodeShort
4544                 ),
4545                 c: '⛄',
4546             }))
4547         );
4548         assert_eq!(
4549             parser(r"\U{26c4}").parse_escape(),
4550             Ok(Primitive::Literal(ast::Literal {
4551                 span: span(0..8),
4552                 kind: ast::LiteralKind::HexBrace(
4553                     ast::HexLiteralKind::UnicodeLong
4554                 ),
4555                 c: '⛄',
4556             }))
4557         );
4558         assert_eq!(
4559             parser(r"\x{26c4}").parse_escape(),
4560             Ok(Primitive::Literal(ast::Literal {
4561                 span: span(0..8),
4562                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4563                 c: '⛄',
4564             }))
4565         );
4566         assert_eq!(
4567             parser(r"\x{26C4}").parse_escape(),
4568             Ok(Primitive::Literal(ast::Literal {
4569                 span: span(0..8),
4570                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4571                 c: '⛄',
4572             }))
4573         );
4574         assert_eq!(
4575             parser(r"\x{10fFfF}").parse_escape(),
4576             Ok(Primitive::Literal(ast::Literal {
4577                 span: span(0..10),
4578                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4579                 c: '\u{10FFFF}',
4580             }))
4581         );
4582 
4583         assert_eq!(
4584             parser(r"\x").parse_escape().unwrap_err(),
4585             TestError {
4586                 span: span(2..2),
4587                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4588             }
4589         );
4590         assert_eq!(
4591             parser(r"\x{").parse_escape().unwrap_err(),
4592             TestError {
4593                 span: span(2..3),
4594                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4595             }
4596         );
4597         assert_eq!(
4598             parser(r"\x{FF").parse_escape().unwrap_err(),
4599             TestError {
4600                 span: span(2..5),
4601                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4602             }
4603         );
4604         assert_eq!(
4605             parser(r"\x{}").parse_escape().unwrap_err(),
4606             TestError {
4607                 span: span(2..4),
4608                 kind: ast::ErrorKind::EscapeHexEmpty,
4609             }
4610         );
4611         assert_eq!(
4612             parser(r"\x{FGF}").parse_escape().unwrap_err(),
4613             TestError {
4614                 span: span(4..5),
4615                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4616             }
4617         );
4618         assert_eq!(
4619             parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4620             TestError {
4621                 span: span(3..9),
4622                 kind: ast::ErrorKind::EscapeHexInvalid,
4623             }
4624         );
4625         assert_eq!(
4626             parser(r"\x{D800}").parse_escape().unwrap_err(),
4627             TestError {
4628                 span: span(3..7),
4629                 kind: ast::ErrorKind::EscapeHexInvalid,
4630             }
4631         );
4632         assert_eq!(
4633             parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4634             TestError {
4635                 span: span(3..12),
4636                 kind: ast::ErrorKind::EscapeHexInvalid,
4637             }
4638         );
4639     }
4640 
4641     #[test]
parse_decimal()4642     fn parse_decimal() {
4643         assert_eq!(parser("123").parse_decimal(), Ok(123));
4644         assert_eq!(parser("0").parse_decimal(), Ok(0));
4645         assert_eq!(parser("01").parse_decimal(), Ok(1));
4646 
4647         assert_eq!(
4648             parser("-1").parse_decimal().unwrap_err(),
4649             TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4650         );
4651         assert_eq!(
4652             parser("").parse_decimal().unwrap_err(),
4653             TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4654         );
4655         assert_eq!(
4656             parser("9999999999").parse_decimal().unwrap_err(),
4657             TestError {
4658                 span: span(0..10),
4659                 kind: ast::ErrorKind::DecimalInvalid,
4660             }
4661         );
4662     }
4663 
4664     #[test]
parse_set_class()4665     fn parse_set_class() {
4666         fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4667             ast::ClassSet::union(ast::ClassSetUnion {
4668                 span: span,
4669                 items: items,
4670             })
4671         }
4672 
4673         fn intersection(
4674             span: Span,
4675             lhs: ast::ClassSet,
4676             rhs: ast::ClassSet,
4677         ) -> ast::ClassSet {
4678             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4679                 span: span,
4680                 kind: ast::ClassSetBinaryOpKind::Intersection,
4681                 lhs: Box::new(lhs),
4682                 rhs: Box::new(rhs),
4683             })
4684         }
4685 
4686         fn difference(
4687             span: Span,
4688             lhs: ast::ClassSet,
4689             rhs: ast::ClassSet,
4690         ) -> ast::ClassSet {
4691             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4692                 span: span,
4693                 kind: ast::ClassSetBinaryOpKind::Difference,
4694                 lhs: Box::new(lhs),
4695                 rhs: Box::new(rhs),
4696             })
4697         }
4698 
4699         fn symdifference(
4700             span: Span,
4701             lhs: ast::ClassSet,
4702             rhs: ast::ClassSet,
4703         ) -> ast::ClassSet {
4704             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4705                 span: span,
4706                 kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4707                 lhs: Box::new(lhs),
4708                 rhs: Box::new(rhs),
4709             })
4710         }
4711 
4712         fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4713             ast::ClassSet::Item(item)
4714         }
4715 
4716         fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4717             ast::ClassSetItem::Ascii(cls)
4718         }
4719 
4720         fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4721             ast::ClassSetItem::Unicode(cls)
4722         }
4723 
4724         fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4725             ast::ClassSetItem::Perl(cls)
4726         }
4727 
4728         fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4729             ast::ClassSetItem::Bracketed(Box::new(cls))
4730         }
4731 
4732         fn lit(span: Span, c: char) -> ast::ClassSetItem {
4733             ast::ClassSetItem::Literal(ast::Literal {
4734                 span: span,
4735                 kind: ast::LiteralKind::Verbatim,
4736                 c: c,
4737             })
4738         }
4739 
4740         fn empty(span: Span) -> ast::ClassSetItem {
4741             ast::ClassSetItem::Empty(span)
4742         }
4743 
4744         fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4745             let pos1 = Position {
4746                 offset: span.start.offset + start.len_utf8(),
4747                 column: span.start.column + 1,
4748                 ..span.start
4749             };
4750             let pos2 = Position {
4751                 offset: span.end.offset - end.len_utf8(),
4752                 column: span.end.column - 1,
4753                 ..span.end
4754             };
4755             ast::ClassSetItem::Range(ast::ClassSetRange {
4756                 span: span,
4757                 start: ast::Literal {
4758                     span: Span { end: pos1, ..span },
4759                     kind: ast::LiteralKind::Verbatim,
4760                     c: start,
4761                 },
4762                 end: ast::Literal {
4763                     span: Span { start: pos2, ..span },
4764                     kind: ast::LiteralKind::Verbatim,
4765                     c: end,
4766                 },
4767             })
4768         }
4769 
4770         fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4771             ast::ClassAscii {
4772                 span: span,
4773                 kind: ast::ClassAsciiKind::Alnum,
4774                 negated: negated,
4775             }
4776         }
4777 
4778         fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4779             ast::ClassAscii {
4780                 span: span,
4781                 kind: ast::ClassAsciiKind::Lower,
4782                 negated: negated,
4783             }
4784         }
4785 
4786         assert_eq!(
4787             parser("[[:alnum:]]").parse(),
4788             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4789                 span: span(0..11),
4790                 negated: false,
4791                 kind: itemset(item_ascii(alnum(span(1..10), false))),
4792             })))
4793         );
4794         assert_eq!(
4795             parser("[[[:alnum:]]]").parse(),
4796             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4797                 span: span(0..13),
4798                 negated: false,
4799                 kind: itemset(item_bracket(ast::ClassBracketed {
4800                     span: span(1..12),
4801                     negated: false,
4802                     kind: itemset(item_ascii(alnum(span(2..11), false))),
4803                 })),
4804             })))
4805         );
4806         assert_eq!(
4807             parser("[[:alnum:]&&[:lower:]]").parse(),
4808             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4809                 span: span(0..22),
4810                 negated: false,
4811                 kind: intersection(
4812                     span(1..21),
4813                     itemset(item_ascii(alnum(span(1..10), false))),
4814                     itemset(item_ascii(lower(span(12..21), false))),
4815                 ),
4816             })))
4817         );
4818         assert_eq!(
4819             parser("[[:alnum:]--[:lower:]]").parse(),
4820             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4821                 span: span(0..22),
4822                 negated: false,
4823                 kind: difference(
4824                     span(1..21),
4825                     itemset(item_ascii(alnum(span(1..10), false))),
4826                     itemset(item_ascii(lower(span(12..21), false))),
4827                 ),
4828             })))
4829         );
4830         assert_eq!(
4831             parser("[[:alnum:]~~[:lower:]]").parse(),
4832             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4833                 span: span(0..22),
4834                 negated: false,
4835                 kind: symdifference(
4836                     span(1..21),
4837                     itemset(item_ascii(alnum(span(1..10), false))),
4838                     itemset(item_ascii(lower(span(12..21), false))),
4839                 ),
4840             })))
4841         );
4842 
4843         assert_eq!(
4844             parser("[a]").parse(),
4845             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4846                 span: span(0..3),
4847                 negated: false,
4848                 kind: itemset(lit(span(1..2), 'a')),
4849             })))
4850         );
4851         assert_eq!(
4852             parser(r"[a\]]").parse(),
4853             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4854                 span: span(0..5),
4855                 negated: false,
4856                 kind: union(
4857                     span(1..4),
4858                     vec![
4859                         lit(span(1..2), 'a'),
4860                         ast::ClassSetItem::Literal(ast::Literal {
4861                             span: span(2..4),
4862                             kind: ast::LiteralKind::Punctuation,
4863                             c: ']',
4864                         }),
4865                     ]
4866                 ),
4867             })))
4868         );
4869         assert_eq!(
4870             parser(r"[a\-z]").parse(),
4871             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4872                 span: span(0..6),
4873                 negated: false,
4874                 kind: union(
4875                     span(1..5),
4876                     vec![
4877                         lit(span(1..2), 'a'),
4878                         ast::ClassSetItem::Literal(ast::Literal {
4879                             span: span(2..4),
4880                             kind: ast::LiteralKind::Punctuation,
4881                             c: '-',
4882                         }),
4883                         lit(span(4..5), 'z'),
4884                     ]
4885                 ),
4886             })))
4887         );
4888         assert_eq!(
4889             parser("[ab]").parse(),
4890             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4891                 span: span(0..4),
4892                 negated: false,
4893                 kind: union(
4894                     span(1..3),
4895                     vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
4896                 ),
4897             })))
4898         );
4899         assert_eq!(
4900             parser("[a-]").parse(),
4901             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4902                 span: span(0..4),
4903                 negated: false,
4904                 kind: union(
4905                     span(1..3),
4906                     vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
4907                 ),
4908             })))
4909         );
4910         assert_eq!(
4911             parser("[-a]").parse(),
4912             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4913                 span: span(0..4),
4914                 negated: false,
4915                 kind: union(
4916                     span(1..3),
4917                     vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
4918                 ),
4919             })))
4920         );
4921         assert_eq!(
4922             parser(r"[\pL]").parse(),
4923             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4924                 span: span(0..5),
4925                 negated: false,
4926                 kind: itemset(item_unicode(ast::ClassUnicode {
4927                     span: span(1..4),
4928                     negated: false,
4929                     kind: ast::ClassUnicodeKind::OneLetter('L'),
4930                 })),
4931             })))
4932         );
4933         assert_eq!(
4934             parser(r"[\w]").parse(),
4935             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4936                 span: span(0..4),
4937                 negated: false,
4938                 kind: itemset(item_perl(ast::ClassPerl {
4939                     span: span(1..3),
4940                     kind: ast::ClassPerlKind::Word,
4941                     negated: false,
4942                 })),
4943             })))
4944         );
4945         assert_eq!(
4946             parser(r"[a\wz]").parse(),
4947             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4948                 span: span(0..6),
4949                 negated: false,
4950                 kind: union(
4951                     span(1..5),
4952                     vec![
4953                         lit(span(1..2), 'a'),
4954                         item_perl(ast::ClassPerl {
4955                             span: span(2..4),
4956                             kind: ast::ClassPerlKind::Word,
4957                             negated: false,
4958                         }),
4959                         lit(span(4..5), 'z'),
4960                     ]
4961                 ),
4962             })))
4963         );
4964 
4965         assert_eq!(
4966             parser("[a-z]").parse(),
4967             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4968                 span: span(0..5),
4969                 negated: false,
4970                 kind: itemset(range(span(1..4), 'a', 'z')),
4971             })))
4972         );
4973         assert_eq!(
4974             parser("[a-cx-z]").parse(),
4975             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4976                 span: span(0..8),
4977                 negated: false,
4978                 kind: union(
4979                     span(1..7),
4980                     vec![
4981                         range(span(1..4), 'a', 'c'),
4982                         range(span(4..7), 'x', 'z'),
4983                     ]
4984                 ),
4985             })))
4986         );
4987         assert_eq!(
4988             parser(r"[\w&&a-cx-z]").parse(),
4989             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4990                 span: span(0..12),
4991                 negated: false,
4992                 kind: intersection(
4993                     span(1..11),
4994                     itemset(item_perl(ast::ClassPerl {
4995                         span: span(1..3),
4996                         kind: ast::ClassPerlKind::Word,
4997                         negated: false,
4998                     })),
4999                     union(
5000                         span(5..11),
5001                         vec![
5002                             range(span(5..8), 'a', 'c'),
5003                             range(span(8..11), 'x', 'z'),
5004                         ]
5005                     ),
5006                 ),
5007             })))
5008         );
5009         assert_eq!(
5010             parser(r"[a-cx-z&&\w]").parse(),
5011             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5012                 span: span(0..12),
5013                 negated: false,
5014                 kind: intersection(
5015                     span(1..11),
5016                     union(
5017                         span(1..7),
5018                         vec![
5019                             range(span(1..4), 'a', 'c'),
5020                             range(span(4..7), 'x', 'z'),
5021                         ]
5022                     ),
5023                     itemset(item_perl(ast::ClassPerl {
5024                         span: span(9..11),
5025                         kind: ast::ClassPerlKind::Word,
5026                         negated: false,
5027                     })),
5028                 ),
5029             })))
5030         );
5031         assert_eq!(
5032             parser(r"[a--b--c]").parse(),
5033             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5034                 span: span(0..9),
5035                 negated: false,
5036                 kind: difference(
5037                     span(1..8),
5038                     difference(
5039                         span(1..5),
5040                         itemset(lit(span(1..2), 'a')),
5041                         itemset(lit(span(4..5), 'b')),
5042                     ),
5043                     itemset(lit(span(7..8), 'c')),
5044                 ),
5045             })))
5046         );
5047         assert_eq!(
5048             parser(r"[a~~b~~c]").parse(),
5049             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5050                 span: span(0..9),
5051                 negated: false,
5052                 kind: symdifference(
5053                     span(1..8),
5054                     symdifference(
5055                         span(1..5),
5056                         itemset(lit(span(1..2), 'a')),
5057                         itemset(lit(span(4..5), 'b')),
5058                     ),
5059                     itemset(lit(span(7..8), 'c')),
5060                 ),
5061             })))
5062         );
5063         assert_eq!(
5064             parser(r"[\^&&^]").parse(),
5065             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5066                 span: span(0..7),
5067                 negated: false,
5068                 kind: intersection(
5069                     span(1..6),
5070                     itemset(ast::ClassSetItem::Literal(ast::Literal {
5071                         span: span(1..3),
5072                         kind: ast::LiteralKind::Punctuation,
5073                         c: '^',
5074                     })),
5075                     itemset(lit(span(5..6), '^')),
5076                 ),
5077             })))
5078         );
5079         assert_eq!(
5080             parser(r"[\&&&&]").parse(),
5081             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5082                 span: span(0..7),
5083                 negated: false,
5084                 kind: intersection(
5085                     span(1..6),
5086                     itemset(ast::ClassSetItem::Literal(ast::Literal {
5087                         span: span(1..3),
5088                         kind: ast::LiteralKind::Punctuation,
5089                         c: '&',
5090                     })),
5091                     itemset(lit(span(5..6), '&')),
5092                 ),
5093             })))
5094         );
5095         assert_eq!(
5096             parser(r"[&&&&]").parse(),
5097             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5098                 span: span(0..6),
5099                 negated: false,
5100                 kind: intersection(
5101                     span(1..5),
5102                     intersection(
5103                         span(1..3),
5104                         itemset(empty(span(1..1))),
5105                         itemset(empty(span(3..3))),
5106                     ),
5107                     itemset(empty(span(5..5))),
5108                 ),
5109             })))
5110         );
5111 
5112         let pat = "[☃-⛄]";
5113         assert_eq!(
5114             parser(pat).parse(),
5115             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5116                 span: span_range(pat, 0..9),
5117                 negated: false,
5118                 kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5119                     span: span_range(pat, 1..8),
5120                     start: ast::Literal {
5121                         span: span_range(pat, 1..4),
5122                         kind: ast::LiteralKind::Verbatim,
5123                         c: '☃',
5124                     },
5125                     end: ast::Literal {
5126                         span: span_range(pat, 5..8),
5127                         kind: ast::LiteralKind::Verbatim,
5128                         c: '⛄',
5129                     },
5130                 })),
5131             })))
5132         );
5133 
5134         assert_eq!(
5135             parser(r"[]]").parse(),
5136             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5137                 span: span(0..3),
5138                 negated: false,
5139                 kind: itemset(lit(span(1..2), ']')),
5140             })))
5141         );
5142         assert_eq!(
5143             parser(r"[]\[]").parse(),
5144             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5145                 span: span(0..5),
5146                 negated: false,
5147                 kind: union(
5148                     span(1..4),
5149                     vec![
5150                         lit(span(1..2), ']'),
5151                         ast::ClassSetItem::Literal(ast::Literal {
5152                             span: span(2..4),
5153                             kind: ast::LiteralKind::Punctuation,
5154                             c: '[',
5155                         }),
5156                     ]
5157                 ),
5158             })))
5159         );
5160         assert_eq!(
5161             parser(r"[\[]]").parse(),
5162             Ok(concat(
5163                 0..5,
5164                 vec![
5165                     Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5166                         span: span(0..4),
5167                         negated: false,
5168                         kind: itemset(ast::ClassSetItem::Literal(
5169                             ast::Literal {
5170                                 span: span(1..3),
5171                                 kind: ast::LiteralKind::Punctuation,
5172                                 c: '[',
5173                             }
5174                         )),
5175                     })),
5176                     Ast::Literal(ast::Literal {
5177                         span: span(4..5),
5178                         kind: ast::LiteralKind::Verbatim,
5179                         c: ']',
5180                     }),
5181                 ]
5182             ))
5183         );
5184 
5185         assert_eq!(
5186             parser("[").parse().unwrap_err(),
5187             TestError {
5188                 span: span(0..1),
5189                 kind: ast::ErrorKind::ClassUnclosed,
5190             }
5191         );
5192         assert_eq!(
5193             parser("[[").parse().unwrap_err(),
5194             TestError {
5195                 span: span(1..2),
5196                 kind: ast::ErrorKind::ClassUnclosed,
5197             }
5198         );
5199         assert_eq!(
5200             parser("[[-]").parse().unwrap_err(),
5201             TestError {
5202                 span: span(0..1),
5203                 kind: ast::ErrorKind::ClassUnclosed,
5204             }
5205         );
5206         assert_eq!(
5207             parser("[[[:alnum:]").parse().unwrap_err(),
5208             TestError {
5209                 span: span(1..2),
5210                 kind: ast::ErrorKind::ClassUnclosed,
5211             }
5212         );
5213         assert_eq!(
5214             parser(r"[\b]").parse().unwrap_err(),
5215             TestError {
5216                 span: span(1..3),
5217                 kind: ast::ErrorKind::ClassEscapeInvalid,
5218             }
5219         );
5220         assert_eq!(
5221             parser(r"[\w-a]").parse().unwrap_err(),
5222             TestError {
5223                 span: span(1..3),
5224                 kind: ast::ErrorKind::ClassRangeLiteral,
5225             }
5226         );
5227         assert_eq!(
5228             parser(r"[a-\w]").parse().unwrap_err(),
5229             TestError {
5230                 span: span(3..5),
5231                 kind: ast::ErrorKind::ClassRangeLiteral,
5232             }
5233         );
5234         assert_eq!(
5235             parser(r"[z-a]").parse().unwrap_err(),
5236             TestError {
5237                 span: span(1..4),
5238                 kind: ast::ErrorKind::ClassRangeInvalid,
5239             }
5240         );
5241 
5242         assert_eq!(
5243             parser_ignore_whitespace("[a ").parse().unwrap_err(),
5244             TestError {
5245                 span: span(0..1),
5246                 kind: ast::ErrorKind::ClassUnclosed,
5247             }
5248         );
5249         assert_eq!(
5250             parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5251             TestError {
5252                 span: span(0..1),
5253                 kind: ast::ErrorKind::ClassUnclosed,
5254             }
5255         );
5256     }
5257 
5258     #[test]
parse_set_class_open()5259     fn parse_set_class_open() {
5260         assert_eq!(parser("[a]").parse_set_class_open(), {
5261             let set = ast::ClassBracketed {
5262                 span: span(0..1),
5263                 negated: false,
5264                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5265                     span: span(1..1),
5266                     items: vec![],
5267                 }),
5268             };
5269             let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5270             Ok((set, union))
5271         });
5272         assert_eq!(
5273             parser_ignore_whitespace("[   a]").parse_set_class_open(),
5274             {
5275                 let set = ast::ClassBracketed {
5276                     span: span(0..4),
5277                     negated: false,
5278                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5279                         span: span(4..4),
5280                         items: vec![],
5281                     }),
5282                 };
5283                 let union =
5284                     ast::ClassSetUnion { span: span(4..4), items: vec![] };
5285                 Ok((set, union))
5286             }
5287         );
5288         assert_eq!(parser("[^a]").parse_set_class_open(), {
5289             let set = ast::ClassBracketed {
5290                 span: span(0..2),
5291                 negated: true,
5292                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5293                     span: span(2..2),
5294                     items: vec![],
5295                 }),
5296             };
5297             let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5298             Ok((set, union))
5299         });
5300         assert_eq!(
5301             parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5302             {
5303                 let set = ast::ClassBracketed {
5304                     span: span(0..4),
5305                     negated: true,
5306                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5307                         span: span(4..4),
5308                         items: vec![],
5309                     }),
5310                 };
5311                 let union =
5312                     ast::ClassSetUnion { span: span(4..4), items: vec![] };
5313                 Ok((set, union))
5314             }
5315         );
5316         assert_eq!(parser("[-a]").parse_set_class_open(), {
5317             let set = ast::ClassBracketed {
5318                 span: span(0..2),
5319                 negated: false,
5320                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5321                     span: span(1..1),
5322                     items: vec![],
5323                 }),
5324             };
5325             let union = ast::ClassSetUnion {
5326                 span: span(1..2),
5327                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5328                     span: span(1..2),
5329                     kind: ast::LiteralKind::Verbatim,
5330                     c: '-',
5331                 })],
5332             };
5333             Ok((set, union))
5334         });
5335         assert_eq!(
5336             parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5337             {
5338                 let set = ast::ClassBracketed {
5339                     span: span(0..4),
5340                     negated: false,
5341                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5342                         span: span(2..2),
5343                         items: vec![],
5344                     }),
5345                 };
5346                 let union = ast::ClassSetUnion {
5347                     span: span(2..3),
5348                     items: vec![ast::ClassSetItem::Literal(ast::Literal {
5349                         span: span(2..3),
5350                         kind: ast::LiteralKind::Verbatim,
5351                         c: '-',
5352                     })],
5353                 };
5354                 Ok((set, union))
5355             }
5356         );
5357         assert_eq!(parser("[^-a]").parse_set_class_open(), {
5358             let set = ast::ClassBracketed {
5359                 span: span(0..3),
5360                 negated: true,
5361                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5362                     span: span(2..2),
5363                     items: vec![],
5364                 }),
5365             };
5366             let union = ast::ClassSetUnion {
5367                 span: span(2..3),
5368                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5369                     span: span(2..3),
5370                     kind: ast::LiteralKind::Verbatim,
5371                     c: '-',
5372                 })],
5373             };
5374             Ok((set, union))
5375         });
5376         assert_eq!(parser("[--a]").parse_set_class_open(), {
5377             let set = ast::ClassBracketed {
5378                 span: span(0..3),
5379                 negated: false,
5380                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5381                     span: span(1..1),
5382                     items: vec![],
5383                 }),
5384             };
5385             let union = ast::ClassSetUnion {
5386                 span: span(1..3),
5387                 items: vec![
5388                     ast::ClassSetItem::Literal(ast::Literal {
5389                         span: span(1..2),
5390                         kind: ast::LiteralKind::Verbatim,
5391                         c: '-',
5392                     }),
5393                     ast::ClassSetItem::Literal(ast::Literal {
5394                         span: span(2..3),
5395                         kind: ast::LiteralKind::Verbatim,
5396                         c: '-',
5397                     }),
5398                 ],
5399             };
5400             Ok((set, union))
5401         });
5402         assert_eq!(parser("[]a]").parse_set_class_open(), {
5403             let set = ast::ClassBracketed {
5404                 span: span(0..2),
5405                 negated: false,
5406                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5407                     span: span(1..1),
5408                     items: vec![],
5409                 }),
5410             };
5411             let union = ast::ClassSetUnion {
5412                 span: span(1..2),
5413                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5414                     span: span(1..2),
5415                     kind: ast::LiteralKind::Verbatim,
5416                     c: ']',
5417                 })],
5418             };
5419             Ok((set, union))
5420         });
5421         assert_eq!(
5422             parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5423             {
5424                 let set = ast::ClassBracketed {
5425                     span: span(0..4),
5426                     negated: false,
5427                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5428                         span: span(2..2),
5429                         items: vec![],
5430                     }),
5431                 };
5432                 let union = ast::ClassSetUnion {
5433                     span: span(2..3),
5434                     items: vec![ast::ClassSetItem::Literal(ast::Literal {
5435                         span: span(2..3),
5436                         kind: ast::LiteralKind::Verbatim,
5437                         c: ']',
5438                     })],
5439                 };
5440                 Ok((set, union))
5441             }
5442         );
5443         assert_eq!(parser("[^]a]").parse_set_class_open(), {
5444             let set = ast::ClassBracketed {
5445                 span: span(0..3),
5446                 negated: true,
5447                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5448                     span: span(2..2),
5449                     items: vec![],
5450                 }),
5451             };
5452             let union = ast::ClassSetUnion {
5453                 span: span(2..3),
5454                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5455                     span: span(2..3),
5456                     kind: ast::LiteralKind::Verbatim,
5457                     c: ']',
5458                 })],
5459             };
5460             Ok((set, union))
5461         });
5462         assert_eq!(parser("[-]a]").parse_set_class_open(), {
5463             let set = ast::ClassBracketed {
5464                 span: span(0..2),
5465                 negated: false,
5466                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5467                     span: span(1..1),
5468                     items: vec![],
5469                 }),
5470             };
5471             let union = ast::ClassSetUnion {
5472                 span: span(1..2),
5473                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5474                     span: span(1..2),
5475                     kind: ast::LiteralKind::Verbatim,
5476                     c: '-',
5477                 })],
5478             };
5479             Ok((set, union))
5480         });
5481 
5482         assert_eq!(
5483             parser("[").parse_set_class_open().unwrap_err(),
5484             TestError {
5485                 span: span(0..1),
5486                 kind: ast::ErrorKind::ClassUnclosed,
5487             }
5488         );
5489         assert_eq!(
5490             parser_ignore_whitespace("[    ")
5491                 .parse_set_class_open()
5492                 .unwrap_err(),
5493             TestError {
5494                 span: span(0..5),
5495                 kind: ast::ErrorKind::ClassUnclosed,
5496             }
5497         );
5498         assert_eq!(
5499             parser("[^").parse_set_class_open().unwrap_err(),
5500             TestError {
5501                 span: span(0..2),
5502                 kind: ast::ErrorKind::ClassUnclosed,
5503             }
5504         );
5505         assert_eq!(
5506             parser("[]").parse_set_class_open().unwrap_err(),
5507             TestError {
5508                 span: span(0..2),
5509                 kind: ast::ErrorKind::ClassUnclosed,
5510             }
5511         );
5512         assert_eq!(
5513             parser("[-").parse_set_class_open().unwrap_err(),
5514             TestError {
5515                 span: span(0..2),
5516                 kind: ast::ErrorKind::ClassUnclosed,
5517             }
5518         );
5519         assert_eq!(
5520             parser("[--").parse_set_class_open().unwrap_err(),
5521             TestError {
5522                 span: span(0..3),
5523                 kind: ast::ErrorKind::ClassUnclosed,
5524             }
5525         );
5526     }
5527 
5528     #[test]
maybe_parse_ascii_class()5529     fn maybe_parse_ascii_class() {
5530         assert_eq!(
5531             parser(r"[:alnum:]").maybe_parse_ascii_class(),
5532             Some(ast::ClassAscii {
5533                 span: span(0..9),
5534                 kind: ast::ClassAsciiKind::Alnum,
5535                 negated: false,
5536             })
5537         );
5538         assert_eq!(
5539             parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5540             Some(ast::ClassAscii {
5541                 span: span(0..9),
5542                 kind: ast::ClassAsciiKind::Alnum,
5543                 negated: false,
5544             })
5545         );
5546         assert_eq!(
5547             parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5548             Some(ast::ClassAscii {
5549                 span: span(0..10),
5550                 kind: ast::ClassAsciiKind::Alnum,
5551                 negated: true,
5552             })
5553         );
5554 
5555         let p = parser(r"[:");
5556         assert_eq!(p.maybe_parse_ascii_class(), None);
5557         assert_eq!(p.offset(), 0);
5558 
5559         let p = parser(r"[:^");
5560         assert_eq!(p.maybe_parse_ascii_class(), None);
5561         assert_eq!(p.offset(), 0);
5562 
5563         let p = parser(r"[^:alnum:]");
5564         assert_eq!(p.maybe_parse_ascii_class(), None);
5565         assert_eq!(p.offset(), 0);
5566 
5567         let p = parser(r"[:alnnum:]");
5568         assert_eq!(p.maybe_parse_ascii_class(), None);
5569         assert_eq!(p.offset(), 0);
5570 
5571         let p = parser(r"[:alnum]");
5572         assert_eq!(p.maybe_parse_ascii_class(), None);
5573         assert_eq!(p.offset(), 0);
5574 
5575         let p = parser(r"[:alnum:");
5576         assert_eq!(p.maybe_parse_ascii_class(), None);
5577         assert_eq!(p.offset(), 0);
5578     }
5579 
5580     #[test]
parse_unicode_class()5581     fn parse_unicode_class() {
5582         assert_eq!(
5583             parser(r"\pN").parse_escape(),
5584             Ok(Primitive::Unicode(ast::ClassUnicode {
5585                 span: span(0..3),
5586                 negated: false,
5587                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5588             }))
5589         );
5590         assert_eq!(
5591             parser(r"\PN").parse_escape(),
5592             Ok(Primitive::Unicode(ast::ClassUnicode {
5593                 span: span(0..3),
5594                 negated: true,
5595                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5596             }))
5597         );
5598         assert_eq!(
5599             parser(r"\p{N}").parse_escape(),
5600             Ok(Primitive::Unicode(ast::ClassUnicode {
5601                 span: span(0..5),
5602                 negated: false,
5603                 kind: ast::ClassUnicodeKind::Named(s("N")),
5604             }))
5605         );
5606         assert_eq!(
5607             parser(r"\P{N}").parse_escape(),
5608             Ok(Primitive::Unicode(ast::ClassUnicode {
5609                 span: span(0..5),
5610                 negated: true,
5611                 kind: ast::ClassUnicodeKind::Named(s("N")),
5612             }))
5613         );
5614         assert_eq!(
5615             parser(r"\p{Greek}").parse_escape(),
5616             Ok(Primitive::Unicode(ast::ClassUnicode {
5617                 span: span(0..9),
5618                 negated: false,
5619                 kind: ast::ClassUnicodeKind::Named(s("Greek")),
5620             }))
5621         );
5622 
5623         assert_eq!(
5624             parser(r"\p{scx:Katakana}").parse_escape(),
5625             Ok(Primitive::Unicode(ast::ClassUnicode {
5626                 span: span(0..16),
5627                 negated: false,
5628                 kind: ast::ClassUnicodeKind::NamedValue {
5629                     op: ast::ClassUnicodeOpKind::Colon,
5630                     name: s("scx"),
5631                     value: s("Katakana"),
5632                 },
5633             }))
5634         );
5635         assert_eq!(
5636             parser(r"\p{scx=Katakana}").parse_escape(),
5637             Ok(Primitive::Unicode(ast::ClassUnicode {
5638                 span: span(0..16),
5639                 negated: false,
5640                 kind: ast::ClassUnicodeKind::NamedValue {
5641                     op: ast::ClassUnicodeOpKind::Equal,
5642                     name: s("scx"),
5643                     value: s("Katakana"),
5644                 },
5645             }))
5646         );
5647         assert_eq!(
5648             parser(r"\p{scx!=Katakana}").parse_escape(),
5649             Ok(Primitive::Unicode(ast::ClassUnicode {
5650                 span: span(0..17),
5651                 negated: false,
5652                 kind: ast::ClassUnicodeKind::NamedValue {
5653                     op: ast::ClassUnicodeOpKind::NotEqual,
5654                     name: s("scx"),
5655                     value: s("Katakana"),
5656                 },
5657             }))
5658         );
5659 
5660         assert_eq!(
5661             parser(r"\p{:}").parse_escape(),
5662             Ok(Primitive::Unicode(ast::ClassUnicode {
5663                 span: span(0..5),
5664                 negated: false,
5665                 kind: ast::ClassUnicodeKind::NamedValue {
5666                     op: ast::ClassUnicodeOpKind::Colon,
5667                     name: s(""),
5668                     value: s(""),
5669                 },
5670             }))
5671         );
5672         assert_eq!(
5673             parser(r"\p{=}").parse_escape(),
5674             Ok(Primitive::Unicode(ast::ClassUnicode {
5675                 span: span(0..5),
5676                 negated: false,
5677                 kind: ast::ClassUnicodeKind::NamedValue {
5678                     op: ast::ClassUnicodeOpKind::Equal,
5679                     name: s(""),
5680                     value: s(""),
5681                 },
5682             }))
5683         );
5684         assert_eq!(
5685             parser(r"\p{!=}").parse_escape(),
5686             Ok(Primitive::Unicode(ast::ClassUnicode {
5687                 span: span(0..6),
5688                 negated: false,
5689                 kind: ast::ClassUnicodeKind::NamedValue {
5690                     op: ast::ClassUnicodeOpKind::NotEqual,
5691                     name: s(""),
5692                     value: s(""),
5693                 },
5694             }))
5695         );
5696 
5697         assert_eq!(
5698             parser(r"\p").parse_escape().unwrap_err(),
5699             TestError {
5700                 span: span(2..2),
5701                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5702             }
5703         );
5704         assert_eq!(
5705             parser(r"\p{").parse_escape().unwrap_err(),
5706             TestError {
5707                 span: span(3..3),
5708                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5709             }
5710         );
5711         assert_eq!(
5712             parser(r"\p{N").parse_escape().unwrap_err(),
5713             TestError {
5714                 span: span(4..4),
5715                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5716             }
5717         );
5718         assert_eq!(
5719             parser(r"\p{Greek").parse_escape().unwrap_err(),
5720             TestError {
5721                 span: span(8..8),
5722                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5723             }
5724         );
5725 
5726         assert_eq!(
5727             parser(r"\pNz").parse(),
5728             Ok(Ast::Concat(ast::Concat {
5729                 span: span(0..4),
5730                 asts: vec![
5731                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5732                         span: span(0..3),
5733                         negated: false,
5734                         kind: ast::ClassUnicodeKind::OneLetter('N'),
5735                     })),
5736                     Ast::Literal(ast::Literal {
5737                         span: span(3..4),
5738                         kind: ast::LiteralKind::Verbatim,
5739                         c: 'z',
5740                     }),
5741                 ],
5742             }))
5743         );
5744         assert_eq!(
5745             parser(r"\p{Greek}z").parse(),
5746             Ok(Ast::Concat(ast::Concat {
5747                 span: span(0..10),
5748                 asts: vec![
5749                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5750                         span: span(0..9),
5751                         negated: false,
5752                         kind: ast::ClassUnicodeKind::Named(s("Greek")),
5753                     })),
5754                     Ast::Literal(ast::Literal {
5755                         span: span(9..10),
5756                         kind: ast::LiteralKind::Verbatim,
5757                         c: 'z',
5758                     }),
5759                 ],
5760             }))
5761         );
5762         assert_eq!(
5763             parser(r"\p\{").parse().unwrap_err(),
5764             TestError {
5765                 span: span(2..3),
5766                 kind: ast::ErrorKind::UnicodeClassInvalid,
5767             }
5768         );
5769         assert_eq!(
5770             parser(r"\P\{").parse().unwrap_err(),
5771             TestError {
5772                 span: span(2..3),
5773                 kind: ast::ErrorKind::UnicodeClassInvalid,
5774             }
5775         );
5776     }
5777 
5778     #[test]
parse_perl_class()5779     fn parse_perl_class() {
5780         assert_eq!(
5781             parser(r"\d").parse_escape(),
5782             Ok(Primitive::Perl(ast::ClassPerl {
5783                 span: span(0..2),
5784                 kind: ast::ClassPerlKind::Digit,
5785                 negated: false,
5786             }))
5787         );
5788         assert_eq!(
5789             parser(r"\D").parse_escape(),
5790             Ok(Primitive::Perl(ast::ClassPerl {
5791                 span: span(0..2),
5792                 kind: ast::ClassPerlKind::Digit,
5793                 negated: true,
5794             }))
5795         );
5796         assert_eq!(
5797             parser(r"\s").parse_escape(),
5798             Ok(Primitive::Perl(ast::ClassPerl {
5799                 span: span(0..2),
5800                 kind: ast::ClassPerlKind::Space,
5801                 negated: false,
5802             }))
5803         );
5804         assert_eq!(
5805             parser(r"\S").parse_escape(),
5806             Ok(Primitive::Perl(ast::ClassPerl {
5807                 span: span(0..2),
5808                 kind: ast::ClassPerlKind::Space,
5809                 negated: true,
5810             }))
5811         );
5812         assert_eq!(
5813             parser(r"\w").parse_escape(),
5814             Ok(Primitive::Perl(ast::ClassPerl {
5815                 span: span(0..2),
5816                 kind: ast::ClassPerlKind::Word,
5817                 negated: false,
5818             }))
5819         );
5820         assert_eq!(
5821             parser(r"\W").parse_escape(),
5822             Ok(Primitive::Perl(ast::ClassPerl {
5823                 span: span(0..2),
5824                 kind: ast::ClassPerlKind::Word,
5825                 negated: true,
5826             }))
5827         );
5828 
5829         assert_eq!(
5830             parser(r"\d").parse(),
5831             Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5832                 span: span(0..2),
5833                 kind: ast::ClassPerlKind::Digit,
5834                 negated: false,
5835             })))
5836         );
5837         assert_eq!(
5838             parser(r"\dz").parse(),
5839             Ok(Ast::Concat(ast::Concat {
5840                 span: span(0..3),
5841                 asts: vec![
5842                     Ast::Class(ast::Class::Perl(ast::ClassPerl {
5843                         span: span(0..2),
5844                         kind: ast::ClassPerlKind::Digit,
5845                         negated: false,
5846                     })),
5847                     Ast::Literal(ast::Literal {
5848                         span: span(2..3),
5849                         kind: ast::LiteralKind::Verbatim,
5850                         c: 'z',
5851                     }),
5852                 ],
5853             }))
5854         );
5855     }
5856 
5857     // This tests a bug fix where the nest limit checker wasn't decrementing
5858     // its depth during post-traversal, which causes long regexes to trip
5859     // the default limit too aggressively.
5860     #[test]
regression_454_nest_too_big()5861     fn regression_454_nest_too_big() {
5862         let pattern = r#"
5863         2(?:
5864           [45]\d{3}|
5865           7(?:
5866             1[0-267]|
5867             2[0-289]|
5868             3[0-29]|
5869             4[01]|
5870             5[1-3]|
5871             6[013]|
5872             7[0178]|
5873             91
5874           )|
5875           8(?:
5876             0[125]|
5877             [139][1-6]|
5878             2[0157-9]|
5879             41|
5880             6[1-35]|
5881             7[1-5]|
5882             8[1-8]|
5883             90
5884           )|
5885           9(?:
5886             0[0-2]|
5887             1[0-4]|
5888             2[568]|
5889             3[3-6]|
5890             5[5-7]|
5891             6[0167]|
5892             7[15]|
5893             8[0146-9]
5894           )
5895         )\d{4}
5896         "#;
5897         assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5898     }
5899 
5900     // This tests that we treat a trailing `-` in a character class as a
5901     // literal `-` even when whitespace mode is enabled and there is whitespace
5902     // after the trailing `-`.
5903     #[test]
regression_455_trailing_dash_ignore_whitespace()5904     fn regression_455_trailing_dash_ignore_whitespace() {
5905         assert!(parser("(?x)[ / - ]").parse().is_ok());
5906         assert!(parser("(?x)[ a - ]").parse().is_ok());
5907         assert!(parser(
5908             "(?x)[
5909             a
5910             - ]
5911         "
5912         )
5913         .parse()
5914         .is_ok());
5915         assert!(parser(
5916             "(?x)[
5917             a # wat
5918             - ]
5919         "
5920         )
5921         .parse()
5922         .is_ok());
5923 
5924         assert!(parser("(?x)[ / -").parse().is_err());
5925         assert!(parser("(?x)[ / - ").parse().is_err());
5926         assert!(parser(
5927             "(?x)[
5928             / -
5929         "
5930         )
5931         .parse()
5932         .is_err());
5933         assert!(parser(
5934             "(?x)[
5935             / - # wat
5936         "
5937         )
5938         .parse()
5939         .is_err());
5940     }
5941 }
5942