1 /*!
2 This module provides a regular expression parser.
3 */
4 
5 use std::borrow::Borrow;
6 use std::cell::{Cell, RefCell};
7 use std::mem;
8 use std::result;
9 
10 use ast::{self, Ast, Position, Span};
11 use either::Either;
12 
13 use is_meta_character;
14 
15 type Result<T> = result::Result<T, ast::Error>;
16 
17 /// A primitive is an expression with no sub-expressions. This includes
18 /// literals, assertions and non-set character classes. This representation
19 /// is used as intermediate state in the parser.
20 ///
21 /// This does not include ASCII character classes, since they can only appear
22 /// within a set character class.
23 #[derive(Clone, Debug, Eq, PartialEq)]
24 enum Primitive {
25     Literal(ast::Literal),
26     Assertion(ast::Assertion),
27     Dot(Span),
28     Perl(ast::ClassPerl),
29     Unicode(ast::ClassUnicode),
30 }
31 
32 impl Primitive {
33     /// Return the span of this primitive.
span(&self) -> &Span34     fn span(&self) -> &Span {
35         match *self {
36             Primitive::Literal(ref x) => &x.span,
37             Primitive::Assertion(ref x) => &x.span,
38             Primitive::Dot(ref span) => span,
39             Primitive::Perl(ref x) => &x.span,
40             Primitive::Unicode(ref x) => &x.span,
41         }
42     }
43 
44     /// Convert this primitive into a proper AST.
into_ast(self) -> Ast45     fn into_ast(self) -> Ast {
46         match self {
47             Primitive::Literal(lit) => Ast::Literal(lit),
48             Primitive::Assertion(assert) => Ast::Assertion(assert),
49             Primitive::Dot(span) => Ast::Dot(span),
50             Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
51             Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
52         }
53     }
54 
55     /// Convert this primitive into an item in a character class.
56     ///
57     /// If this primitive is not a legal item (i.e., an assertion or a dot),
58     /// then return an error.
into_class_set_item<P: Borrow<Parser>>( self, p: &ParserI<P>, ) -> Result<ast::ClassSetItem>59     fn into_class_set_item<P: Borrow<Parser>>(
60         self,
61         p: &ParserI<P>,
62     ) -> Result<ast::ClassSetItem> {
63         use self::Primitive::*;
64         use ast::ClassSetItem;
65 
66         match self {
67             Literal(lit) => Ok(ClassSetItem::Literal(lit)),
68             Perl(cls) => Ok(ClassSetItem::Perl(cls)),
69             Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
70             x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
71         }
72     }
73 
74     /// Convert this primitive into a literal in a character class. In
75     /// particular, literals are the only valid items that can appear in
76     /// ranges.
77     ///
78     /// If this primitive is not a legal item (i.e., a class, assertion or a
79     /// dot), then return an error.
into_class_literal<P: Borrow<Parser>>( self, p: &ParserI<P>, ) -> Result<ast::Literal>80     fn into_class_literal<P: Borrow<Parser>>(
81         self,
82         p: &ParserI<P>,
83     ) -> Result<ast::Literal> {
84         use self::Primitive::*;
85 
86         match self {
87             Literal(lit) => Ok(lit),
88             x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
89         }
90     }
91 }
92 
93 /// Returns true if the given character is a hexadecimal digit.
is_hex(c: char) -> bool94 fn is_hex(c: char) -> bool {
95     ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
96 }
97 
98 /// Returns true if the given character is a valid in a capture group name.
99 ///
100 /// If `first` is true, then `c` is treated as the first character in the
101 /// group name (which is not allowed to be a digit).
is_capture_char(c: char, first: bool) -> bool102 fn is_capture_char(c: char, first: bool) -> bool {
103     c == '_'
104         || (!first && c >= '0' && c <= '9')
105         || (c >= 'a' && c <= 'z')
106         || (c >= 'A' && c <= 'Z')
107 }
108 
109 /// A builder for a regular expression parser.
110 ///
111 /// This builder permits modifying configuration options for the parser.
112 #[derive(Clone, Debug)]
113 pub struct ParserBuilder {
114     ignore_whitespace: bool,
115     nest_limit: u32,
116     octal: bool,
117 }
118 
119 impl Default for ParserBuilder {
default() -> ParserBuilder120     fn default() -> ParserBuilder {
121         ParserBuilder::new()
122     }
123 }
124 
125 impl ParserBuilder {
126     /// Create a new parser builder with a default configuration.
new() -> ParserBuilder127     pub fn new() -> ParserBuilder {
128         ParserBuilder {
129             ignore_whitespace: false,
130             nest_limit: 250,
131             octal: false,
132         }
133     }
134 
135     /// Build a parser from this configuration with the given pattern.
build(&self) -> Parser136     pub fn build(&self) -> Parser {
137         Parser {
138             pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
139             capture_index: Cell::new(0),
140             nest_limit: self.nest_limit,
141             octal: self.octal,
142             initial_ignore_whitespace: self.ignore_whitespace,
143             ignore_whitespace: Cell::new(self.ignore_whitespace),
144             comments: RefCell::new(vec![]),
145             stack_group: RefCell::new(vec![]),
146             stack_class: RefCell::new(vec![]),
147             capture_names: RefCell::new(vec![]),
148             scratch: RefCell::new(String::new()),
149         }
150     }
151 
152     /// Set the nesting limit for this parser.
153     ///
154     /// The nesting limit controls how deep the abstract syntax tree is allowed
155     /// to be. If the AST exceeds the given limit (e.g., with too many nested
156     /// groups), then an error is returned by the parser.
157     ///
158     /// The purpose of this limit is to act as a heuristic to prevent stack
159     /// overflow for consumers that do structural induction on an `Ast` using
160     /// explicit recursion. While this crate never does this (instead using
161     /// constant stack space and moving the call stack to the heap), other
162     /// crates may.
163     ///
164     /// This limit is not checked until the entire Ast is parsed. Therefore,
165     /// if callers want to put a limit on the amount of heap space used, then
166     /// they should impose a limit on the length, in bytes, of the concrete
167     /// pattern string. In particular, this is viable since this parser
168     /// implementation will limit itself to heap space proportional to the
169     /// lenth of the pattern string.
170     ///
171     /// Note that a nest limit of `0` will return a nest limit error for most
172     /// patterns but not all. For example, a nest limit of `0` permits `a` but
173     /// not `ab`, since `ab` requires a concatenation, which results in a nest
174     /// depth of `1`. In general, a nest limit is not something that manifests
175     /// in an obvious way in the concrete syntax, therefore, it should not be
176     /// used in a granular way.
nest_limit(&mut self, limit: u32) -> &mut ParserBuilder177     pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
178         self.nest_limit = limit;
179         self
180     }
181 
182     /// Whether to support octal syntax or not.
183     ///
184     /// Octal syntax is a little-known way of uttering Unicode codepoints in
185     /// a regular expression. For example, `a`, `\x61`, `\u0061` and
186     /// `\141` are all equivalent regular expressions, where the last example
187     /// shows octal syntax.
188     ///
189     /// While supporting octal syntax isn't in and of itself a problem, it does
190     /// make good error messages harder. That is, in PCRE based regex engines,
191     /// syntax like `\0` invokes a backreference, which is explicitly
192     /// unsupported in Rust's regex engine. However, many users expect it to
193     /// be supported. Therefore, when octal support is disabled, the error
194     /// message will explicitly mention that backreferences aren't supported.
195     ///
196     /// Octal syntax is disabled by default.
octal(&mut self, yes: bool) -> &mut ParserBuilder197     pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
198         self.octal = yes;
199         self
200     }
201 
202     /// Enable verbose mode in the regular expression.
203     ///
204     /// When enabled, verbose mode permits insigificant whitespace in many
205     /// places in the regular expression, as well as comments. Comments are
206     /// started using `#` and continue until the end of the line.
207     ///
208     /// By default, this is disabled. It may be selectively enabled in the
209     /// regular expression by using the `x` flag regardless of this setting.
ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder210     pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
211         self.ignore_whitespace = yes;
212         self
213     }
214 }
215 
216 /// A regular expression parser.
217 ///
218 /// This parses a string representation of a regular expression into an
219 /// abstract syntax tree. The size of the tree is proportional to the length
220 /// of the regular expression pattern.
221 ///
222 /// A `Parser` can be configured in more detail via a
223 /// [`ParserBuilder`](struct.ParserBuilder.html).
224 #[derive(Clone, Debug)]
225 pub struct Parser {
226     /// The current position of the parser.
227     pos: Cell<Position>,
228     /// The current capture index.
229     capture_index: Cell<u32>,
230     /// The maximum number of open parens/brackets allowed. If the parser
231     /// exceeds this number, then an error is returned.
232     nest_limit: u32,
233     /// Whether to support octal syntax or not. When `false`, the parser will
234     /// return an error helpfully pointing out that backreferences are not
235     /// supported.
236     octal: bool,
237     /// The initial setting for `ignore_whitespace` as provided by
238     /// Th`ParserBuilder`. is is used when reseting the parser's state.
239     initial_ignore_whitespace: bool,
240     /// Whether whitespace should be ignored. When enabled, comments are
241     /// also permitted.
242     ignore_whitespace: Cell<bool>,
243     /// A list of comments, in order of appearance.
244     comments: RefCell<Vec<ast::Comment>>,
245     /// A stack of grouped sub-expressions, including alternations.
246     stack_group: RefCell<Vec<GroupState>>,
247     /// A stack of nested character classes. This is only non-empty when
248     /// parsing a class.
249     stack_class: RefCell<Vec<ClassState>>,
250     /// A sorted sequence of capture names. This is used to detect duplicate
251     /// capture names and report an error if one is detected.
252     capture_names: RefCell<Vec<ast::CaptureName>>,
253     /// A scratch buffer used in various places. Mostly this is used to
254     /// accumulate relevant characters from parts of a pattern.
255     scratch: RefCell<String>,
256 }
257 
258 /// ParserI is the internal parser implementation.
259 ///
260 /// We use this separate type so that we can carry the provided pattern string
261 /// along with us. In particular, a `Parser` internal state is not tied to any
262 /// one pattern, but `ParserI` is.
263 ///
264 /// This type also lets us use `ParserI<&Parser>` in production code while
265 /// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
266 /// work against the internal interface of the parser.
267 #[derive(Clone, Debug)]
268 struct ParserI<'s, P> {
269     /// The parser state/configuration.
270     parser: P,
271     /// The full regular expression provided by the user.
272     pattern: &'s str,
273 }
274 
275 /// GroupState represents a single stack frame while parsing nested groups
276 /// and alternations. Each frame records the state up to an opening parenthesis
277 /// or a alternating bracket `|`.
278 #[derive(Clone, Debug)]
279 enum GroupState {
280     /// This state is pushed whenever an opening group is found.
281     Group {
282         /// The concatenation immediately preceding the opening group.
283         concat: ast::Concat,
284         /// The group that has been opened. Its sub-AST is always empty.
285         group: ast::Group,
286         /// Whether this group has the `x` flag enabled or not.
287         ignore_whitespace: bool,
288     },
289     /// This state is pushed whenever a new alternation branch is found. If
290     /// an alternation branch is found and this state is at the top of the
291     /// stack, then this state should be modified to include the new
292     /// alternation.
293     Alternation(ast::Alternation),
294 }
295 
296 /// ClassState represents a single stack frame while parsing character classes.
297 /// Each frame records the state up to an intersection, difference, symmetric
298 /// difference or nested class.
299 ///
300 /// Note that a parser's character class stack is only non-empty when parsing
301 /// a character class. In all other cases, it is empty.
302 #[derive(Clone, Debug)]
303 enum ClassState {
304     /// This state is pushed whenever an opening bracket is found.
305     Open {
306         /// The union of class items immediately preceding this class.
307         union: ast::ClassSetUnion,
308         /// The class that has been opened. Typically this just corresponds
309         /// to the `[`, but it can also include `[^` since `^` indicates
310         /// negation of the class.
311         set: ast::ClassBracketed,
312     },
313     /// This state is pushed when a operator is seen. When popped, the stored
314     /// set becomes the left hand side of the operator.
315     Op {
316         /// The type of the operation, i.e., &&, -- or ~~.
317         kind: ast::ClassSetBinaryOpKind,
318         /// The left-hand side of the operator.
319         lhs: ast::ClassSet,
320     },
321 }
322 
323 impl Parser {
324     /// Create a new parser with a default configuration.
325     ///
326     /// The parser can be run with either the `parse` or `parse_with_comments`
327     /// methods. The parse methods return an abstract syntax tree.
328     ///
329     /// To set configuration options on the parser, use
330     /// [`ParserBuilder`](struct.ParserBuilder.html).
new() -> Parser331     pub fn new() -> Parser {
332         ParserBuilder::new().build()
333     }
334 
335     /// Parse the regular expression into an abstract syntax tree.
parse(&mut self, pattern: &str) -> Result<Ast>336     pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
337         ParserI::new(self, pattern).parse()
338     }
339 
340     /// Parse the regular expression and return an abstract syntax tree with
341     /// all of the comments found in the pattern.
parse_with_comments( &mut self, pattern: &str, ) -> Result<ast::WithComments>342     pub fn parse_with_comments(
343         &mut self,
344         pattern: &str,
345     ) -> Result<ast::WithComments> {
346         ParserI::new(self, pattern).parse_with_comments()
347     }
348 
349     /// Reset the internal state of a parser.
350     ///
351     /// This is called at the beginning of every parse. This prevents the
352     /// parser from running with inconsistent state (say, if a previous
353     /// invocation returned an error and the parser is reused).
reset(&self)354     fn reset(&self) {
355         // These settings should be in line with the construction
356         // in `ParserBuilder::build`.
357         self.pos.set(Position { offset: 0, line: 1, column: 1 });
358         self.ignore_whitespace.set(self.initial_ignore_whitespace);
359         self.comments.borrow_mut().clear();
360         self.stack_group.borrow_mut().clear();
361         self.stack_class.borrow_mut().clear();
362     }
363 }
364 
365 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
366     /// Build an internal parser from a parser configuration and a pattern.
new(parser: P, pattern: &'s str) -> ParserI<'s, P>367     fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
368         ParserI { parser: parser, pattern: pattern }
369     }
370 
371     /// Return a reference to the parser state.
parser(&self) -> &Parser372     fn parser(&self) -> &Parser {
373         self.parser.borrow()
374     }
375 
376     /// Return a reference to the pattern being parsed.
pattern(&self) -> &str377     fn pattern(&self) -> &str {
378         self.pattern.borrow()
379     }
380 
381     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error382     fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
383         ast::Error {
384             kind: kind,
385             pattern: self.pattern().to_string(),
386             span: span,
387         }
388     }
389 
390     /// Return the current offset of the parser.
391     ///
392     /// The offset starts at `0` from the beginning of the regular expression
393     /// pattern string.
offset(&self) -> usize394     fn offset(&self) -> usize {
395         self.parser().pos.get().offset
396     }
397 
398     /// Return the current line number of the parser.
399     ///
400     /// The line number starts at `1`.
line(&self) -> usize401     fn line(&self) -> usize {
402         self.parser().pos.get().line
403     }
404 
405     /// Return the current column of the parser.
406     ///
407     /// The column number starts at `1` and is reset whenever a `\n` is seen.
column(&self) -> usize408     fn column(&self) -> usize {
409         self.parser().pos.get().column
410     }
411 
412     /// Return the next capturing index. Each subsequent call increments the
413     /// internal index.
414     ///
415     /// The span given should correspond to the location of the opening
416     /// parenthesis.
417     ///
418     /// If the capture limit is exceeded, then an error is returned.
next_capture_index(&self, span: Span) -> Result<u32>419     fn next_capture_index(&self, span: Span) -> Result<u32> {
420         let current = self.parser().capture_index.get();
421         let i = current.checked_add(1).ok_or_else(|| {
422             self.error(span, ast::ErrorKind::CaptureLimitExceeded)
423         })?;
424         self.parser().capture_index.set(i);
425         Ok(i)
426     }
427 
428     /// Adds the given capture name to this parser. If this capture name has
429     /// already been used, then an error is returned.
add_capture_name(&self, cap: &ast::CaptureName) -> Result<()>430     fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
431         let mut names = self.parser().capture_names.borrow_mut();
432         match names
433             .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
434         {
435             Err(i) => {
436                 names.insert(i, cap.clone());
437                 Ok(())
438             }
439             Ok(i) => Err(self.error(
440                 cap.span,
441                 ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
442             )),
443         }
444     }
445 
446     /// Return whether the parser should ignore whitespace or not.
ignore_whitespace(&self) -> bool447     fn ignore_whitespace(&self) -> bool {
448         self.parser().ignore_whitespace.get()
449     }
450 
451     /// Return the character at the current position of the parser.
452     ///
453     /// This panics if the current position does not point to a valid char.
char(&self) -> char454     fn char(&self) -> char {
455         self.char_at(self.offset())
456     }
457 
458     /// Return the character at the given position.
459     ///
460     /// This panics if the given position does not point to a valid char.
char_at(&self, i: usize) -> char461     fn char_at(&self, i: usize) -> char {
462         self.pattern()[i..]
463             .chars()
464             .next()
465             .unwrap_or_else(|| panic!("expected char at offset {}", i))
466     }
467 
468     /// Bump the parser to the next Unicode scalar value.
469     ///
470     /// If the end of the input has been reached, then `false` is returned.
bump(&self) -> bool471     fn bump(&self) -> bool {
472         if self.is_eof() {
473             return false;
474         }
475         let Position { mut offset, mut line, mut column } = self.pos();
476         if self.char() == '\n' {
477             line = line.checked_add(1).unwrap();
478             column = 1;
479         } else {
480             column = column.checked_add(1).unwrap();
481         }
482         offset += self.char().len_utf8();
483         self.parser().pos.set(Position {
484             offset: offset,
485             line: line,
486             column: column,
487         });
488         self.pattern()[self.offset()..].chars().next().is_some()
489     }
490 
491     /// If the substring starting at the current position of the parser has
492     /// the given prefix, then bump the parser to the character immediately
493     /// following the prefix and return true. Otherwise, don't bump the parser
494     /// and return false.
bump_if(&self, prefix: &str) -> bool495     fn bump_if(&self, prefix: &str) -> bool {
496         if self.pattern()[self.offset()..].starts_with(prefix) {
497             for _ in 0..prefix.chars().count() {
498                 self.bump();
499             }
500             true
501         } else {
502             false
503         }
504     }
505 
506     /// Returns true if and only if the parser is positioned at a look-around
507     /// prefix. The conditions under which this returns true must always
508     /// correspond to a regular expression that would otherwise be consider
509     /// invalid.
510     ///
511     /// This should only be called immediately after parsing the opening of
512     /// a group or a set of flags.
is_lookaround_prefix(&self) -> bool513     fn is_lookaround_prefix(&self) -> bool {
514         self.bump_if("?=")
515             || self.bump_if("?!")
516             || self.bump_if("?<=")
517             || self.bump_if("?<!")
518     }
519 
520     /// Bump the parser, and if the `x` flag is enabled, bump through any
521     /// subsequent spaces. Return true if and only if the parser is not at
522     /// EOF.
bump_and_bump_space(&self) -> bool523     fn bump_and_bump_space(&self) -> bool {
524         if !self.bump() {
525             return false;
526         }
527         self.bump_space();
528         !self.is_eof()
529     }
530 
531     /// If the `x` flag is enabled (i.e., whitespace insensitivity with
532     /// comments), then this will advance the parser through all whitespace
533     /// and comments to the next non-whitespace non-comment byte.
534     ///
535     /// If the `x` flag is disabled, then this is a no-op.
536     ///
537     /// This should be used selectively throughout the parser where
538     /// arbitrary whitespace is permitted when the `x` flag is enabled. For
539     /// example, `{   5  , 6}` is equivalent to `{5,6}`.
bump_space(&self)540     fn bump_space(&self) {
541         if !self.ignore_whitespace() {
542             return;
543         }
544         while !self.is_eof() {
545             if self.char().is_whitespace() {
546                 self.bump();
547             } else if self.char() == '#' {
548                 let start = self.pos();
549                 let mut comment_text = String::new();
550                 self.bump();
551                 while !self.is_eof() {
552                     let c = self.char();
553                     self.bump();
554                     if c == '\n' {
555                         break;
556                     }
557                     comment_text.push(c);
558                 }
559                 let comment = ast::Comment {
560                     span: Span::new(start, self.pos()),
561                     comment: comment_text,
562                 };
563                 self.parser().comments.borrow_mut().push(comment);
564             } else {
565                 break;
566             }
567         }
568     }
569 
570     /// Peek at the next character in the input without advancing the parser.
571     ///
572     /// If the input has been exhausted, then this returns `None`.
peek(&self) -> Option<char>573     fn peek(&self) -> Option<char> {
574         if self.is_eof() {
575             return None;
576         }
577         self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
578     }
579 
580     /// Like peek, but will ignore spaces when the parser is in whitespace
581     /// insensitive mode.
peek_space(&self) -> Option<char>582     fn peek_space(&self) -> Option<char> {
583         if !self.ignore_whitespace() {
584             return self.peek();
585         }
586         if self.is_eof() {
587             return None;
588         }
589         let mut start = self.offset() + self.char().len_utf8();
590         let mut in_comment = false;
591         for (i, c) in self.pattern()[start..].char_indices() {
592             if c.is_whitespace() {
593                 continue;
594             } else if !in_comment && c == '#' {
595                 in_comment = true;
596             } else if in_comment && c == '\n' {
597                 in_comment = false;
598             } else {
599                 start += i;
600                 break;
601             }
602         }
603         self.pattern()[start..].chars().next()
604     }
605 
606     /// Returns true if the next call to `bump` would return false.
is_eof(&self) -> bool607     fn is_eof(&self) -> bool {
608         self.offset() == self.pattern().len()
609     }
610 
611     /// Return the current position of the parser, which includes the offset,
612     /// line and column.
pos(&self) -> Position613     fn pos(&self) -> Position {
614         self.parser().pos.get()
615     }
616 
617     /// Create a span at the current position of the parser. Both the start
618     /// and end of the span are set.
span(&self) -> Span619     fn span(&self) -> Span {
620         Span::splat(self.pos())
621     }
622 
623     /// Create a span that covers the current character.
span_char(&self) -> Span624     fn span_char(&self) -> Span {
625         let mut next = Position {
626             offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
627             line: self.line(),
628             column: self.column().checked_add(1).unwrap(),
629         };
630         if self.char() == '\n' {
631             next.line += 1;
632             next.column = 1;
633         }
634         Span::new(self.pos(), next)
635     }
636 
637     /// Parse and push a single alternation on to the parser's internal stack.
638     /// If the top of the stack already has an alternation, then add to that
639     /// instead of pushing a new one.
640     ///
641     /// The concatenation given corresponds to a single alternation branch.
642     /// The concatenation returned starts the next branch and is empty.
643     ///
644     /// This assumes the parser is currently positioned at `|` and will advance
645     /// the parser to the character following `|`.
646     #[inline(never)]
push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat>647     fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
648         assert_eq!(self.char(), '|');
649         concat.span.end = self.pos();
650         self.push_or_add_alternation(concat);
651         self.bump();
652         Ok(ast::Concat { span: self.span(), asts: vec![] })
653     }
654 
655     /// Pushes or adds the given branch of an alternation to the parser's
656     /// internal stack of state.
push_or_add_alternation(&self, concat: ast::Concat)657     fn push_or_add_alternation(&self, concat: ast::Concat) {
658         use self::GroupState::*;
659 
660         let mut stack = self.parser().stack_group.borrow_mut();
661         if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
662             alts.asts.push(concat.into_ast());
663             return;
664         }
665         stack.push(Alternation(ast::Alternation {
666             span: Span::new(concat.span.start, self.pos()),
667             asts: vec![concat.into_ast()],
668         }));
669     }
670 
671     /// Parse and push a group AST (and its parent concatenation) on to the
672     /// parser's internal stack. Return a fresh concatenation corresponding
673     /// to the group's sub-AST.
674     ///
675     /// If a set of flags was found (with no group), then the concatenation
676     /// is returned with that set of flags added.
677     ///
678     /// This assumes that the parser is currently positioned on the opening
679     /// parenthesis. It advances the parser to the character at the start
680     /// of the sub-expression (or adjoining expression).
681     ///
682     /// If there was a problem parsing the start of the group, then an error
683     /// is returned.
684     #[inline(never)]
push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat>685     fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
686         assert_eq!(self.char(), '(');
687         match self.parse_group()? {
688             Either::Left(set) => {
689                 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
690                 if let Some(v) = ignore {
691                     self.parser().ignore_whitespace.set(v);
692                 }
693 
694                 concat.asts.push(Ast::Flags(set));
695                 Ok(concat)
696             }
697             Either::Right(group) => {
698                 let old_ignore_whitespace = self.ignore_whitespace();
699                 let new_ignore_whitespace = group
700                     .flags()
701                     .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
702                     .unwrap_or(old_ignore_whitespace);
703                 self.parser().stack_group.borrow_mut().push(
704                     GroupState::Group {
705                         concat: concat,
706                         group: group,
707                         ignore_whitespace: old_ignore_whitespace,
708                     },
709                 );
710                 self.parser().ignore_whitespace.set(new_ignore_whitespace);
711                 Ok(ast::Concat { span: self.span(), asts: vec![] })
712             }
713         }
714     }
715 
716     /// Pop a group AST from the parser's internal stack and set the group's
717     /// AST to the given concatenation. Return the concatenation containing
718     /// the group.
719     ///
720     /// This assumes that the parser is currently positioned on the closing
721     /// parenthesis and advances the parser to the character following the `)`.
722     ///
723     /// If no such group could be popped, then an unopened group error is
724     /// returned.
725     #[inline(never)]
pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat>726     fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
727         use self::GroupState::*;
728 
729         assert_eq!(self.char(), ')');
730         let mut stack = self.parser().stack_group.borrow_mut();
731         let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
732             .pop()
733         {
734             Some(Group { concat, group, ignore_whitespace }) => {
735                 (concat, group, ignore_whitespace, None)
736             }
737             Some(Alternation(alt)) => match stack.pop() {
738                 Some(Group { concat, group, ignore_whitespace }) => {
739                     (concat, group, ignore_whitespace, Some(alt))
740                 }
741                 None | Some(Alternation(_)) => {
742                     return Err(self.error(
743                         self.span_char(),
744                         ast::ErrorKind::GroupUnopened,
745                     ));
746                 }
747             },
748             None => {
749                 return Err(self
750                     .error(self.span_char(), ast::ErrorKind::GroupUnopened));
751             }
752         };
753         self.parser().ignore_whitespace.set(ignore_whitespace);
754         group_concat.span.end = self.pos();
755         self.bump();
756         group.span.end = self.pos();
757         match alt {
758             Some(mut alt) => {
759                 alt.span.end = group_concat.span.end;
760                 alt.asts.push(group_concat.into_ast());
761                 group.ast = Box::new(alt.into_ast());
762             }
763             None => {
764                 group.ast = Box::new(group_concat.into_ast());
765             }
766         }
767         prior_concat.asts.push(Ast::Group(group));
768         Ok(prior_concat)
769     }
770 
771     /// Pop the last state from the parser's internal stack, if it exists, and
772     /// add the given concatenation to it. There either must be no state or a
773     /// single alternation item on the stack. Any other scenario produces an
774     /// error.
775     ///
776     /// This assumes that the parser has advanced to the end.
777     #[inline(never)]
pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast>778     fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
779         concat.span.end = self.pos();
780         let mut stack = self.parser().stack_group.borrow_mut();
781         let ast = match stack.pop() {
782             None => Ok(concat.into_ast()),
783             Some(GroupState::Alternation(mut alt)) => {
784                 alt.span.end = self.pos();
785                 alt.asts.push(concat.into_ast());
786                 Ok(Ast::Alternation(alt))
787             }
788             Some(GroupState::Group { group, .. }) => {
789                 return Err(
790                     self.error(group.span, ast::ErrorKind::GroupUnclosed)
791                 );
792             }
793         };
794         // If we try to pop again, there should be nothing.
795         match stack.pop() {
796             None => ast,
797             Some(GroupState::Alternation(_)) => {
798                 // This unreachable is unfortunate. This case can't happen
799                 // because the only way we can be here is if there were two
800                 // `GroupState::Alternation`s adjacent in the parser's stack,
801                 // which we guarantee to never happen because we never push a
802                 // `GroupState::Alternation` if one is already at the top of
803                 // the stack.
804                 unreachable!()
805             }
806             Some(GroupState::Group { group, .. }) => {
807                 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
808             }
809         }
810     }
811 
812     /// Parse the opening of a character class and push the current class
813     /// parsing context onto the parser's stack. This assumes that the parser
814     /// is positioned at an opening `[`. The given union should correspond to
815     /// the union of set items built up before seeing the `[`.
816     ///
817     /// If there was a problem parsing the opening of the class, then an error
818     /// is returned. Otherwise, a new union of set items for the class is
819     /// returned (which may be populated with either a `]` or a `-`).
820     #[inline(never)]
push_class_open( &self, parent_union: ast::ClassSetUnion, ) -> Result<ast::ClassSetUnion>821     fn push_class_open(
822         &self,
823         parent_union: ast::ClassSetUnion,
824     ) -> Result<ast::ClassSetUnion> {
825         assert_eq!(self.char(), '[');
826 
827         let (nested_set, nested_union) = self.parse_set_class_open()?;
828         self.parser()
829             .stack_class
830             .borrow_mut()
831             .push(ClassState::Open { union: parent_union, set: nested_set });
832         Ok(nested_union)
833     }
834 
835     /// Parse the end of a character class set and pop the character class
836     /// parser stack. The union given corresponds to the last union built
837     /// before seeing the closing `]`. The union returned corresponds to the
838     /// parent character class set with the nested class added to it.
839     ///
840     /// This assumes that the parser is positioned at a `]` and will advance
841     /// the parser to the byte immediately following the `]`.
842     ///
843     /// If the stack is empty after popping, then this returns the final
844     /// "top-level" character class AST (where a "top-level" character class
845     /// is one that is not nested inside any other character class).
846     ///
847     /// If there is no corresponding opening bracket on the parser's stack,
848     /// then an error is returned.
849     #[inline(never)]
pop_class( &self, nested_union: ast::ClassSetUnion, ) -> Result<Either<ast::ClassSetUnion, ast::Class>>850     fn pop_class(
851         &self,
852         nested_union: ast::ClassSetUnion,
853     ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
854         assert_eq!(self.char(), ']');
855 
856         let item = ast::ClassSet::Item(nested_union.into_item());
857         let prevset = self.pop_class_op(item);
858         let mut stack = self.parser().stack_class.borrow_mut();
859         match stack.pop() {
860             None => {
861                 // We can never observe an empty stack:
862                 //
863                 // 1) We are guaranteed to start with a non-empty stack since
864                 //    the character class parser is only initiated when it sees
865                 //    a `[`.
866                 // 2) If we ever observe an empty stack while popping after
867                 //    seeing a `]`, then we signal the character class parser
868                 //    to terminate.
869                 panic!("unexpected empty character class stack")
870             }
871             Some(ClassState::Op { .. }) => {
872                 // This panic is unfortunate, but this case is impossible
873                 // since we already popped the Op state if one exists above.
874                 // Namely, every push to the class parser stack is guarded by
875                 // whether an existing Op is already on the top of the stack.
876                 // If it is, the existing Op is modified. That is, the stack
877                 // can never have consecutive Op states.
878                 panic!("unexpected ClassState::Op")
879             }
880             Some(ClassState::Open { mut union, mut set }) => {
881                 self.bump();
882                 set.span.end = self.pos();
883                 set.kind = prevset;
884                 if stack.is_empty() {
885                     Ok(Either::Right(ast::Class::Bracketed(set)))
886                 } else {
887                     union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
888                     Ok(Either::Left(union))
889                 }
890             }
891         }
892     }
893 
894     /// Return an "unclosed class" error whose span points to the most
895     /// recently opened class.
896     ///
897     /// This should only be called while parsing a character class.
898     #[inline(never)]
unclosed_class_error(&self) -> ast::Error899     fn unclosed_class_error(&self) -> ast::Error {
900         for state in self.parser().stack_class.borrow().iter().rev() {
901             match *state {
902                 ClassState::Open { ref set, .. } => {
903                     return self
904                         .error(set.span, ast::ErrorKind::ClassUnclosed);
905                 }
906                 _ => {}
907             }
908         }
909         // We are guaranteed to have a non-empty stack with at least
910         // one open bracket, so we should never get here.
911         panic!("no open character class found")
912     }
913 
914     /// Push the current set of class items on to the class parser's stack as
915     /// the left hand side of the given operator.
916     ///
917     /// A fresh set union is returned, which should be used to build the right
918     /// hand side of this operator.
919     #[inline(never)]
push_class_op( &self, next_kind: ast::ClassSetBinaryOpKind, next_union: ast::ClassSetUnion, ) -> ast::ClassSetUnion920     fn push_class_op(
921         &self,
922         next_kind: ast::ClassSetBinaryOpKind,
923         next_union: ast::ClassSetUnion,
924     ) -> ast::ClassSetUnion {
925         let item = ast::ClassSet::Item(next_union.into_item());
926         let new_lhs = self.pop_class_op(item);
927         self.parser()
928             .stack_class
929             .borrow_mut()
930             .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
931         ast::ClassSetUnion { span: self.span(), items: vec![] }
932     }
933 
934     /// Pop a character class set from the character class parser stack. If the
935     /// top of the stack is just an item (not an operation), then return the
936     /// given set unchanged. If the top of the stack is an operation, then the
937     /// given set will be used as the rhs of the operation on the top of the
938     /// stack. In that case, the binary operation is returned as a set.
939     #[inline(never)]
pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet940     fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
941         let mut stack = self.parser().stack_class.borrow_mut();
942         let (kind, lhs) = match stack.pop() {
943             Some(ClassState::Op { kind, lhs }) => (kind, lhs),
944             Some(state @ ClassState::Open { .. }) => {
945                 stack.push(state);
946                 return rhs;
947             }
948             None => unreachable!(),
949         };
950         let span = Span::new(lhs.span().start, rhs.span().end);
951         ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
952             span: span,
953             kind: kind,
954             lhs: Box::new(lhs),
955             rhs: Box::new(rhs),
956         })
957     }
958 }
959 
960 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
961     /// Parse the regular expression into an abstract syntax tree.
parse(&self) -> Result<Ast>962     fn parse(&self) -> Result<Ast> {
963         self.parse_with_comments().map(|astc| astc.ast)
964     }
965 
966     /// Parse the regular expression and return an abstract syntax tree with
967     /// all of the comments found in the pattern.
parse_with_comments(&self) -> Result<ast::WithComments>968     fn parse_with_comments(&self) -> Result<ast::WithComments> {
969         assert_eq!(self.offset(), 0, "parser can only be used once");
970         self.parser().reset();
971         let mut concat = ast::Concat { span: self.span(), asts: vec![] };
972         loop {
973             self.bump_space();
974             if self.is_eof() {
975                 break;
976             }
977             match self.char() {
978                 '(' => concat = self.push_group(concat)?,
979                 ')' => concat = self.pop_group(concat)?,
980                 '|' => concat = self.push_alternate(concat)?,
981                 '[' => {
982                     let class = self.parse_set_class()?;
983                     concat.asts.push(Ast::Class(class));
984                 }
985                 '?' => {
986                     concat = self.parse_uncounted_repetition(
987                         concat,
988                         ast::RepetitionKind::ZeroOrOne,
989                     )?;
990                 }
991                 '*' => {
992                     concat = self.parse_uncounted_repetition(
993                         concat,
994                         ast::RepetitionKind::ZeroOrMore,
995                     )?;
996                 }
997                 '+' => {
998                     concat = self.parse_uncounted_repetition(
999                         concat,
1000                         ast::RepetitionKind::OneOrMore,
1001                     )?;
1002                 }
1003                 '{' => {
1004                     concat = self.parse_counted_repetition(concat)?;
1005                 }
1006                 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1007             }
1008         }
1009         let ast = self.pop_group_end(concat)?;
1010         NestLimiter::new(self).check(&ast)?;
1011         Ok(ast::WithComments {
1012             ast: ast,
1013             comments: mem::replace(
1014                 &mut *self.parser().comments.borrow_mut(),
1015                 vec![],
1016             ),
1017         })
1018     }
1019 
1020     /// Parses an uncounted repetition operation. An uncounted repetition
1021     /// operator includes ?, * and +, but does not include the {m,n} syntax.
1022     /// The given `kind` should correspond to the operator observed by the
1023     /// caller.
1024     ///
1025     /// This assumes that the paser is currently positioned at the repetition
1026     /// operator and advances the parser to the first character after the
1027     /// operator. (Note that the operator may include a single additional `?`,
1028     /// which makes the operator ungreedy.)
1029     ///
1030     /// The caller should include the concatenation that is being built. The
1031     /// concatenation returned includes the repetition operator applied to the
1032     /// last expression in the given concatenation.
1033     #[inline(never)]
parse_uncounted_repetition( &self, mut concat: ast::Concat, kind: ast::RepetitionKind, ) -> Result<ast::Concat>1034     fn parse_uncounted_repetition(
1035         &self,
1036         mut concat: ast::Concat,
1037         kind: ast::RepetitionKind,
1038     ) -> Result<ast::Concat> {
1039         assert!(
1040             self.char() == '?' || self.char() == '*' || self.char() == '+'
1041         );
1042         let op_start = self.pos();
1043         let ast = match concat.asts.pop() {
1044             Some(ast) => ast,
1045             None => {
1046                 return Err(
1047                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1048                 )
1049             }
1050         };
1051         match ast {
1052             Ast::Empty(_) | Ast::Flags(_) => {
1053                 return Err(
1054                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1055                 )
1056             }
1057             _ => {}
1058         }
1059         let mut greedy = true;
1060         if self.bump() && self.char() == '?' {
1061             greedy = false;
1062             self.bump();
1063         }
1064         concat.asts.push(Ast::Repetition(ast::Repetition {
1065             span: ast.span().with_end(self.pos()),
1066             op: ast::RepetitionOp {
1067                 span: Span::new(op_start, self.pos()),
1068                 kind: kind,
1069             },
1070             greedy: greedy,
1071             ast: Box::new(ast),
1072         }));
1073         Ok(concat)
1074     }
1075 
1076     /// Parses a counted repetition operation. A counted repetition operator
1077     /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1078     /// operators.
1079     ///
1080     /// This assumes that the paser is currently positioned at the opening `{`
1081     /// and advances the parser to the first character after the operator.
1082     /// (Note that the operator may include a single additional `?`, which
1083     /// makes the operator ungreedy.)
1084     ///
1085     /// The caller should include the concatenation that is being built. The
1086     /// concatenation returned includes the repetition operator applied to the
1087     /// last expression in the given concatenation.
1088     #[inline(never)]
parse_counted_repetition( &self, mut concat: ast::Concat, ) -> Result<ast::Concat>1089     fn parse_counted_repetition(
1090         &self,
1091         mut concat: ast::Concat,
1092     ) -> Result<ast::Concat> {
1093         assert!(self.char() == '{');
1094         let start = self.pos();
1095         let ast = match concat.asts.pop() {
1096             Some(ast) => ast,
1097             None => {
1098                 return Err(
1099                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1100                 )
1101             }
1102         };
1103         match ast {
1104             Ast::Empty(_) | Ast::Flags(_) => {
1105                 return Err(
1106                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1107                 )
1108             }
1109             _ => {}
1110         }
1111         if !self.bump_and_bump_space() {
1112             return Err(self.error(
1113                 Span::new(start, self.pos()),
1114                 ast::ErrorKind::RepetitionCountUnclosed,
1115             ));
1116         }
1117         let count_start = specialize_err(
1118             self.parse_decimal(),
1119             ast::ErrorKind::DecimalEmpty,
1120             ast::ErrorKind::RepetitionCountDecimalEmpty,
1121         )?;
1122         let mut range = ast::RepetitionRange::Exactly(count_start);
1123         if self.is_eof() {
1124             return Err(self.error(
1125                 Span::new(start, self.pos()),
1126                 ast::ErrorKind::RepetitionCountUnclosed,
1127             ));
1128         }
1129         if self.char() == ',' {
1130             if !self.bump_and_bump_space() {
1131                 return Err(self.error(
1132                     Span::new(start, self.pos()),
1133                     ast::ErrorKind::RepetitionCountUnclosed,
1134                 ));
1135             }
1136             if self.char() != '}' {
1137                 let count_end = specialize_err(
1138                     self.parse_decimal(),
1139                     ast::ErrorKind::DecimalEmpty,
1140                     ast::ErrorKind::RepetitionCountDecimalEmpty,
1141                 )?;
1142                 range = ast::RepetitionRange::Bounded(count_start, count_end);
1143             } else {
1144                 range = ast::RepetitionRange::AtLeast(count_start);
1145             }
1146         }
1147         if self.is_eof() || self.char() != '}' {
1148             return Err(self.error(
1149                 Span::new(start, self.pos()),
1150                 ast::ErrorKind::RepetitionCountUnclosed,
1151             ));
1152         }
1153 
1154         let mut greedy = true;
1155         if self.bump_and_bump_space() && self.char() == '?' {
1156             greedy = false;
1157             self.bump();
1158         }
1159 
1160         let op_span = Span::new(start, self.pos());
1161         if !range.is_valid() {
1162             return Err(
1163                 self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1164             );
1165         }
1166         concat.asts.push(Ast::Repetition(ast::Repetition {
1167             span: ast.span().with_end(self.pos()),
1168             op: ast::RepetitionOp {
1169                 span: op_span,
1170                 kind: ast::RepetitionKind::Range(range),
1171             },
1172             greedy: greedy,
1173             ast: Box::new(ast),
1174         }));
1175         Ok(concat)
1176     }
1177 
1178     /// Parse a group (which contains a sub-expression) or a set of flags.
1179     ///
1180     /// If a group was found, then it is returned with an empty AST. If a set
1181     /// of flags is found, then that set is returned.
1182     ///
1183     /// The parser should be positioned at the opening parenthesis.
1184     ///
1185     /// This advances the parser to the character before the start of the
1186     /// sub-expression (in the case of a group) or to the closing parenthesis
1187     /// immediately following the set of flags.
1188     ///
1189     /// # Errors
1190     ///
1191     /// If flags are given and incorrectly specified, then a corresponding
1192     /// error is returned.
1193     ///
1194     /// If a capture name is given and it is incorrectly specified, then a
1195     /// corresponding error is returned.
1196     #[inline(never)]
parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>>1197     fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1198         assert_eq!(self.char(), '(');
1199         let open_span = self.span_char();
1200         self.bump();
1201         self.bump_space();
1202         if self.is_lookaround_prefix() {
1203             return Err(self.error(
1204                 Span::new(open_span.start, self.span().end),
1205                 ast::ErrorKind::UnsupportedLookAround,
1206             ));
1207         }
1208         let inner_span = self.span();
1209         if self.bump_if("?P<") {
1210             let capture_index = self.next_capture_index(open_span)?;
1211             let cap = self.parse_capture_name(capture_index)?;
1212             Ok(Either::Right(ast::Group {
1213                 span: open_span,
1214                 kind: ast::GroupKind::CaptureName(cap),
1215                 ast: Box::new(Ast::Empty(self.span())),
1216             }))
1217         } else if self.bump_if("?") {
1218             if self.is_eof() {
1219                 return Err(
1220                     self.error(open_span, ast::ErrorKind::GroupUnclosed)
1221                 );
1222             }
1223             let flags = self.parse_flags()?;
1224             let char_end = self.char();
1225             self.bump();
1226             if char_end == ')' {
1227                 // We don't allow empty flags, e.g., `(?)`. We instead
1228                 // interpret it as a repetition operator missing its argument.
1229                 if flags.items.is_empty() {
1230                     return Err(self.error(
1231                         inner_span,
1232                         ast::ErrorKind::RepetitionMissing,
1233                     ));
1234                 }
1235                 Ok(Either::Left(ast::SetFlags {
1236                     span: Span { end: self.pos(), ..open_span },
1237                     flags: flags,
1238                 }))
1239             } else {
1240                 assert_eq!(char_end, ':');
1241                 Ok(Either::Right(ast::Group {
1242                     span: open_span,
1243                     kind: ast::GroupKind::NonCapturing(flags),
1244                     ast: Box::new(Ast::Empty(self.span())),
1245                 }))
1246             }
1247         } else {
1248             let capture_index = self.next_capture_index(open_span)?;
1249             Ok(Either::Right(ast::Group {
1250                 span: open_span,
1251                 kind: ast::GroupKind::CaptureIndex(capture_index),
1252                 ast: Box::new(Ast::Empty(self.span())),
1253             }))
1254         }
1255     }
1256 
1257     /// Parses a capture group name. Assumes that the parser is positioned at
1258     /// the first character in the name following the opening `<` (and may
1259     /// possibly be EOF). This advances the parser to the first character
1260     /// following the closing `>`.
1261     ///
1262     /// The caller must provide the capture index of the group for this name.
1263     #[inline(never)]
parse_capture_name( &self, capture_index: u32, ) -> Result<ast::CaptureName>1264     fn parse_capture_name(
1265         &self,
1266         capture_index: u32,
1267     ) -> Result<ast::CaptureName> {
1268         if self.is_eof() {
1269             return Err(self
1270                 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1271         }
1272         let start = self.pos();
1273         loop {
1274             if self.char() == '>' {
1275                 break;
1276             }
1277             if !is_capture_char(self.char(), self.pos() == start) {
1278                 return Err(self.error(
1279                     self.span_char(),
1280                     ast::ErrorKind::GroupNameInvalid,
1281                 ));
1282             }
1283             if !self.bump() {
1284                 break;
1285             }
1286         }
1287         let end = self.pos();
1288         if self.is_eof() {
1289             return Err(self
1290                 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1291         }
1292         assert_eq!(self.char(), '>');
1293         self.bump();
1294         let name = &self.pattern()[start.offset..end.offset];
1295         if name.is_empty() {
1296             return Err(self.error(
1297                 Span::new(start, start),
1298                 ast::ErrorKind::GroupNameEmpty,
1299             ));
1300         }
1301         let capname = ast::CaptureName {
1302             span: Span::new(start, end),
1303             name: name.to_string(),
1304             index: capture_index,
1305         };
1306         self.add_capture_name(&capname)?;
1307         Ok(capname)
1308     }
1309 
1310     /// Parse a sequence of flags starting at the current character.
1311     ///
1312     /// This advances the parser to the character immediately following the
1313     /// flags, which is guaranteed to be either `:` or `)`.
1314     ///
1315     /// # Errors
1316     ///
1317     /// If any flags are duplicated, then an error is returned.
1318     ///
1319     /// If the negation operator is used more than once, then an error is
1320     /// returned.
1321     ///
1322     /// If no flags could be found or if the negation operation is not followed
1323     /// by any flags, then an error is returned.
1324     #[inline(never)]
parse_flags(&self) -> Result<ast::Flags>1325     fn parse_flags(&self) -> Result<ast::Flags> {
1326         let mut flags = ast::Flags { span: self.span(), items: vec![] };
1327         let mut last_was_negation = None;
1328         while self.char() != ':' && self.char() != ')' {
1329             if self.char() == '-' {
1330                 last_was_negation = Some(self.span_char());
1331                 let item = ast::FlagsItem {
1332                     span: self.span_char(),
1333                     kind: ast::FlagsItemKind::Negation,
1334                 };
1335                 if let Some(i) = flags.add_item(item) {
1336                     return Err(self.error(
1337                         self.span_char(),
1338                         ast::ErrorKind::FlagRepeatedNegation {
1339                             original: flags.items[i].span,
1340                         },
1341                     ));
1342                 }
1343             } else {
1344                 last_was_negation = None;
1345                 let item = ast::FlagsItem {
1346                     span: self.span_char(),
1347                     kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1348                 };
1349                 if let Some(i) = flags.add_item(item) {
1350                     return Err(self.error(
1351                         self.span_char(),
1352                         ast::ErrorKind::FlagDuplicate {
1353                             original: flags.items[i].span,
1354                         },
1355                     ));
1356                 }
1357             }
1358             if !self.bump() {
1359                 return Err(
1360                     self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1361                 );
1362             }
1363         }
1364         if let Some(span) = last_was_negation {
1365             return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1366         }
1367         flags.span.end = self.pos();
1368         Ok(flags)
1369     }
1370 
1371     /// Parse the current character as a flag. Do not advance the parser.
1372     ///
1373     /// # Errors
1374     ///
1375     /// If the flag is not recognized, then an error is returned.
1376     #[inline(never)]
parse_flag(&self) -> Result<ast::Flag>1377     fn parse_flag(&self) -> Result<ast::Flag> {
1378         match self.char() {
1379             'i' => Ok(ast::Flag::CaseInsensitive),
1380             'm' => Ok(ast::Flag::MultiLine),
1381             's' => Ok(ast::Flag::DotMatchesNewLine),
1382             'U' => Ok(ast::Flag::SwapGreed),
1383             'u' => Ok(ast::Flag::Unicode),
1384             'x' => Ok(ast::Flag::IgnoreWhitespace),
1385             _ => {
1386                 Err(self
1387                     .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1388             }
1389         }
1390     }
1391 
1392     /// Parse a primitive AST. e.g., A literal, non-set character class or
1393     /// assertion.
1394     ///
1395     /// This assumes that the parser expects a primitive at the current
1396     /// location. i.e., All other non-primitive cases have been handled.
1397     /// For example, if the parser's position is at `|`, then `|` will be
1398     /// treated as a literal (e.g., inside a character class).
1399     ///
1400     /// This advances the parser to the first character immediately following
1401     /// the primitive.
parse_primitive(&self) -> Result<Primitive>1402     fn parse_primitive(&self) -> Result<Primitive> {
1403         match self.char() {
1404             '\\' => self.parse_escape(),
1405             '.' => {
1406                 let ast = Primitive::Dot(self.span_char());
1407                 self.bump();
1408                 Ok(ast)
1409             }
1410             '^' => {
1411                 let ast = Primitive::Assertion(ast::Assertion {
1412                     span: self.span_char(),
1413                     kind: ast::AssertionKind::StartLine,
1414                 });
1415                 self.bump();
1416                 Ok(ast)
1417             }
1418             '$' => {
1419                 let ast = Primitive::Assertion(ast::Assertion {
1420                     span: self.span_char(),
1421                     kind: ast::AssertionKind::EndLine,
1422                 });
1423                 self.bump();
1424                 Ok(ast)
1425             }
1426             c => {
1427                 let ast = Primitive::Literal(ast::Literal {
1428                     span: self.span_char(),
1429                     kind: ast::LiteralKind::Verbatim,
1430                     c: c,
1431                 });
1432                 self.bump();
1433                 Ok(ast)
1434             }
1435         }
1436     }
1437 
1438     /// Parse an escape sequence as a primitive AST.
1439     ///
1440     /// This assumes the parser is positioned at the start of the escape
1441     /// sequence, i.e., `\`. It advances the parser to the first position
1442     /// immediately following the escape sequence.
1443     #[inline(never)]
parse_escape(&self) -> Result<Primitive>1444     fn parse_escape(&self) -> Result<Primitive> {
1445         assert_eq!(self.char(), '\\');
1446         let start = self.pos();
1447         if !self.bump() {
1448             return Err(self.error(
1449                 Span::new(start, self.pos()),
1450                 ast::ErrorKind::EscapeUnexpectedEof,
1451             ));
1452         }
1453         let c = self.char();
1454         // Put some of the more complicated routines into helpers.
1455         match c {
1456             '0'..='7' => {
1457                 if !self.parser().octal {
1458                     return Err(self.error(
1459                         Span::new(start, self.span_char().end),
1460                         ast::ErrorKind::UnsupportedBackreference,
1461                     ));
1462                 }
1463                 let mut lit = self.parse_octal();
1464                 lit.span.start = start;
1465                 return Ok(Primitive::Literal(lit));
1466             }
1467             '8'..='9' if !self.parser().octal => {
1468                 return Err(self.error(
1469                     Span::new(start, self.span_char().end),
1470                     ast::ErrorKind::UnsupportedBackreference,
1471                 ));
1472             }
1473             'x' | 'u' | 'U' => {
1474                 let mut lit = self.parse_hex()?;
1475                 lit.span.start = start;
1476                 return Ok(Primitive::Literal(lit));
1477             }
1478             'p' | 'P' => {
1479                 let mut cls = self.parse_unicode_class()?;
1480                 cls.span.start = start;
1481                 return Ok(Primitive::Unicode(cls));
1482             }
1483             'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1484                 let mut cls = self.parse_perl_class();
1485                 cls.span.start = start;
1486                 return Ok(Primitive::Perl(cls));
1487             }
1488             _ => {}
1489         }
1490 
1491         // Handle all of the one letter sequences inline.
1492         self.bump();
1493         let span = Span::new(start, self.pos());
1494         if is_meta_character(c) {
1495             return Ok(Primitive::Literal(ast::Literal {
1496                 span: span,
1497                 kind: ast::LiteralKind::Punctuation,
1498                 c: c,
1499             }));
1500         }
1501         let special = |kind, c| {
1502             Ok(Primitive::Literal(ast::Literal {
1503                 span: span,
1504                 kind: ast::LiteralKind::Special(kind),
1505                 c: c,
1506             }))
1507         };
1508         match c {
1509             'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1510             'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1511             't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1512             'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1513             'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1514             'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1515             ' ' if self.ignore_whitespace() => {
1516                 special(ast::SpecialLiteralKind::Space, ' ')
1517             }
1518             'A' => Ok(Primitive::Assertion(ast::Assertion {
1519                 span: span,
1520                 kind: ast::AssertionKind::StartText,
1521             })),
1522             'z' => Ok(Primitive::Assertion(ast::Assertion {
1523                 span: span,
1524                 kind: ast::AssertionKind::EndText,
1525             })),
1526             'b' => Ok(Primitive::Assertion(ast::Assertion {
1527                 span: span,
1528                 kind: ast::AssertionKind::WordBoundary,
1529             })),
1530             'B' => Ok(Primitive::Assertion(ast::Assertion {
1531                 span: span,
1532                 kind: ast::AssertionKind::NotWordBoundary,
1533             })),
1534             _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1535         }
1536     }
1537 
1538     /// Parse an octal representation of a Unicode codepoint up to 3 digits
1539     /// long. This expects the parser to be positioned at the first octal
1540     /// digit and advances the parser to the first character immediately
1541     /// following the octal number. This also assumes that parsing octal
1542     /// escapes is enabled.
1543     ///
1544     /// Assuming the preconditions are met, this routine can never fail.
1545     #[inline(never)]
parse_octal(&self) -> ast::Literal1546     fn parse_octal(&self) -> ast::Literal {
1547         use std::char;
1548         use std::u32;
1549 
1550         assert!(self.parser().octal);
1551         assert!('0' <= self.char() && self.char() <= '7');
1552         let start = self.pos();
1553         // Parse up to two more digits.
1554         while self.bump()
1555             && '0' <= self.char()
1556             && self.char() <= '7'
1557             && self.pos().offset - start.offset <= 2
1558         {}
1559         let end = self.pos();
1560         let octal = &self.pattern()[start.offset..end.offset];
1561         // Parsing the octal should never fail since the above guarantees a
1562         // valid number.
1563         let codepoint =
1564             u32::from_str_radix(octal, 8).expect("valid octal number");
1565         // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1566         // invalid Unicode scalar values.
1567         let c = char::from_u32(codepoint).expect("Unicode scalar value");
1568         ast::Literal {
1569             span: Span::new(start, end),
1570             kind: ast::LiteralKind::Octal,
1571             c: c,
1572         }
1573     }
1574 
1575     /// Parse a hex representation of a Unicode codepoint. This handles both
1576     /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1577     /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1578     /// the first character immediately following the hexadecimal literal.
1579     #[inline(never)]
parse_hex(&self) -> Result<ast::Literal>1580     fn parse_hex(&self) -> Result<ast::Literal> {
1581         assert!(
1582             self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1583         );
1584 
1585         let hex_kind = match self.char() {
1586             'x' => ast::HexLiteralKind::X,
1587             'u' => ast::HexLiteralKind::UnicodeShort,
1588             _ => ast::HexLiteralKind::UnicodeLong,
1589         };
1590         if !self.bump_and_bump_space() {
1591             return Err(
1592                 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1593             );
1594         }
1595         if self.char() == '{' {
1596             self.parse_hex_brace(hex_kind)
1597         } else {
1598             self.parse_hex_digits(hex_kind)
1599         }
1600     }
1601 
1602     /// Parse an N-digit hex representation of a Unicode codepoint. This
1603     /// expects the parser to be positioned at the first digit and will advance
1604     /// the parser to the first character immediately following the escape
1605     /// sequence.
1606     ///
1607     /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1608     /// or 8 (for `\UNNNNNNNN`).
1609     #[inline(never)]
parse_hex_digits( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1610     fn parse_hex_digits(
1611         &self,
1612         kind: ast::HexLiteralKind,
1613     ) -> Result<ast::Literal> {
1614         use std::char;
1615         use std::u32;
1616 
1617         let mut scratch = self.parser().scratch.borrow_mut();
1618         scratch.clear();
1619 
1620         let start = self.pos();
1621         for i in 0..kind.digits() {
1622             if i > 0 && !self.bump_and_bump_space() {
1623                 return Err(self
1624                     .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1625             }
1626             if !is_hex(self.char()) {
1627                 return Err(self.error(
1628                     self.span_char(),
1629                     ast::ErrorKind::EscapeHexInvalidDigit,
1630                 ));
1631             }
1632             scratch.push(self.char());
1633         }
1634         // The final bump just moves the parser past the literal, which may
1635         // be EOF.
1636         self.bump_and_bump_space();
1637         let end = self.pos();
1638         let hex = scratch.as_str();
1639         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1640             None => Err(self.error(
1641                 Span::new(start, end),
1642                 ast::ErrorKind::EscapeHexInvalid,
1643             )),
1644             Some(c) => Ok(ast::Literal {
1645                 span: Span::new(start, end),
1646                 kind: ast::LiteralKind::HexFixed(kind),
1647                 c: c,
1648             }),
1649         }
1650     }
1651 
1652     /// Parse a hex representation of any Unicode scalar value. This expects
1653     /// the parser to be positioned at the opening brace `{` and will advance
1654     /// the parser to the first character following the closing brace `}`.
1655     #[inline(never)]
parse_hex_brace( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1656     fn parse_hex_brace(
1657         &self,
1658         kind: ast::HexLiteralKind,
1659     ) -> Result<ast::Literal> {
1660         use std::char;
1661         use std::u32;
1662 
1663         let mut scratch = self.parser().scratch.borrow_mut();
1664         scratch.clear();
1665 
1666         let brace_pos = self.pos();
1667         let start = self.span_char().end;
1668         while self.bump_and_bump_space() && self.char() != '}' {
1669             if !is_hex(self.char()) {
1670                 return Err(self.error(
1671                     self.span_char(),
1672                     ast::ErrorKind::EscapeHexInvalidDigit,
1673                 ));
1674             }
1675             scratch.push(self.char());
1676         }
1677         if self.is_eof() {
1678             return Err(self.error(
1679                 Span::new(brace_pos, self.pos()),
1680                 ast::ErrorKind::EscapeUnexpectedEof,
1681             ));
1682         }
1683         let end = self.pos();
1684         let hex = scratch.as_str();
1685         assert_eq!(self.char(), '}');
1686         self.bump_and_bump_space();
1687 
1688         if hex.is_empty() {
1689             return Err(self.error(
1690                 Span::new(brace_pos, self.pos()),
1691                 ast::ErrorKind::EscapeHexEmpty,
1692             ));
1693         }
1694         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1695             None => Err(self.error(
1696                 Span::new(start, end),
1697                 ast::ErrorKind::EscapeHexInvalid,
1698             )),
1699             Some(c) => Ok(ast::Literal {
1700                 span: Span::new(start, self.pos()),
1701                 kind: ast::LiteralKind::HexBrace(kind),
1702                 c: c,
1703             }),
1704         }
1705     }
1706 
1707     /// Parse a decimal number into a u32 while trimming leading and trailing
1708     /// whitespace.
1709     ///
1710     /// This expects the parser to be positioned at the first position where
1711     /// a decimal digit could occur. This will advance the parser to the byte
1712     /// immediately following the last contiguous decimal digit.
1713     ///
1714     /// If no decimal digit could be found or if there was a problem parsing
1715     /// the complete set of digits into a u32, then an error is returned.
parse_decimal(&self) -> Result<u32>1716     fn parse_decimal(&self) -> Result<u32> {
1717         let mut scratch = self.parser().scratch.borrow_mut();
1718         scratch.clear();
1719 
1720         while !self.is_eof() && self.char().is_whitespace() {
1721             self.bump();
1722         }
1723         let start = self.pos();
1724         while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1725             scratch.push(self.char());
1726             self.bump_and_bump_space();
1727         }
1728         let span = Span::new(start, self.pos());
1729         while !self.is_eof() && self.char().is_whitespace() {
1730             self.bump_and_bump_space();
1731         }
1732         let digits = scratch.as_str();
1733         if digits.is_empty() {
1734             return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1735         }
1736         match u32::from_str_radix(digits, 10).ok() {
1737             Some(n) => Ok(n),
1738             None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1739         }
1740     }
1741 
1742     /// Parse a standard character class consisting primarily of characters or
1743     /// character ranges, but can also contain nested character classes of
1744     /// any type (sans `.`).
1745     ///
1746     /// This assumes the parser is positioned at the opening `[`. If parsing
1747     /// is successful, then the parser is advanced to the position immediately
1748     /// following the closing `]`.
1749     #[inline(never)]
parse_set_class(&self) -> Result<ast::Class>1750     fn parse_set_class(&self) -> Result<ast::Class> {
1751         assert_eq!(self.char(), '[');
1752 
1753         let mut union =
1754             ast::ClassSetUnion { span: self.span(), items: vec![] };
1755         loop {
1756             self.bump_space();
1757             if self.is_eof() {
1758                 return Err(self.unclosed_class_error());
1759             }
1760             match self.char() {
1761                 '[' => {
1762                     // If we've already parsed the opening bracket, then
1763                     // attempt to treat this as the beginning of an ASCII
1764                     // class. If ASCII class parsing fails, then the parser
1765                     // backs up to `[`.
1766                     if !self.parser().stack_class.borrow().is_empty() {
1767                         if let Some(cls) = self.maybe_parse_ascii_class() {
1768                             union.push(ast::ClassSetItem::Ascii(cls));
1769                             continue;
1770                         }
1771                     }
1772                     union = self.push_class_open(union)?;
1773                 }
1774                 ']' => match self.pop_class(union)? {
1775                     Either::Left(nested_union) => {
1776                         union = nested_union;
1777                     }
1778                     Either::Right(class) => return Ok(class),
1779                 },
1780                 '&' if self.peek() == Some('&') => {
1781                     assert!(self.bump_if("&&"));
1782                     union = self.push_class_op(
1783                         ast::ClassSetBinaryOpKind::Intersection,
1784                         union,
1785                     );
1786                 }
1787                 '-' if self.peek() == Some('-') => {
1788                     assert!(self.bump_if("--"));
1789                     union = self.push_class_op(
1790                         ast::ClassSetBinaryOpKind::Difference,
1791                         union,
1792                     );
1793                 }
1794                 '~' if self.peek() == Some('~') => {
1795                     assert!(self.bump_if("~~"));
1796                     union = self.push_class_op(
1797                         ast::ClassSetBinaryOpKind::SymmetricDifference,
1798                         union,
1799                     );
1800                 }
1801                 _ => {
1802                     union.push(self.parse_set_class_range()?);
1803                 }
1804             }
1805         }
1806     }
1807 
1808     /// Parse a single primitive item in a character class set. The item to
1809     /// be parsed can either be one of a simple literal character, a range
1810     /// between two simple literal characters or a "primitive" character
1811     /// class like \w or \p{Greek}.
1812     ///
1813     /// If an invalid escape is found, or if a character class is found where
1814     /// a simple literal is expected (e.g., in a range), then an error is
1815     /// returned.
1816     #[inline(never)]
parse_set_class_range(&self) -> Result<ast::ClassSetItem>1817     fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1818         let prim1 = self.parse_set_class_item()?;
1819         self.bump_space();
1820         if self.is_eof() {
1821             return Err(self.unclosed_class_error());
1822         }
1823         // If the next char isn't a `-`, then we don't have a range.
1824         // There are two exceptions. If the char after a `-` is a `]`, then
1825         // `-` is interpreted as a literal `-`. Alternatively, if the char
1826         // after a `-` is a `-`, then `--` corresponds to a "difference"
1827         // operation.
1828         if self.char() != '-'
1829             || self.peek_space() == Some(']')
1830             || self.peek_space() == Some('-')
1831         {
1832             return prim1.into_class_set_item(self);
1833         }
1834         // OK, now we're parsing a range, so bump past the `-` and parse the
1835         // second half of the range.
1836         if !self.bump_and_bump_space() {
1837             return Err(self.unclosed_class_error());
1838         }
1839         let prim2 = self.parse_set_class_item()?;
1840         let range = ast::ClassSetRange {
1841             span: Span::new(prim1.span().start, prim2.span().end),
1842             start: prim1.into_class_literal(self)?,
1843             end: prim2.into_class_literal(self)?,
1844         };
1845         if !range.is_valid() {
1846             return Err(
1847                 self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1848             );
1849         }
1850         Ok(ast::ClassSetItem::Range(range))
1851     }
1852 
1853     /// Parse a single item in a character class as a primitive, where the
1854     /// primitive either consists of a verbatim literal or a single escape
1855     /// sequence.
1856     ///
1857     /// This assumes the parser is positioned at the beginning of a primitive,
1858     /// and advances the parser to the first position after the primitive if
1859     /// successful.
1860     ///
1861     /// Note that it is the caller's responsibility to report an error if an
1862     /// illegal primitive was parsed.
1863     #[inline(never)]
parse_set_class_item(&self) -> Result<Primitive>1864     fn parse_set_class_item(&self) -> Result<Primitive> {
1865         if self.char() == '\\' {
1866             self.parse_escape()
1867         } else {
1868             let x = Primitive::Literal(ast::Literal {
1869                 span: self.span_char(),
1870                 kind: ast::LiteralKind::Verbatim,
1871                 c: self.char(),
1872             });
1873             self.bump();
1874             Ok(x)
1875         }
1876     }
1877 
1878     /// Parses the opening of a character class set. This includes the opening
1879     /// bracket along with `^` if present to indicate negation. This also
1880     /// starts parsing the opening set of unioned items if applicable, since
1881     /// there are special rules applied to certain characters in the opening
1882     /// of a character class. For example, `[^]]` is the class of all
1883     /// characters not equal to `]`. (`]` would need to be escaped in any other
1884     /// position.) Similarly for `-`.
1885     ///
1886     /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1887     /// empty union. This empty union should be replaced with the actual item
1888     /// when it is popped from the parser's stack.
1889     ///
1890     /// This assumes the parser is positioned at the opening `[` and advances
1891     /// the parser to the first non-special byte of the character class.
1892     ///
1893     /// An error is returned if EOF is found.
1894     #[inline(never)]
parse_set_class_open( &self, ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)>1895     fn parse_set_class_open(
1896         &self,
1897     ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1898         assert_eq!(self.char(), '[');
1899         let start = self.pos();
1900         if !self.bump_and_bump_space() {
1901             return Err(self.error(
1902                 Span::new(start, self.pos()),
1903                 ast::ErrorKind::ClassUnclosed,
1904             ));
1905         }
1906 
1907         let negated = if self.char() != '^' {
1908             false
1909         } else {
1910             if !self.bump_and_bump_space() {
1911                 return Err(self.error(
1912                     Span::new(start, self.pos()),
1913                     ast::ErrorKind::ClassUnclosed,
1914                 ));
1915             }
1916             true
1917         };
1918         // Accept any number of `-` as literal `-`.
1919         let mut union =
1920             ast::ClassSetUnion { span: self.span(), items: vec![] };
1921         while self.char() == '-' {
1922             union.push(ast::ClassSetItem::Literal(ast::Literal {
1923                 span: self.span_char(),
1924                 kind: ast::LiteralKind::Verbatim,
1925                 c: '-',
1926             }));
1927             if !self.bump_and_bump_space() {
1928                 return Err(self.error(
1929                     Span::new(start, self.pos()),
1930                     ast::ErrorKind::ClassUnclosed,
1931                 ));
1932             }
1933         }
1934         // If `]` is the *first* char in a set, then interpret it as a literal
1935         // `]`. That is, an empty class is impossible to write.
1936         if union.items.is_empty() && self.char() == ']' {
1937             union.push(ast::ClassSetItem::Literal(ast::Literal {
1938                 span: self.span_char(),
1939                 kind: ast::LiteralKind::Verbatim,
1940                 c: ']',
1941             }));
1942             if !self.bump_and_bump_space() {
1943                 return Err(self.error(
1944                     Span::new(start, self.pos()),
1945                     ast::ErrorKind::ClassUnclosed,
1946                 ));
1947             }
1948         }
1949         let set = ast::ClassBracketed {
1950             span: Span::new(start, self.pos()),
1951             negated: negated,
1952             kind: ast::ClassSet::union(ast::ClassSetUnion {
1953                 span: Span::new(union.span.start, union.span.start),
1954                 items: vec![],
1955             }),
1956         };
1957         Ok((set, union))
1958     }
1959 
1960     /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1961     ///
1962     /// This assumes the parser is positioned at the opening `[`.
1963     ///
1964     /// If no valid ASCII character class could be found, then this does not
1965     /// advance the parser and `None` is returned. Otherwise, the parser is
1966     /// advanced to the first byte following the closing `]` and the
1967     /// corresponding ASCII class is returned.
1968     #[inline(never)]
maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii>1969     fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1970         // ASCII character classes are interesting from a parsing perspective
1971         // because parsing cannot fail with any interesting error. For example,
1972         // in order to use an ASCII character class, it must be enclosed in
1973         // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1974         // of it as "ASCII character characters have the syntax `[:NAME:]`
1975         // which can only appear within character brackets." This means that
1976         // things like `[[:lower:]A]` are legal constructs.
1977         //
1978         // However, if one types an incorrect ASCII character class, e.g.,
1979         // `[[:loower:]]`, then we treat that as a normal nested character
1980         // class containing the characters `:elorw`. One might argue that we
1981         // should return an error instead since the repeated colons give away
1982         // the intent to write an ASCII class. But what if the user typed
1983         // `[[:lower]]` instead? How can we tell that was intended to be an
1984         // ASCII class and not just a normal nested class?
1985         //
1986         // Reasonable people can probably disagree over this, but for better
1987         // or worse, we implement semantics that never fails at the expense
1988         // of better failure modes.
1989         assert_eq!(self.char(), '[');
1990         // If parsing fails, then we back up the parser to this starting point.
1991         let start = self.pos();
1992         let mut negated = false;
1993         if !self.bump() || self.char() != ':' {
1994             self.parser().pos.set(start);
1995             return None;
1996         }
1997         if !self.bump() {
1998             self.parser().pos.set(start);
1999             return None;
2000         }
2001         if self.char() == '^' {
2002             negated = true;
2003             if !self.bump() {
2004                 self.parser().pos.set(start);
2005                 return None;
2006             }
2007         }
2008         let name_start = self.offset();
2009         while self.char() != ':' && self.bump() {}
2010         if self.is_eof() {
2011             self.parser().pos.set(start);
2012             return None;
2013         }
2014         let name = &self.pattern()[name_start..self.offset()];
2015         if !self.bump_if(":]") {
2016             self.parser().pos.set(start);
2017             return None;
2018         }
2019         let kind = match ast::ClassAsciiKind::from_name(name) {
2020             Some(kind) => kind,
2021             None => {
2022                 self.parser().pos.set(start);
2023                 return None;
2024             }
2025         };
2026         Some(ast::ClassAscii {
2027             span: Span::new(start, self.pos()),
2028             kind: kind,
2029             negated: negated,
2030         })
2031     }
2032 
2033     /// Parse a Unicode class in either the single character notation, `\pN`
2034     /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2035     /// the parser is positioned at the `p` (or `P` for negation) and will
2036     /// advance the parser to the character immediately following the class.
2037     ///
2038     /// Note that this does not check whether the class name is valid or not.
2039     #[inline(never)]
parse_unicode_class(&self) -> Result<ast::ClassUnicode>2040     fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2041         assert!(self.char() == 'p' || self.char() == 'P');
2042 
2043         let mut scratch = self.parser().scratch.borrow_mut();
2044         scratch.clear();
2045 
2046         let negated = self.char() == 'P';
2047         if !self.bump_and_bump_space() {
2048             return Err(
2049                 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2050             );
2051         }
2052         let (start, kind) = if self.char() == '{' {
2053             let start = self.span_char().end;
2054             while self.bump_and_bump_space() && self.char() != '}' {
2055                 scratch.push(self.char());
2056             }
2057             if self.is_eof() {
2058                 return Err(self
2059                     .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2060             }
2061             assert_eq!(self.char(), '}');
2062             self.bump();
2063 
2064             let name = scratch.as_str();
2065             if let Some(i) = name.find("!=") {
2066                 (
2067                     start,
2068                     ast::ClassUnicodeKind::NamedValue {
2069                         op: ast::ClassUnicodeOpKind::NotEqual,
2070                         name: name[..i].to_string(),
2071                         value: name[i + 2..].to_string(),
2072                     },
2073                 )
2074             } else if let Some(i) = name.find(':') {
2075                 (
2076                     start,
2077                     ast::ClassUnicodeKind::NamedValue {
2078                         op: ast::ClassUnicodeOpKind::Colon,
2079                         name: name[..i].to_string(),
2080                         value: name[i + 1..].to_string(),
2081                     },
2082                 )
2083             } else if let Some(i) = name.find('=') {
2084                 (
2085                     start,
2086                     ast::ClassUnicodeKind::NamedValue {
2087                         op: ast::ClassUnicodeOpKind::Equal,
2088                         name: name[..i].to_string(),
2089                         value: name[i + 1..].to_string(),
2090                     },
2091                 )
2092             } else {
2093                 (start, ast::ClassUnicodeKind::Named(name.to_string()))
2094             }
2095         } else {
2096             let start = self.pos();
2097             let c = self.char();
2098             if c == '\\' {
2099                 return Err(self.error(
2100                     self.span_char(),
2101                     ast::ErrorKind::UnicodeClassInvalid,
2102                 ));
2103             }
2104             self.bump_and_bump_space();
2105             let kind = ast::ClassUnicodeKind::OneLetter(c);
2106             (start, kind)
2107         };
2108         Ok(ast::ClassUnicode {
2109             span: Span::new(start, self.pos()),
2110             negated: negated,
2111             kind: kind,
2112         })
2113     }
2114 
2115     /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2116     /// parser is currently at a valid character class name and will be
2117     /// advanced to the character immediately following the class.
2118     #[inline(never)]
parse_perl_class(&self) -> ast::ClassPerl2119     fn parse_perl_class(&self) -> ast::ClassPerl {
2120         let c = self.char();
2121         let span = self.span_char();
2122         self.bump();
2123         let (negated, kind) = match c {
2124             'd' => (false, ast::ClassPerlKind::Digit),
2125             'D' => (true, ast::ClassPerlKind::Digit),
2126             's' => (false, ast::ClassPerlKind::Space),
2127             'S' => (true, ast::ClassPerlKind::Space),
2128             'w' => (false, ast::ClassPerlKind::Word),
2129             'W' => (true, ast::ClassPerlKind::Word),
2130             c => panic!("expected valid Perl class but got '{}'", c),
2131         };
2132         ast::ClassPerl { span: span, kind: kind, negated: negated }
2133     }
2134 }
2135 
2136 /// A type that traverses a fully parsed Ast and checks whether its depth
2137 /// exceeds the specified nesting limit. If it does, then an error is returned.
2138 #[derive(Debug)]
2139 struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
2140     /// The parser that is checking the nest limit.
2141     p: &'p ParserI<'s, P>,
2142     /// The current depth while walking an Ast.
2143     depth: u32,
2144 }
2145 
2146 impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P>2147     fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2148         NestLimiter { p: p, depth: 0 }
2149     }
2150 
2151     #[inline(never)]
check(self, ast: &Ast) -> Result<()>2152     fn check(self, ast: &Ast) -> Result<()> {
2153         ast::visit(ast, self)
2154     }
2155 
increment_depth(&mut self, span: &Span) -> Result<()>2156     fn increment_depth(&mut self, span: &Span) -> Result<()> {
2157         let new = self.depth.checked_add(1).ok_or_else(|| {
2158             self.p.error(
2159                 span.clone(),
2160                 ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2161             )
2162         })?;
2163         let limit = self.p.parser().nest_limit;
2164         if new > limit {
2165             return Err(self.p.error(
2166                 span.clone(),
2167                 ast::ErrorKind::NestLimitExceeded(limit),
2168             ));
2169         }
2170         self.depth = new;
2171         Ok(())
2172     }
2173 
decrement_depth(&mut self)2174     fn decrement_depth(&mut self) {
2175         // Assuming the correctness of the visitor, this should never drop
2176         // below 0.
2177         self.depth = self.depth.checked_sub(1).unwrap();
2178     }
2179 }
2180 
2181 impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2182     type Output = ();
2183     type Err = ast::Error;
2184 
finish(self) -> Result<()>2185     fn finish(self) -> Result<()> {
2186         Ok(())
2187     }
2188 
visit_pre(&mut self, ast: &Ast) -> Result<()>2189     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2190         let span = match *ast {
2191             Ast::Empty(_)
2192             | Ast::Flags(_)
2193             | Ast::Literal(_)
2194             | Ast::Dot(_)
2195             | Ast::Assertion(_)
2196             | Ast::Class(ast::Class::Unicode(_))
2197             | Ast::Class(ast::Class::Perl(_)) => {
2198                 // These are all base cases, so we don't increment depth.
2199                 return Ok(());
2200             }
2201             Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2202             Ast::Repetition(ref x) => &x.span,
2203             Ast::Group(ref x) => &x.span,
2204             Ast::Alternation(ref x) => &x.span,
2205             Ast::Concat(ref x) => &x.span,
2206         };
2207         self.increment_depth(span)
2208     }
2209 
visit_post(&mut self, ast: &Ast) -> Result<()>2210     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2211         match *ast {
2212             Ast::Empty(_)
2213             | Ast::Flags(_)
2214             | Ast::Literal(_)
2215             | Ast::Dot(_)
2216             | Ast::Assertion(_)
2217             | Ast::Class(ast::Class::Unicode(_))
2218             | Ast::Class(ast::Class::Perl(_)) => {
2219                 // These are all base cases, so we don't decrement depth.
2220                 Ok(())
2221             }
2222             Ast::Class(ast::Class::Bracketed(_))
2223             | Ast::Repetition(_)
2224             | Ast::Group(_)
2225             | Ast::Alternation(_)
2226             | Ast::Concat(_) => {
2227                 self.decrement_depth();
2228                 Ok(())
2229             }
2230         }
2231     }
2232 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2233     fn visit_class_set_item_pre(
2234         &mut self,
2235         ast: &ast::ClassSetItem,
2236     ) -> Result<()> {
2237         let span = match *ast {
2238             ast::ClassSetItem::Empty(_)
2239             | ast::ClassSetItem::Literal(_)
2240             | ast::ClassSetItem::Range(_)
2241             | ast::ClassSetItem::Ascii(_)
2242             | ast::ClassSetItem::Unicode(_)
2243             | ast::ClassSetItem::Perl(_) => {
2244                 // These are all base cases, so we don't increment depth.
2245                 return Ok(());
2246             }
2247             ast::ClassSetItem::Bracketed(ref x) => &x.span,
2248             ast::ClassSetItem::Union(ref x) => &x.span,
2249         };
2250         self.increment_depth(span)
2251     }
2252 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2253     fn visit_class_set_item_post(
2254         &mut self,
2255         ast: &ast::ClassSetItem,
2256     ) -> Result<()> {
2257         match *ast {
2258             ast::ClassSetItem::Empty(_)
2259             | ast::ClassSetItem::Literal(_)
2260             | ast::ClassSetItem::Range(_)
2261             | ast::ClassSetItem::Ascii(_)
2262             | ast::ClassSetItem::Unicode(_)
2263             | ast::ClassSetItem::Perl(_) => {
2264                 // These are all base cases, so we don't decrement depth.
2265                 Ok(())
2266             }
2267             ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2268                 self.decrement_depth();
2269                 Ok(())
2270             }
2271         }
2272     }
2273 
visit_class_set_binary_op_pre( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<()>2274     fn visit_class_set_binary_op_pre(
2275         &mut self,
2276         ast: &ast::ClassSetBinaryOp,
2277     ) -> Result<()> {
2278         self.increment_depth(&ast.span)
2279     }
2280 
visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<()>2281     fn visit_class_set_binary_op_post(
2282         &mut self,
2283         _ast: &ast::ClassSetBinaryOp,
2284     ) -> Result<()> {
2285         self.decrement_depth();
2286         Ok(())
2287     }
2288 }
2289 
2290 /// When the result is an error, transforms the ast::ErrorKind from the source
2291 /// Result into another one. This function is used to return clearer error
2292 /// messages when possible.
specialize_err<T>( result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind, ) -> Result<T>2293 fn specialize_err<T>(
2294     result: Result<T>,
2295     from: ast::ErrorKind,
2296     to: ast::ErrorKind,
2297 ) -> Result<T> {
2298     if let Err(e) = result {
2299         if e.kind == from {
2300             Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2301         } else {
2302             Err(e)
2303         }
2304     } else {
2305         result
2306     }
2307 }
2308 
2309 #[cfg(test)]
2310 mod tests {
2311     use std::ops::Range;
2312 
2313     use super::{Parser, ParserBuilder, ParserI, Primitive};
2314     use ast::{self, Ast, Position, Span};
2315 
2316     // Our own assert_eq, which has slightly better formatting (but honestly
2317     // still kind of crappy).
2318     macro_rules! assert_eq {
2319         ($left:expr, $right:expr) => {{
2320             match (&$left, &$right) {
2321                 (left_val, right_val) => {
2322                     if !(*left_val == *right_val) {
2323                         panic!(
2324                             "assertion failed: `(left == right)`\n\n\
2325                              left:  `{:?}`\nright: `{:?}`\n\n",
2326                             left_val, right_val
2327                         )
2328                     }
2329                 }
2330             }
2331         }};
2332     }
2333 
2334     // We create these errors to compare with real ast::Errors in the tests.
2335     // We define equality between TestError and ast::Error to disregard the
2336     // pattern string in ast::Error, which is annoying to provide in tests.
2337     #[derive(Clone, Debug)]
2338     struct TestError {
2339         span: Span,
2340         kind: ast::ErrorKind,
2341     }
2342 
2343     impl PartialEq<ast::Error> for TestError {
eq(&self, other: &ast::Error) -> bool2344         fn eq(&self, other: &ast::Error) -> bool {
2345             self.span == other.span && self.kind == other.kind
2346         }
2347     }
2348 
2349     impl PartialEq<TestError> for ast::Error {
eq(&self, other: &TestError) -> bool2350         fn eq(&self, other: &TestError) -> bool {
2351             self.span == other.span && self.kind == other.kind
2352         }
2353     }
2354 
s(str: &str) -> String2355     fn s(str: &str) -> String {
2356         str.to_string()
2357     }
2358 
parser(pattern: &str) -> ParserI<Parser>2359     fn parser(pattern: &str) -> ParserI<Parser> {
2360         ParserI::new(Parser::new(), pattern)
2361     }
2362 
parser_octal(pattern: &str) -> ParserI<Parser>2363     fn parser_octal(pattern: &str) -> ParserI<Parser> {
2364         let parser = ParserBuilder::new().octal(true).build();
2365         ParserI::new(parser, pattern)
2366     }
2367 
parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser>2368     fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
2369         let p = ParserBuilder::new().nest_limit(nest_limit).build();
2370         ParserI::new(p, pattern)
2371     }
2372 
parser_ignore_whitespace(pattern: &str) -> ParserI<Parser>2373     fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
2374         let p = ParserBuilder::new().ignore_whitespace(true).build();
2375         ParserI::new(p, pattern)
2376     }
2377 
2378     /// Short alias for creating a new span.
nspan(start: Position, end: Position) -> Span2379     fn nspan(start: Position, end: Position) -> Span {
2380         Span::new(start, end)
2381     }
2382 
2383     /// Short alias for creating a new position.
npos(offset: usize, line: usize, column: usize) -> Position2384     fn npos(offset: usize, line: usize, column: usize) -> Position {
2385         Position::new(offset, line, column)
2386     }
2387 
2388     /// Create a new span from the given offset range. This assumes a single
2389     /// line and sets the columns based on the offsets. i.e., This only works
2390     /// out of the box for ASCII, which is fine for most tests.
span(range: Range<usize>) -> Span2391     fn span(range: Range<usize>) -> Span {
2392         let start = Position::new(range.start, 1, range.start + 1);
2393         let end = Position::new(range.end, 1, range.end + 1);
2394         Span::new(start, end)
2395     }
2396 
2397     /// Create a new span for the corresponding byte range in the given string.
span_range(subject: &str, range: Range<usize>) -> Span2398     fn span_range(subject: &str, range: Range<usize>) -> Span {
2399         let start = Position {
2400             offset: range.start,
2401             line: 1 + subject[..range.start].matches('\n').count(),
2402             column: 1 + subject[..range.start]
2403                 .chars()
2404                 .rev()
2405                 .position(|c| c == '\n')
2406                 .unwrap_or(subject[..range.start].chars().count()),
2407         };
2408         let end = Position {
2409             offset: range.end,
2410             line: 1 + subject[..range.end].matches('\n').count(),
2411             column: 1 + subject[..range.end]
2412                 .chars()
2413                 .rev()
2414                 .position(|c| c == '\n')
2415                 .unwrap_or(subject[..range.end].chars().count()),
2416         };
2417         Span::new(start, end)
2418     }
2419 
2420     /// Create a verbatim literal starting at the given position.
lit(c: char, start: usize) -> Ast2421     fn lit(c: char, start: usize) -> Ast {
2422         lit_with(c, span(start..start + c.len_utf8()))
2423     }
2424 
2425     /// Create a punctuation literal starting at the given position.
punct_lit(c: char, span: Span) -> Ast2426     fn punct_lit(c: char, span: Span) -> Ast {
2427         Ast::Literal(ast::Literal {
2428             span: span,
2429             kind: ast::LiteralKind::Punctuation,
2430             c: c,
2431         })
2432     }
2433 
2434     /// Create a verbatim literal with the given span.
lit_with(c: char, span: Span) -> Ast2435     fn lit_with(c: char, span: Span) -> Ast {
2436         Ast::Literal(ast::Literal {
2437             span: span,
2438             kind: ast::LiteralKind::Verbatim,
2439             c: c,
2440         })
2441     }
2442 
2443     /// Create a concatenation with the given range.
concat(range: Range<usize>, asts: Vec<Ast>) -> Ast2444     fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2445         concat_with(span(range), asts)
2446     }
2447 
2448     /// Create a concatenation with the given span.
concat_with(span: Span, asts: Vec<Ast>) -> Ast2449     fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2450         Ast::Concat(ast::Concat { span: span, asts: asts })
2451     }
2452 
2453     /// Create an alternation with the given span.
alt(range: Range<usize>, asts: Vec<Ast>) -> Ast2454     fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2455         Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
2456     }
2457 
2458     /// Create a capturing group with the given span.
group(range: Range<usize>, index: u32, ast: Ast) -> Ast2459     fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2460         Ast::Group(ast::Group {
2461             span: span(range),
2462             kind: ast::GroupKind::CaptureIndex(index),
2463             ast: Box::new(ast),
2464         })
2465     }
2466 
2467     /// Create an ast::SetFlags.
2468     ///
2469     /// The given pattern should be the full pattern string. The range given
2470     /// should correspond to the byte offsets where the flag set occurs.
2471     ///
2472     /// If negated is true, then the set is interpreted as beginning with a
2473     /// negation.
flag_set( pat: &str, range: Range<usize>, flag: ast::Flag, negated: bool, ) -> Ast2474     fn flag_set(
2475         pat: &str,
2476         range: Range<usize>,
2477         flag: ast::Flag,
2478         negated: bool,
2479     ) -> Ast {
2480         let mut items = vec![ast::FlagsItem {
2481             span: span_range(pat, (range.end - 2)..(range.end - 1)),
2482             kind: ast::FlagsItemKind::Flag(flag),
2483         }];
2484         if negated {
2485             items.insert(
2486                 0,
2487                 ast::FlagsItem {
2488                     span: span_range(pat, (range.start + 2)..(range.end - 2)),
2489                     kind: ast::FlagsItemKind::Negation,
2490                 },
2491             );
2492         }
2493         Ast::Flags(ast::SetFlags {
2494             span: span_range(pat, range.clone()),
2495             flags: ast::Flags {
2496                 span: span_range(pat, (range.start + 2)..(range.end - 1)),
2497                 items: items,
2498             },
2499         })
2500     }
2501 
2502     #[test]
parse_nest_limit()2503     fn parse_nest_limit() {
2504         // A nest limit of 0 still allows some types of regexes.
2505         assert_eq!(
2506             parser_nest_limit("", 0).parse(),
2507             Ok(Ast::Empty(span(0..0)))
2508         );
2509         assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2510 
2511         // Test repetition operations, which require one level of nesting.
2512         assert_eq!(
2513             parser_nest_limit("a+", 0).parse().unwrap_err(),
2514             TestError {
2515                 span: span(0..2),
2516                 kind: ast::ErrorKind::NestLimitExceeded(0),
2517             }
2518         );
2519         assert_eq!(
2520             parser_nest_limit("a+", 1).parse(),
2521             Ok(Ast::Repetition(ast::Repetition {
2522                 span: span(0..2),
2523                 op: ast::RepetitionOp {
2524                     span: span(1..2),
2525                     kind: ast::RepetitionKind::OneOrMore,
2526                 },
2527                 greedy: true,
2528                 ast: Box::new(lit('a', 0)),
2529             }))
2530         );
2531         assert_eq!(
2532             parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2533             TestError {
2534                 span: span(0..3),
2535                 kind: ast::ErrorKind::NestLimitExceeded(1),
2536             }
2537         );
2538         assert_eq!(
2539             parser_nest_limit("a+*", 1).parse().unwrap_err(),
2540             TestError {
2541                 span: span(0..2),
2542                 kind: ast::ErrorKind::NestLimitExceeded(1),
2543             }
2544         );
2545         assert_eq!(
2546             parser_nest_limit("a+*", 2).parse(),
2547             Ok(Ast::Repetition(ast::Repetition {
2548                 span: span(0..3),
2549                 op: ast::RepetitionOp {
2550                     span: span(2..3),
2551                     kind: ast::RepetitionKind::ZeroOrMore,
2552                 },
2553                 greedy: true,
2554                 ast: Box::new(Ast::Repetition(ast::Repetition {
2555                     span: span(0..2),
2556                     op: ast::RepetitionOp {
2557                         span: span(1..2),
2558                         kind: ast::RepetitionKind::OneOrMore,
2559                     },
2560                     greedy: true,
2561                     ast: Box::new(lit('a', 0)),
2562                 })),
2563             }))
2564         );
2565 
2566         // Test concatenations. A concatenation requires one level of nesting.
2567         assert_eq!(
2568             parser_nest_limit("ab", 0).parse().unwrap_err(),
2569             TestError {
2570                 span: span(0..2),
2571                 kind: ast::ErrorKind::NestLimitExceeded(0),
2572             }
2573         );
2574         assert_eq!(
2575             parser_nest_limit("ab", 1).parse(),
2576             Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2577         );
2578         assert_eq!(
2579             parser_nest_limit("abc", 1).parse(),
2580             Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2581         );
2582 
2583         // Test alternations. An alternation requires one level of nesting.
2584         assert_eq!(
2585             parser_nest_limit("a|b", 0).parse().unwrap_err(),
2586             TestError {
2587                 span: span(0..3),
2588                 kind: ast::ErrorKind::NestLimitExceeded(0),
2589             }
2590         );
2591         assert_eq!(
2592             parser_nest_limit("a|b", 1).parse(),
2593             Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2594         );
2595         assert_eq!(
2596             parser_nest_limit("a|b|c", 1).parse(),
2597             Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2598         );
2599 
2600         // Test character classes. Classes form their own mini-recursive
2601         // syntax!
2602         assert_eq!(
2603             parser_nest_limit("[a]", 0).parse().unwrap_err(),
2604             TestError {
2605                 span: span(0..3),
2606                 kind: ast::ErrorKind::NestLimitExceeded(0),
2607             }
2608         );
2609         assert_eq!(
2610             parser_nest_limit("[a]", 1).parse(),
2611             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2612                 span: span(0..3),
2613                 negated: false,
2614                 kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2615                     ast::Literal {
2616                         span: span(1..2),
2617                         kind: ast::LiteralKind::Verbatim,
2618                         c: 'a',
2619                     }
2620                 )),
2621             })))
2622         );
2623         assert_eq!(
2624             parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2625             TestError {
2626                 span: span(1..3),
2627                 kind: ast::ErrorKind::NestLimitExceeded(1),
2628             }
2629         );
2630         assert_eq!(
2631             parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2632             TestError {
2633                 span: span(3..7),
2634                 kind: ast::ErrorKind::NestLimitExceeded(2),
2635             }
2636         );
2637         assert_eq!(
2638             parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2639             TestError {
2640                 span: span(4..6),
2641                 kind: ast::ErrorKind::NestLimitExceeded(3),
2642             }
2643         );
2644         assert_eq!(
2645             parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2646             TestError {
2647                 span: span(1..5),
2648                 kind: ast::ErrorKind::NestLimitExceeded(1),
2649             }
2650         );
2651         assert_eq!(
2652             parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2653             TestError {
2654                 span: span(4..6),
2655                 kind: ast::ErrorKind::NestLimitExceeded(2),
2656             }
2657         );
2658     }
2659 
2660     #[test]
parse_comments()2661     fn parse_comments() {
2662         let pat = "(?x)
2663 # This is comment 1.
2664 foo # This is comment 2.
2665   # This is comment 3.
2666 bar
2667 # This is comment 4.";
2668         let astc = parser(pat).parse_with_comments().unwrap();
2669         assert_eq!(
2670             astc.ast,
2671             concat_with(
2672                 span_range(pat, 0..pat.len()),
2673                 vec![
2674                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2675                     lit_with('f', span_range(pat, 26..27)),
2676                     lit_with('o', span_range(pat, 27..28)),
2677                     lit_with('o', span_range(pat, 28..29)),
2678                     lit_with('b', span_range(pat, 74..75)),
2679                     lit_with('a', span_range(pat, 75..76)),
2680                     lit_with('r', span_range(pat, 76..77)),
2681                 ]
2682             )
2683         );
2684         assert_eq!(
2685             astc.comments,
2686             vec![
2687                 ast::Comment {
2688                     span: span_range(pat, 5..26),
2689                     comment: s(" This is comment 1."),
2690                 },
2691                 ast::Comment {
2692                     span: span_range(pat, 30..51),
2693                     comment: s(" This is comment 2."),
2694                 },
2695                 ast::Comment {
2696                     span: span_range(pat, 53..74),
2697                     comment: s(" This is comment 3."),
2698                 },
2699                 ast::Comment {
2700                     span: span_range(pat, 78..98),
2701                     comment: s(" This is comment 4."),
2702                 },
2703             ]
2704         );
2705     }
2706 
2707     #[test]
parse_holistic()2708     fn parse_holistic() {
2709         assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2710         assert_eq!(
2711             parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2712             Ok(concat(
2713                 0..36,
2714                 vec![
2715                     punct_lit('\\', span(0..2)),
2716                     punct_lit('.', span(2..4)),
2717                     punct_lit('+', span(4..6)),
2718                     punct_lit('*', span(6..8)),
2719                     punct_lit('?', span(8..10)),
2720                     punct_lit('(', span(10..12)),
2721                     punct_lit(')', span(12..14)),
2722                     punct_lit('|', span(14..16)),
2723                     punct_lit('[', span(16..18)),
2724                     punct_lit(']', span(18..20)),
2725                     punct_lit('{', span(20..22)),
2726                     punct_lit('}', span(22..24)),
2727                     punct_lit('^', span(24..26)),
2728                     punct_lit('$', span(26..28)),
2729                     punct_lit('#', span(28..30)),
2730                     punct_lit('&', span(30..32)),
2731                     punct_lit('-', span(32..34)),
2732                     punct_lit('~', span(34..36)),
2733                 ]
2734             ))
2735         );
2736     }
2737 
2738     #[test]
parse_ignore_whitespace()2739     fn parse_ignore_whitespace() {
2740         // Test that basic whitespace insensitivity works.
2741         let pat = "(?x)a b";
2742         assert_eq!(
2743             parser(pat).parse(),
2744             Ok(concat_with(
2745                 nspan(npos(0, 1, 1), npos(7, 1, 8)),
2746                 vec![
2747                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2748                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2749                     lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2750                 ]
2751             ))
2752         );
2753 
2754         // Test that we can toggle whitespace insensitivity.
2755         let pat = "(?x)a b(?-x)a b";
2756         assert_eq!(
2757             parser(pat).parse(),
2758             Ok(concat_with(
2759                 nspan(npos(0, 1, 1), npos(15, 1, 16)),
2760                 vec![
2761                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2762                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2763                     lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2764                     flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2765                     lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2766                     lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2767                     lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2768                 ]
2769             ))
2770         );
2771 
2772         // Test that nesting whitespace insensitive flags works.
2773         let pat = "a (?x:a )a ";
2774         assert_eq!(
2775             parser(pat).parse(),
2776             Ok(concat_with(
2777                 span_range(pat, 0..11),
2778                 vec![
2779                     lit_with('a', span_range(pat, 0..1)),
2780                     lit_with(' ', span_range(pat, 1..2)),
2781                     Ast::Group(ast::Group {
2782                         span: span_range(pat, 2..9),
2783                         kind: ast::GroupKind::NonCapturing(ast::Flags {
2784                             span: span_range(pat, 4..5),
2785                             items: vec![ast::FlagsItem {
2786                                 span: span_range(pat, 4..5),
2787                                 kind: ast::FlagsItemKind::Flag(
2788                                     ast::Flag::IgnoreWhitespace
2789                                 ),
2790                             },],
2791                         }),
2792                         ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2793                     }),
2794                     lit_with('a', span_range(pat, 9..10)),
2795                     lit_with(' ', span_range(pat, 10..11)),
2796                 ]
2797             ))
2798         );
2799 
2800         // Test that whitespace after an opening paren is insignificant.
2801         let pat = "(?x)( ?P<foo> a )";
2802         assert_eq!(
2803             parser(pat).parse(),
2804             Ok(concat_with(
2805                 span_range(pat, 0..pat.len()),
2806                 vec![
2807                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2808                     Ast::Group(ast::Group {
2809                         span: span_range(pat, 4..pat.len()),
2810                         kind: ast::GroupKind::CaptureName(ast::CaptureName {
2811                             span: span_range(pat, 9..12),
2812                             name: s("foo"),
2813                             index: 1,
2814                         }),
2815                         ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2816                     }),
2817                 ]
2818             ))
2819         );
2820         let pat = "(?x)(  a )";
2821         assert_eq!(
2822             parser(pat).parse(),
2823             Ok(concat_with(
2824                 span_range(pat, 0..pat.len()),
2825                 vec![
2826                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2827                     Ast::Group(ast::Group {
2828                         span: span_range(pat, 4..pat.len()),
2829                         kind: ast::GroupKind::CaptureIndex(1),
2830                         ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2831                     }),
2832                 ]
2833             ))
2834         );
2835         let pat = "(?x)(  ?:  a )";
2836         assert_eq!(
2837             parser(pat).parse(),
2838             Ok(concat_with(
2839                 span_range(pat, 0..pat.len()),
2840                 vec![
2841                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2842                     Ast::Group(ast::Group {
2843                         span: span_range(pat, 4..pat.len()),
2844                         kind: ast::GroupKind::NonCapturing(ast::Flags {
2845                             span: span_range(pat, 8..8),
2846                             items: vec![],
2847                         }),
2848                         ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2849                     }),
2850                 ]
2851             ))
2852         );
2853         let pat = r"(?x)\x { 53 }";
2854         assert_eq!(
2855             parser(pat).parse(),
2856             Ok(concat_with(
2857                 span_range(pat, 0..pat.len()),
2858                 vec![
2859                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2860                     Ast::Literal(ast::Literal {
2861                         span: span(4..13),
2862                         kind: ast::LiteralKind::HexBrace(
2863                             ast::HexLiteralKind::X
2864                         ),
2865                         c: 'S',
2866                     }),
2867                 ]
2868             ))
2869         );
2870 
2871         // Test that whitespace after an escape is OK.
2872         let pat = r"(?x)\ ";
2873         assert_eq!(
2874             parser(pat).parse(),
2875             Ok(concat_with(
2876                 span_range(pat, 0..pat.len()),
2877                 vec![
2878                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2879                     Ast::Literal(ast::Literal {
2880                         span: span_range(pat, 4..6),
2881                         kind: ast::LiteralKind::Special(
2882                             ast::SpecialLiteralKind::Space
2883                         ),
2884                         c: ' ',
2885                     }),
2886                 ]
2887             ))
2888         );
2889         // ... but only when `x` mode is enabled.
2890         let pat = r"\ ";
2891         assert_eq!(
2892             parser(pat).parse().unwrap_err(),
2893             TestError {
2894                 span: span_range(pat, 0..2),
2895                 kind: ast::ErrorKind::EscapeUnrecognized,
2896             }
2897         );
2898     }
2899 
2900     #[test]
parse_newlines()2901     fn parse_newlines() {
2902         let pat = ".\n.";
2903         assert_eq!(
2904             parser(pat).parse(),
2905             Ok(concat_with(
2906                 span_range(pat, 0..3),
2907                 vec![
2908                     Ast::Dot(span_range(pat, 0..1)),
2909                     lit_with('\n', span_range(pat, 1..2)),
2910                     Ast::Dot(span_range(pat, 2..3)),
2911                 ]
2912             ))
2913         );
2914 
2915         let pat = "foobar\nbaz\nquux\n";
2916         assert_eq!(
2917             parser(pat).parse(),
2918             Ok(concat_with(
2919                 span_range(pat, 0..pat.len()),
2920                 vec![
2921                     lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2922                     lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2923                     lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2924                     lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2925                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2926                     lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2927                     lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2928                     lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2929                     lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2930                     lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2931                     lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2932                     lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2933                     lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2934                     lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2935                     lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2936                     lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2937                 ]
2938             ))
2939         );
2940     }
2941 
2942     #[test]
parse_uncounted_repetition()2943     fn parse_uncounted_repetition() {
2944         assert_eq!(
2945             parser(r"a*").parse(),
2946             Ok(Ast::Repetition(ast::Repetition {
2947                 span: span(0..2),
2948                 op: ast::RepetitionOp {
2949                     span: span(1..2),
2950                     kind: ast::RepetitionKind::ZeroOrMore,
2951                 },
2952                 greedy: true,
2953                 ast: Box::new(lit('a', 0)),
2954             }))
2955         );
2956         assert_eq!(
2957             parser(r"a+").parse(),
2958             Ok(Ast::Repetition(ast::Repetition {
2959                 span: span(0..2),
2960                 op: ast::RepetitionOp {
2961                     span: span(1..2),
2962                     kind: ast::RepetitionKind::OneOrMore,
2963                 },
2964                 greedy: true,
2965                 ast: Box::new(lit('a', 0)),
2966             }))
2967         );
2968 
2969         assert_eq!(
2970             parser(r"a?").parse(),
2971             Ok(Ast::Repetition(ast::Repetition {
2972                 span: span(0..2),
2973                 op: ast::RepetitionOp {
2974                     span: span(1..2),
2975                     kind: ast::RepetitionKind::ZeroOrOne,
2976                 },
2977                 greedy: true,
2978                 ast: Box::new(lit('a', 0)),
2979             }))
2980         );
2981         assert_eq!(
2982             parser(r"a??").parse(),
2983             Ok(Ast::Repetition(ast::Repetition {
2984                 span: span(0..3),
2985                 op: ast::RepetitionOp {
2986                     span: span(1..3),
2987                     kind: ast::RepetitionKind::ZeroOrOne,
2988                 },
2989                 greedy: false,
2990                 ast: Box::new(lit('a', 0)),
2991             }))
2992         );
2993         assert_eq!(
2994             parser(r"a?").parse(),
2995             Ok(Ast::Repetition(ast::Repetition {
2996                 span: span(0..2),
2997                 op: ast::RepetitionOp {
2998                     span: span(1..2),
2999                     kind: ast::RepetitionKind::ZeroOrOne,
3000                 },
3001                 greedy: true,
3002                 ast: Box::new(lit('a', 0)),
3003             }))
3004         );
3005         assert_eq!(
3006             parser(r"a?b").parse(),
3007             Ok(concat(
3008                 0..3,
3009                 vec![
3010                     Ast::Repetition(ast::Repetition {
3011                         span: span(0..2),
3012                         op: ast::RepetitionOp {
3013                             span: span(1..2),
3014                             kind: ast::RepetitionKind::ZeroOrOne,
3015                         },
3016                         greedy: true,
3017                         ast: Box::new(lit('a', 0)),
3018                     }),
3019                     lit('b', 2),
3020                 ]
3021             ))
3022         );
3023         assert_eq!(
3024             parser(r"a??b").parse(),
3025             Ok(concat(
3026                 0..4,
3027                 vec![
3028                     Ast::Repetition(ast::Repetition {
3029                         span: span(0..3),
3030                         op: ast::RepetitionOp {
3031                             span: span(1..3),
3032                             kind: ast::RepetitionKind::ZeroOrOne,
3033                         },
3034                         greedy: false,
3035                         ast: Box::new(lit('a', 0)),
3036                     }),
3037                     lit('b', 3),
3038                 ]
3039             ))
3040         );
3041         assert_eq!(
3042             parser(r"ab?").parse(),
3043             Ok(concat(
3044                 0..3,
3045                 vec![
3046                     lit('a', 0),
3047                     Ast::Repetition(ast::Repetition {
3048                         span: span(1..3),
3049                         op: ast::RepetitionOp {
3050                             span: span(2..3),
3051                             kind: ast::RepetitionKind::ZeroOrOne,
3052                         },
3053                         greedy: true,
3054                         ast: Box::new(lit('b', 1)),
3055                     }),
3056                 ]
3057             ))
3058         );
3059         assert_eq!(
3060             parser(r"(ab)?").parse(),
3061             Ok(Ast::Repetition(ast::Repetition {
3062                 span: span(0..5),
3063                 op: ast::RepetitionOp {
3064                     span: span(4..5),
3065                     kind: ast::RepetitionKind::ZeroOrOne,
3066                 },
3067                 greedy: true,
3068                 ast: Box::new(group(
3069                     0..4,
3070                     1,
3071                     concat(1..3, vec![lit('a', 1), lit('b', 2),])
3072                 )),
3073             }))
3074         );
3075         assert_eq!(
3076             parser(r"|a?").parse(),
3077             Ok(alt(
3078                 0..3,
3079                 vec![
3080                     Ast::Empty(span(0..0)),
3081                     Ast::Repetition(ast::Repetition {
3082                         span: span(1..3),
3083                         op: ast::RepetitionOp {
3084                             span: span(2..3),
3085                             kind: ast::RepetitionKind::ZeroOrOne,
3086                         },
3087                         greedy: true,
3088                         ast: Box::new(lit('a', 1)),
3089                     }),
3090                 ]
3091             ))
3092         );
3093 
3094         assert_eq!(
3095             parser(r"*").parse().unwrap_err(),
3096             TestError {
3097                 span: span(0..0),
3098                 kind: ast::ErrorKind::RepetitionMissing,
3099             }
3100         );
3101         assert_eq!(
3102             parser(r"(?i)*").parse().unwrap_err(),
3103             TestError {
3104                 span: span(4..4),
3105                 kind: ast::ErrorKind::RepetitionMissing,
3106             }
3107         );
3108         assert_eq!(
3109             parser(r"(*)").parse().unwrap_err(),
3110             TestError {
3111                 span: span(1..1),
3112                 kind: ast::ErrorKind::RepetitionMissing,
3113             }
3114         );
3115         assert_eq!(
3116             parser(r"(?:?)").parse().unwrap_err(),
3117             TestError {
3118                 span: span(3..3),
3119                 kind: ast::ErrorKind::RepetitionMissing,
3120             }
3121         );
3122         assert_eq!(
3123             parser(r"+").parse().unwrap_err(),
3124             TestError {
3125                 span: span(0..0),
3126                 kind: ast::ErrorKind::RepetitionMissing,
3127             }
3128         );
3129         assert_eq!(
3130             parser(r"?").parse().unwrap_err(),
3131             TestError {
3132                 span: span(0..0),
3133                 kind: ast::ErrorKind::RepetitionMissing,
3134             }
3135         );
3136         assert_eq!(
3137             parser(r"(?)").parse().unwrap_err(),
3138             TestError {
3139                 span: span(1..1),
3140                 kind: ast::ErrorKind::RepetitionMissing,
3141             }
3142         );
3143         assert_eq!(
3144             parser(r"|*").parse().unwrap_err(),
3145             TestError {
3146                 span: span(1..1),
3147                 kind: ast::ErrorKind::RepetitionMissing,
3148             }
3149         );
3150         assert_eq!(
3151             parser(r"|+").parse().unwrap_err(),
3152             TestError {
3153                 span: span(1..1),
3154                 kind: ast::ErrorKind::RepetitionMissing,
3155             }
3156         );
3157         assert_eq!(
3158             parser(r"|?").parse().unwrap_err(),
3159             TestError {
3160                 span: span(1..1),
3161                 kind: ast::ErrorKind::RepetitionMissing,
3162             }
3163         );
3164     }
3165 
3166     #[test]
parse_counted_repetition()3167     fn parse_counted_repetition() {
3168         assert_eq!(
3169             parser(r"a{5}").parse(),
3170             Ok(Ast::Repetition(ast::Repetition {
3171                 span: span(0..4),
3172                 op: ast::RepetitionOp {
3173                     span: span(1..4),
3174                     kind: ast::RepetitionKind::Range(
3175                         ast::RepetitionRange::Exactly(5)
3176                     ),
3177                 },
3178                 greedy: true,
3179                 ast: Box::new(lit('a', 0)),
3180             }))
3181         );
3182         assert_eq!(
3183             parser(r"a{5,}").parse(),
3184             Ok(Ast::Repetition(ast::Repetition {
3185                 span: span(0..5),
3186                 op: ast::RepetitionOp {
3187                     span: span(1..5),
3188                     kind: ast::RepetitionKind::Range(
3189                         ast::RepetitionRange::AtLeast(5)
3190                     ),
3191                 },
3192                 greedy: true,
3193                 ast: Box::new(lit('a', 0)),
3194             }))
3195         );
3196         assert_eq!(
3197             parser(r"a{5,9}").parse(),
3198             Ok(Ast::Repetition(ast::Repetition {
3199                 span: span(0..6),
3200                 op: ast::RepetitionOp {
3201                     span: span(1..6),
3202                     kind: ast::RepetitionKind::Range(
3203                         ast::RepetitionRange::Bounded(5, 9)
3204                     ),
3205                 },
3206                 greedy: true,
3207                 ast: Box::new(lit('a', 0)),
3208             }))
3209         );
3210         assert_eq!(
3211             parser(r"a{5}?").parse(),
3212             Ok(Ast::Repetition(ast::Repetition {
3213                 span: span(0..5),
3214                 op: ast::RepetitionOp {
3215                     span: span(1..5),
3216                     kind: ast::RepetitionKind::Range(
3217                         ast::RepetitionRange::Exactly(5)
3218                     ),
3219                 },
3220                 greedy: false,
3221                 ast: Box::new(lit('a', 0)),
3222             }))
3223         );
3224         assert_eq!(
3225             parser(r"ab{5}").parse(),
3226             Ok(concat(
3227                 0..5,
3228                 vec![
3229                     lit('a', 0),
3230                     Ast::Repetition(ast::Repetition {
3231                         span: span(1..5),
3232                         op: ast::RepetitionOp {
3233                             span: span(2..5),
3234                             kind: ast::RepetitionKind::Range(
3235                                 ast::RepetitionRange::Exactly(5)
3236                             ),
3237                         },
3238                         greedy: true,
3239                         ast: Box::new(lit('b', 1)),
3240                     }),
3241                 ]
3242             ))
3243         );
3244         assert_eq!(
3245             parser(r"ab{5}c").parse(),
3246             Ok(concat(
3247                 0..6,
3248                 vec![
3249                     lit('a', 0),
3250                     Ast::Repetition(ast::Repetition {
3251                         span: span(1..5),
3252                         op: ast::RepetitionOp {
3253                             span: span(2..5),
3254                             kind: ast::RepetitionKind::Range(
3255                                 ast::RepetitionRange::Exactly(5)
3256                             ),
3257                         },
3258                         greedy: true,
3259                         ast: Box::new(lit('b', 1)),
3260                     }),
3261                     lit('c', 5),
3262                 ]
3263             ))
3264         );
3265 
3266         assert_eq!(
3267             parser(r"a{ 5 }").parse(),
3268             Ok(Ast::Repetition(ast::Repetition {
3269                 span: span(0..6),
3270                 op: ast::RepetitionOp {
3271                     span: span(1..6),
3272                     kind: ast::RepetitionKind::Range(
3273                         ast::RepetitionRange::Exactly(5)
3274                     ),
3275                 },
3276                 greedy: true,
3277                 ast: Box::new(lit('a', 0)),
3278             }))
3279         );
3280         assert_eq!(
3281             parser(r"a{ 5 , 9 }").parse(),
3282             Ok(Ast::Repetition(ast::Repetition {
3283                 span: span(0..10),
3284                 op: ast::RepetitionOp {
3285                     span: span(1..10),
3286                     kind: ast::RepetitionKind::Range(
3287                         ast::RepetitionRange::Bounded(5, 9)
3288                     ),
3289                 },
3290                 greedy: true,
3291                 ast: Box::new(lit('a', 0)),
3292             }))
3293         );
3294         assert_eq!(
3295             parser_ignore_whitespace(r"a{5,9} ?").parse(),
3296             Ok(Ast::Repetition(ast::Repetition {
3297                 span: span(0..8),
3298                 op: ast::RepetitionOp {
3299                     span: span(1..8),
3300                     kind: ast::RepetitionKind::Range(
3301                         ast::RepetitionRange::Bounded(5, 9)
3302                     ),
3303                 },
3304                 greedy: false,
3305                 ast: Box::new(lit('a', 0)),
3306             }))
3307         );
3308 
3309         assert_eq!(
3310             parser(r"(?i){0}").parse().unwrap_err(),
3311             TestError {
3312                 span: span(4..4),
3313                 kind: ast::ErrorKind::RepetitionMissing,
3314             }
3315         );
3316         assert_eq!(
3317             parser(r"(?m){1,1}").parse().unwrap_err(),
3318             TestError {
3319                 span: span(4..4),
3320                 kind: ast::ErrorKind::RepetitionMissing,
3321             }
3322         );
3323         assert_eq!(
3324             parser(r"a{]}").parse().unwrap_err(),
3325             TestError {
3326                 span: span(2..2),
3327                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3328             }
3329         );
3330         assert_eq!(
3331             parser(r"a{1,]}").parse().unwrap_err(),
3332             TestError {
3333                 span: span(4..4),
3334                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3335             }
3336         );
3337         assert_eq!(
3338             parser(r"a{").parse().unwrap_err(),
3339             TestError {
3340                 span: span(1..2),
3341                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3342             }
3343         );
3344         assert_eq!(
3345             parser(r"a{}").parse().unwrap_err(),
3346             TestError {
3347                 span: span(2..2),
3348                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3349             }
3350         );
3351         assert_eq!(
3352             parser(r"a{a").parse().unwrap_err(),
3353             TestError {
3354                 span: span(2..2),
3355                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3356             }
3357         );
3358         assert_eq!(
3359             parser(r"a{9999999999}").parse().unwrap_err(),
3360             TestError {
3361                 span: span(2..12),
3362                 kind: ast::ErrorKind::DecimalInvalid,
3363             }
3364         );
3365         assert_eq!(
3366             parser(r"a{9").parse().unwrap_err(),
3367             TestError {
3368                 span: span(1..3),
3369                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3370             }
3371         );
3372         assert_eq!(
3373             parser(r"a{9,a").parse().unwrap_err(),
3374             TestError {
3375                 span: span(4..4),
3376                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3377             }
3378         );
3379         assert_eq!(
3380             parser(r"a{9,9999999999}").parse().unwrap_err(),
3381             TestError {
3382                 span: span(4..14),
3383                 kind: ast::ErrorKind::DecimalInvalid,
3384             }
3385         );
3386         assert_eq!(
3387             parser(r"a{9,").parse().unwrap_err(),
3388             TestError {
3389                 span: span(1..4),
3390                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3391             }
3392         );
3393         assert_eq!(
3394             parser(r"a{9,11").parse().unwrap_err(),
3395             TestError {
3396                 span: span(1..6),
3397                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3398             }
3399         );
3400         assert_eq!(
3401             parser(r"a{2,1}").parse().unwrap_err(),
3402             TestError {
3403                 span: span(1..6),
3404                 kind: ast::ErrorKind::RepetitionCountInvalid,
3405             }
3406         );
3407         assert_eq!(
3408             parser(r"{5}").parse().unwrap_err(),
3409             TestError {
3410                 span: span(0..0),
3411                 kind: ast::ErrorKind::RepetitionMissing,
3412             }
3413         );
3414         assert_eq!(
3415             parser(r"|{5}").parse().unwrap_err(),
3416             TestError {
3417                 span: span(1..1),
3418                 kind: ast::ErrorKind::RepetitionMissing,
3419             }
3420         );
3421     }
3422 
3423     #[test]
parse_alternate()3424     fn parse_alternate() {
3425         assert_eq!(
3426             parser(r"a|b").parse(),
3427             Ok(Ast::Alternation(ast::Alternation {
3428                 span: span(0..3),
3429                 asts: vec![lit('a', 0), lit('b', 2)],
3430             }))
3431         );
3432         assert_eq!(
3433             parser(r"(a|b)").parse(),
3434             Ok(group(
3435                 0..5,
3436                 1,
3437                 Ast::Alternation(ast::Alternation {
3438                     span: span(1..4),
3439                     asts: vec![lit('a', 1), lit('b', 3)],
3440                 })
3441             ))
3442         );
3443 
3444         assert_eq!(
3445             parser(r"a|b|c").parse(),
3446             Ok(Ast::Alternation(ast::Alternation {
3447                 span: span(0..5),
3448                 asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3449             }))
3450         );
3451         assert_eq!(
3452             parser(r"ax|by|cz").parse(),
3453             Ok(Ast::Alternation(ast::Alternation {
3454                 span: span(0..8),
3455                 asts: vec![
3456                     concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3457                     concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3458                     concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3459                 ],
3460             }))
3461         );
3462         assert_eq!(
3463             parser(r"(ax|by|cz)").parse(),
3464             Ok(group(
3465                 0..10,
3466                 1,
3467                 Ast::Alternation(ast::Alternation {
3468                     span: span(1..9),
3469                     asts: vec![
3470                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3471                         concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3472                         concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3473                     ],
3474                 })
3475             ))
3476         );
3477         assert_eq!(
3478             parser(r"(ax|(by|(cz)))").parse(),
3479             Ok(group(
3480                 0..14,
3481                 1,
3482                 alt(
3483                     1..13,
3484                     vec![
3485                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3486                         group(
3487                             4..13,
3488                             2,
3489                             alt(
3490                                 5..12,
3491                                 vec![
3492                                     concat(
3493                                         5..7,
3494                                         vec![lit('b', 5), lit('y', 6)]
3495                                     ),
3496                                     group(
3497                                         8..12,
3498                                         3,
3499                                         concat(
3500                                             9..11,
3501                                             vec![lit('c', 9), lit('z', 10),]
3502                                         )
3503                                     ),
3504                                 ]
3505                             )
3506                         ),
3507                     ]
3508                 )
3509             ))
3510         );
3511 
3512         assert_eq!(
3513             parser(r"|").parse(),
3514             Ok(alt(
3515                 0..1,
3516                 vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
3517             ))
3518         );
3519         assert_eq!(
3520             parser(r"||").parse(),
3521             Ok(alt(
3522                 0..2,
3523                 vec![
3524                     Ast::Empty(span(0..0)),
3525                     Ast::Empty(span(1..1)),
3526                     Ast::Empty(span(2..2)),
3527                 ]
3528             ))
3529         );
3530         assert_eq!(
3531             parser(r"a|").parse(),
3532             Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
3533         );
3534         assert_eq!(
3535             parser(r"|a").parse(),
3536             Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
3537         );
3538 
3539         assert_eq!(
3540             parser(r"(|)").parse(),
3541             Ok(group(
3542                 0..3,
3543                 1,
3544                 alt(
3545                     1..2,
3546                     vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
3547                 )
3548             ))
3549         );
3550         assert_eq!(
3551             parser(r"(a|)").parse(),
3552             Ok(group(
3553                 0..4,
3554                 1,
3555                 alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
3556             ))
3557         );
3558         assert_eq!(
3559             parser(r"(|a)").parse(),
3560             Ok(group(
3561                 0..4,
3562                 1,
3563                 alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
3564             ))
3565         );
3566 
3567         assert_eq!(
3568             parser(r"a|b)").parse().unwrap_err(),
3569             TestError {
3570                 span: span(3..4),
3571                 kind: ast::ErrorKind::GroupUnopened,
3572             }
3573         );
3574         assert_eq!(
3575             parser(r"(a|b").parse().unwrap_err(),
3576             TestError {
3577                 span: span(0..1),
3578                 kind: ast::ErrorKind::GroupUnclosed,
3579             }
3580         );
3581     }
3582 
3583     #[test]
parse_unsupported_lookaround()3584     fn parse_unsupported_lookaround() {
3585         assert_eq!(
3586             parser(r"(?=a)").parse().unwrap_err(),
3587             TestError {
3588                 span: span(0..3),
3589                 kind: ast::ErrorKind::UnsupportedLookAround,
3590             }
3591         );
3592         assert_eq!(
3593             parser(r"(?!a)").parse().unwrap_err(),
3594             TestError {
3595                 span: span(0..3),
3596                 kind: ast::ErrorKind::UnsupportedLookAround,
3597             }
3598         );
3599         assert_eq!(
3600             parser(r"(?<=a)").parse().unwrap_err(),
3601             TestError {
3602                 span: span(0..4),
3603                 kind: ast::ErrorKind::UnsupportedLookAround,
3604             }
3605         );
3606         assert_eq!(
3607             parser(r"(?<!a)").parse().unwrap_err(),
3608             TestError {
3609                 span: span(0..4),
3610                 kind: ast::ErrorKind::UnsupportedLookAround,
3611             }
3612         );
3613     }
3614 
3615     #[test]
parse_group()3616     fn parse_group() {
3617         assert_eq!(
3618             parser("(?i)").parse(),
3619             Ok(Ast::Flags(ast::SetFlags {
3620                 span: span(0..4),
3621                 flags: ast::Flags {
3622                     span: span(2..3),
3623                     items: vec![ast::FlagsItem {
3624                         span: span(2..3),
3625                         kind: ast::FlagsItemKind::Flag(
3626                             ast::Flag::CaseInsensitive
3627                         ),
3628                     }],
3629                 },
3630             }))
3631         );
3632         assert_eq!(
3633             parser("(?iU)").parse(),
3634             Ok(Ast::Flags(ast::SetFlags {
3635                 span: span(0..5),
3636                 flags: ast::Flags {
3637                     span: span(2..4),
3638                     items: vec![
3639                         ast::FlagsItem {
3640                             span: span(2..3),
3641                             kind: ast::FlagsItemKind::Flag(
3642                                 ast::Flag::CaseInsensitive
3643                             ),
3644                         },
3645                         ast::FlagsItem {
3646                             span: span(3..4),
3647                             kind: ast::FlagsItemKind::Flag(
3648                                 ast::Flag::SwapGreed
3649                             ),
3650                         },
3651                     ],
3652                 },
3653             }))
3654         );
3655         assert_eq!(
3656             parser("(?i-U)").parse(),
3657             Ok(Ast::Flags(ast::SetFlags {
3658                 span: span(0..6),
3659                 flags: ast::Flags {
3660                     span: span(2..5),
3661                     items: vec![
3662                         ast::FlagsItem {
3663                             span: span(2..3),
3664                             kind: ast::FlagsItemKind::Flag(
3665                                 ast::Flag::CaseInsensitive
3666                             ),
3667                         },
3668                         ast::FlagsItem {
3669                             span: span(3..4),
3670                             kind: ast::FlagsItemKind::Negation,
3671                         },
3672                         ast::FlagsItem {
3673                             span: span(4..5),
3674                             kind: ast::FlagsItemKind::Flag(
3675                                 ast::Flag::SwapGreed
3676                             ),
3677                         },
3678                     ],
3679                 },
3680             }))
3681         );
3682 
3683         assert_eq!(
3684             parser("()").parse(),
3685             Ok(Ast::Group(ast::Group {
3686                 span: span(0..2),
3687                 kind: ast::GroupKind::CaptureIndex(1),
3688                 ast: Box::new(Ast::Empty(span(1..1))),
3689             }))
3690         );
3691         assert_eq!(
3692             parser("(a)").parse(),
3693             Ok(Ast::Group(ast::Group {
3694                 span: span(0..3),
3695                 kind: ast::GroupKind::CaptureIndex(1),
3696                 ast: Box::new(lit('a', 1)),
3697             }))
3698         );
3699         assert_eq!(
3700             parser("(())").parse(),
3701             Ok(Ast::Group(ast::Group {
3702                 span: span(0..4),
3703                 kind: ast::GroupKind::CaptureIndex(1),
3704                 ast: Box::new(Ast::Group(ast::Group {
3705                     span: span(1..3),
3706                     kind: ast::GroupKind::CaptureIndex(2),
3707                     ast: Box::new(Ast::Empty(span(2..2))),
3708                 })),
3709             }))
3710         );
3711 
3712         assert_eq!(
3713             parser("(?:a)").parse(),
3714             Ok(Ast::Group(ast::Group {
3715                 span: span(0..5),
3716                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3717                     span: span(2..2),
3718                     items: vec![],
3719                 }),
3720                 ast: Box::new(lit('a', 3)),
3721             }))
3722         );
3723 
3724         assert_eq!(
3725             parser("(?i:a)").parse(),
3726             Ok(Ast::Group(ast::Group {
3727                 span: span(0..6),
3728                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3729                     span: span(2..3),
3730                     items: vec![ast::FlagsItem {
3731                         span: span(2..3),
3732                         kind: ast::FlagsItemKind::Flag(
3733                             ast::Flag::CaseInsensitive
3734                         ),
3735                     },],
3736                 }),
3737                 ast: Box::new(lit('a', 4)),
3738             }))
3739         );
3740         assert_eq!(
3741             parser("(?i-U:a)").parse(),
3742             Ok(Ast::Group(ast::Group {
3743                 span: span(0..8),
3744                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3745                     span: span(2..5),
3746                     items: vec![
3747                         ast::FlagsItem {
3748                             span: span(2..3),
3749                             kind: ast::FlagsItemKind::Flag(
3750                                 ast::Flag::CaseInsensitive
3751                             ),
3752                         },
3753                         ast::FlagsItem {
3754                             span: span(3..4),
3755                             kind: ast::FlagsItemKind::Negation,
3756                         },
3757                         ast::FlagsItem {
3758                             span: span(4..5),
3759                             kind: ast::FlagsItemKind::Flag(
3760                                 ast::Flag::SwapGreed
3761                             ),
3762                         },
3763                     ],
3764                 }),
3765                 ast: Box::new(lit('a', 6)),
3766             }))
3767         );
3768 
3769         assert_eq!(
3770             parser("(").parse().unwrap_err(),
3771             TestError {
3772                 span: span(0..1),
3773                 kind: ast::ErrorKind::GroupUnclosed,
3774             }
3775         );
3776         assert_eq!(
3777             parser("(?").parse().unwrap_err(),
3778             TestError {
3779                 span: span(0..1),
3780                 kind: ast::ErrorKind::GroupUnclosed,
3781             }
3782         );
3783         assert_eq!(
3784             parser("(?P").parse().unwrap_err(),
3785             TestError {
3786                 span: span(2..3),
3787                 kind: ast::ErrorKind::FlagUnrecognized,
3788             }
3789         );
3790         assert_eq!(
3791             parser("(?P<").parse().unwrap_err(),
3792             TestError {
3793                 span: span(4..4),
3794                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3795             }
3796         );
3797         assert_eq!(
3798             parser("(a").parse().unwrap_err(),
3799             TestError {
3800                 span: span(0..1),
3801                 kind: ast::ErrorKind::GroupUnclosed,
3802             }
3803         );
3804         assert_eq!(
3805             parser("(()").parse().unwrap_err(),
3806             TestError {
3807                 span: span(0..1),
3808                 kind: ast::ErrorKind::GroupUnclosed,
3809             }
3810         );
3811         assert_eq!(
3812             parser(")").parse().unwrap_err(),
3813             TestError {
3814                 span: span(0..1),
3815                 kind: ast::ErrorKind::GroupUnopened,
3816             }
3817         );
3818         assert_eq!(
3819             parser("a)").parse().unwrap_err(),
3820             TestError {
3821                 span: span(1..2),
3822                 kind: ast::ErrorKind::GroupUnopened,
3823             }
3824         );
3825     }
3826 
3827     #[test]
parse_capture_name()3828     fn parse_capture_name() {
3829         assert_eq!(
3830             parser("(?P<a>z)").parse(),
3831             Ok(Ast::Group(ast::Group {
3832                 span: span(0..8),
3833                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3834                     span: span(4..5),
3835                     name: s("a"),
3836                     index: 1,
3837                 }),
3838                 ast: Box::new(lit('z', 6)),
3839             }))
3840         );
3841         assert_eq!(
3842             parser("(?P<abc>z)").parse(),
3843             Ok(Ast::Group(ast::Group {
3844                 span: span(0..10),
3845                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3846                     span: span(4..7),
3847                     name: s("abc"),
3848                     index: 1,
3849                 }),
3850                 ast: Box::new(lit('z', 8)),
3851             }))
3852         );
3853 
3854         assert_eq!(
3855             parser("(?P<").parse().unwrap_err(),
3856             TestError {
3857                 span: span(4..4),
3858                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3859             }
3860         );
3861         assert_eq!(
3862             parser("(?P<>z)").parse().unwrap_err(),
3863             TestError {
3864                 span: span(4..4),
3865                 kind: ast::ErrorKind::GroupNameEmpty,
3866             }
3867         );
3868         assert_eq!(
3869             parser("(?P<a").parse().unwrap_err(),
3870             TestError {
3871                 span: span(5..5),
3872                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3873             }
3874         );
3875         assert_eq!(
3876             parser("(?P<ab").parse().unwrap_err(),
3877             TestError {
3878                 span: span(6..6),
3879                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3880             }
3881         );
3882         assert_eq!(
3883             parser("(?P<0a").parse().unwrap_err(),
3884             TestError {
3885                 span: span(4..5),
3886                 kind: ast::ErrorKind::GroupNameInvalid,
3887             }
3888         );
3889         assert_eq!(
3890             parser("(?P<~").parse().unwrap_err(),
3891             TestError {
3892                 span: span(4..5),
3893                 kind: ast::ErrorKind::GroupNameInvalid,
3894             }
3895         );
3896         assert_eq!(
3897             parser("(?P<abc~").parse().unwrap_err(),
3898             TestError {
3899                 span: span(7..8),
3900                 kind: ast::ErrorKind::GroupNameInvalid,
3901             }
3902         );
3903         assert_eq!(
3904             parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3905             TestError {
3906                 span: span(12..13),
3907                 kind: ast::ErrorKind::GroupNameDuplicate {
3908                     original: span(4..5),
3909                 },
3910             }
3911         );
3912     }
3913 
3914     #[test]
parse_flags()3915     fn parse_flags() {
3916         assert_eq!(
3917             parser("i:").parse_flags(),
3918             Ok(ast::Flags {
3919                 span: span(0..1),
3920                 items: vec![ast::FlagsItem {
3921                     span: span(0..1),
3922                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3923                 }],
3924             })
3925         );
3926         assert_eq!(
3927             parser("i)").parse_flags(),
3928             Ok(ast::Flags {
3929                 span: span(0..1),
3930                 items: vec![ast::FlagsItem {
3931                     span: span(0..1),
3932                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3933                 }],
3934             })
3935         );
3936 
3937         assert_eq!(
3938             parser("isU:").parse_flags(),
3939             Ok(ast::Flags {
3940                 span: span(0..3),
3941                 items: vec![
3942                     ast::FlagsItem {
3943                         span: span(0..1),
3944                         kind: ast::FlagsItemKind::Flag(
3945                             ast::Flag::CaseInsensitive
3946                         ),
3947                     },
3948                     ast::FlagsItem {
3949                         span: span(1..2),
3950                         kind: ast::FlagsItemKind::Flag(
3951                             ast::Flag::DotMatchesNewLine
3952                         ),
3953                     },
3954                     ast::FlagsItem {
3955                         span: span(2..3),
3956                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3957                     },
3958                 ],
3959             })
3960         );
3961 
3962         assert_eq!(
3963             parser("-isU:").parse_flags(),
3964             Ok(ast::Flags {
3965                 span: span(0..4),
3966                 items: vec![
3967                     ast::FlagsItem {
3968                         span: span(0..1),
3969                         kind: ast::FlagsItemKind::Negation,
3970                     },
3971                     ast::FlagsItem {
3972                         span: span(1..2),
3973                         kind: ast::FlagsItemKind::Flag(
3974                             ast::Flag::CaseInsensitive
3975                         ),
3976                     },
3977                     ast::FlagsItem {
3978                         span: span(2..3),
3979                         kind: ast::FlagsItemKind::Flag(
3980                             ast::Flag::DotMatchesNewLine
3981                         ),
3982                     },
3983                     ast::FlagsItem {
3984                         span: span(3..4),
3985                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3986                     },
3987                 ],
3988             })
3989         );
3990         assert_eq!(
3991             parser("i-sU:").parse_flags(),
3992             Ok(ast::Flags {
3993                 span: span(0..4),
3994                 items: vec![
3995                     ast::FlagsItem {
3996                         span: span(0..1),
3997                         kind: ast::FlagsItemKind::Flag(
3998                             ast::Flag::CaseInsensitive
3999                         ),
4000                     },
4001                     ast::FlagsItem {
4002                         span: span(1..2),
4003                         kind: ast::FlagsItemKind::Negation,
4004                     },
4005                     ast::FlagsItem {
4006                         span: span(2..3),
4007                         kind: ast::FlagsItemKind::Flag(
4008                             ast::Flag::DotMatchesNewLine
4009                         ),
4010                     },
4011                     ast::FlagsItem {
4012                         span: span(3..4),
4013                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4014                     },
4015                 ],
4016             })
4017         );
4018 
4019         assert_eq!(
4020             parser("isU").parse_flags().unwrap_err(),
4021             TestError {
4022                 span: span(3..3),
4023                 kind: ast::ErrorKind::FlagUnexpectedEof,
4024             }
4025         );
4026         assert_eq!(
4027             parser("isUa:").parse_flags().unwrap_err(),
4028             TestError {
4029                 span: span(3..4),
4030                 kind: ast::ErrorKind::FlagUnrecognized,
4031             }
4032         );
4033         assert_eq!(
4034             parser("isUi:").parse_flags().unwrap_err(),
4035             TestError {
4036                 span: span(3..4),
4037                 kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4038             }
4039         );
4040         assert_eq!(
4041             parser("i-sU-i:").parse_flags().unwrap_err(),
4042             TestError {
4043                 span: span(4..5),
4044                 kind: ast::ErrorKind::FlagRepeatedNegation {
4045                     original: span(1..2),
4046                 },
4047             }
4048         );
4049         assert_eq!(
4050             parser("-)").parse_flags().unwrap_err(),
4051             TestError {
4052                 span: span(0..1),
4053                 kind: ast::ErrorKind::FlagDanglingNegation,
4054             }
4055         );
4056         assert_eq!(
4057             parser("i-)").parse_flags().unwrap_err(),
4058             TestError {
4059                 span: span(1..2),
4060                 kind: ast::ErrorKind::FlagDanglingNegation,
4061             }
4062         );
4063         assert_eq!(
4064             parser("iU-)").parse_flags().unwrap_err(),
4065             TestError {
4066                 span: span(2..3),
4067                 kind: ast::ErrorKind::FlagDanglingNegation,
4068             }
4069         );
4070     }
4071 
4072     #[test]
parse_flag()4073     fn parse_flag() {
4074         assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4075         assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4076         assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4077         assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4078         assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4079         assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4080 
4081         assert_eq!(
4082             parser("a").parse_flag().unwrap_err(),
4083             TestError {
4084                 span: span(0..1),
4085                 kind: ast::ErrorKind::FlagUnrecognized,
4086             }
4087         );
4088         assert_eq!(
4089             parser("☃").parse_flag().unwrap_err(),
4090             TestError {
4091                 span: span_range("☃", 0..3),
4092                 kind: ast::ErrorKind::FlagUnrecognized,
4093             }
4094         );
4095     }
4096 
4097     #[test]
parse_primitive_non_escape()4098     fn parse_primitive_non_escape() {
4099         assert_eq!(
4100             parser(r".").parse_primitive(),
4101             Ok(Primitive::Dot(span(0..1)))
4102         );
4103         assert_eq!(
4104             parser(r"^").parse_primitive(),
4105             Ok(Primitive::Assertion(ast::Assertion {
4106                 span: span(0..1),
4107                 kind: ast::AssertionKind::StartLine,
4108             }))
4109         );
4110         assert_eq!(
4111             parser(r"$").parse_primitive(),
4112             Ok(Primitive::Assertion(ast::Assertion {
4113                 span: span(0..1),
4114                 kind: ast::AssertionKind::EndLine,
4115             }))
4116         );
4117 
4118         assert_eq!(
4119             parser(r"a").parse_primitive(),
4120             Ok(Primitive::Literal(ast::Literal {
4121                 span: span(0..1),
4122                 kind: ast::LiteralKind::Verbatim,
4123                 c: 'a',
4124             }))
4125         );
4126         assert_eq!(
4127             parser(r"|").parse_primitive(),
4128             Ok(Primitive::Literal(ast::Literal {
4129                 span: span(0..1),
4130                 kind: ast::LiteralKind::Verbatim,
4131                 c: '|',
4132             }))
4133         );
4134         assert_eq!(
4135             parser(r"☃").parse_primitive(),
4136             Ok(Primitive::Literal(ast::Literal {
4137                 span: span_range("☃", 0..3),
4138                 kind: ast::LiteralKind::Verbatim,
4139                 c: '☃',
4140             }))
4141         );
4142     }
4143 
4144     #[test]
parse_escape()4145     fn parse_escape() {
4146         assert_eq!(
4147             parser(r"\|").parse_primitive(),
4148             Ok(Primitive::Literal(ast::Literal {
4149                 span: span(0..2),
4150                 kind: ast::LiteralKind::Punctuation,
4151                 c: '|',
4152             }))
4153         );
4154         let specials = &[
4155             (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4156             (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4157             (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4158             (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4159             (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4160             (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4161         ];
4162         for &(pat, c, ref kind) in specials {
4163             assert_eq!(
4164                 parser(pat).parse_primitive(),
4165                 Ok(Primitive::Literal(ast::Literal {
4166                     span: span(0..2),
4167                     kind: ast::LiteralKind::Special(kind.clone()),
4168                     c: c,
4169                 }))
4170             );
4171         }
4172         assert_eq!(
4173             parser(r"\A").parse_primitive(),
4174             Ok(Primitive::Assertion(ast::Assertion {
4175                 span: span(0..2),
4176                 kind: ast::AssertionKind::StartText,
4177             }))
4178         );
4179         assert_eq!(
4180             parser(r"\z").parse_primitive(),
4181             Ok(Primitive::Assertion(ast::Assertion {
4182                 span: span(0..2),
4183                 kind: ast::AssertionKind::EndText,
4184             }))
4185         );
4186         assert_eq!(
4187             parser(r"\b").parse_primitive(),
4188             Ok(Primitive::Assertion(ast::Assertion {
4189                 span: span(0..2),
4190                 kind: ast::AssertionKind::WordBoundary,
4191             }))
4192         );
4193         assert_eq!(
4194             parser(r"\B").parse_primitive(),
4195             Ok(Primitive::Assertion(ast::Assertion {
4196                 span: span(0..2),
4197                 kind: ast::AssertionKind::NotWordBoundary,
4198             }))
4199         );
4200 
4201         assert_eq!(
4202             parser(r"\").parse_escape().unwrap_err(),
4203             TestError {
4204                 span: span(0..1),
4205                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4206             }
4207         );
4208         assert_eq!(
4209             parser(r"\y").parse_escape().unwrap_err(),
4210             TestError {
4211                 span: span(0..2),
4212                 kind: ast::ErrorKind::EscapeUnrecognized,
4213             }
4214         );
4215     }
4216 
4217     #[test]
parse_unsupported_backreference()4218     fn parse_unsupported_backreference() {
4219         assert_eq!(
4220             parser(r"\0").parse_escape().unwrap_err(),
4221             TestError {
4222                 span: span(0..2),
4223                 kind: ast::ErrorKind::UnsupportedBackreference,
4224             }
4225         );
4226         assert_eq!(
4227             parser(r"\9").parse_escape().unwrap_err(),
4228             TestError {
4229                 span: span(0..2),
4230                 kind: ast::ErrorKind::UnsupportedBackreference,
4231             }
4232         );
4233     }
4234 
4235     #[test]
parse_octal()4236     fn parse_octal() {
4237         for i in 0..511 {
4238             let pat = format!(r"\{:o}", i);
4239             assert_eq!(
4240                 parser_octal(&pat).parse_escape(),
4241                 Ok(Primitive::Literal(ast::Literal {
4242                     span: span(0..pat.len()),
4243                     kind: ast::LiteralKind::Octal,
4244                     c: ::std::char::from_u32(i).unwrap(),
4245                 }))
4246             );
4247         }
4248         assert_eq!(
4249             parser_octal(r"\778").parse_escape(),
4250             Ok(Primitive::Literal(ast::Literal {
4251                 span: span(0..3),
4252                 kind: ast::LiteralKind::Octal,
4253                 c: '?',
4254             }))
4255         );
4256         assert_eq!(
4257             parser_octal(r"\7777").parse_escape(),
4258             Ok(Primitive::Literal(ast::Literal {
4259                 span: span(0..4),
4260                 kind: ast::LiteralKind::Octal,
4261                 c: '\u{01FF}',
4262             }))
4263         );
4264         assert_eq!(
4265             parser_octal(r"\778").parse(),
4266             Ok(Ast::Concat(ast::Concat {
4267                 span: span(0..4),
4268                 asts: vec![
4269                     Ast::Literal(ast::Literal {
4270                         span: span(0..3),
4271                         kind: ast::LiteralKind::Octal,
4272                         c: '?',
4273                     }),
4274                     Ast::Literal(ast::Literal {
4275                         span: span(3..4),
4276                         kind: ast::LiteralKind::Verbatim,
4277                         c: '8',
4278                     }),
4279                 ],
4280             }))
4281         );
4282         assert_eq!(
4283             parser_octal(r"\7777").parse(),
4284             Ok(Ast::Concat(ast::Concat {
4285                 span: span(0..5),
4286                 asts: vec![
4287                     Ast::Literal(ast::Literal {
4288                         span: span(0..4),
4289                         kind: ast::LiteralKind::Octal,
4290                         c: '\u{01FF}',
4291                     }),
4292                     Ast::Literal(ast::Literal {
4293                         span: span(4..5),
4294                         kind: ast::LiteralKind::Verbatim,
4295                         c: '7',
4296                     }),
4297                 ],
4298             }))
4299         );
4300 
4301         assert_eq!(
4302             parser_octal(r"\8").parse_escape().unwrap_err(),
4303             TestError {
4304                 span: span(0..2),
4305                 kind: ast::ErrorKind::EscapeUnrecognized,
4306             }
4307         );
4308     }
4309 
4310     #[test]
parse_hex_two()4311     fn parse_hex_two() {
4312         for i in 0..256 {
4313             let pat = format!(r"\x{:02x}", i);
4314             assert_eq!(
4315                 parser(&pat).parse_escape(),
4316                 Ok(Primitive::Literal(ast::Literal {
4317                     span: span(0..pat.len()),
4318                     kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4319                     c: ::std::char::from_u32(i).unwrap(),
4320                 }))
4321             );
4322         }
4323 
4324         assert_eq!(
4325             parser(r"\xF").parse_escape().unwrap_err(),
4326             TestError {
4327                 span: span(3..3),
4328                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4329             }
4330         );
4331         assert_eq!(
4332             parser(r"\xG").parse_escape().unwrap_err(),
4333             TestError {
4334                 span: span(2..3),
4335                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4336             }
4337         );
4338         assert_eq!(
4339             parser(r"\xFG").parse_escape().unwrap_err(),
4340             TestError {
4341                 span: span(3..4),
4342                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4343             }
4344         );
4345     }
4346 
4347     #[test]
parse_hex_four()4348     fn parse_hex_four() {
4349         for i in 0..65536 {
4350             let c = match ::std::char::from_u32(i) {
4351                 None => continue,
4352                 Some(c) => c,
4353             };
4354             let pat = format!(r"\u{:04x}", i);
4355             assert_eq!(
4356                 parser(&pat).parse_escape(),
4357                 Ok(Primitive::Literal(ast::Literal {
4358                     span: span(0..pat.len()),
4359                     kind: ast::LiteralKind::HexFixed(
4360                         ast::HexLiteralKind::UnicodeShort
4361                     ),
4362                     c: c,
4363                 }))
4364             );
4365         }
4366 
4367         assert_eq!(
4368             parser(r"\uF").parse_escape().unwrap_err(),
4369             TestError {
4370                 span: span(3..3),
4371                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4372             }
4373         );
4374         assert_eq!(
4375             parser(r"\uG").parse_escape().unwrap_err(),
4376             TestError {
4377                 span: span(2..3),
4378                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4379             }
4380         );
4381         assert_eq!(
4382             parser(r"\uFG").parse_escape().unwrap_err(),
4383             TestError {
4384                 span: span(3..4),
4385                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4386             }
4387         );
4388         assert_eq!(
4389             parser(r"\uFFG").parse_escape().unwrap_err(),
4390             TestError {
4391                 span: span(4..5),
4392                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4393             }
4394         );
4395         assert_eq!(
4396             parser(r"\uFFFG").parse_escape().unwrap_err(),
4397             TestError {
4398                 span: span(5..6),
4399                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4400             }
4401         );
4402         assert_eq!(
4403             parser(r"\uD800").parse_escape().unwrap_err(),
4404             TestError {
4405                 span: span(2..6),
4406                 kind: ast::ErrorKind::EscapeHexInvalid,
4407             }
4408         );
4409     }
4410 
4411     #[test]
parse_hex_eight()4412     fn parse_hex_eight() {
4413         for i in 0..65536 {
4414             let c = match ::std::char::from_u32(i) {
4415                 None => continue,
4416                 Some(c) => c,
4417             };
4418             let pat = format!(r"\U{:08x}", i);
4419             assert_eq!(
4420                 parser(&pat).parse_escape(),
4421                 Ok(Primitive::Literal(ast::Literal {
4422                     span: span(0..pat.len()),
4423                     kind: ast::LiteralKind::HexFixed(
4424                         ast::HexLiteralKind::UnicodeLong
4425                     ),
4426                     c: c,
4427                 }))
4428             );
4429         }
4430 
4431         assert_eq!(
4432             parser(r"\UF").parse_escape().unwrap_err(),
4433             TestError {
4434                 span: span(3..3),
4435                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4436             }
4437         );
4438         assert_eq!(
4439             parser(r"\UG").parse_escape().unwrap_err(),
4440             TestError {
4441                 span: span(2..3),
4442                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4443             }
4444         );
4445         assert_eq!(
4446             parser(r"\UFG").parse_escape().unwrap_err(),
4447             TestError {
4448                 span: span(3..4),
4449                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4450             }
4451         );
4452         assert_eq!(
4453             parser(r"\UFFG").parse_escape().unwrap_err(),
4454             TestError {
4455                 span: span(4..5),
4456                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4457             }
4458         );
4459         assert_eq!(
4460             parser(r"\UFFFG").parse_escape().unwrap_err(),
4461             TestError {
4462                 span: span(5..6),
4463                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4464             }
4465         );
4466         assert_eq!(
4467             parser(r"\UFFFFG").parse_escape().unwrap_err(),
4468             TestError {
4469                 span: span(6..7),
4470                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4471             }
4472         );
4473         assert_eq!(
4474             parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4475             TestError {
4476                 span: span(7..8),
4477                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4478             }
4479         );
4480         assert_eq!(
4481             parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4482             TestError {
4483                 span: span(8..9),
4484                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4485             }
4486         );
4487         assert_eq!(
4488             parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4489             TestError {
4490                 span: span(9..10),
4491                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4492             }
4493         );
4494     }
4495 
4496     #[test]
parse_hex_brace()4497     fn parse_hex_brace() {
4498         assert_eq!(
4499             parser(r"\u{26c4}").parse_escape(),
4500             Ok(Primitive::Literal(ast::Literal {
4501                 span: span(0..8),
4502                 kind: ast::LiteralKind::HexBrace(
4503                     ast::HexLiteralKind::UnicodeShort
4504                 ),
4505                 c: '⛄',
4506             }))
4507         );
4508         assert_eq!(
4509             parser(r"\U{26c4}").parse_escape(),
4510             Ok(Primitive::Literal(ast::Literal {
4511                 span: span(0..8),
4512                 kind: ast::LiteralKind::HexBrace(
4513                     ast::HexLiteralKind::UnicodeLong
4514                 ),
4515                 c: '⛄',
4516             }))
4517         );
4518         assert_eq!(
4519             parser(r"\x{26c4}").parse_escape(),
4520             Ok(Primitive::Literal(ast::Literal {
4521                 span: span(0..8),
4522                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4523                 c: '⛄',
4524             }))
4525         );
4526         assert_eq!(
4527             parser(r"\x{26C4}").parse_escape(),
4528             Ok(Primitive::Literal(ast::Literal {
4529                 span: span(0..8),
4530                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4531                 c: '⛄',
4532             }))
4533         );
4534         assert_eq!(
4535             parser(r"\x{10fFfF}").parse_escape(),
4536             Ok(Primitive::Literal(ast::Literal {
4537                 span: span(0..10),
4538                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4539                 c: '\u{10FFFF}',
4540             }))
4541         );
4542 
4543         assert_eq!(
4544             parser(r"\x").parse_escape().unwrap_err(),
4545             TestError {
4546                 span: span(2..2),
4547                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4548             }
4549         );
4550         assert_eq!(
4551             parser(r"\x{").parse_escape().unwrap_err(),
4552             TestError {
4553                 span: span(2..3),
4554                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4555             }
4556         );
4557         assert_eq!(
4558             parser(r"\x{FF").parse_escape().unwrap_err(),
4559             TestError {
4560                 span: span(2..5),
4561                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4562             }
4563         );
4564         assert_eq!(
4565             parser(r"\x{}").parse_escape().unwrap_err(),
4566             TestError {
4567                 span: span(2..4),
4568                 kind: ast::ErrorKind::EscapeHexEmpty,
4569             }
4570         );
4571         assert_eq!(
4572             parser(r"\x{FGF}").parse_escape().unwrap_err(),
4573             TestError {
4574                 span: span(4..5),
4575                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4576             }
4577         );
4578         assert_eq!(
4579             parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4580             TestError {
4581                 span: span(3..9),
4582                 kind: ast::ErrorKind::EscapeHexInvalid,
4583             }
4584         );
4585         assert_eq!(
4586             parser(r"\x{D800}").parse_escape().unwrap_err(),
4587             TestError {
4588                 span: span(3..7),
4589                 kind: ast::ErrorKind::EscapeHexInvalid,
4590             }
4591         );
4592         assert_eq!(
4593             parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4594             TestError {
4595                 span: span(3..12),
4596                 kind: ast::ErrorKind::EscapeHexInvalid,
4597             }
4598         );
4599     }
4600 
4601     #[test]
parse_decimal()4602     fn parse_decimal() {
4603         assert_eq!(parser("123").parse_decimal(), Ok(123));
4604         assert_eq!(parser("0").parse_decimal(), Ok(0));
4605         assert_eq!(parser("01").parse_decimal(), Ok(1));
4606 
4607         assert_eq!(
4608             parser("-1").parse_decimal().unwrap_err(),
4609             TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4610         );
4611         assert_eq!(
4612             parser("").parse_decimal().unwrap_err(),
4613             TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4614         );
4615         assert_eq!(
4616             parser("9999999999").parse_decimal().unwrap_err(),
4617             TestError {
4618                 span: span(0..10),
4619                 kind: ast::ErrorKind::DecimalInvalid,
4620             }
4621         );
4622     }
4623 
4624     #[test]
parse_set_class()4625     fn parse_set_class() {
4626         fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4627             ast::ClassSet::union(ast::ClassSetUnion {
4628                 span: span,
4629                 items: items,
4630             })
4631         }
4632 
4633         fn intersection(
4634             span: Span,
4635             lhs: ast::ClassSet,
4636             rhs: ast::ClassSet,
4637         ) -> ast::ClassSet {
4638             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4639                 span: span,
4640                 kind: ast::ClassSetBinaryOpKind::Intersection,
4641                 lhs: Box::new(lhs),
4642                 rhs: Box::new(rhs),
4643             })
4644         }
4645 
4646         fn difference(
4647             span: Span,
4648             lhs: ast::ClassSet,
4649             rhs: ast::ClassSet,
4650         ) -> ast::ClassSet {
4651             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4652                 span: span,
4653                 kind: ast::ClassSetBinaryOpKind::Difference,
4654                 lhs: Box::new(lhs),
4655                 rhs: Box::new(rhs),
4656             })
4657         }
4658 
4659         fn symdifference(
4660             span: Span,
4661             lhs: ast::ClassSet,
4662             rhs: ast::ClassSet,
4663         ) -> ast::ClassSet {
4664             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4665                 span: span,
4666                 kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4667                 lhs: Box::new(lhs),
4668                 rhs: Box::new(rhs),
4669             })
4670         }
4671 
4672         fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4673             ast::ClassSet::Item(item)
4674         }
4675 
4676         fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4677             ast::ClassSetItem::Ascii(cls)
4678         }
4679 
4680         fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4681             ast::ClassSetItem::Unicode(cls)
4682         }
4683 
4684         fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4685             ast::ClassSetItem::Perl(cls)
4686         }
4687 
4688         fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4689             ast::ClassSetItem::Bracketed(Box::new(cls))
4690         }
4691 
4692         fn lit(span: Span, c: char) -> ast::ClassSetItem {
4693             ast::ClassSetItem::Literal(ast::Literal {
4694                 span: span,
4695                 kind: ast::LiteralKind::Verbatim,
4696                 c: c,
4697             })
4698         }
4699 
4700         fn empty(span: Span) -> ast::ClassSetItem {
4701             ast::ClassSetItem::Empty(span)
4702         }
4703 
4704         fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4705             let pos1 = Position {
4706                 offset: span.start.offset + start.len_utf8(),
4707                 column: span.start.column + 1,
4708                 ..span.start
4709             };
4710             let pos2 = Position {
4711                 offset: span.end.offset - end.len_utf8(),
4712                 column: span.end.column - 1,
4713                 ..span.end
4714             };
4715             ast::ClassSetItem::Range(ast::ClassSetRange {
4716                 span: span,
4717                 start: ast::Literal {
4718                     span: Span { end: pos1, ..span },
4719                     kind: ast::LiteralKind::Verbatim,
4720                     c: start,
4721                 },
4722                 end: ast::Literal {
4723                     span: Span { start: pos2, ..span },
4724                     kind: ast::LiteralKind::Verbatim,
4725                     c: end,
4726                 },
4727             })
4728         }
4729 
4730         fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4731             ast::ClassAscii {
4732                 span: span,
4733                 kind: ast::ClassAsciiKind::Alnum,
4734                 negated: negated,
4735             }
4736         }
4737 
4738         fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4739             ast::ClassAscii {
4740                 span: span,
4741                 kind: ast::ClassAsciiKind::Lower,
4742                 negated: negated,
4743             }
4744         }
4745 
4746         assert_eq!(
4747             parser("[[:alnum:]]").parse(),
4748             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4749                 span: span(0..11),
4750                 negated: false,
4751                 kind: itemset(item_ascii(alnum(span(1..10), false))),
4752             })))
4753         );
4754         assert_eq!(
4755             parser("[[[:alnum:]]]").parse(),
4756             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4757                 span: span(0..13),
4758                 negated: false,
4759                 kind: itemset(item_bracket(ast::ClassBracketed {
4760                     span: span(1..12),
4761                     negated: false,
4762                     kind: itemset(item_ascii(alnum(span(2..11), false))),
4763                 })),
4764             })))
4765         );
4766         assert_eq!(
4767             parser("[[:alnum:]&&[:lower:]]").parse(),
4768             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4769                 span: span(0..22),
4770                 negated: false,
4771                 kind: intersection(
4772                     span(1..21),
4773                     itemset(item_ascii(alnum(span(1..10), false))),
4774                     itemset(item_ascii(lower(span(12..21), false))),
4775                 ),
4776             })))
4777         );
4778         assert_eq!(
4779             parser("[[:alnum:]--[:lower:]]").parse(),
4780             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4781                 span: span(0..22),
4782                 negated: false,
4783                 kind: difference(
4784                     span(1..21),
4785                     itemset(item_ascii(alnum(span(1..10), false))),
4786                     itemset(item_ascii(lower(span(12..21), false))),
4787                 ),
4788             })))
4789         );
4790         assert_eq!(
4791             parser("[[:alnum:]~~[:lower:]]").parse(),
4792             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4793                 span: span(0..22),
4794                 negated: false,
4795                 kind: symdifference(
4796                     span(1..21),
4797                     itemset(item_ascii(alnum(span(1..10), false))),
4798                     itemset(item_ascii(lower(span(12..21), false))),
4799                 ),
4800             })))
4801         );
4802 
4803         assert_eq!(
4804             parser("[a]").parse(),
4805             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4806                 span: span(0..3),
4807                 negated: false,
4808                 kind: itemset(lit(span(1..2), 'a')),
4809             })))
4810         );
4811         assert_eq!(
4812             parser(r"[a\]]").parse(),
4813             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4814                 span: span(0..5),
4815                 negated: false,
4816                 kind: union(
4817                     span(1..4),
4818                     vec![
4819                         lit(span(1..2), 'a'),
4820                         ast::ClassSetItem::Literal(ast::Literal {
4821                             span: span(2..4),
4822                             kind: ast::LiteralKind::Punctuation,
4823                             c: ']',
4824                         }),
4825                     ]
4826                 ),
4827             })))
4828         );
4829         assert_eq!(
4830             parser(r"[a\-z]").parse(),
4831             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4832                 span: span(0..6),
4833                 negated: false,
4834                 kind: union(
4835                     span(1..5),
4836                     vec![
4837                         lit(span(1..2), 'a'),
4838                         ast::ClassSetItem::Literal(ast::Literal {
4839                             span: span(2..4),
4840                             kind: ast::LiteralKind::Punctuation,
4841                             c: '-',
4842                         }),
4843                         lit(span(4..5), 'z'),
4844                     ]
4845                 ),
4846             })))
4847         );
4848         assert_eq!(
4849             parser("[ab]").parse(),
4850             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4851                 span: span(0..4),
4852                 negated: false,
4853                 kind: union(
4854                     span(1..3),
4855                     vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
4856                 ),
4857             })))
4858         );
4859         assert_eq!(
4860             parser("[a-]").parse(),
4861             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4862                 span: span(0..4),
4863                 negated: false,
4864                 kind: union(
4865                     span(1..3),
4866                     vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
4867                 ),
4868             })))
4869         );
4870         assert_eq!(
4871             parser("[-a]").parse(),
4872             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4873                 span: span(0..4),
4874                 negated: false,
4875                 kind: union(
4876                     span(1..3),
4877                     vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
4878                 ),
4879             })))
4880         );
4881         assert_eq!(
4882             parser(r"[\pL]").parse(),
4883             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4884                 span: span(0..5),
4885                 negated: false,
4886                 kind: itemset(item_unicode(ast::ClassUnicode {
4887                     span: span(1..4),
4888                     negated: false,
4889                     kind: ast::ClassUnicodeKind::OneLetter('L'),
4890                 })),
4891             })))
4892         );
4893         assert_eq!(
4894             parser(r"[\w]").parse(),
4895             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4896                 span: span(0..4),
4897                 negated: false,
4898                 kind: itemset(item_perl(ast::ClassPerl {
4899                     span: span(1..3),
4900                     kind: ast::ClassPerlKind::Word,
4901                     negated: false,
4902                 })),
4903             })))
4904         );
4905         assert_eq!(
4906             parser(r"[a\wz]").parse(),
4907             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4908                 span: span(0..6),
4909                 negated: false,
4910                 kind: union(
4911                     span(1..5),
4912                     vec![
4913                         lit(span(1..2), 'a'),
4914                         item_perl(ast::ClassPerl {
4915                             span: span(2..4),
4916                             kind: ast::ClassPerlKind::Word,
4917                             negated: false,
4918                         }),
4919                         lit(span(4..5), 'z'),
4920                     ]
4921                 ),
4922             })))
4923         );
4924 
4925         assert_eq!(
4926             parser("[a-z]").parse(),
4927             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4928                 span: span(0..5),
4929                 negated: false,
4930                 kind: itemset(range(span(1..4), 'a', 'z')),
4931             })))
4932         );
4933         assert_eq!(
4934             parser("[a-cx-z]").parse(),
4935             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4936                 span: span(0..8),
4937                 negated: false,
4938                 kind: union(
4939                     span(1..7),
4940                     vec![
4941                         range(span(1..4), 'a', 'c'),
4942                         range(span(4..7), 'x', 'z'),
4943                     ]
4944                 ),
4945             })))
4946         );
4947         assert_eq!(
4948             parser(r"[\w&&a-cx-z]").parse(),
4949             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4950                 span: span(0..12),
4951                 negated: false,
4952                 kind: intersection(
4953                     span(1..11),
4954                     itemset(item_perl(ast::ClassPerl {
4955                         span: span(1..3),
4956                         kind: ast::ClassPerlKind::Word,
4957                         negated: false,
4958                     })),
4959                     union(
4960                         span(5..11),
4961                         vec![
4962                             range(span(5..8), 'a', 'c'),
4963                             range(span(8..11), 'x', 'z'),
4964                         ]
4965                     ),
4966                 ),
4967             })))
4968         );
4969         assert_eq!(
4970             parser(r"[a-cx-z&&\w]").parse(),
4971             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4972                 span: span(0..12),
4973                 negated: false,
4974                 kind: intersection(
4975                     span(1..11),
4976                     union(
4977                         span(1..7),
4978                         vec![
4979                             range(span(1..4), 'a', 'c'),
4980                             range(span(4..7), 'x', 'z'),
4981                         ]
4982                     ),
4983                     itemset(item_perl(ast::ClassPerl {
4984                         span: span(9..11),
4985                         kind: ast::ClassPerlKind::Word,
4986                         negated: false,
4987                     })),
4988                 ),
4989             })))
4990         );
4991         assert_eq!(
4992             parser(r"[a--b--c]").parse(),
4993             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4994                 span: span(0..9),
4995                 negated: false,
4996                 kind: difference(
4997                     span(1..8),
4998                     difference(
4999                         span(1..5),
5000                         itemset(lit(span(1..2), 'a')),
5001                         itemset(lit(span(4..5), 'b')),
5002                     ),
5003                     itemset(lit(span(7..8), 'c')),
5004                 ),
5005             })))
5006         );
5007         assert_eq!(
5008             parser(r"[a~~b~~c]").parse(),
5009             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5010                 span: span(0..9),
5011                 negated: false,
5012                 kind: symdifference(
5013                     span(1..8),
5014                     symdifference(
5015                         span(1..5),
5016                         itemset(lit(span(1..2), 'a')),
5017                         itemset(lit(span(4..5), 'b')),
5018                     ),
5019                     itemset(lit(span(7..8), 'c')),
5020                 ),
5021             })))
5022         );
5023         assert_eq!(
5024             parser(r"[\^&&^]").parse(),
5025             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5026                 span: span(0..7),
5027                 negated: false,
5028                 kind: intersection(
5029                     span(1..6),
5030                     itemset(ast::ClassSetItem::Literal(ast::Literal {
5031                         span: span(1..3),
5032                         kind: ast::LiteralKind::Punctuation,
5033                         c: '^',
5034                     })),
5035                     itemset(lit(span(5..6), '^')),
5036                 ),
5037             })))
5038         );
5039         assert_eq!(
5040             parser(r"[\&&&&]").parse(),
5041             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5042                 span: span(0..7),
5043                 negated: false,
5044                 kind: intersection(
5045                     span(1..6),
5046                     itemset(ast::ClassSetItem::Literal(ast::Literal {
5047                         span: span(1..3),
5048                         kind: ast::LiteralKind::Punctuation,
5049                         c: '&',
5050                     })),
5051                     itemset(lit(span(5..6), '&')),
5052                 ),
5053             })))
5054         );
5055         assert_eq!(
5056             parser(r"[&&&&]").parse(),
5057             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5058                 span: span(0..6),
5059                 negated: false,
5060                 kind: intersection(
5061                     span(1..5),
5062                     intersection(
5063                         span(1..3),
5064                         itemset(empty(span(1..1))),
5065                         itemset(empty(span(3..3))),
5066                     ),
5067                     itemset(empty(span(5..5))),
5068                 ),
5069             })))
5070         );
5071 
5072         let pat = "[☃-⛄]";
5073         assert_eq!(
5074             parser(pat).parse(),
5075             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5076                 span: span_range(pat, 0..9),
5077                 negated: false,
5078                 kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5079                     span: span_range(pat, 1..8),
5080                     start: ast::Literal {
5081                         span: span_range(pat, 1..4),
5082                         kind: ast::LiteralKind::Verbatim,
5083                         c: '☃',
5084                     },
5085                     end: ast::Literal {
5086                         span: span_range(pat, 5..8),
5087                         kind: ast::LiteralKind::Verbatim,
5088                         c: '⛄',
5089                     },
5090                 })),
5091             })))
5092         );
5093 
5094         assert_eq!(
5095             parser(r"[]]").parse(),
5096             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5097                 span: span(0..3),
5098                 negated: false,
5099                 kind: itemset(lit(span(1..2), ']')),
5100             })))
5101         );
5102         assert_eq!(
5103             parser(r"[]\[]").parse(),
5104             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5105                 span: span(0..5),
5106                 negated: false,
5107                 kind: union(
5108                     span(1..4),
5109                     vec![
5110                         lit(span(1..2), ']'),
5111                         ast::ClassSetItem::Literal(ast::Literal {
5112                             span: span(2..4),
5113                             kind: ast::LiteralKind::Punctuation,
5114                             c: '[',
5115                         }),
5116                     ]
5117                 ),
5118             })))
5119         );
5120         assert_eq!(
5121             parser(r"[\[]]").parse(),
5122             Ok(concat(
5123                 0..5,
5124                 vec![
5125                     Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5126                         span: span(0..4),
5127                         negated: false,
5128                         kind: itemset(ast::ClassSetItem::Literal(
5129                             ast::Literal {
5130                                 span: span(1..3),
5131                                 kind: ast::LiteralKind::Punctuation,
5132                                 c: '[',
5133                             }
5134                         )),
5135                     })),
5136                     Ast::Literal(ast::Literal {
5137                         span: span(4..5),
5138                         kind: ast::LiteralKind::Verbatim,
5139                         c: ']',
5140                     }),
5141                 ]
5142             ))
5143         );
5144 
5145         assert_eq!(
5146             parser("[").parse().unwrap_err(),
5147             TestError {
5148                 span: span(0..1),
5149                 kind: ast::ErrorKind::ClassUnclosed,
5150             }
5151         );
5152         assert_eq!(
5153             parser("[[").parse().unwrap_err(),
5154             TestError {
5155                 span: span(1..2),
5156                 kind: ast::ErrorKind::ClassUnclosed,
5157             }
5158         );
5159         assert_eq!(
5160             parser("[[-]").parse().unwrap_err(),
5161             TestError {
5162                 span: span(0..1),
5163                 kind: ast::ErrorKind::ClassUnclosed,
5164             }
5165         );
5166         assert_eq!(
5167             parser("[[[:alnum:]").parse().unwrap_err(),
5168             TestError {
5169                 span: span(1..2),
5170                 kind: ast::ErrorKind::ClassUnclosed,
5171             }
5172         );
5173         assert_eq!(
5174             parser(r"[\b]").parse().unwrap_err(),
5175             TestError {
5176                 span: span(1..3),
5177                 kind: ast::ErrorKind::ClassEscapeInvalid,
5178             }
5179         );
5180         assert_eq!(
5181             parser(r"[\w-a]").parse().unwrap_err(),
5182             TestError {
5183                 span: span(1..3),
5184                 kind: ast::ErrorKind::ClassRangeLiteral,
5185             }
5186         );
5187         assert_eq!(
5188             parser(r"[a-\w]").parse().unwrap_err(),
5189             TestError {
5190                 span: span(3..5),
5191                 kind: ast::ErrorKind::ClassRangeLiteral,
5192             }
5193         );
5194         assert_eq!(
5195             parser(r"[z-a]").parse().unwrap_err(),
5196             TestError {
5197                 span: span(1..4),
5198                 kind: ast::ErrorKind::ClassRangeInvalid,
5199             }
5200         );
5201 
5202         assert_eq!(
5203             parser_ignore_whitespace("[a ").parse().unwrap_err(),
5204             TestError {
5205                 span: span(0..1),
5206                 kind: ast::ErrorKind::ClassUnclosed,
5207             }
5208         );
5209         assert_eq!(
5210             parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5211             TestError {
5212                 span: span(0..1),
5213                 kind: ast::ErrorKind::ClassUnclosed,
5214             }
5215         );
5216     }
5217 
5218     #[test]
parse_set_class_open()5219     fn parse_set_class_open() {
5220         assert_eq!(parser("[a]").parse_set_class_open(), {
5221             let set = ast::ClassBracketed {
5222                 span: span(0..1),
5223                 negated: false,
5224                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5225                     span: span(1..1),
5226                     items: vec![],
5227                 }),
5228             };
5229             let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5230             Ok((set, union))
5231         });
5232         assert_eq!(
5233             parser_ignore_whitespace("[   a]").parse_set_class_open(),
5234             {
5235                 let set = ast::ClassBracketed {
5236                     span: span(0..4),
5237                     negated: false,
5238                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5239                         span: span(4..4),
5240                         items: vec![],
5241                     }),
5242                 };
5243                 let union =
5244                     ast::ClassSetUnion { span: span(4..4), items: vec![] };
5245                 Ok((set, union))
5246             }
5247         );
5248         assert_eq!(parser("[^a]").parse_set_class_open(), {
5249             let set = ast::ClassBracketed {
5250                 span: span(0..2),
5251                 negated: true,
5252                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5253                     span: span(2..2),
5254                     items: vec![],
5255                 }),
5256             };
5257             let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5258             Ok((set, union))
5259         });
5260         assert_eq!(
5261             parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5262             {
5263                 let set = ast::ClassBracketed {
5264                     span: span(0..4),
5265                     negated: true,
5266                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5267                         span: span(4..4),
5268                         items: vec![],
5269                     }),
5270                 };
5271                 let union =
5272                     ast::ClassSetUnion { span: span(4..4), items: vec![] };
5273                 Ok((set, union))
5274             }
5275         );
5276         assert_eq!(parser("[-a]").parse_set_class_open(), {
5277             let set = ast::ClassBracketed {
5278                 span: span(0..2),
5279                 negated: false,
5280                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5281                     span: span(1..1),
5282                     items: vec![],
5283                 }),
5284             };
5285             let union = ast::ClassSetUnion {
5286                 span: span(1..2),
5287                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5288                     span: span(1..2),
5289                     kind: ast::LiteralKind::Verbatim,
5290                     c: '-',
5291                 })],
5292             };
5293             Ok((set, union))
5294         });
5295         assert_eq!(
5296             parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5297             {
5298                 let set = ast::ClassBracketed {
5299                     span: span(0..4),
5300                     negated: false,
5301                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5302                         span: span(2..2),
5303                         items: vec![],
5304                     }),
5305                 };
5306                 let union = ast::ClassSetUnion {
5307                     span: span(2..3),
5308                     items: vec![ast::ClassSetItem::Literal(ast::Literal {
5309                         span: span(2..3),
5310                         kind: ast::LiteralKind::Verbatim,
5311                         c: '-',
5312                     })],
5313                 };
5314                 Ok((set, union))
5315             }
5316         );
5317         assert_eq!(parser("[^-a]").parse_set_class_open(), {
5318             let set = ast::ClassBracketed {
5319                 span: span(0..3),
5320                 negated: true,
5321                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5322                     span: span(2..2),
5323                     items: vec![],
5324                 }),
5325             };
5326             let union = ast::ClassSetUnion {
5327                 span: span(2..3),
5328                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5329                     span: span(2..3),
5330                     kind: ast::LiteralKind::Verbatim,
5331                     c: '-',
5332                 })],
5333             };
5334             Ok((set, union))
5335         });
5336         assert_eq!(parser("[--a]").parse_set_class_open(), {
5337             let set = ast::ClassBracketed {
5338                 span: span(0..3),
5339                 negated: false,
5340                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5341                     span: span(1..1),
5342                     items: vec![],
5343                 }),
5344             };
5345             let union = ast::ClassSetUnion {
5346                 span: span(1..3),
5347                 items: vec![
5348                     ast::ClassSetItem::Literal(ast::Literal {
5349                         span: span(1..2),
5350                         kind: ast::LiteralKind::Verbatim,
5351                         c: '-',
5352                     }),
5353                     ast::ClassSetItem::Literal(ast::Literal {
5354                         span: span(2..3),
5355                         kind: ast::LiteralKind::Verbatim,
5356                         c: '-',
5357                     }),
5358                 ],
5359             };
5360             Ok((set, union))
5361         });
5362         assert_eq!(parser("[]a]").parse_set_class_open(), {
5363             let set = ast::ClassBracketed {
5364                 span: span(0..2),
5365                 negated: false,
5366                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5367                     span: span(1..1),
5368                     items: vec![],
5369                 }),
5370             };
5371             let union = ast::ClassSetUnion {
5372                 span: span(1..2),
5373                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5374                     span: span(1..2),
5375                     kind: ast::LiteralKind::Verbatim,
5376                     c: ']',
5377                 })],
5378             };
5379             Ok((set, union))
5380         });
5381         assert_eq!(
5382             parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5383             {
5384                 let set = ast::ClassBracketed {
5385                     span: span(0..4),
5386                     negated: false,
5387                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5388                         span: span(2..2),
5389                         items: vec![],
5390                     }),
5391                 };
5392                 let union = ast::ClassSetUnion {
5393                     span: span(2..3),
5394                     items: vec![ast::ClassSetItem::Literal(ast::Literal {
5395                         span: span(2..3),
5396                         kind: ast::LiteralKind::Verbatim,
5397                         c: ']',
5398                     })],
5399                 };
5400                 Ok((set, union))
5401             }
5402         );
5403         assert_eq!(parser("[^]a]").parse_set_class_open(), {
5404             let set = ast::ClassBracketed {
5405                 span: span(0..3),
5406                 negated: true,
5407                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5408                     span: span(2..2),
5409                     items: vec![],
5410                 }),
5411             };
5412             let union = ast::ClassSetUnion {
5413                 span: span(2..3),
5414                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5415                     span: span(2..3),
5416                     kind: ast::LiteralKind::Verbatim,
5417                     c: ']',
5418                 })],
5419             };
5420             Ok((set, union))
5421         });
5422         assert_eq!(parser("[-]a]").parse_set_class_open(), {
5423             let set = ast::ClassBracketed {
5424                 span: span(0..2),
5425                 negated: false,
5426                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5427                     span: span(1..1),
5428                     items: vec![],
5429                 }),
5430             };
5431             let union = ast::ClassSetUnion {
5432                 span: span(1..2),
5433                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5434                     span: span(1..2),
5435                     kind: ast::LiteralKind::Verbatim,
5436                     c: '-',
5437                 })],
5438             };
5439             Ok((set, union))
5440         });
5441 
5442         assert_eq!(
5443             parser("[").parse_set_class_open().unwrap_err(),
5444             TestError {
5445                 span: span(0..1),
5446                 kind: ast::ErrorKind::ClassUnclosed,
5447             }
5448         );
5449         assert_eq!(
5450             parser_ignore_whitespace("[    ")
5451                 .parse_set_class_open()
5452                 .unwrap_err(),
5453             TestError {
5454                 span: span(0..5),
5455                 kind: ast::ErrorKind::ClassUnclosed,
5456             }
5457         );
5458         assert_eq!(
5459             parser("[^").parse_set_class_open().unwrap_err(),
5460             TestError {
5461                 span: span(0..2),
5462                 kind: ast::ErrorKind::ClassUnclosed,
5463             }
5464         );
5465         assert_eq!(
5466             parser("[]").parse_set_class_open().unwrap_err(),
5467             TestError {
5468                 span: span(0..2),
5469                 kind: ast::ErrorKind::ClassUnclosed,
5470             }
5471         );
5472         assert_eq!(
5473             parser("[-").parse_set_class_open().unwrap_err(),
5474             TestError {
5475                 span: span(0..2),
5476                 kind: ast::ErrorKind::ClassUnclosed,
5477             }
5478         );
5479         assert_eq!(
5480             parser("[--").parse_set_class_open().unwrap_err(),
5481             TestError {
5482                 span: span(0..3),
5483                 kind: ast::ErrorKind::ClassUnclosed,
5484             }
5485         );
5486     }
5487 
5488     #[test]
maybe_parse_ascii_class()5489     fn maybe_parse_ascii_class() {
5490         assert_eq!(
5491             parser(r"[:alnum:]").maybe_parse_ascii_class(),
5492             Some(ast::ClassAscii {
5493                 span: span(0..9),
5494                 kind: ast::ClassAsciiKind::Alnum,
5495                 negated: false,
5496             })
5497         );
5498         assert_eq!(
5499             parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5500             Some(ast::ClassAscii {
5501                 span: span(0..9),
5502                 kind: ast::ClassAsciiKind::Alnum,
5503                 negated: false,
5504             })
5505         );
5506         assert_eq!(
5507             parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5508             Some(ast::ClassAscii {
5509                 span: span(0..10),
5510                 kind: ast::ClassAsciiKind::Alnum,
5511                 negated: true,
5512             })
5513         );
5514 
5515         let p = parser(r"[:");
5516         assert_eq!(p.maybe_parse_ascii_class(), None);
5517         assert_eq!(p.offset(), 0);
5518 
5519         let p = parser(r"[:^");
5520         assert_eq!(p.maybe_parse_ascii_class(), None);
5521         assert_eq!(p.offset(), 0);
5522 
5523         let p = parser(r"[^:alnum:]");
5524         assert_eq!(p.maybe_parse_ascii_class(), None);
5525         assert_eq!(p.offset(), 0);
5526 
5527         let p = parser(r"[:alnnum:]");
5528         assert_eq!(p.maybe_parse_ascii_class(), None);
5529         assert_eq!(p.offset(), 0);
5530 
5531         let p = parser(r"[:alnum]");
5532         assert_eq!(p.maybe_parse_ascii_class(), None);
5533         assert_eq!(p.offset(), 0);
5534 
5535         let p = parser(r"[:alnum:");
5536         assert_eq!(p.maybe_parse_ascii_class(), None);
5537         assert_eq!(p.offset(), 0);
5538     }
5539 
5540     #[test]
parse_unicode_class()5541     fn parse_unicode_class() {
5542         assert_eq!(
5543             parser(r"\pN").parse_escape(),
5544             Ok(Primitive::Unicode(ast::ClassUnicode {
5545                 span: span(0..3),
5546                 negated: false,
5547                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5548             }))
5549         );
5550         assert_eq!(
5551             parser(r"\PN").parse_escape(),
5552             Ok(Primitive::Unicode(ast::ClassUnicode {
5553                 span: span(0..3),
5554                 negated: true,
5555                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5556             }))
5557         );
5558         assert_eq!(
5559             parser(r"\p{N}").parse_escape(),
5560             Ok(Primitive::Unicode(ast::ClassUnicode {
5561                 span: span(0..5),
5562                 negated: false,
5563                 kind: ast::ClassUnicodeKind::Named(s("N")),
5564             }))
5565         );
5566         assert_eq!(
5567             parser(r"\P{N}").parse_escape(),
5568             Ok(Primitive::Unicode(ast::ClassUnicode {
5569                 span: span(0..5),
5570                 negated: true,
5571                 kind: ast::ClassUnicodeKind::Named(s("N")),
5572             }))
5573         );
5574         assert_eq!(
5575             parser(r"\p{Greek}").parse_escape(),
5576             Ok(Primitive::Unicode(ast::ClassUnicode {
5577                 span: span(0..9),
5578                 negated: false,
5579                 kind: ast::ClassUnicodeKind::Named(s("Greek")),
5580             }))
5581         );
5582 
5583         assert_eq!(
5584             parser(r"\p{scx:Katakana}").parse_escape(),
5585             Ok(Primitive::Unicode(ast::ClassUnicode {
5586                 span: span(0..16),
5587                 negated: false,
5588                 kind: ast::ClassUnicodeKind::NamedValue {
5589                     op: ast::ClassUnicodeOpKind::Colon,
5590                     name: s("scx"),
5591                     value: s("Katakana"),
5592                 },
5593             }))
5594         );
5595         assert_eq!(
5596             parser(r"\p{scx=Katakana}").parse_escape(),
5597             Ok(Primitive::Unicode(ast::ClassUnicode {
5598                 span: span(0..16),
5599                 negated: false,
5600                 kind: ast::ClassUnicodeKind::NamedValue {
5601                     op: ast::ClassUnicodeOpKind::Equal,
5602                     name: s("scx"),
5603                     value: s("Katakana"),
5604                 },
5605             }))
5606         );
5607         assert_eq!(
5608             parser(r"\p{scx!=Katakana}").parse_escape(),
5609             Ok(Primitive::Unicode(ast::ClassUnicode {
5610                 span: span(0..17),
5611                 negated: false,
5612                 kind: ast::ClassUnicodeKind::NamedValue {
5613                     op: ast::ClassUnicodeOpKind::NotEqual,
5614                     name: s("scx"),
5615                     value: s("Katakana"),
5616                 },
5617             }))
5618         );
5619 
5620         assert_eq!(
5621             parser(r"\p{:}").parse_escape(),
5622             Ok(Primitive::Unicode(ast::ClassUnicode {
5623                 span: span(0..5),
5624                 negated: false,
5625                 kind: ast::ClassUnicodeKind::NamedValue {
5626                     op: ast::ClassUnicodeOpKind::Colon,
5627                     name: s(""),
5628                     value: s(""),
5629                 },
5630             }))
5631         );
5632         assert_eq!(
5633             parser(r"\p{=}").parse_escape(),
5634             Ok(Primitive::Unicode(ast::ClassUnicode {
5635                 span: span(0..5),
5636                 negated: false,
5637                 kind: ast::ClassUnicodeKind::NamedValue {
5638                     op: ast::ClassUnicodeOpKind::Equal,
5639                     name: s(""),
5640                     value: s(""),
5641                 },
5642             }))
5643         );
5644         assert_eq!(
5645             parser(r"\p{!=}").parse_escape(),
5646             Ok(Primitive::Unicode(ast::ClassUnicode {
5647                 span: span(0..6),
5648                 negated: false,
5649                 kind: ast::ClassUnicodeKind::NamedValue {
5650                     op: ast::ClassUnicodeOpKind::NotEqual,
5651                     name: s(""),
5652                     value: s(""),
5653                 },
5654             }))
5655         );
5656 
5657         assert_eq!(
5658             parser(r"\p").parse_escape().unwrap_err(),
5659             TestError {
5660                 span: span(2..2),
5661                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5662             }
5663         );
5664         assert_eq!(
5665             parser(r"\p{").parse_escape().unwrap_err(),
5666             TestError {
5667                 span: span(3..3),
5668                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5669             }
5670         );
5671         assert_eq!(
5672             parser(r"\p{N").parse_escape().unwrap_err(),
5673             TestError {
5674                 span: span(4..4),
5675                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5676             }
5677         );
5678         assert_eq!(
5679             parser(r"\p{Greek").parse_escape().unwrap_err(),
5680             TestError {
5681                 span: span(8..8),
5682                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5683             }
5684         );
5685 
5686         assert_eq!(
5687             parser(r"\pNz").parse(),
5688             Ok(Ast::Concat(ast::Concat {
5689                 span: span(0..4),
5690                 asts: vec![
5691                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5692                         span: span(0..3),
5693                         negated: false,
5694                         kind: ast::ClassUnicodeKind::OneLetter('N'),
5695                     })),
5696                     Ast::Literal(ast::Literal {
5697                         span: span(3..4),
5698                         kind: ast::LiteralKind::Verbatim,
5699                         c: 'z',
5700                     }),
5701                 ],
5702             }))
5703         );
5704         assert_eq!(
5705             parser(r"\p{Greek}z").parse(),
5706             Ok(Ast::Concat(ast::Concat {
5707                 span: span(0..10),
5708                 asts: vec![
5709                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5710                         span: span(0..9),
5711                         negated: false,
5712                         kind: ast::ClassUnicodeKind::Named(s("Greek")),
5713                     })),
5714                     Ast::Literal(ast::Literal {
5715                         span: span(9..10),
5716                         kind: ast::LiteralKind::Verbatim,
5717                         c: 'z',
5718                     }),
5719                 ],
5720             }))
5721         );
5722         assert_eq!(
5723             parser(r"\p\{").parse().unwrap_err(),
5724             TestError {
5725                 span: span(2..3),
5726                 kind: ast::ErrorKind::UnicodeClassInvalid,
5727             }
5728         );
5729         assert_eq!(
5730             parser(r"\P\{").parse().unwrap_err(),
5731             TestError {
5732                 span: span(2..3),
5733                 kind: ast::ErrorKind::UnicodeClassInvalid,
5734             }
5735         );
5736     }
5737 
5738     #[test]
parse_perl_class()5739     fn parse_perl_class() {
5740         assert_eq!(
5741             parser(r"\d").parse_escape(),
5742             Ok(Primitive::Perl(ast::ClassPerl {
5743                 span: span(0..2),
5744                 kind: ast::ClassPerlKind::Digit,
5745                 negated: false,
5746             }))
5747         );
5748         assert_eq!(
5749             parser(r"\D").parse_escape(),
5750             Ok(Primitive::Perl(ast::ClassPerl {
5751                 span: span(0..2),
5752                 kind: ast::ClassPerlKind::Digit,
5753                 negated: true,
5754             }))
5755         );
5756         assert_eq!(
5757             parser(r"\s").parse_escape(),
5758             Ok(Primitive::Perl(ast::ClassPerl {
5759                 span: span(0..2),
5760                 kind: ast::ClassPerlKind::Space,
5761                 negated: false,
5762             }))
5763         );
5764         assert_eq!(
5765             parser(r"\S").parse_escape(),
5766             Ok(Primitive::Perl(ast::ClassPerl {
5767                 span: span(0..2),
5768                 kind: ast::ClassPerlKind::Space,
5769                 negated: true,
5770             }))
5771         );
5772         assert_eq!(
5773             parser(r"\w").parse_escape(),
5774             Ok(Primitive::Perl(ast::ClassPerl {
5775                 span: span(0..2),
5776                 kind: ast::ClassPerlKind::Word,
5777                 negated: false,
5778             }))
5779         );
5780         assert_eq!(
5781             parser(r"\W").parse_escape(),
5782             Ok(Primitive::Perl(ast::ClassPerl {
5783                 span: span(0..2),
5784                 kind: ast::ClassPerlKind::Word,
5785                 negated: true,
5786             }))
5787         );
5788 
5789         assert_eq!(
5790             parser(r"\d").parse(),
5791             Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5792                 span: span(0..2),
5793                 kind: ast::ClassPerlKind::Digit,
5794                 negated: false,
5795             })))
5796         );
5797         assert_eq!(
5798             parser(r"\dz").parse(),
5799             Ok(Ast::Concat(ast::Concat {
5800                 span: span(0..3),
5801                 asts: vec![
5802                     Ast::Class(ast::Class::Perl(ast::ClassPerl {
5803                         span: span(0..2),
5804                         kind: ast::ClassPerlKind::Digit,
5805                         negated: false,
5806                     })),
5807                     Ast::Literal(ast::Literal {
5808                         span: span(2..3),
5809                         kind: ast::LiteralKind::Verbatim,
5810                         c: 'z',
5811                     }),
5812                 ],
5813             }))
5814         );
5815     }
5816 
5817     // This tests a bug fix where the nest limit checker wasn't decrementing
5818     // its depth during post-traversal, which causes long regexes to trip
5819     // the default limit too aggressively.
5820     #[test]
regression_454_nest_too_big()5821     fn regression_454_nest_too_big() {
5822         let pattern = r#"
5823         2(?:
5824           [45]\d{3}|
5825           7(?:
5826             1[0-267]|
5827             2[0-289]|
5828             3[0-29]|
5829             4[01]|
5830             5[1-3]|
5831             6[013]|
5832             7[0178]|
5833             91
5834           )|
5835           8(?:
5836             0[125]|
5837             [139][1-6]|
5838             2[0157-9]|
5839             41|
5840             6[1-35]|
5841             7[1-5]|
5842             8[1-8]|
5843             90
5844           )|
5845           9(?:
5846             0[0-2]|
5847             1[0-4]|
5848             2[568]|
5849             3[3-6]|
5850             5[5-7]|
5851             6[0167]|
5852             7[15]|
5853             8[0146-9]
5854           )
5855         )\d{4}
5856         "#;
5857         assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5858     }
5859 
5860     // This tests that we treat a trailing `-` in a character class as a
5861     // literal `-` even when whitespace mode is enabled and there is whitespace
5862     // after the trailing `-`.
5863     #[test]
regression_455_trailing_dash_ignore_whitespace()5864     fn regression_455_trailing_dash_ignore_whitespace() {
5865         assert!(parser("(?x)[ / - ]").parse().is_ok());
5866         assert!(parser("(?x)[ a - ]").parse().is_ok());
5867         assert!(parser(
5868             "(?x)[
5869             a
5870             - ]
5871         "
5872         )
5873         .parse()
5874         .is_ok());
5875         assert!(parser(
5876             "(?x)[
5877             a # wat
5878             - ]
5879         "
5880         )
5881         .parse()
5882         .is_ok());
5883 
5884         assert!(parser("(?x)[ / -").parse().is_err());
5885         assert!(parser("(?x)[ / - ").parse().is_err());
5886         assert!(parser(
5887             "(?x)[
5888             / -
5889         "
5890         )
5891         .parse()
5892         .is_err());
5893         assert!(parser(
5894             "(?x)[
5895             / - # wat
5896         "
5897         )
5898         .parse()
5899         .is_err());
5900     }
5901 }
5902