1 /*!
2 This module provides a regular expression parser.
3 */
4 
5 use std::borrow::Borrow;
6 use std::cell::{Cell, RefCell};
7 use std::mem;
8 use std::result;
9 
10 use ast::{self, Ast, Position, Span};
11 use either::Either;
12 
13 use is_meta_character;
14 
15 type Result<T> = result::Result<T, ast::Error>;
16 
17 /// A primitive is an expression with no sub-expressions. This includes
18 /// literals, assertions and non-set character classes. This representation
19 /// is used as intermediate state in the parser.
20 ///
21 /// This does not include ASCII character classes, since they can only appear
22 /// within a set character class.
23 #[derive(Clone, Debug, Eq, PartialEq)]
24 enum Primitive {
25     Literal(ast::Literal),
26     Assertion(ast::Assertion),
27     Dot(Span),
28     Perl(ast::ClassPerl),
29     Unicode(ast::ClassUnicode),
30 }
31 
32 impl Primitive {
33     /// Return the span of this primitive.
span(&self) -> &Span34     fn span(&self) -> &Span {
35         match *self {
36             Primitive::Literal(ref x) => &x.span,
37             Primitive::Assertion(ref x) => &x.span,
38             Primitive::Dot(ref span) => span,
39             Primitive::Perl(ref x) => &x.span,
40             Primitive::Unicode(ref x) => &x.span,
41         }
42     }
43 
44     /// Convert this primitive into a proper AST.
into_ast(self) -> Ast45     fn into_ast(self) -> Ast {
46         match self {
47             Primitive::Literal(lit) => Ast::Literal(lit),
48             Primitive::Assertion(assert) => Ast::Assertion(assert),
49             Primitive::Dot(span) => Ast::Dot(span),
50             Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
51             Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
52         }
53     }
54 
55     /// Convert this primitive into an item in a character class.
56     ///
57     /// If this primitive is not a legal item (i.e., an assertion or a dot),
58     /// then return an error.
into_class_set_item<P: Borrow<Parser>>( self, p: &ParserI<P>, ) -> Result<ast::ClassSetItem>59     fn into_class_set_item<P: Borrow<Parser>>(
60         self,
61         p: &ParserI<P>,
62     ) -> Result<ast::ClassSetItem> {
63         use self::Primitive::*;
64         use ast::ClassSetItem;
65 
66         match self {
67             Literal(lit) => Ok(ClassSetItem::Literal(lit)),
68             Perl(cls) => Ok(ClassSetItem::Perl(cls)),
69             Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
70             x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
71         }
72     }
73 
74     /// Convert this primitive into a literal in a character class. In
75     /// particular, literals are the only valid items that can appear in
76     /// ranges.
77     ///
78     /// If this primitive is not a legal item (i.e., a class, assertion or a
79     /// dot), then return an error.
into_class_literal<P: Borrow<Parser>>( self, p: &ParserI<P>, ) -> Result<ast::Literal>80     fn into_class_literal<P: Borrow<Parser>>(
81         self,
82         p: &ParserI<P>,
83     ) -> Result<ast::Literal> {
84         use self::Primitive::*;
85 
86         match self {
87             Literal(lit) => Ok(lit),
88             x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
89         }
90     }
91 }
92 
93 /// Returns true if the given character is a hexadecimal digit.
is_hex(c: char) -> bool94 fn is_hex(c: char) -> bool {
95     ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
96 }
97 
98 /// Returns true if the given character is a valid in a capture group name.
99 ///
100 /// If `first` is true, then `c` is treated as the first character in the
101 /// group name (which is not allowed to be a digit).
is_capture_char(c: char, first: bool) -> bool102 fn is_capture_char(c: char, first: bool) -> bool {
103     c == '_'
104         || (!first && c >= '0' && c <= '9')
105         || (c >= 'a' && c <= 'z')
106         || (c >= 'A' && c <= 'Z')
107 }
108 
109 /// A builder for a regular expression parser.
110 ///
111 /// This builder permits modifying configuration options for the parser.
112 #[derive(Clone, Debug)]
113 pub struct ParserBuilder {
114     ignore_whitespace: bool,
115     nest_limit: u32,
116     octal: bool,
117 }
118 
119 impl Default for ParserBuilder {
default() -> ParserBuilder120     fn default() -> ParserBuilder {
121         ParserBuilder::new()
122     }
123 }
124 
125 impl ParserBuilder {
126     /// Create a new parser builder with a default configuration.
new() -> ParserBuilder127     pub fn new() -> ParserBuilder {
128         ParserBuilder {
129             ignore_whitespace: false,
130             nest_limit: 250,
131             octal: false,
132         }
133     }
134 
135     /// Build a parser from this configuration with the given pattern.
build(&self) -> Parser136     pub fn build(&self) -> Parser {
137         Parser {
138             pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
139             capture_index: Cell::new(0),
140             nest_limit: self.nest_limit,
141             octal: self.octal,
142             initial_ignore_whitespace: self.ignore_whitespace,
143             ignore_whitespace: Cell::new(self.ignore_whitespace),
144             comments: RefCell::new(vec![]),
145             stack_group: RefCell::new(vec![]),
146             stack_class: RefCell::new(vec![]),
147             capture_names: RefCell::new(vec![]),
148             scratch: RefCell::new(String::new()),
149         }
150     }
151 
152     /// Set the nesting limit for this parser.
153     ///
154     /// The nesting limit controls how deep the abstract syntax tree is allowed
155     /// to be. If the AST exceeds the given limit (e.g., with too many nested
156     /// groups), then an error is returned by the parser.
157     ///
158     /// The purpose of this limit is to act as a heuristic to prevent stack
159     /// overflow for consumers that do structural induction on an `Ast` using
160     /// explicit recursion. While this crate never does this (instead using
161     /// constant stack space and moving the call stack to the heap), other
162     /// crates may.
163     ///
164     /// This limit is not checked until the entire Ast is parsed. Therefore,
165     /// if callers want to put a limit on the amount of heap space used, then
166     /// they should impose a limit on the length, in bytes, of the concrete
167     /// pattern string. In particular, this is viable since this parser
168     /// implementation will limit itself to heap space proportional to the
169     /// lenth of the pattern string.
170     ///
171     /// Note that a nest limit of `0` will return a nest limit error for most
172     /// patterns but not all. For example, a nest limit of `0` permits `a` but
173     /// not `ab`, since `ab` requires a concatenation, which results in a nest
174     /// depth of `1`. In general, a nest limit is not something that manifests
175     /// in an obvious way in the concrete syntax, therefore, it should not be
176     /// used in a granular way.
nest_limit(&mut self, limit: u32) -> &mut ParserBuilder177     pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
178         self.nest_limit = limit;
179         self
180     }
181 
182     /// Whether to support octal syntax or not.
183     ///
184     /// Octal syntax is a little-known way of uttering Unicode codepoints in
185     /// a regular expression. For example, `a`, `\x61`, `\u0061` and
186     /// `\141` are all equivalent regular expressions, where the last example
187     /// shows octal syntax.
188     ///
189     /// While supporting octal syntax isn't in and of itself a problem, it does
190     /// make good error messages harder. That is, in PCRE based regex engines,
191     /// syntax like `\0` invokes a backreference, which is explicitly
192     /// unsupported in Rust's regex engine. However, many users expect it to
193     /// be supported. Therefore, when octal support is disabled, the error
194     /// message will explicitly mention that backreferences aren't supported.
195     ///
196     /// Octal syntax is disabled by default.
octal(&mut self, yes: bool) -> &mut ParserBuilder197     pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
198         self.octal = yes;
199         self
200     }
201 
202     /// Enable verbose mode in the regular expression.
203     ///
204     /// When enabled, verbose mode permits insigificant whitespace in many
205     /// places in the regular expression, as well as comments. Comments are
206     /// started using `#` and continue until the end of the line.
207     ///
208     /// By default, this is disabled. It may be selectively enabled in the
209     /// regular expression by using the `x` flag regardless of this setting.
ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder210     pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
211         self.ignore_whitespace = yes;
212         self
213     }
214 }
215 
216 /// A regular expression parser.
217 ///
218 /// This parses a string representation of a regular expression into an
219 /// abstract syntax tree. The size of the tree is proportional to the length
220 /// of the regular expression pattern.
221 ///
222 /// A `Parser` can be configured in more detail via a
223 /// [`ParserBuilder`](struct.ParserBuilder.html).
224 #[derive(Clone, Debug)]
225 pub struct Parser {
226     /// The current position of the parser.
227     pos: Cell<Position>,
228     /// The current capture index.
229     capture_index: Cell<u32>,
230     /// The maximum number of open parens/brackets allowed. If the parser
231     /// exceeds this number, then an error is returned.
232     nest_limit: u32,
233     /// Whether to support octal syntax or not. When `false`, the parser will
234     /// return an error helpfully pointing out that backreferences are not
235     /// supported.
236     octal: bool,
237     /// The initial setting for `ignore_whitespace` as provided by
238     /// Th`ParserBuilder`. is is used when reseting the parser's state.
239     initial_ignore_whitespace: bool,
240     /// Whether whitespace should be ignored. When enabled, comments are
241     /// also permitted.
242     ignore_whitespace: Cell<bool>,
243     /// A list of comments, in order of appearance.
244     comments: RefCell<Vec<ast::Comment>>,
245     /// A stack of grouped sub-expressions, including alternations.
246     stack_group: RefCell<Vec<GroupState>>,
247     /// A stack of nested character classes. This is only non-empty when
248     /// parsing a class.
249     stack_class: RefCell<Vec<ClassState>>,
250     /// A sorted sequence of capture names. This is used to detect duplicate
251     /// capture names and report an error if one is detected.
252     capture_names: RefCell<Vec<ast::CaptureName>>,
253     /// A scratch buffer used in various places. Mostly this is used to
254     /// accumulate relevant characters from parts of a pattern.
255     scratch: RefCell<String>,
256 }
257 
258 /// ParserI is the internal parser implementation.
259 ///
260 /// We use this separate type so that we can carry the provided pattern string
261 /// along with us. In particular, a `Parser` internal state is not tied to any
262 /// one pattern, but `ParserI` is.
263 ///
264 /// This type also lets us use `ParserI<&Parser>` in production code while
265 /// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
266 /// work against the internal interface of the parser.
267 #[derive(Clone, Debug)]
268 struct ParserI<'s, P> {
269     /// The parser state/configuration.
270     parser: P,
271     /// The full regular expression provided by the user.
272     pattern: &'s str,
273 }
274 
275 /// GroupState represents a single stack frame while parsing nested groups
276 /// and alternations. Each frame records the state up to an opening parenthesis
277 /// or a alternating bracket `|`.
278 #[derive(Clone, Debug)]
279 enum GroupState {
280     /// This state is pushed whenever an opening group is found.
281     Group {
282         /// The concatenation immediately preceding the opening group.
283         concat: ast::Concat,
284         /// The group that has been opened. Its sub-AST is always empty.
285         group: ast::Group,
286         /// Whether this group has the `x` flag enabled or not.
287         ignore_whitespace: bool,
288     },
289     /// This state is pushed whenever a new alternation branch is found. If
290     /// an alternation branch is found and this state is at the top of the
291     /// stack, then this state should be modified to include the new
292     /// alternation.
293     Alternation(ast::Alternation),
294 }
295 
296 /// ClassState represents a single stack frame while parsing character classes.
297 /// Each frame records the state up to an intersection, difference, symmetric
298 /// difference or nested class.
299 ///
300 /// Note that a parser's character class stack is only non-empty when parsing
301 /// a character class. In all other cases, it is empty.
302 #[derive(Clone, Debug)]
303 enum ClassState {
304     /// This state is pushed whenever an opening bracket is found.
305     Open {
306         /// The union of class items immediately preceding this class.
307         union: ast::ClassSetUnion,
308         /// The class that has been opened. Typically this just corresponds
309         /// to the `[`, but it can also include `[^` since `^` indicates
310         /// negation of the class.
311         set: ast::ClassBracketed,
312     },
313     /// This state is pushed when a operator is seen. When popped, the stored
314     /// set becomes the left hand side of the operator.
315     Op {
316         /// The type of the operation, i.e., &&, -- or ~~.
317         kind: ast::ClassSetBinaryOpKind,
318         /// The left-hand side of the operator.
319         lhs: ast::ClassSet,
320     },
321 }
322 
323 impl Parser {
324     /// Create a new parser with a default configuration.
325     ///
326     /// The parser can be run with either the `parse` or `parse_with_comments`
327     /// methods. The parse methods return an abstract syntax tree.
328     ///
329     /// To set configuration options on the parser, use
330     /// [`ParserBuilder`](struct.ParserBuilder.html).
new() -> Parser331     pub fn new() -> Parser {
332         ParserBuilder::new().build()
333     }
334 
335     /// Parse the regular expression into an abstract syntax tree.
parse(&mut self, pattern: &str) -> Result<Ast>336     pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
337         ParserI::new(self, pattern).parse()
338     }
339 
340     /// Parse the regular expression and return an abstract syntax tree with
341     /// all of the comments found in the pattern.
parse_with_comments( &mut self, pattern: &str, ) -> Result<ast::WithComments>342     pub fn parse_with_comments(
343         &mut self,
344         pattern: &str,
345     ) -> Result<ast::WithComments> {
346         ParserI::new(self, pattern).parse_with_comments()
347     }
348 
349     /// Reset the internal state of a parser.
350     ///
351     /// This is called at the beginning of every parse. This prevents the
352     /// parser from running with inconsistent state (say, if a previous
353     /// invocation returned an error and the parser is reused).
reset(&self)354     fn reset(&self) {
355         // These settings should be in line with the construction
356         // in `ParserBuilder::build`.
357         self.pos.set(Position { offset: 0, line: 1, column: 1 });
358         self.ignore_whitespace.set(self.initial_ignore_whitespace);
359         self.comments.borrow_mut().clear();
360         self.stack_group.borrow_mut().clear();
361         self.stack_class.borrow_mut().clear();
362     }
363 }
364 
365 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
366     /// Build an internal parser from a parser configuration and a pattern.
new(parser: P, pattern: &'s str) -> ParserI<'s, P>367     fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
368         ParserI { parser: parser, pattern: pattern }
369     }
370 
371     /// Return a reference to the parser state.
parser(&self) -> &Parser372     fn parser(&self) -> &Parser {
373         self.parser.borrow()
374     }
375 
376     /// Return a reference to the pattern being parsed.
pattern(&self) -> &str377     fn pattern(&self) -> &str {
378         self.pattern.borrow()
379     }
380 
381     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error382     fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
383         ast::Error {
384             kind: kind,
385             pattern: self.pattern().to_string(),
386             span: span,
387         }
388     }
389 
390     /// Return the current offset of the parser.
391     ///
392     /// The offset starts at `0` from the beginning of the regular expression
393     /// pattern string.
offset(&self) -> usize394     fn offset(&self) -> usize {
395         self.parser().pos.get().offset
396     }
397 
398     /// Return the current line number of the parser.
399     ///
400     /// The line number starts at `1`.
line(&self) -> usize401     fn line(&self) -> usize {
402         self.parser().pos.get().line
403     }
404 
405     /// Return the current column of the parser.
406     ///
407     /// The column number starts at `1` and is reset whenever a `\n` is seen.
column(&self) -> usize408     fn column(&self) -> usize {
409         self.parser().pos.get().column
410     }
411 
412     /// Return the next capturing index. Each subsequent call increments the
413     /// internal index.
414     ///
415     /// The span given should correspond to the location of the opening
416     /// parenthesis.
417     ///
418     /// If the capture limit is exceeded, then an error is returned.
next_capture_index(&self, span: Span) -> Result<u32>419     fn next_capture_index(&self, span: Span) -> Result<u32> {
420         let current = self.parser().capture_index.get();
421         let i = current.checked_add(1).ok_or_else(|| {
422             self.error(span, ast::ErrorKind::CaptureLimitExceeded)
423         })?;
424         self.parser().capture_index.set(i);
425         Ok(i)
426     }
427 
428     /// Adds the given capture name to this parser. If this capture name has
429     /// already been used, then an error is returned.
add_capture_name(&self, cap: &ast::CaptureName) -> Result<()>430     fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
431         let mut names = self.parser().capture_names.borrow_mut();
432         match names
433             .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
434         {
435             Err(i) => {
436                 names.insert(i, cap.clone());
437                 Ok(())
438             }
439             Ok(i) => Err(self.error(
440                 cap.span,
441                 ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
442             )),
443         }
444     }
445 
446     /// Return whether the parser should ignore whitespace or not.
ignore_whitespace(&self) -> bool447     fn ignore_whitespace(&self) -> bool {
448         self.parser().ignore_whitespace.get()
449     }
450 
451     /// Return the character at the current position of the parser.
452     ///
453     /// This panics if the current position does not point to a valid char.
char(&self) -> char454     fn char(&self) -> char {
455         self.char_at(self.offset())
456     }
457 
458     /// Return the character at the given position.
459     ///
460     /// This panics if the given position does not point to a valid char.
char_at(&self, i: usize) -> char461     fn char_at(&self, i: usize) -> char {
462         self.pattern()[i..]
463             .chars()
464             .next()
465             .unwrap_or_else(|| panic!("expected char at offset {}", i))
466     }
467 
468     /// Bump the parser to the next Unicode scalar value.
469     ///
470     /// If the end of the input has been reached, then `false` is returned.
bump(&self) -> bool471     fn bump(&self) -> bool {
472         if self.is_eof() {
473             return false;
474         }
475         let Position { mut offset, mut line, mut column } = self.pos();
476         if self.char() == '\n' {
477             line = line.checked_add(1).unwrap();
478             column = 1;
479         } else {
480             column = column.checked_add(1).unwrap();
481         }
482         offset += self.char().len_utf8();
483         self.parser().pos.set(Position {
484             offset: offset,
485             line: line,
486             column: column,
487         });
488         self.pattern()[self.offset()..].chars().next().is_some()
489     }
490 
491     /// If the substring starting at the current position of the parser has
492     /// the given prefix, then bump the parser to the character immediately
493     /// following the prefix and return true. Otherwise, don't bump the parser
494     /// and return false.
bump_if(&self, prefix: &str) -> bool495     fn bump_if(&self, prefix: &str) -> bool {
496         if self.pattern()[self.offset()..].starts_with(prefix) {
497             for _ in 0..prefix.chars().count() {
498                 self.bump();
499             }
500             true
501         } else {
502             false
503         }
504     }
505 
506     /// Returns true if and only if the parser is positioned at a look-around
507     /// prefix. The conditions under which this returns true must always
508     /// correspond to a regular expression that would otherwise be consider
509     /// invalid.
510     ///
511     /// This should only be called immediately after parsing the opening of
512     /// a group or a set of flags.
is_lookaround_prefix(&self) -> bool513     fn is_lookaround_prefix(&self) -> bool {
514         self.bump_if("?=")
515             || self.bump_if("?!")
516             || self.bump_if("?<=")
517             || self.bump_if("?<!")
518     }
519 
520     /// Bump the parser, and if the `x` flag is enabled, bump through any
521     /// subsequent spaces. Return true if and only if the parser is not at
522     /// EOF.
bump_and_bump_space(&self) -> bool523     fn bump_and_bump_space(&self) -> bool {
524         if !self.bump() {
525             return false;
526         }
527         self.bump_space();
528         !self.is_eof()
529     }
530 
531     /// If the `x` flag is enabled (i.e., whitespace insensitivity with
532     /// comments), then this will advance the parser through all whitespace
533     /// and comments to the next non-whitespace non-comment byte.
534     ///
535     /// If the `x` flag is disabled, then this is a no-op.
536     ///
537     /// This should be used selectively throughout the parser where
538     /// arbitrary whitespace is permitted when the `x` flag is enabled. For
539     /// example, `{   5  , 6}` is equivalent to `{5,6}`.
bump_space(&self)540     fn bump_space(&self) {
541         if !self.ignore_whitespace() {
542             return;
543         }
544         while !self.is_eof() {
545             if self.char().is_whitespace() {
546                 self.bump();
547             } else if self.char() == '#' {
548                 let start = self.pos();
549                 let mut comment_text = String::new();
550                 self.bump();
551                 while !self.is_eof() {
552                     let c = self.char();
553                     self.bump();
554                     if c == '\n' {
555                         break;
556                     }
557                     comment_text.push(c);
558                 }
559                 let comment = ast::Comment {
560                     span: Span::new(start, self.pos()),
561                     comment: comment_text,
562                 };
563                 self.parser().comments.borrow_mut().push(comment);
564             } else {
565                 break;
566             }
567         }
568     }
569 
570     /// Peek at the next character in the input without advancing the parser.
571     ///
572     /// If the input has been exhausted, then this returns `None`.
peek(&self) -> Option<char>573     fn peek(&self) -> Option<char> {
574         if self.is_eof() {
575             return None;
576         }
577         self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
578     }
579 
580     /// Like peek, but will ignore spaces when the parser is in whitespace
581     /// insensitive mode.
peek_space(&self) -> Option<char>582     fn peek_space(&self) -> Option<char> {
583         if !self.ignore_whitespace() {
584             return self.peek();
585         }
586         if self.is_eof() {
587             return None;
588         }
589         let mut start = self.offset() + self.char().len_utf8();
590         let mut in_comment = false;
591         for (i, c) in self.pattern()[start..].char_indices() {
592             if c.is_whitespace() {
593                 continue;
594             } else if !in_comment && c == '#' {
595                 in_comment = true;
596             } else if in_comment && c == '\n' {
597                 in_comment = false;
598             } else {
599                 start += i;
600                 break;
601             }
602         }
603         self.pattern()[start..].chars().next()
604     }
605 
606     /// Returns true if the next call to `bump` would return false.
is_eof(&self) -> bool607     fn is_eof(&self) -> bool {
608         self.offset() == self.pattern().len()
609     }
610 
611     /// Return the current position of the parser, which includes the offset,
612     /// line and column.
pos(&self) -> Position613     fn pos(&self) -> Position {
614         self.parser().pos.get()
615     }
616 
617     /// Create a span at the current position of the parser. Both the start
618     /// and end of the span are set.
span(&self) -> Span619     fn span(&self) -> Span {
620         Span::splat(self.pos())
621     }
622 
623     /// Create a span that covers the current character.
span_char(&self) -> Span624     fn span_char(&self) -> Span {
625         let mut next = Position {
626             offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
627             line: self.line(),
628             column: self.column().checked_add(1).unwrap(),
629         };
630         if self.char() == '\n' {
631             next.line += 1;
632             next.column = 1;
633         }
634         Span::new(self.pos(), next)
635     }
636 
637     /// Parse and push a single alternation on to the parser's internal stack.
638     /// If the top of the stack already has an alternation, then add to that
639     /// instead of pushing a new one.
640     ///
641     /// The concatenation given corresponds to a single alternation branch.
642     /// The concatenation returned starts the next branch and is empty.
643     ///
644     /// This assumes the parser is currently positioned at `|` and will advance
645     /// the parser to the character following `|`.
646     #[inline(never)]
push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat>647     fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
648         assert_eq!(self.char(), '|');
649         concat.span.end = self.pos();
650         self.push_or_add_alternation(concat);
651         self.bump();
652         Ok(ast::Concat { span: self.span(), asts: vec![] })
653     }
654 
655     /// Pushes or adds the given branch of an alternation to the parser's
656     /// internal stack of state.
push_or_add_alternation(&self, concat: ast::Concat)657     fn push_or_add_alternation(&self, concat: ast::Concat) {
658         use self::GroupState::*;
659 
660         let mut stack = self.parser().stack_group.borrow_mut();
661         if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
662             alts.asts.push(concat.into_ast());
663             return;
664         }
665         stack.push(Alternation(ast::Alternation {
666             span: Span::new(concat.span.start, self.pos()),
667             asts: vec![concat.into_ast()],
668         }));
669     }
670 
671     /// Parse and push a group AST (and its parent concatenation) on to the
672     /// parser's internal stack. Return a fresh concatenation corresponding
673     /// to the group's sub-AST.
674     ///
675     /// If a set of flags was found (with no group), then the concatenation
676     /// is returned with that set of flags added.
677     ///
678     /// This assumes that the parser is currently positioned on the opening
679     /// parenthesis. It advances the parser to the character at the start
680     /// of the sub-expression (or adjoining expression).
681     ///
682     /// If there was a problem parsing the start of the group, then an error
683     /// is returned.
684     #[inline(never)]
push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat>685     fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
686         assert_eq!(self.char(), '(');
687         match self.parse_group()? {
688             Either::Left(set) => {
689                 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
690                 if let Some(v) = ignore {
691                     self.parser().ignore_whitespace.set(v);
692                 }
693 
694                 concat.asts.push(Ast::Flags(set));
695                 Ok(concat)
696             }
697             Either::Right(group) => {
698                 let old_ignore_whitespace = self.ignore_whitespace();
699                 let new_ignore_whitespace = group
700                     .flags()
701                     .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
702                     .unwrap_or(old_ignore_whitespace);
703                 self.parser().stack_group.borrow_mut().push(
704                     GroupState::Group {
705                         concat: concat,
706                         group: group,
707                         ignore_whitespace: old_ignore_whitespace,
708                     },
709                 );
710                 self.parser().ignore_whitespace.set(new_ignore_whitespace);
711                 Ok(ast::Concat { span: self.span(), asts: vec![] })
712             }
713         }
714     }
715 
716     /// Pop a group AST from the parser's internal stack and set the group's
717     /// AST to the given concatenation. Return the concatenation containing
718     /// the group.
719     ///
720     /// This assumes that the parser is currently positioned on the closing
721     /// parenthesis and advances the parser to the character following the `)`.
722     ///
723     /// If no such group could be popped, then an unopened group error is
724     /// returned.
725     #[inline(never)]
pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat>726     fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
727         use self::GroupState::*;
728 
729         assert_eq!(self.char(), ')');
730         let mut stack = self.parser().stack_group.borrow_mut();
731         let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
732             .pop()
733         {
734             Some(Group { concat, group, ignore_whitespace }) => {
735                 (concat, group, ignore_whitespace, None)
736             }
737             Some(Alternation(alt)) => match stack.pop() {
738                 Some(Group { concat, group, ignore_whitespace }) => {
739                     (concat, group, ignore_whitespace, Some(alt))
740                 }
741                 None | Some(Alternation(_)) => {
742                     return Err(self.error(
743                         self.span_char(),
744                         ast::ErrorKind::GroupUnopened,
745                     ));
746                 }
747             },
748             None => {
749                 return Err(self
750                     .error(self.span_char(), ast::ErrorKind::GroupUnopened));
751             }
752         };
753         self.parser().ignore_whitespace.set(ignore_whitespace);
754         group_concat.span.end = self.pos();
755         self.bump();
756         group.span.end = self.pos();
757         match alt {
758             Some(mut alt) => {
759                 alt.span.end = group_concat.span.end;
760                 alt.asts.push(group_concat.into_ast());
761                 group.ast = Box::new(alt.into_ast());
762             }
763             None => {
764                 group.ast = Box::new(group_concat.into_ast());
765             }
766         }
767         prior_concat.asts.push(Ast::Group(group));
768         Ok(prior_concat)
769     }
770 
771     /// Pop the last state from the parser's internal stack, if it exists, and
772     /// add the given concatenation to it. There either must be no state or a
773     /// single alternation item on the stack. Any other scenario produces an
774     /// error.
775     ///
776     /// This assumes that the parser has advanced to the end.
777     #[inline(never)]
pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast>778     fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
779         concat.span.end = self.pos();
780         let mut stack = self.parser().stack_group.borrow_mut();
781         let ast = match stack.pop() {
782             None => Ok(concat.into_ast()),
783             Some(GroupState::Alternation(mut alt)) => {
784                 alt.span.end = self.pos();
785                 alt.asts.push(concat.into_ast());
786                 Ok(Ast::Alternation(alt))
787             }
788             Some(GroupState::Group { group, .. }) => {
789                 return Err(
790                     self.error(group.span, ast::ErrorKind::GroupUnclosed)
791                 );
792             }
793         };
794         // If we try to pop again, there should be nothing.
795         match stack.pop() {
796             None => ast,
797             Some(GroupState::Alternation(_)) => {
798                 // This unreachable is unfortunate. This case can't happen
799                 // because the only way we can be here is if there were two
800                 // `GroupState::Alternation`s adjacent in the parser's stack,
801                 // which we guarantee to never happen because we never push a
802                 // `GroupState::Alternation` if one is already at the top of
803                 // the stack.
804                 unreachable!()
805             }
806             Some(GroupState::Group { group, .. }) => {
807                 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
808             }
809         }
810     }
811 
812     /// Parse the opening of a character class and push the current class
813     /// parsing context onto the parser's stack. This assumes that the parser
814     /// is positioned at an opening `[`. The given union should correspond to
815     /// the union of set items built up before seeing the `[`.
816     ///
817     /// If there was a problem parsing the opening of the class, then an error
818     /// is returned. Otherwise, a new union of set items for the class is
819     /// returned (which may be populated with either a `]` or a `-`).
820     #[inline(never)]
push_class_open( &self, parent_union: ast::ClassSetUnion, ) -> Result<ast::ClassSetUnion>821     fn push_class_open(
822         &self,
823         parent_union: ast::ClassSetUnion,
824     ) -> Result<ast::ClassSetUnion> {
825         assert_eq!(self.char(), '[');
826 
827         let (nested_set, nested_union) = self.parse_set_class_open()?;
828         self.parser()
829             .stack_class
830             .borrow_mut()
831             .push(ClassState::Open { union: parent_union, set: nested_set });
832         Ok(nested_union)
833     }
834 
835     /// Parse the end of a character class set and pop the character class
836     /// parser stack. The union given corresponds to the last union built
837     /// before seeing the closing `]`. The union returned corresponds to the
838     /// parent character class set with the nested class added to it.
839     ///
840     /// This assumes that the parser is positioned at a `]` and will advance
841     /// the parser to the byte immediately following the `]`.
842     ///
843     /// If the stack is empty after popping, then this returns the final
844     /// "top-level" character class AST (where a "top-level" character class
845     /// is one that is not nested inside any other character class).
846     ///
847     /// If there is no corresponding opening bracket on the parser's stack,
848     /// then an error is returned.
849     #[inline(never)]
pop_class( &self, nested_union: ast::ClassSetUnion, ) -> Result<Either<ast::ClassSetUnion, ast::Class>>850     fn pop_class(
851         &self,
852         nested_union: ast::ClassSetUnion,
853     ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
854         assert_eq!(self.char(), ']');
855 
856         let item = ast::ClassSet::Item(nested_union.into_item());
857         let prevset = self.pop_class_op(item);
858         let mut stack = self.parser().stack_class.borrow_mut();
859         match stack.pop() {
860             None => {
861                 // We can never observe an empty stack:
862                 //
863                 // 1) We are guaranteed to start with a non-empty stack since
864                 //    the character class parser is only initiated when it sees
865                 //    a `[`.
866                 // 2) If we ever observe an empty stack while popping after
867                 //    seeing a `]`, then we signal the character class parser
868                 //    to terminate.
869                 panic!("unexpected empty character class stack")
870             }
871             Some(ClassState::Op { .. }) => {
872                 // This panic is unfortunate, but this case is impossible
873                 // since we already popped the Op state if one exists above.
874                 // Namely, every push to the class parser stack is guarded by
875                 // whether an existing Op is already on the top of the stack.
876                 // If it is, the existing Op is modified. That is, the stack
877                 // can never have consecutive Op states.
878                 panic!("unexpected ClassState::Op")
879             }
880             Some(ClassState::Open { mut union, mut set }) => {
881                 self.bump();
882                 set.span.end = self.pos();
883                 set.kind = prevset;
884                 if stack.is_empty() {
885                     Ok(Either::Right(ast::Class::Bracketed(set)))
886                 } else {
887                     union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
888                     Ok(Either::Left(union))
889                 }
890             }
891         }
892     }
893 
894     /// Return an "unclosed class" error whose span points to the most
895     /// recently opened class.
896     ///
897     /// This should only be called while parsing a character class.
898     #[inline(never)]
unclosed_class_error(&self) -> ast::Error899     fn unclosed_class_error(&self) -> ast::Error {
900         for state in self.parser().stack_class.borrow().iter().rev() {
901             match *state {
902                 ClassState::Open { ref set, .. } => {
903                     return self
904                         .error(set.span, ast::ErrorKind::ClassUnclosed);
905                 }
906                 _ => {}
907             }
908         }
909         // We are guaranteed to have a non-empty stack with at least
910         // one open bracket, so we should never get here.
911         panic!("no open character class found")
912     }
913 
914     /// Push the current set of class items on to the class parser's stack as
915     /// the left hand side of the given operator.
916     ///
917     /// A fresh set union is returned, which should be used to build the right
918     /// hand side of this operator.
919     #[inline(never)]
push_class_op( &self, next_kind: ast::ClassSetBinaryOpKind, next_union: ast::ClassSetUnion, ) -> ast::ClassSetUnion920     fn push_class_op(
921         &self,
922         next_kind: ast::ClassSetBinaryOpKind,
923         next_union: ast::ClassSetUnion,
924     ) -> ast::ClassSetUnion {
925         let item = ast::ClassSet::Item(next_union.into_item());
926         let new_lhs = self.pop_class_op(item);
927         self.parser()
928             .stack_class
929             .borrow_mut()
930             .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
931         ast::ClassSetUnion { span: self.span(), items: vec![] }
932     }
933 
934     /// Pop a character class set from the character class parser stack. If the
935     /// top of the stack is just an item (not an operation), then return the
936     /// given set unchanged. If the top of the stack is an operation, then the
937     /// given set will be used as the rhs of the operation on the top of the
938     /// stack. In that case, the binary operation is returned as a set.
939     #[inline(never)]
pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet940     fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
941         let mut stack = self.parser().stack_class.borrow_mut();
942         let (kind, lhs) = match stack.pop() {
943             Some(ClassState::Op { kind, lhs }) => (kind, lhs),
944             Some(state @ ClassState::Open { .. }) => {
945                 stack.push(state);
946                 return rhs;
947             }
948             None => unreachable!(),
949         };
950         let span = Span::new(lhs.span().start, rhs.span().end);
951         ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
952             span: span,
953             kind: kind,
954             lhs: Box::new(lhs),
955             rhs: Box::new(rhs),
956         })
957     }
958 }
959 
960 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
961     /// Parse the regular expression into an abstract syntax tree.
parse(&self) -> Result<Ast>962     fn parse(&self) -> Result<Ast> {
963         self.parse_with_comments().map(|astc| astc.ast)
964     }
965 
966     /// Parse the regular expression and return an abstract syntax tree with
967     /// all of the comments found in the pattern.
parse_with_comments(&self) -> Result<ast::WithComments>968     fn parse_with_comments(&self) -> Result<ast::WithComments> {
969         assert_eq!(self.offset(), 0, "parser can only be used once");
970         self.parser().reset();
971         let mut concat = ast::Concat { span: self.span(), asts: vec![] };
972         loop {
973             self.bump_space();
974             if self.is_eof() {
975                 break;
976             }
977             match self.char() {
978                 '(' => concat = self.push_group(concat)?,
979                 ')' => concat = self.pop_group(concat)?,
980                 '|' => concat = self.push_alternate(concat)?,
981                 '[' => {
982                     let class = self.parse_set_class()?;
983                     concat.asts.push(Ast::Class(class));
984                 }
985                 '?' => {
986                     concat = self.parse_uncounted_repetition(
987                         concat,
988                         ast::RepetitionKind::ZeroOrOne,
989                     )?;
990                 }
991                 '*' => {
992                     concat = self.parse_uncounted_repetition(
993                         concat,
994                         ast::RepetitionKind::ZeroOrMore,
995                     )?;
996                 }
997                 '+' => {
998                     concat = self.parse_uncounted_repetition(
999                         concat,
1000                         ast::RepetitionKind::OneOrMore,
1001                     )?;
1002                 }
1003                 '{' => {
1004                     concat = self.parse_counted_repetition(concat)?;
1005                 }
1006                 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1007             }
1008         }
1009         let ast = self.pop_group_end(concat)?;
1010         NestLimiter::new(self).check(&ast)?;
1011         Ok(ast::WithComments {
1012             ast: ast,
1013             comments: mem::replace(
1014                 &mut *self.parser().comments.borrow_mut(),
1015                 vec![],
1016             ),
1017         })
1018     }
1019 
1020     /// Parses an uncounted repetition operation. An uncounted repetition
1021     /// operator includes ?, * and +, but does not include the {m,n} syntax.
1022     /// The given `kind` should correspond to the operator observed by the
1023     /// caller.
1024     ///
1025     /// This assumes that the paser is currently positioned at the repetition
1026     /// operator and advances the parser to the first character after the
1027     /// operator. (Note that the operator may include a single additional `?`,
1028     /// which makes the operator ungreedy.)
1029     ///
1030     /// The caller should include the concatenation that is being built. The
1031     /// concatenation returned includes the repetition operator applied to the
1032     /// last expression in the given concatenation.
1033     #[inline(never)]
parse_uncounted_repetition( &self, mut concat: ast::Concat, kind: ast::RepetitionKind, ) -> Result<ast::Concat>1034     fn parse_uncounted_repetition(
1035         &self,
1036         mut concat: ast::Concat,
1037         kind: ast::RepetitionKind,
1038     ) -> Result<ast::Concat> {
1039         assert!(
1040             self.char() == '?' || self.char() == '*' || self.char() == '+'
1041         );
1042         let op_start = self.pos();
1043         let ast = match concat.asts.pop() {
1044             Some(ast) => ast,
1045             None => {
1046                 return Err(
1047                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1048                 )
1049             }
1050         };
1051         match ast {
1052             Ast::Empty(_) | Ast::Flags(_) => {
1053                 return Err(
1054                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1055                 )
1056             }
1057             _ => {}
1058         }
1059         let mut greedy = true;
1060         if self.bump() && self.char() == '?' {
1061             greedy = false;
1062             self.bump();
1063         }
1064         concat.asts.push(Ast::Repetition(ast::Repetition {
1065             span: ast.span().with_end(self.pos()),
1066             op: ast::RepetitionOp {
1067                 span: Span::new(op_start, self.pos()),
1068                 kind: kind,
1069             },
1070             greedy: greedy,
1071             ast: Box::new(ast),
1072         }));
1073         Ok(concat)
1074     }
1075 
1076     /// Parses a counted repetition operation. A counted repetition operator
1077     /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1078     /// operators.
1079     ///
1080     /// This assumes that the paser is currently positioned at the opening `{`
1081     /// and advances the parser to the first character after the operator.
1082     /// (Note that the operator may include a single additional `?`, which
1083     /// makes the operator ungreedy.)
1084     ///
1085     /// The caller should include the concatenation that is being built. The
1086     /// concatenation returned includes the repetition operator applied to the
1087     /// last expression in the given concatenation.
1088     #[inline(never)]
parse_counted_repetition( &self, mut concat: ast::Concat, ) -> Result<ast::Concat>1089     fn parse_counted_repetition(
1090         &self,
1091         mut concat: ast::Concat,
1092     ) -> Result<ast::Concat> {
1093         assert!(self.char() == '{');
1094         let start = self.pos();
1095         let ast = match concat.asts.pop() {
1096             Some(ast) => ast,
1097             None => {
1098                 return Err(
1099                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1100                 )
1101             }
1102         };
1103         match ast {
1104             Ast::Empty(_) | Ast::Flags(_) => {
1105                 return Err(
1106                     self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1107                 )
1108             }
1109             _ => {}
1110         }
1111         if !self.bump_and_bump_space() {
1112             return Err(self.error(
1113                 Span::new(start, self.pos()),
1114                 ast::ErrorKind::RepetitionCountUnclosed,
1115             ));
1116         }
1117         let count_start = specialize_err(
1118             self.parse_decimal(),
1119             ast::ErrorKind::DecimalEmpty,
1120             ast::ErrorKind::RepetitionCountDecimalEmpty,
1121         )?;
1122         let mut range = ast::RepetitionRange::Exactly(count_start);
1123         if self.is_eof() {
1124             return Err(self.error(
1125                 Span::new(start, self.pos()),
1126                 ast::ErrorKind::RepetitionCountUnclosed,
1127             ));
1128         }
1129         if self.char() == ',' {
1130             if !self.bump_and_bump_space() {
1131                 return Err(self.error(
1132                     Span::new(start, self.pos()),
1133                     ast::ErrorKind::RepetitionCountUnclosed,
1134                 ));
1135             }
1136             if self.char() != '}' {
1137                 let count_end = specialize_err(
1138                     self.parse_decimal(),
1139                     ast::ErrorKind::DecimalEmpty,
1140                     ast::ErrorKind::RepetitionCountDecimalEmpty,
1141                 )?;
1142                 range = ast::RepetitionRange::Bounded(count_start, count_end);
1143             } else {
1144                 range = ast::RepetitionRange::AtLeast(count_start);
1145             }
1146         }
1147         if self.is_eof() || self.char() != '}' {
1148             return Err(self.error(
1149                 Span::new(start, self.pos()),
1150                 ast::ErrorKind::RepetitionCountUnclosed,
1151             ));
1152         }
1153 
1154         let mut greedy = true;
1155         if self.bump_and_bump_space() && self.char() == '?' {
1156             greedy = false;
1157             self.bump();
1158         }
1159 
1160         let op_span = Span::new(start, self.pos());
1161         if !range.is_valid() {
1162             return Err(
1163                 self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1164             );
1165         }
1166         concat.asts.push(Ast::Repetition(ast::Repetition {
1167             span: ast.span().with_end(self.pos()),
1168             op: ast::RepetitionOp {
1169                 span: op_span,
1170                 kind: ast::RepetitionKind::Range(range),
1171             },
1172             greedy: greedy,
1173             ast: Box::new(ast),
1174         }));
1175         Ok(concat)
1176     }
1177 
1178     /// Parse a group (which contains a sub-expression) or a set of flags.
1179     ///
1180     /// If a group was found, then it is returned with an empty AST. If a set
1181     /// of flags is found, then that set is returned.
1182     ///
1183     /// The parser should be positioned at the opening parenthesis.
1184     ///
1185     /// This advances the parser to the character before the start of the
1186     /// sub-expression (in the case of a group) or to the closing parenthesis
1187     /// immediately following the set of flags.
1188     ///
1189     /// # Errors
1190     ///
1191     /// If flags are given and incorrectly specified, then a corresponding
1192     /// error is returned.
1193     ///
1194     /// If a capture name is given and it is incorrectly specified, then a
1195     /// corresponding error is returned.
1196     #[inline(never)]
parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>>1197     fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1198         assert_eq!(self.char(), '(');
1199         let open_span = self.span_char();
1200         self.bump();
1201         self.bump_space();
1202         if self.is_lookaround_prefix() {
1203             return Err(self.error(
1204                 Span::new(open_span.start, self.span().end),
1205                 ast::ErrorKind::UnsupportedLookAround,
1206             ));
1207         }
1208         let inner_span = self.span();
1209         if self.bump_if("?P<") {
1210             let capture_index = self.next_capture_index(open_span)?;
1211             let cap = self.parse_capture_name(capture_index)?;
1212             Ok(Either::Right(ast::Group {
1213                 span: open_span,
1214                 kind: ast::GroupKind::CaptureName(cap),
1215                 ast: Box::new(Ast::Empty(self.span())),
1216             }))
1217         } else if self.bump_if("?") {
1218             if self.is_eof() {
1219                 return Err(
1220                     self.error(open_span, ast::ErrorKind::GroupUnclosed)
1221                 );
1222             }
1223             let flags = self.parse_flags()?;
1224             let char_end = self.char();
1225             self.bump();
1226             if char_end == ')' {
1227                 // We don't allow empty flags, e.g., `(?)`. We instead
1228                 // interpret it as a repetition operator missing its argument.
1229                 if flags.items.is_empty() {
1230                     return Err(self.error(
1231                         inner_span,
1232                         ast::ErrorKind::RepetitionMissing,
1233                     ));
1234                 }
1235                 Ok(Either::Left(ast::SetFlags {
1236                     span: Span { end: self.pos(), ..open_span },
1237                     flags: flags,
1238                 }))
1239             } else {
1240                 assert_eq!(char_end, ':');
1241                 Ok(Either::Right(ast::Group {
1242                     span: open_span,
1243                     kind: ast::GroupKind::NonCapturing(flags),
1244                     ast: Box::new(Ast::Empty(self.span())),
1245                 }))
1246             }
1247         } else {
1248             let capture_index = self.next_capture_index(open_span)?;
1249             Ok(Either::Right(ast::Group {
1250                 span: open_span,
1251                 kind: ast::GroupKind::CaptureIndex(capture_index),
1252                 ast: Box::new(Ast::Empty(self.span())),
1253             }))
1254         }
1255     }
1256 
1257     /// Parses a capture group name. Assumes that the parser is positioned at
1258     /// the first character in the name following the opening `<` (and may
1259     /// possibly be EOF). This advances the parser to the first character
1260     /// following the closing `>`.
1261     ///
1262     /// The caller must provide the capture index of the group for this name.
1263     #[inline(never)]
parse_capture_name( &self, capture_index: u32, ) -> Result<ast::CaptureName>1264     fn parse_capture_name(
1265         &self,
1266         capture_index: u32,
1267     ) -> Result<ast::CaptureName> {
1268         if self.is_eof() {
1269             return Err(self
1270                 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1271         }
1272         let start = self.pos();
1273         loop {
1274             if self.char() == '>' {
1275                 break;
1276             }
1277             if !is_capture_char(self.char(), self.pos() == start) {
1278                 return Err(self.error(
1279                     self.span_char(),
1280                     ast::ErrorKind::GroupNameInvalid,
1281                 ));
1282             }
1283             if !self.bump() {
1284                 break;
1285             }
1286         }
1287         let end = self.pos();
1288         if self.is_eof() {
1289             return Err(self
1290                 .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1291         }
1292         assert_eq!(self.char(), '>');
1293         self.bump();
1294         let name = &self.pattern()[start.offset..end.offset];
1295         if name.is_empty() {
1296             return Err(self.error(
1297                 Span::new(start, start),
1298                 ast::ErrorKind::GroupNameEmpty,
1299             ));
1300         }
1301         let capname = ast::CaptureName {
1302             span: Span::new(start, end),
1303             name: name.to_string(),
1304             index: capture_index,
1305         };
1306         self.add_capture_name(&capname)?;
1307         Ok(capname)
1308     }
1309 
1310     /// Parse a sequence of flags starting at the current character.
1311     ///
1312     /// This advances the parser to the character immediately following the
1313     /// flags, which is guaranteed to be either `:` or `)`.
1314     ///
1315     /// # Errors
1316     ///
1317     /// If any flags are duplicated, then an error is returned.
1318     ///
1319     /// If the negation operator is used more than once, then an error is
1320     /// returned.
1321     ///
1322     /// If no flags could be found or if the negation operation is not followed
1323     /// by any flags, then an error is returned.
1324     #[inline(never)]
parse_flags(&self) -> Result<ast::Flags>1325     fn parse_flags(&self) -> Result<ast::Flags> {
1326         let mut flags = ast::Flags { span: self.span(), items: vec![] };
1327         let mut last_was_negation = None;
1328         while self.char() != ':' && self.char() != ')' {
1329             if self.char() == '-' {
1330                 last_was_negation = Some(self.span_char());
1331                 let item = ast::FlagsItem {
1332                     span: self.span_char(),
1333                     kind: ast::FlagsItemKind::Negation,
1334                 };
1335                 if let Some(i) = flags.add_item(item) {
1336                     return Err(self.error(
1337                         self.span_char(),
1338                         ast::ErrorKind::FlagRepeatedNegation {
1339                             original: flags.items[i].span,
1340                         },
1341                     ));
1342                 }
1343             } else {
1344                 last_was_negation = None;
1345                 let item = ast::FlagsItem {
1346                     span: self.span_char(),
1347                     kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1348                 };
1349                 if let Some(i) = flags.add_item(item) {
1350                     return Err(self.error(
1351                         self.span_char(),
1352                         ast::ErrorKind::FlagDuplicate {
1353                             original: flags.items[i].span,
1354                         },
1355                     ));
1356                 }
1357             }
1358             if !self.bump() {
1359                 return Err(
1360                     self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1361                 );
1362             }
1363         }
1364         if let Some(span) = last_was_negation {
1365             return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1366         }
1367         flags.span.end = self.pos();
1368         Ok(flags)
1369     }
1370 
1371     /// Parse the current character as a flag. Do not advance the parser.
1372     ///
1373     /// # Errors
1374     ///
1375     /// If the flag is not recognized, then an error is returned.
1376     #[inline(never)]
parse_flag(&self) -> Result<ast::Flag>1377     fn parse_flag(&self) -> Result<ast::Flag> {
1378         match self.char() {
1379             'i' => Ok(ast::Flag::CaseInsensitive),
1380             'm' => Ok(ast::Flag::MultiLine),
1381             's' => Ok(ast::Flag::DotMatchesNewLine),
1382             'U' => Ok(ast::Flag::SwapGreed),
1383             'u' => Ok(ast::Flag::Unicode),
1384             'x' => Ok(ast::Flag::IgnoreWhitespace),
1385             _ => {
1386                 Err(self
1387                     .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1388             }
1389         }
1390     }
1391 
1392     /// Parse a primitive AST. e.g., A literal, non-set character class or
1393     /// assertion.
1394     ///
1395     /// This assumes that the parser expects a primitive at the current
1396     /// location. i.e., All other non-primitive cases have been handled.
1397     /// For example, if the parser's position is at `|`, then `|` will be
1398     /// treated as a literal (e.g., inside a character class).
1399     ///
1400     /// This advances the parser to the first character immediately following
1401     /// the primitive.
parse_primitive(&self) -> Result<Primitive>1402     fn parse_primitive(&self) -> Result<Primitive> {
1403         match self.char() {
1404             '\\' => self.parse_escape(),
1405             '.' => {
1406                 let ast = Primitive::Dot(self.span_char());
1407                 self.bump();
1408                 Ok(ast)
1409             }
1410             '^' => {
1411                 let ast = Primitive::Assertion(ast::Assertion {
1412                     span: self.span_char(),
1413                     kind: ast::AssertionKind::StartLine,
1414                 });
1415                 self.bump();
1416                 Ok(ast)
1417             }
1418             '$' => {
1419                 let ast = Primitive::Assertion(ast::Assertion {
1420                     span: self.span_char(),
1421                     kind: ast::AssertionKind::EndLine,
1422                 });
1423                 self.bump();
1424                 Ok(ast)
1425             }
1426             c => {
1427                 let ast = Primitive::Literal(ast::Literal {
1428                     span: self.span_char(),
1429                     kind: ast::LiteralKind::Verbatim,
1430                     c: c,
1431                 });
1432                 self.bump();
1433                 Ok(ast)
1434             }
1435         }
1436     }
1437 
1438     /// Parse an escape sequence as a primitive AST.
1439     ///
1440     /// This assumes the parser is positioned at the start of the escape
1441     /// sequence, i.e., `\`. It advances the parser to the first position
1442     /// immediately following the escape sequence.
1443     #[inline(never)]
parse_escape(&self) -> Result<Primitive>1444     fn parse_escape(&self) -> Result<Primitive> {
1445         assert_eq!(self.char(), '\\');
1446         let start = self.pos();
1447         if !self.bump() {
1448             return Err(self.error(
1449                 Span::new(start, self.pos()),
1450                 ast::ErrorKind::EscapeUnexpectedEof,
1451             ));
1452         }
1453         let c = self.char();
1454         // Put some of the more complicated routines into helpers.
1455         match c {
1456             '0'..='7' => {
1457                 if !self.parser().octal {
1458                     return Err(self.error(
1459                         Span::new(start, self.span_char().end),
1460                         ast::ErrorKind::UnsupportedBackreference,
1461                     ));
1462                 }
1463                 let mut lit = self.parse_octal();
1464                 lit.span.start = start;
1465                 return Ok(Primitive::Literal(lit));
1466             }
1467             '8'..='9' if !self.parser().octal => {
1468                 return Err(self.error(
1469                     Span::new(start, self.span_char().end),
1470                     ast::ErrorKind::UnsupportedBackreference,
1471                 ));
1472             }
1473             'x' | 'u' | 'U' => {
1474                 let mut lit = self.parse_hex()?;
1475                 lit.span.start = start;
1476                 return Ok(Primitive::Literal(lit));
1477             }
1478             'p' | 'P' => {
1479                 let mut cls = self.parse_unicode_class()?;
1480                 cls.span.start = start;
1481                 return Ok(Primitive::Unicode(cls));
1482             }
1483             'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1484                 let mut cls = self.parse_perl_class();
1485                 cls.span.start = start;
1486                 return Ok(Primitive::Perl(cls));
1487             }
1488             _ => {}
1489         }
1490 
1491         // Handle all of the one letter sequences inline.
1492         self.bump();
1493         let span = Span::new(start, self.pos());
1494         if is_meta_character(c) {
1495             return Ok(Primitive::Literal(ast::Literal {
1496                 span: span,
1497                 kind: ast::LiteralKind::Punctuation,
1498                 c: c,
1499             }));
1500         }
1501         let special = |kind, c| {
1502             Ok(Primitive::Literal(ast::Literal {
1503                 span: span,
1504                 kind: ast::LiteralKind::Special(kind),
1505                 c: c,
1506             }))
1507         };
1508         match c {
1509             'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1510             'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1511             't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1512             'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1513             'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1514             'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1515             ' ' if self.ignore_whitespace() => {
1516                 special(ast::SpecialLiteralKind::Space, ' ')
1517             }
1518             'A' => Ok(Primitive::Assertion(ast::Assertion {
1519                 span: span,
1520                 kind: ast::AssertionKind::StartText,
1521             })),
1522             'z' => Ok(Primitive::Assertion(ast::Assertion {
1523                 span: span,
1524                 kind: ast::AssertionKind::EndText,
1525             })),
1526             'b' => Ok(Primitive::Assertion(ast::Assertion {
1527                 span: span,
1528                 kind: ast::AssertionKind::WordBoundary,
1529             })),
1530             'B' => Ok(Primitive::Assertion(ast::Assertion {
1531                 span: span,
1532                 kind: ast::AssertionKind::NotWordBoundary,
1533             })),
1534             _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1535         }
1536     }
1537 
1538     /// Parse an octal representation of a Unicode codepoint up to 3 digits
1539     /// long. This expects the parser to be positioned at the first octal
1540     /// digit and advances the parser to the first character immediately
1541     /// following the octal number. This also assumes that parsing octal
1542     /// escapes is enabled.
1543     ///
1544     /// Assuming the preconditions are met, this routine can never fail.
1545     #[inline(never)]
parse_octal(&self) -> ast::Literal1546     fn parse_octal(&self) -> ast::Literal {
1547         use std::char;
1548         use std::u32;
1549 
1550         assert!(self.parser().octal);
1551         assert!('0' <= self.char() && self.char() <= '7');
1552         let start = self.pos();
1553         // Parse up to two more digits.
1554         while self.bump()
1555             && '0' <= self.char()
1556             && self.char() <= '7'
1557             && self.pos().offset - start.offset <= 2
1558         {}
1559         let end = self.pos();
1560         let octal = &self.pattern()[start.offset..end.offset];
1561         // Parsing the octal should never fail since the above guarantees a
1562         // valid number.
1563         let codepoint =
1564             u32::from_str_radix(octal, 8).expect("valid octal number");
1565         // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1566         // invalid Unicode scalar values.
1567         let c = char::from_u32(codepoint).expect("Unicode scalar value");
1568         ast::Literal {
1569             span: Span::new(start, end),
1570             kind: ast::LiteralKind::Octal,
1571             c: c,
1572         }
1573     }
1574 
1575     /// Parse a hex representation of a Unicode codepoint. This handles both
1576     /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1577     /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1578     /// the first character immediately following the hexadecimal literal.
1579     #[inline(never)]
parse_hex(&self) -> Result<ast::Literal>1580     fn parse_hex(&self) -> Result<ast::Literal> {
1581         assert!(
1582             self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1583         );
1584 
1585         let hex_kind = match self.char() {
1586             'x' => ast::HexLiteralKind::X,
1587             'u' => ast::HexLiteralKind::UnicodeShort,
1588             _ => ast::HexLiteralKind::UnicodeLong,
1589         };
1590         if !self.bump_and_bump_space() {
1591             return Err(
1592                 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1593             );
1594         }
1595         if self.char() == '{' {
1596             self.parse_hex_brace(hex_kind)
1597         } else {
1598             self.parse_hex_digits(hex_kind)
1599         }
1600     }
1601 
1602     /// Parse an N-digit hex representation of a Unicode codepoint. This
1603     /// expects the parser to be positioned at the first digit and will advance
1604     /// the parser to the first character immediately following the escape
1605     /// sequence.
1606     ///
1607     /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1608     /// or 8 (for `\UNNNNNNNN`).
1609     #[inline(never)]
parse_hex_digits( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1610     fn parse_hex_digits(
1611         &self,
1612         kind: ast::HexLiteralKind,
1613     ) -> Result<ast::Literal> {
1614         use std::char;
1615         use std::u32;
1616 
1617         let mut scratch = self.parser().scratch.borrow_mut();
1618         scratch.clear();
1619 
1620         let start = self.pos();
1621         for i in 0..kind.digits() {
1622             if i > 0 && !self.bump_and_bump_space() {
1623                 return Err(self
1624                     .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1625             }
1626             if !is_hex(self.char()) {
1627                 return Err(self.error(
1628                     self.span_char(),
1629                     ast::ErrorKind::EscapeHexInvalidDigit,
1630                 ));
1631             }
1632             scratch.push(self.char());
1633         }
1634         // The final bump just moves the parser past the literal, which may
1635         // be EOF.
1636         self.bump_and_bump_space();
1637         let end = self.pos();
1638         let hex = scratch.as_str();
1639         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1640             None => Err(self.error(
1641                 Span::new(start, end),
1642                 ast::ErrorKind::EscapeHexInvalid,
1643             )),
1644             Some(c) => Ok(ast::Literal {
1645                 span: Span::new(start, end),
1646                 kind: ast::LiteralKind::HexFixed(kind),
1647                 c: c,
1648             }),
1649         }
1650     }
1651 
1652     /// Parse a hex representation of any Unicode scalar value. This expects
1653     /// the parser to be positioned at the opening brace `{` and will advance
1654     /// the parser to the first character following the closing brace `}`.
1655     #[inline(never)]
parse_hex_brace( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1656     fn parse_hex_brace(
1657         &self,
1658         kind: ast::HexLiteralKind,
1659     ) -> Result<ast::Literal> {
1660         use std::char;
1661         use std::u32;
1662 
1663         let mut scratch = self.parser().scratch.borrow_mut();
1664         scratch.clear();
1665 
1666         let brace_pos = self.pos();
1667         let start = self.span_char().end;
1668         while self.bump_and_bump_space() && self.char() != '}' {
1669             if !is_hex(self.char()) {
1670                 return Err(self.error(
1671                     self.span_char(),
1672                     ast::ErrorKind::EscapeHexInvalidDigit,
1673                 ));
1674             }
1675             scratch.push(self.char());
1676         }
1677         if self.is_eof() {
1678             return Err(self.error(
1679                 Span::new(brace_pos, self.pos()),
1680                 ast::ErrorKind::EscapeUnexpectedEof,
1681             ));
1682         }
1683         let end = self.pos();
1684         let hex = scratch.as_str();
1685         assert_eq!(self.char(), '}');
1686         self.bump_and_bump_space();
1687 
1688         if hex.is_empty() {
1689             return Err(self.error(
1690                 Span::new(brace_pos, self.pos()),
1691                 ast::ErrorKind::EscapeHexEmpty,
1692             ));
1693         }
1694         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1695             None => Err(self.error(
1696                 Span::new(start, end),
1697                 ast::ErrorKind::EscapeHexInvalid,
1698             )),
1699             Some(c) => Ok(ast::Literal {
1700                 span: Span::new(start, self.pos()),
1701                 kind: ast::LiteralKind::HexBrace(kind),
1702                 c: c,
1703             }),
1704         }
1705     }
1706 
1707     /// Parse a decimal number into a u32 while trimming leading and trailing
1708     /// whitespace.
1709     ///
1710     /// This expects the parser to be positioned at the first position where
1711     /// a decimal digit could occur. This will advance the parser to the byte
1712     /// immediately following the last contiguous decimal digit.
1713     ///
1714     /// If no decimal digit could be found or if there was a problem parsing
1715     /// the complete set of digits into a u32, then an error is returned.
parse_decimal(&self) -> Result<u32>1716     fn parse_decimal(&self) -> Result<u32> {
1717         let mut scratch = self.parser().scratch.borrow_mut();
1718         scratch.clear();
1719 
1720         while !self.is_eof() && self.char().is_whitespace() {
1721             self.bump();
1722         }
1723         let start = self.pos();
1724         while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1725             scratch.push(self.char());
1726             self.bump_and_bump_space();
1727         }
1728         let span = Span::new(start, self.pos());
1729         while !self.is_eof() && self.char().is_whitespace() {
1730             self.bump_and_bump_space();
1731         }
1732         let digits = scratch.as_str();
1733         if digits.is_empty() {
1734             return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1735         }
1736         match u32::from_str_radix(digits, 10).ok() {
1737             Some(n) => Ok(n),
1738             None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1739         }
1740     }
1741 
1742     /// Parse a standard character class consisting primarily of characters or
1743     /// character ranges, but can also contain nested character classes of
1744     /// any type (sans `.`).
1745     ///
1746     /// This assumes the parser is positioned at the opening `[`. If parsing
1747     /// is successful, then the parser is advanced to the position immediately
1748     /// following the closing `]`.
1749     #[inline(never)]
parse_set_class(&self) -> Result<ast::Class>1750     fn parse_set_class(&self) -> Result<ast::Class> {
1751         assert_eq!(self.char(), '[');
1752 
1753         let mut union =
1754             ast::ClassSetUnion { span: self.span(), items: vec![] };
1755         loop {
1756             self.bump_space();
1757             if self.is_eof() {
1758                 return Err(self.unclosed_class_error());
1759             }
1760             match self.char() {
1761                 '[' => {
1762                     // If we've already parsed the opening bracket, then
1763                     // attempt to treat this as the beginning of an ASCII
1764                     // class. If ASCII class parsing fails, then the parser
1765                     // backs up to `[`.
1766                     if !self.parser().stack_class.borrow().is_empty() {
1767                         if let Some(cls) = self.maybe_parse_ascii_class() {
1768                             union.push(ast::ClassSetItem::Ascii(cls));
1769                             continue;
1770                         }
1771                     }
1772                     union = self.push_class_open(union)?;
1773                 }
1774                 ']' => match self.pop_class(union)? {
1775                     Either::Left(nested_union) => {
1776                         union = nested_union;
1777                     }
1778                     Either::Right(class) => return Ok(class),
1779                 },
1780                 '&' if self.peek() == Some('&') => {
1781                     assert!(self.bump_if("&&"));
1782                     union = self.push_class_op(
1783                         ast::ClassSetBinaryOpKind::Intersection,
1784                         union,
1785                     );
1786                 }
1787                 '-' if self.peek() == Some('-') => {
1788                     assert!(self.bump_if("--"));
1789                     union = self.push_class_op(
1790                         ast::ClassSetBinaryOpKind::Difference,
1791                         union,
1792                     );
1793                 }
1794                 '~' if self.peek() == Some('~') => {
1795                     assert!(self.bump_if("~~"));
1796                     union = self.push_class_op(
1797                         ast::ClassSetBinaryOpKind::SymmetricDifference,
1798                         union,
1799                     );
1800                 }
1801                 _ => {
1802                     union.push(self.parse_set_class_range()?);
1803                 }
1804             }
1805         }
1806     }
1807 
1808     /// Parse a single primitive item in a character class set. The item to
1809     /// be parsed can either be one of a simple literal character, a range
1810     /// between two simple literal characters or a "primitive" character
1811     /// class like \w or \p{Greek}.
1812     ///
1813     /// If an invalid escape is found, or if a character class is found where
1814     /// a simple literal is expected (e.g., in a range), then an error is
1815     /// returned.
1816     #[inline(never)]
parse_set_class_range(&self) -> Result<ast::ClassSetItem>1817     fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1818         let prim1 = self.parse_set_class_item()?;
1819         self.bump_space();
1820         if self.is_eof() {
1821             return Err(self.unclosed_class_error());
1822         }
1823         // If the next char isn't a `-`, then we don't have a range.
1824         // There are two exceptions. If the char after a `-` is a `]`, then
1825         // `-` is interpreted as a literal `-`. Alternatively, if the char
1826         // after a `-` is a `-`, then `--` corresponds to a "difference"
1827         // operation.
1828         if self.char() != '-'
1829             || self.peek_space() == Some(']')
1830             || self.peek_space() == Some('-')
1831         {
1832             return prim1.into_class_set_item(self);
1833         }
1834         // OK, now we're parsing a range, so bump past the `-` and parse the
1835         // second half of the range.
1836         if !self.bump_and_bump_space() {
1837             return Err(self.unclosed_class_error());
1838         }
1839         let prim2 = self.parse_set_class_item()?;
1840         let range = ast::ClassSetRange {
1841             span: Span::new(prim1.span().start, prim2.span().end),
1842             start: prim1.into_class_literal(self)?,
1843             end: prim2.into_class_literal(self)?,
1844         };
1845         if !range.is_valid() {
1846             return Err(
1847                 self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1848             );
1849         }
1850         Ok(ast::ClassSetItem::Range(range))
1851     }
1852 
1853     /// Parse a single item in a character class as a primitive, where the
1854     /// primitive either consists of a verbatim literal or a single escape
1855     /// sequence.
1856     ///
1857     /// This assumes the parser is positioned at the beginning of a primitive,
1858     /// and advances the parser to the first position after the primitive if
1859     /// successful.
1860     ///
1861     /// Note that it is the caller's responsibility to report an error if an
1862     /// illegal primitive was parsed.
1863     #[inline(never)]
parse_set_class_item(&self) -> Result<Primitive>1864     fn parse_set_class_item(&self) -> Result<Primitive> {
1865         if self.char() == '\\' {
1866             self.parse_escape()
1867         } else {
1868             let x = Primitive::Literal(ast::Literal {
1869                 span: self.span_char(),
1870                 kind: ast::LiteralKind::Verbatim,
1871                 c: self.char(),
1872             });
1873             self.bump();
1874             Ok(x)
1875         }
1876     }
1877 
1878     /// Parses the opening of a character class set. This includes the opening
1879     /// bracket along with `^` if present to indicate negation. This also
1880     /// starts parsing the opening set of unioned items if applicable, since
1881     /// there are special rules applied to certain characters in the opening
1882     /// of a character class. For example, `[^]]` is the class of all
1883     /// characters not equal to `]`. (`]` would need to be escaped in any other
1884     /// position.) Similarly for `-`.
1885     ///
1886     /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1887     /// empty union. This empty union should be replaced with the actual item
1888     /// when it is popped from the parser's stack.
1889     ///
1890     /// This assumes the parser is positioned at the opening `[` and advances
1891     /// the parser to the first non-special byte of the character class.
1892     ///
1893     /// An error is returned if EOF is found.
1894     #[inline(never)]
parse_set_class_open( &self, ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)>1895     fn parse_set_class_open(
1896         &self,
1897     ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1898         assert_eq!(self.char(), '[');
1899         let start = self.pos();
1900         if !self.bump_and_bump_space() {
1901             return Err(self.error(
1902                 Span::new(start, self.pos()),
1903                 ast::ErrorKind::ClassUnclosed,
1904             ));
1905         }
1906 
1907         let negated = if self.char() != '^' {
1908             false
1909         } else {
1910             if !self.bump_and_bump_space() {
1911                 return Err(self.error(
1912                     Span::new(start, self.pos()),
1913                     ast::ErrorKind::ClassUnclosed,
1914                 ));
1915             }
1916             true
1917         };
1918         // Accept any number of `-` as literal `-`.
1919         let mut union =
1920             ast::ClassSetUnion { span: self.span(), items: vec![] };
1921         while self.char() == '-' {
1922             union.push(ast::ClassSetItem::Literal(ast::Literal {
1923                 span: self.span_char(),
1924                 kind: ast::LiteralKind::Verbatim,
1925                 c: '-',
1926             }));
1927             if !self.bump_and_bump_space() {
1928                 return Err(self.error(
1929                     Span::new(start, self.pos()),
1930                     ast::ErrorKind::ClassUnclosed,
1931                 ));
1932             }
1933         }
1934         // If `]` is the *first* char in a set, then interpret it as a literal
1935         // `]`. That is, an empty class is impossible to write.
1936         if union.items.is_empty() && self.char() == ']' {
1937             union.push(ast::ClassSetItem::Literal(ast::Literal {
1938                 span: self.span_char(),
1939                 kind: ast::LiteralKind::Verbatim,
1940                 c: ']',
1941             }));
1942             if !self.bump_and_bump_space() {
1943                 return Err(self.error(
1944                     Span::new(start, self.pos()),
1945                     ast::ErrorKind::ClassUnclosed,
1946                 ));
1947             }
1948         }
1949         let set = ast::ClassBracketed {
1950             span: Span::new(start, self.pos()),
1951             negated: negated,
1952             kind: ast::ClassSet::union(ast::ClassSetUnion {
1953                 span: Span::new(union.span.start, union.span.start),
1954                 items: vec![],
1955             }),
1956         };
1957         Ok((set, union))
1958     }
1959 
1960     /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1961     ///
1962     /// This assumes the parser is positioned at the opening `[`.
1963     ///
1964     /// If no valid ASCII character class could be found, then this does not
1965     /// advance the parser and `None` is returned. Otherwise, the parser is
1966     /// advanced to the first byte following the closing `]` and the
1967     /// corresponding ASCII class is returned.
1968     #[inline(never)]
maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii>1969     fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1970         // ASCII character classes are interesting from a parsing perspective
1971         // because parsing cannot fail with any interesting error. For example,
1972         // in order to use an ASCII character class, it must be enclosed in
1973         // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1974         // of it as "ASCII character characters have the syntax `[:NAME:]`
1975         // which can only appear within character brackets." This means that
1976         // things like `[[:lower:]A]` are legal constructs.
1977         //
1978         // However, if one types an incorrect ASCII character class, e.g.,
1979         // `[[:loower:]]`, then we treat that as a normal nested character
1980         // class containing the characters `:elorw`. One might argue that we
1981         // should return an error instead since the repeated colons give away
1982         // the intent to write an ASCII class. But what if the user typed
1983         // `[[:lower]]` instead? How can we tell that was intended to be an
1984         // ASCII class and not just a normal nested class?
1985         //
1986         // Reasonable people can probably disagree over this, but for better
1987         // or worse, we implement semantics that never fails at the expense
1988         // of better failure modes.
1989         assert_eq!(self.char(), '[');
1990         // If parsing fails, then we back up the parser to this starting point.
1991         let start = self.pos();
1992         let mut negated = false;
1993         if !self.bump() || self.char() != ':' {
1994             self.parser().pos.set(start);
1995             return None;
1996         }
1997         if !self.bump() {
1998             self.parser().pos.set(start);
1999             return None;
2000         }
2001         if self.char() == '^' {
2002             negated = true;
2003             if !self.bump() {
2004                 self.parser().pos.set(start);
2005                 return None;
2006             }
2007         }
2008         let name_start = self.offset();
2009         while self.char() != ':' && self.bump() {}
2010         if self.is_eof() {
2011             self.parser().pos.set(start);
2012             return None;
2013         }
2014         let name = &self.pattern()[name_start..self.offset()];
2015         if !self.bump_if(":]") {
2016             self.parser().pos.set(start);
2017             return None;
2018         }
2019         let kind = match ast::ClassAsciiKind::from_name(name) {
2020             Some(kind) => kind,
2021             None => {
2022                 self.parser().pos.set(start);
2023                 return None;
2024             }
2025         };
2026         Some(ast::ClassAscii {
2027             span: Span::new(start, self.pos()),
2028             kind: kind,
2029             negated: negated,
2030         })
2031     }
2032 
2033     /// Parse a Unicode class in either the single character notation, `\pN`
2034     /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2035     /// the parser is positioned at the `p` (or `P` for negation) and will
2036     /// advance the parser to the character immediately following the class.
2037     ///
2038     /// Note that this does not check whether the class name is valid or not.
2039     #[inline(never)]
parse_unicode_class(&self) -> Result<ast::ClassUnicode>2040     fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2041         assert!(self.char() == 'p' || self.char() == 'P');
2042 
2043         let mut scratch = self.parser().scratch.borrow_mut();
2044         scratch.clear();
2045 
2046         let negated = self.char() == 'P';
2047         if !self.bump_and_bump_space() {
2048             return Err(
2049                 self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2050             );
2051         }
2052         let (start, kind) = if self.char() == '{' {
2053             let start = self.span_char().end;
2054             while self.bump_and_bump_space() && self.char() != '}' {
2055                 scratch.push(self.char());
2056             }
2057             if self.is_eof() {
2058                 return Err(self
2059                     .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2060             }
2061             assert_eq!(self.char(), '}');
2062             self.bump();
2063 
2064             let name = scratch.as_str();
2065             if let Some(i) = name.find("!=") {
2066                 (
2067                     start,
2068                     ast::ClassUnicodeKind::NamedValue {
2069                         op: ast::ClassUnicodeOpKind::NotEqual,
2070                         name: name[..i].to_string(),
2071                         value: name[i + 2..].to_string(),
2072                     },
2073                 )
2074             } else if let Some(i) = name.find(':') {
2075                 (
2076                     start,
2077                     ast::ClassUnicodeKind::NamedValue {
2078                         op: ast::ClassUnicodeOpKind::Colon,
2079                         name: name[..i].to_string(),
2080                         value: name[i + 1..].to_string(),
2081                     },
2082                 )
2083             } else if let Some(i) = name.find('=') {
2084                 (
2085                     start,
2086                     ast::ClassUnicodeKind::NamedValue {
2087                         op: ast::ClassUnicodeOpKind::Equal,
2088                         name: name[..i].to_string(),
2089                         value: name[i + 1..].to_string(),
2090                     },
2091                 )
2092             } else {
2093                 (start, ast::ClassUnicodeKind::Named(name.to_string()))
2094             }
2095         } else {
2096             let start = self.pos();
2097             let c = self.char();
2098             self.bump_and_bump_space();
2099             let kind = ast::ClassUnicodeKind::OneLetter(c);
2100             (start, kind)
2101         };
2102         Ok(ast::ClassUnicode {
2103             span: Span::new(start, self.pos()),
2104             negated: negated,
2105             kind: kind,
2106         })
2107     }
2108 
2109     /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2110     /// parser is currently at a valid character class name and will be
2111     /// advanced to the character immediately following the class.
2112     #[inline(never)]
parse_perl_class(&self) -> ast::ClassPerl2113     fn parse_perl_class(&self) -> ast::ClassPerl {
2114         let c = self.char();
2115         let span = self.span_char();
2116         self.bump();
2117         let (negated, kind) = match c {
2118             'd' => (false, ast::ClassPerlKind::Digit),
2119             'D' => (true, ast::ClassPerlKind::Digit),
2120             's' => (false, ast::ClassPerlKind::Space),
2121             'S' => (true, ast::ClassPerlKind::Space),
2122             'w' => (false, ast::ClassPerlKind::Word),
2123             'W' => (true, ast::ClassPerlKind::Word),
2124             c => panic!("expected valid Perl class but got '{}'", c),
2125         };
2126         ast::ClassPerl { span: span, kind: kind, negated: negated }
2127     }
2128 }
2129 
2130 /// A type that traverses a fully parsed Ast and checks whether its depth
2131 /// exceeds the specified nesting limit. If it does, then an error is returned.
2132 #[derive(Debug)]
2133 struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
2134     /// The parser that is checking the nest limit.
2135     p: &'p ParserI<'s, P>,
2136     /// The current depth while walking an Ast.
2137     depth: u32,
2138 }
2139 
2140 impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P>2141     fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2142         NestLimiter { p: p, depth: 0 }
2143     }
2144 
2145     #[inline(never)]
check(self, ast: &Ast) -> Result<()>2146     fn check(self, ast: &Ast) -> Result<()> {
2147         ast::visit(ast, self)
2148     }
2149 
increment_depth(&mut self, span: &Span) -> Result<()>2150     fn increment_depth(&mut self, span: &Span) -> Result<()> {
2151         let new = self.depth.checked_add(1).ok_or_else(|| {
2152             self.p.error(
2153                 span.clone(),
2154                 ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2155             )
2156         })?;
2157         let limit = self.p.parser().nest_limit;
2158         if new > limit {
2159             return Err(self.p.error(
2160                 span.clone(),
2161                 ast::ErrorKind::NestLimitExceeded(limit),
2162             ));
2163         }
2164         self.depth = new;
2165         Ok(())
2166     }
2167 
decrement_depth(&mut self)2168     fn decrement_depth(&mut self) {
2169         // Assuming the correctness of the visitor, this should never drop
2170         // below 0.
2171         self.depth = self.depth.checked_sub(1).unwrap();
2172     }
2173 }
2174 
2175 impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2176     type Output = ();
2177     type Err = ast::Error;
2178 
finish(self) -> Result<()>2179     fn finish(self) -> Result<()> {
2180         Ok(())
2181     }
2182 
visit_pre(&mut self, ast: &Ast) -> Result<()>2183     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2184         let span = match *ast {
2185             Ast::Empty(_)
2186             | Ast::Flags(_)
2187             | Ast::Literal(_)
2188             | Ast::Dot(_)
2189             | Ast::Assertion(_)
2190             | Ast::Class(ast::Class::Unicode(_))
2191             | Ast::Class(ast::Class::Perl(_)) => {
2192                 // These are all base cases, so we don't increment depth.
2193                 return Ok(());
2194             }
2195             Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2196             Ast::Repetition(ref x) => &x.span,
2197             Ast::Group(ref x) => &x.span,
2198             Ast::Alternation(ref x) => &x.span,
2199             Ast::Concat(ref x) => &x.span,
2200         };
2201         self.increment_depth(span)
2202     }
2203 
visit_post(&mut self, ast: &Ast) -> Result<()>2204     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2205         match *ast {
2206             Ast::Empty(_)
2207             | Ast::Flags(_)
2208             | Ast::Literal(_)
2209             | Ast::Dot(_)
2210             | Ast::Assertion(_)
2211             | Ast::Class(ast::Class::Unicode(_))
2212             | Ast::Class(ast::Class::Perl(_)) => {
2213                 // These are all base cases, so we don't decrement depth.
2214                 Ok(())
2215             }
2216             Ast::Class(ast::Class::Bracketed(_))
2217             | Ast::Repetition(_)
2218             | Ast::Group(_)
2219             | Ast::Alternation(_)
2220             | Ast::Concat(_) => {
2221                 self.decrement_depth();
2222                 Ok(())
2223             }
2224         }
2225     }
2226 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2227     fn visit_class_set_item_pre(
2228         &mut self,
2229         ast: &ast::ClassSetItem,
2230     ) -> Result<()> {
2231         let span = match *ast {
2232             ast::ClassSetItem::Empty(_)
2233             | ast::ClassSetItem::Literal(_)
2234             | ast::ClassSetItem::Range(_)
2235             | ast::ClassSetItem::Ascii(_)
2236             | ast::ClassSetItem::Unicode(_)
2237             | ast::ClassSetItem::Perl(_) => {
2238                 // These are all base cases, so we don't increment depth.
2239                 return Ok(());
2240             }
2241             ast::ClassSetItem::Bracketed(ref x) => &x.span,
2242             ast::ClassSetItem::Union(ref x) => &x.span,
2243         };
2244         self.increment_depth(span)
2245     }
2246 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2247     fn visit_class_set_item_post(
2248         &mut self,
2249         ast: &ast::ClassSetItem,
2250     ) -> Result<()> {
2251         match *ast {
2252             ast::ClassSetItem::Empty(_)
2253             | ast::ClassSetItem::Literal(_)
2254             | ast::ClassSetItem::Range(_)
2255             | ast::ClassSetItem::Ascii(_)
2256             | ast::ClassSetItem::Unicode(_)
2257             | ast::ClassSetItem::Perl(_) => {
2258                 // These are all base cases, so we don't decrement depth.
2259                 Ok(())
2260             }
2261             ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2262                 self.decrement_depth();
2263                 Ok(())
2264             }
2265         }
2266     }
2267 
visit_class_set_binary_op_pre( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<()>2268     fn visit_class_set_binary_op_pre(
2269         &mut self,
2270         ast: &ast::ClassSetBinaryOp,
2271     ) -> Result<()> {
2272         self.increment_depth(&ast.span)
2273     }
2274 
visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<()>2275     fn visit_class_set_binary_op_post(
2276         &mut self,
2277         _ast: &ast::ClassSetBinaryOp,
2278     ) -> Result<()> {
2279         self.decrement_depth();
2280         Ok(())
2281     }
2282 }
2283 
2284 /// When the result is an error, transforms the ast::ErrorKind from the source
2285 /// Result into another one. This function is used to return clearer error
2286 /// messages when possible.
specialize_err<T>( result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind, ) -> Result<T>2287 fn specialize_err<T>(
2288     result: Result<T>,
2289     from: ast::ErrorKind,
2290     to: ast::ErrorKind,
2291 ) -> Result<T> {
2292     if let Err(e) = result {
2293         if e.kind == from {
2294             Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2295         } else {
2296             Err(e)
2297         }
2298     } else {
2299         result
2300     }
2301 }
2302 
2303 #[cfg(test)]
2304 mod tests {
2305     use std::ops::Range;
2306 
2307     use super::{Parser, ParserBuilder, ParserI, Primitive};
2308     use ast::{self, Ast, Position, Span};
2309 
2310     // Our own assert_eq, which has slightly better formatting (but honestly
2311     // still kind of crappy).
2312     macro_rules! assert_eq {
2313         ($left:expr, $right:expr) => {{
2314             match (&$left, &$right) {
2315                 (left_val, right_val) => {
2316                     if !(*left_val == *right_val) {
2317                         panic!(
2318                             "assertion failed: `(left == right)`\n\n\
2319                              left:  `{:?}`\nright: `{:?}`\n\n",
2320                             left_val, right_val
2321                         )
2322                     }
2323                 }
2324             }
2325         }};
2326     }
2327 
2328     // We create these errors to compare with real ast::Errors in the tests.
2329     // We define equality between TestError and ast::Error to disregard the
2330     // pattern string in ast::Error, which is annoying to provide in tests.
2331     #[derive(Clone, Debug)]
2332     struct TestError {
2333         span: Span,
2334         kind: ast::ErrorKind,
2335     }
2336 
2337     impl PartialEq<ast::Error> for TestError {
eq(&self, other: &ast::Error) -> bool2338         fn eq(&self, other: &ast::Error) -> bool {
2339             self.span == other.span && self.kind == other.kind
2340         }
2341     }
2342 
2343     impl PartialEq<TestError> for ast::Error {
eq(&self, other: &TestError) -> bool2344         fn eq(&self, other: &TestError) -> bool {
2345             self.span == other.span && self.kind == other.kind
2346         }
2347     }
2348 
s(str: &str) -> String2349     fn s(str: &str) -> String {
2350         str.to_string()
2351     }
2352 
parser(pattern: &str) -> ParserI<Parser>2353     fn parser(pattern: &str) -> ParserI<Parser> {
2354         ParserI::new(Parser::new(), pattern)
2355     }
2356 
parser_octal(pattern: &str) -> ParserI<Parser>2357     fn parser_octal(pattern: &str) -> ParserI<Parser> {
2358         let parser = ParserBuilder::new().octal(true).build();
2359         ParserI::new(parser, pattern)
2360     }
2361 
parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser>2362     fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
2363         let p = ParserBuilder::new().nest_limit(nest_limit).build();
2364         ParserI::new(p, pattern)
2365     }
2366 
parser_ignore_whitespace(pattern: &str) -> ParserI<Parser>2367     fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
2368         let p = ParserBuilder::new().ignore_whitespace(true).build();
2369         ParserI::new(p, pattern)
2370     }
2371 
2372     /// Short alias for creating a new span.
nspan(start: Position, end: Position) -> Span2373     fn nspan(start: Position, end: Position) -> Span {
2374         Span::new(start, end)
2375     }
2376 
2377     /// Short alias for creating a new position.
npos(offset: usize, line: usize, column: usize) -> Position2378     fn npos(offset: usize, line: usize, column: usize) -> Position {
2379         Position::new(offset, line, column)
2380     }
2381 
2382     /// Create a new span from the given offset range. This assumes a single
2383     /// line and sets the columns based on the offsets. i.e., This only works
2384     /// out of the box for ASCII, which is fine for most tests.
span(range: Range<usize>) -> Span2385     fn span(range: Range<usize>) -> Span {
2386         let start = Position::new(range.start, 1, range.start + 1);
2387         let end = Position::new(range.end, 1, range.end + 1);
2388         Span::new(start, end)
2389     }
2390 
2391     /// Create a new span for the corresponding byte range in the given string.
span_range(subject: &str, range: Range<usize>) -> Span2392     fn span_range(subject: &str, range: Range<usize>) -> Span {
2393         let start = Position {
2394             offset: range.start,
2395             line: 1 + subject[..range.start].matches('\n').count(),
2396             column: 1 + subject[..range.start]
2397                 .chars()
2398                 .rev()
2399                 .position(|c| c == '\n')
2400                 .unwrap_or(subject[..range.start].chars().count()),
2401         };
2402         let end = Position {
2403             offset: range.end,
2404             line: 1 + subject[..range.end].matches('\n').count(),
2405             column: 1 + subject[..range.end]
2406                 .chars()
2407                 .rev()
2408                 .position(|c| c == '\n')
2409                 .unwrap_or(subject[..range.end].chars().count()),
2410         };
2411         Span::new(start, end)
2412     }
2413 
2414     /// Create a verbatim literal starting at the given position.
lit(c: char, start: usize) -> Ast2415     fn lit(c: char, start: usize) -> Ast {
2416         lit_with(c, span(start..start + c.len_utf8()))
2417     }
2418 
2419     /// Create a punctuation literal starting at the given position.
punct_lit(c: char, span: Span) -> Ast2420     fn punct_lit(c: char, span: Span) -> Ast {
2421         Ast::Literal(ast::Literal {
2422             span: span,
2423             kind: ast::LiteralKind::Punctuation,
2424             c: c,
2425         })
2426     }
2427 
2428     /// Create a verbatim literal with the given span.
lit_with(c: char, span: Span) -> Ast2429     fn lit_with(c: char, span: Span) -> Ast {
2430         Ast::Literal(ast::Literal {
2431             span: span,
2432             kind: ast::LiteralKind::Verbatim,
2433             c: c,
2434         })
2435     }
2436 
2437     /// Create a concatenation with the given range.
concat(range: Range<usize>, asts: Vec<Ast>) -> Ast2438     fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2439         concat_with(span(range), asts)
2440     }
2441 
2442     /// Create a concatenation with the given span.
concat_with(span: Span, asts: Vec<Ast>) -> Ast2443     fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2444         Ast::Concat(ast::Concat { span: span, asts: asts })
2445     }
2446 
2447     /// Create an alternation with the given span.
alt(range: Range<usize>, asts: Vec<Ast>) -> Ast2448     fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2449         Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
2450     }
2451 
2452     /// Create a capturing group with the given span.
group(range: Range<usize>, index: u32, ast: Ast) -> Ast2453     fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2454         Ast::Group(ast::Group {
2455             span: span(range),
2456             kind: ast::GroupKind::CaptureIndex(index),
2457             ast: Box::new(ast),
2458         })
2459     }
2460 
2461     /// Create an ast::SetFlags.
2462     ///
2463     /// The given pattern should be the full pattern string. The range given
2464     /// should correspond to the byte offsets where the flag set occurs.
2465     ///
2466     /// If negated is true, then the set is interpreted as beginning with a
2467     /// negation.
flag_set( pat: &str, range: Range<usize>, flag: ast::Flag, negated: bool, ) -> Ast2468     fn flag_set(
2469         pat: &str,
2470         range: Range<usize>,
2471         flag: ast::Flag,
2472         negated: bool,
2473     ) -> Ast {
2474         let mut items = vec![ast::FlagsItem {
2475             span: span_range(pat, (range.end - 2)..(range.end - 1)),
2476             kind: ast::FlagsItemKind::Flag(flag),
2477         }];
2478         if negated {
2479             items.insert(
2480                 0,
2481                 ast::FlagsItem {
2482                     span: span_range(pat, (range.start + 2)..(range.end - 2)),
2483                     kind: ast::FlagsItemKind::Negation,
2484                 },
2485             );
2486         }
2487         Ast::Flags(ast::SetFlags {
2488             span: span_range(pat, range.clone()),
2489             flags: ast::Flags {
2490                 span: span_range(pat, (range.start + 2)..(range.end - 1)),
2491                 items: items,
2492             },
2493         })
2494     }
2495 
2496     #[test]
parse_nest_limit()2497     fn parse_nest_limit() {
2498         // A nest limit of 0 still allows some types of regexes.
2499         assert_eq!(
2500             parser_nest_limit("", 0).parse(),
2501             Ok(Ast::Empty(span(0..0)))
2502         );
2503         assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2504 
2505         // Test repetition operations, which require one level of nesting.
2506         assert_eq!(
2507             parser_nest_limit("a+", 0).parse().unwrap_err(),
2508             TestError {
2509                 span: span(0..2),
2510                 kind: ast::ErrorKind::NestLimitExceeded(0),
2511             }
2512         );
2513         assert_eq!(
2514             parser_nest_limit("a+", 1).parse(),
2515             Ok(Ast::Repetition(ast::Repetition {
2516                 span: span(0..2),
2517                 op: ast::RepetitionOp {
2518                     span: span(1..2),
2519                     kind: ast::RepetitionKind::OneOrMore,
2520                 },
2521                 greedy: true,
2522                 ast: Box::new(lit('a', 0)),
2523             }))
2524         );
2525         assert_eq!(
2526             parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2527             TestError {
2528                 span: span(0..3),
2529                 kind: ast::ErrorKind::NestLimitExceeded(1),
2530             }
2531         );
2532         assert_eq!(
2533             parser_nest_limit("a+*", 1).parse().unwrap_err(),
2534             TestError {
2535                 span: span(0..2),
2536                 kind: ast::ErrorKind::NestLimitExceeded(1),
2537             }
2538         );
2539         assert_eq!(
2540             parser_nest_limit("a+*", 2).parse(),
2541             Ok(Ast::Repetition(ast::Repetition {
2542                 span: span(0..3),
2543                 op: ast::RepetitionOp {
2544                     span: span(2..3),
2545                     kind: ast::RepetitionKind::ZeroOrMore,
2546                 },
2547                 greedy: true,
2548                 ast: Box::new(Ast::Repetition(ast::Repetition {
2549                     span: span(0..2),
2550                     op: ast::RepetitionOp {
2551                         span: span(1..2),
2552                         kind: ast::RepetitionKind::OneOrMore,
2553                     },
2554                     greedy: true,
2555                     ast: Box::new(lit('a', 0)),
2556                 })),
2557             }))
2558         );
2559 
2560         // Test concatenations. A concatenation requires one level of nesting.
2561         assert_eq!(
2562             parser_nest_limit("ab", 0).parse().unwrap_err(),
2563             TestError {
2564                 span: span(0..2),
2565                 kind: ast::ErrorKind::NestLimitExceeded(0),
2566             }
2567         );
2568         assert_eq!(
2569             parser_nest_limit("ab", 1).parse(),
2570             Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2571         );
2572         assert_eq!(
2573             parser_nest_limit("abc", 1).parse(),
2574             Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2575         );
2576 
2577         // Test alternations. An alternation requires one level of nesting.
2578         assert_eq!(
2579             parser_nest_limit("a|b", 0).parse().unwrap_err(),
2580             TestError {
2581                 span: span(0..3),
2582                 kind: ast::ErrorKind::NestLimitExceeded(0),
2583             }
2584         );
2585         assert_eq!(
2586             parser_nest_limit("a|b", 1).parse(),
2587             Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2588         );
2589         assert_eq!(
2590             parser_nest_limit("a|b|c", 1).parse(),
2591             Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2592         );
2593 
2594         // Test character classes. Classes form their own mini-recursive
2595         // syntax!
2596         assert_eq!(
2597             parser_nest_limit("[a]", 0).parse().unwrap_err(),
2598             TestError {
2599                 span: span(0..3),
2600                 kind: ast::ErrorKind::NestLimitExceeded(0),
2601             }
2602         );
2603         assert_eq!(
2604             parser_nest_limit("[a]", 1).parse(),
2605             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2606                 span: span(0..3),
2607                 negated: false,
2608                 kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2609                     ast::Literal {
2610                         span: span(1..2),
2611                         kind: ast::LiteralKind::Verbatim,
2612                         c: 'a',
2613                     }
2614                 )),
2615             })))
2616         );
2617         assert_eq!(
2618             parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2619             TestError {
2620                 span: span(1..3),
2621                 kind: ast::ErrorKind::NestLimitExceeded(1),
2622             }
2623         );
2624         assert_eq!(
2625             parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2626             TestError {
2627                 span: span(3..7),
2628                 kind: ast::ErrorKind::NestLimitExceeded(2),
2629             }
2630         );
2631         assert_eq!(
2632             parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2633             TestError {
2634                 span: span(4..6),
2635                 kind: ast::ErrorKind::NestLimitExceeded(3),
2636             }
2637         );
2638         assert_eq!(
2639             parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2640             TestError {
2641                 span: span(1..5),
2642                 kind: ast::ErrorKind::NestLimitExceeded(1),
2643             }
2644         );
2645         assert_eq!(
2646             parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2647             TestError {
2648                 span: span(4..6),
2649                 kind: ast::ErrorKind::NestLimitExceeded(2),
2650             }
2651         );
2652     }
2653 
2654     #[test]
parse_comments()2655     fn parse_comments() {
2656         let pat = "(?x)
2657 # This is comment 1.
2658 foo # This is comment 2.
2659   # This is comment 3.
2660 bar
2661 # This is comment 4.";
2662         let astc = parser(pat).parse_with_comments().unwrap();
2663         assert_eq!(
2664             astc.ast,
2665             concat_with(
2666                 span_range(pat, 0..pat.len()),
2667                 vec![
2668                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2669                     lit_with('f', span_range(pat, 26..27)),
2670                     lit_with('o', span_range(pat, 27..28)),
2671                     lit_with('o', span_range(pat, 28..29)),
2672                     lit_with('b', span_range(pat, 74..75)),
2673                     lit_with('a', span_range(pat, 75..76)),
2674                     lit_with('r', span_range(pat, 76..77)),
2675                 ]
2676             )
2677         );
2678         assert_eq!(
2679             astc.comments,
2680             vec![
2681                 ast::Comment {
2682                     span: span_range(pat, 5..26),
2683                     comment: s(" This is comment 1."),
2684                 },
2685                 ast::Comment {
2686                     span: span_range(pat, 30..51),
2687                     comment: s(" This is comment 2."),
2688                 },
2689                 ast::Comment {
2690                     span: span_range(pat, 53..74),
2691                     comment: s(" This is comment 3."),
2692                 },
2693                 ast::Comment {
2694                     span: span_range(pat, 78..98),
2695                     comment: s(" This is comment 4."),
2696                 },
2697             ]
2698         );
2699     }
2700 
2701     #[test]
parse_holistic()2702     fn parse_holistic() {
2703         assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2704         assert_eq!(
2705             parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2706             Ok(concat(
2707                 0..36,
2708                 vec![
2709                     punct_lit('\\', span(0..2)),
2710                     punct_lit('.', span(2..4)),
2711                     punct_lit('+', span(4..6)),
2712                     punct_lit('*', span(6..8)),
2713                     punct_lit('?', span(8..10)),
2714                     punct_lit('(', span(10..12)),
2715                     punct_lit(')', span(12..14)),
2716                     punct_lit('|', span(14..16)),
2717                     punct_lit('[', span(16..18)),
2718                     punct_lit(']', span(18..20)),
2719                     punct_lit('{', span(20..22)),
2720                     punct_lit('}', span(22..24)),
2721                     punct_lit('^', span(24..26)),
2722                     punct_lit('$', span(26..28)),
2723                     punct_lit('#', span(28..30)),
2724                     punct_lit('&', span(30..32)),
2725                     punct_lit('-', span(32..34)),
2726                     punct_lit('~', span(34..36)),
2727                 ]
2728             ))
2729         );
2730     }
2731 
2732     #[test]
parse_ignore_whitespace()2733     fn parse_ignore_whitespace() {
2734         // Test that basic whitespace insensitivity works.
2735         let pat = "(?x)a b";
2736         assert_eq!(
2737             parser(pat).parse(),
2738             Ok(concat_with(
2739                 nspan(npos(0, 1, 1), npos(7, 1, 8)),
2740                 vec![
2741                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2742                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2743                     lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2744                 ]
2745             ))
2746         );
2747 
2748         // Test that we can toggle whitespace insensitivity.
2749         let pat = "(?x)a b(?-x)a b";
2750         assert_eq!(
2751             parser(pat).parse(),
2752             Ok(concat_with(
2753                 nspan(npos(0, 1, 1), npos(15, 1, 16)),
2754                 vec![
2755                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2756                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2757                     lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2758                     flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2759                     lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2760                     lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2761                     lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2762                 ]
2763             ))
2764         );
2765 
2766         // Test that nesting whitespace insensitive flags works.
2767         let pat = "a (?x:a )a ";
2768         assert_eq!(
2769             parser(pat).parse(),
2770             Ok(concat_with(
2771                 span_range(pat, 0..11),
2772                 vec![
2773                     lit_with('a', span_range(pat, 0..1)),
2774                     lit_with(' ', span_range(pat, 1..2)),
2775                     Ast::Group(ast::Group {
2776                         span: span_range(pat, 2..9),
2777                         kind: ast::GroupKind::NonCapturing(ast::Flags {
2778                             span: span_range(pat, 4..5),
2779                             items: vec![ast::FlagsItem {
2780                                 span: span_range(pat, 4..5),
2781                                 kind: ast::FlagsItemKind::Flag(
2782                                     ast::Flag::IgnoreWhitespace
2783                                 ),
2784                             },],
2785                         }),
2786                         ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2787                     }),
2788                     lit_with('a', span_range(pat, 9..10)),
2789                     lit_with(' ', span_range(pat, 10..11)),
2790                 ]
2791             ))
2792         );
2793 
2794         // Test that whitespace after an opening paren is insignificant.
2795         let pat = "(?x)( ?P<foo> a )";
2796         assert_eq!(
2797             parser(pat).parse(),
2798             Ok(concat_with(
2799                 span_range(pat, 0..pat.len()),
2800                 vec![
2801                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2802                     Ast::Group(ast::Group {
2803                         span: span_range(pat, 4..pat.len()),
2804                         kind: ast::GroupKind::CaptureName(ast::CaptureName {
2805                             span: span_range(pat, 9..12),
2806                             name: s("foo"),
2807                             index: 1,
2808                         }),
2809                         ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2810                     }),
2811                 ]
2812             ))
2813         );
2814         let pat = "(?x)(  a )";
2815         assert_eq!(
2816             parser(pat).parse(),
2817             Ok(concat_with(
2818                 span_range(pat, 0..pat.len()),
2819                 vec![
2820                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2821                     Ast::Group(ast::Group {
2822                         span: span_range(pat, 4..pat.len()),
2823                         kind: ast::GroupKind::CaptureIndex(1),
2824                         ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2825                     }),
2826                 ]
2827             ))
2828         );
2829         let pat = "(?x)(  ?:  a )";
2830         assert_eq!(
2831             parser(pat).parse(),
2832             Ok(concat_with(
2833                 span_range(pat, 0..pat.len()),
2834                 vec![
2835                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2836                     Ast::Group(ast::Group {
2837                         span: span_range(pat, 4..pat.len()),
2838                         kind: ast::GroupKind::NonCapturing(ast::Flags {
2839                             span: span_range(pat, 8..8),
2840                             items: vec![],
2841                         }),
2842                         ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2843                     }),
2844                 ]
2845             ))
2846         );
2847         let pat = r"(?x)\x { 53 }";
2848         assert_eq!(
2849             parser(pat).parse(),
2850             Ok(concat_with(
2851                 span_range(pat, 0..pat.len()),
2852                 vec![
2853                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2854                     Ast::Literal(ast::Literal {
2855                         span: span(4..13),
2856                         kind: ast::LiteralKind::HexBrace(
2857                             ast::HexLiteralKind::X
2858                         ),
2859                         c: 'S',
2860                     }),
2861                 ]
2862             ))
2863         );
2864 
2865         // Test that whitespace after an escape is OK.
2866         let pat = r"(?x)\ ";
2867         assert_eq!(
2868             parser(pat).parse(),
2869             Ok(concat_with(
2870                 span_range(pat, 0..pat.len()),
2871                 vec![
2872                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2873                     Ast::Literal(ast::Literal {
2874                         span: span_range(pat, 4..6),
2875                         kind: ast::LiteralKind::Special(
2876                             ast::SpecialLiteralKind::Space
2877                         ),
2878                         c: ' ',
2879                     }),
2880                 ]
2881             ))
2882         );
2883         // ... but only when `x` mode is enabled.
2884         let pat = r"\ ";
2885         assert_eq!(
2886             parser(pat).parse().unwrap_err(),
2887             TestError {
2888                 span: span_range(pat, 0..2),
2889                 kind: ast::ErrorKind::EscapeUnrecognized,
2890             }
2891         );
2892     }
2893 
2894     #[test]
parse_newlines()2895     fn parse_newlines() {
2896         let pat = ".\n.";
2897         assert_eq!(
2898             parser(pat).parse(),
2899             Ok(concat_with(
2900                 span_range(pat, 0..3),
2901                 vec![
2902                     Ast::Dot(span_range(pat, 0..1)),
2903                     lit_with('\n', span_range(pat, 1..2)),
2904                     Ast::Dot(span_range(pat, 2..3)),
2905                 ]
2906             ))
2907         );
2908 
2909         let pat = "foobar\nbaz\nquux\n";
2910         assert_eq!(
2911             parser(pat).parse(),
2912             Ok(concat_with(
2913                 span_range(pat, 0..pat.len()),
2914                 vec![
2915                     lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2916                     lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2917                     lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2918                     lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2919                     lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2920                     lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2921                     lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2922                     lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2923                     lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2924                     lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2925                     lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2926                     lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2927                     lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2928                     lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2929                     lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2930                     lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2931                 ]
2932             ))
2933         );
2934     }
2935 
2936     #[test]
parse_uncounted_repetition()2937     fn parse_uncounted_repetition() {
2938         assert_eq!(
2939             parser(r"a*").parse(),
2940             Ok(Ast::Repetition(ast::Repetition {
2941                 span: span(0..2),
2942                 op: ast::RepetitionOp {
2943                     span: span(1..2),
2944                     kind: ast::RepetitionKind::ZeroOrMore,
2945                 },
2946                 greedy: true,
2947                 ast: Box::new(lit('a', 0)),
2948             }))
2949         );
2950         assert_eq!(
2951             parser(r"a+").parse(),
2952             Ok(Ast::Repetition(ast::Repetition {
2953                 span: span(0..2),
2954                 op: ast::RepetitionOp {
2955                     span: span(1..2),
2956                     kind: ast::RepetitionKind::OneOrMore,
2957                 },
2958                 greedy: true,
2959                 ast: Box::new(lit('a', 0)),
2960             }))
2961         );
2962 
2963         assert_eq!(
2964             parser(r"a?").parse(),
2965             Ok(Ast::Repetition(ast::Repetition {
2966                 span: span(0..2),
2967                 op: ast::RepetitionOp {
2968                     span: span(1..2),
2969                     kind: ast::RepetitionKind::ZeroOrOne,
2970                 },
2971                 greedy: true,
2972                 ast: Box::new(lit('a', 0)),
2973             }))
2974         );
2975         assert_eq!(
2976             parser(r"a??").parse(),
2977             Ok(Ast::Repetition(ast::Repetition {
2978                 span: span(0..3),
2979                 op: ast::RepetitionOp {
2980                     span: span(1..3),
2981                     kind: ast::RepetitionKind::ZeroOrOne,
2982                 },
2983                 greedy: false,
2984                 ast: Box::new(lit('a', 0)),
2985             }))
2986         );
2987         assert_eq!(
2988             parser(r"a?").parse(),
2989             Ok(Ast::Repetition(ast::Repetition {
2990                 span: span(0..2),
2991                 op: ast::RepetitionOp {
2992                     span: span(1..2),
2993                     kind: ast::RepetitionKind::ZeroOrOne,
2994                 },
2995                 greedy: true,
2996                 ast: Box::new(lit('a', 0)),
2997             }))
2998         );
2999         assert_eq!(
3000             parser(r"a?b").parse(),
3001             Ok(concat(
3002                 0..3,
3003                 vec![
3004                     Ast::Repetition(ast::Repetition {
3005                         span: span(0..2),
3006                         op: ast::RepetitionOp {
3007                             span: span(1..2),
3008                             kind: ast::RepetitionKind::ZeroOrOne,
3009                         },
3010                         greedy: true,
3011                         ast: Box::new(lit('a', 0)),
3012                     }),
3013                     lit('b', 2),
3014                 ]
3015             ))
3016         );
3017         assert_eq!(
3018             parser(r"a??b").parse(),
3019             Ok(concat(
3020                 0..4,
3021                 vec![
3022                     Ast::Repetition(ast::Repetition {
3023                         span: span(0..3),
3024                         op: ast::RepetitionOp {
3025                             span: span(1..3),
3026                             kind: ast::RepetitionKind::ZeroOrOne,
3027                         },
3028                         greedy: false,
3029                         ast: Box::new(lit('a', 0)),
3030                     }),
3031                     lit('b', 3),
3032                 ]
3033             ))
3034         );
3035         assert_eq!(
3036             parser(r"ab?").parse(),
3037             Ok(concat(
3038                 0..3,
3039                 vec![
3040                     lit('a', 0),
3041                     Ast::Repetition(ast::Repetition {
3042                         span: span(1..3),
3043                         op: ast::RepetitionOp {
3044                             span: span(2..3),
3045                             kind: ast::RepetitionKind::ZeroOrOne,
3046                         },
3047                         greedy: true,
3048                         ast: Box::new(lit('b', 1)),
3049                     }),
3050                 ]
3051             ))
3052         );
3053         assert_eq!(
3054             parser(r"(ab)?").parse(),
3055             Ok(Ast::Repetition(ast::Repetition {
3056                 span: span(0..5),
3057                 op: ast::RepetitionOp {
3058                     span: span(4..5),
3059                     kind: ast::RepetitionKind::ZeroOrOne,
3060                 },
3061                 greedy: true,
3062                 ast: Box::new(group(
3063                     0..4,
3064                     1,
3065                     concat(1..3, vec![lit('a', 1), lit('b', 2),])
3066                 )),
3067             }))
3068         );
3069         assert_eq!(
3070             parser(r"|a?").parse(),
3071             Ok(alt(
3072                 0..3,
3073                 vec![
3074                     Ast::Empty(span(0..0)),
3075                     Ast::Repetition(ast::Repetition {
3076                         span: span(1..3),
3077                         op: ast::RepetitionOp {
3078                             span: span(2..3),
3079                             kind: ast::RepetitionKind::ZeroOrOne,
3080                         },
3081                         greedy: true,
3082                         ast: Box::new(lit('a', 1)),
3083                     }),
3084                 ]
3085             ))
3086         );
3087 
3088         assert_eq!(
3089             parser(r"*").parse().unwrap_err(),
3090             TestError {
3091                 span: span(0..0),
3092                 kind: ast::ErrorKind::RepetitionMissing,
3093             }
3094         );
3095         assert_eq!(
3096             parser(r"(?i)*").parse().unwrap_err(),
3097             TestError {
3098                 span: span(4..4),
3099                 kind: ast::ErrorKind::RepetitionMissing,
3100             }
3101         );
3102         assert_eq!(
3103             parser(r"(*)").parse().unwrap_err(),
3104             TestError {
3105                 span: span(1..1),
3106                 kind: ast::ErrorKind::RepetitionMissing,
3107             }
3108         );
3109         assert_eq!(
3110             parser(r"(?:?)").parse().unwrap_err(),
3111             TestError {
3112                 span: span(3..3),
3113                 kind: ast::ErrorKind::RepetitionMissing,
3114             }
3115         );
3116         assert_eq!(
3117             parser(r"+").parse().unwrap_err(),
3118             TestError {
3119                 span: span(0..0),
3120                 kind: ast::ErrorKind::RepetitionMissing,
3121             }
3122         );
3123         assert_eq!(
3124             parser(r"?").parse().unwrap_err(),
3125             TestError {
3126                 span: span(0..0),
3127                 kind: ast::ErrorKind::RepetitionMissing,
3128             }
3129         );
3130         assert_eq!(
3131             parser(r"(?)").parse().unwrap_err(),
3132             TestError {
3133                 span: span(1..1),
3134                 kind: ast::ErrorKind::RepetitionMissing,
3135             }
3136         );
3137         assert_eq!(
3138             parser(r"|*").parse().unwrap_err(),
3139             TestError {
3140                 span: span(1..1),
3141                 kind: ast::ErrorKind::RepetitionMissing,
3142             }
3143         );
3144         assert_eq!(
3145             parser(r"|+").parse().unwrap_err(),
3146             TestError {
3147                 span: span(1..1),
3148                 kind: ast::ErrorKind::RepetitionMissing,
3149             }
3150         );
3151         assert_eq!(
3152             parser(r"|?").parse().unwrap_err(),
3153             TestError {
3154                 span: span(1..1),
3155                 kind: ast::ErrorKind::RepetitionMissing,
3156             }
3157         );
3158     }
3159 
3160     #[test]
parse_counted_repetition()3161     fn parse_counted_repetition() {
3162         assert_eq!(
3163             parser(r"a{5}").parse(),
3164             Ok(Ast::Repetition(ast::Repetition {
3165                 span: span(0..4),
3166                 op: ast::RepetitionOp {
3167                     span: span(1..4),
3168                     kind: ast::RepetitionKind::Range(
3169                         ast::RepetitionRange::Exactly(5)
3170                     ),
3171                 },
3172                 greedy: true,
3173                 ast: Box::new(lit('a', 0)),
3174             }))
3175         );
3176         assert_eq!(
3177             parser(r"a{5,}").parse(),
3178             Ok(Ast::Repetition(ast::Repetition {
3179                 span: span(0..5),
3180                 op: ast::RepetitionOp {
3181                     span: span(1..5),
3182                     kind: ast::RepetitionKind::Range(
3183                         ast::RepetitionRange::AtLeast(5)
3184                     ),
3185                 },
3186                 greedy: true,
3187                 ast: Box::new(lit('a', 0)),
3188             }))
3189         );
3190         assert_eq!(
3191             parser(r"a{5,9}").parse(),
3192             Ok(Ast::Repetition(ast::Repetition {
3193                 span: span(0..6),
3194                 op: ast::RepetitionOp {
3195                     span: span(1..6),
3196                     kind: ast::RepetitionKind::Range(
3197                         ast::RepetitionRange::Bounded(5, 9)
3198                     ),
3199                 },
3200                 greedy: true,
3201                 ast: Box::new(lit('a', 0)),
3202             }))
3203         );
3204         assert_eq!(
3205             parser(r"a{5}?").parse(),
3206             Ok(Ast::Repetition(ast::Repetition {
3207                 span: span(0..5),
3208                 op: ast::RepetitionOp {
3209                     span: span(1..5),
3210                     kind: ast::RepetitionKind::Range(
3211                         ast::RepetitionRange::Exactly(5)
3212                     ),
3213                 },
3214                 greedy: false,
3215                 ast: Box::new(lit('a', 0)),
3216             }))
3217         );
3218         assert_eq!(
3219             parser(r"ab{5}").parse(),
3220             Ok(concat(
3221                 0..5,
3222                 vec![
3223                     lit('a', 0),
3224                     Ast::Repetition(ast::Repetition {
3225                         span: span(1..5),
3226                         op: ast::RepetitionOp {
3227                             span: span(2..5),
3228                             kind: ast::RepetitionKind::Range(
3229                                 ast::RepetitionRange::Exactly(5)
3230                             ),
3231                         },
3232                         greedy: true,
3233                         ast: Box::new(lit('b', 1)),
3234                     }),
3235                 ]
3236             ))
3237         );
3238         assert_eq!(
3239             parser(r"ab{5}c").parse(),
3240             Ok(concat(
3241                 0..6,
3242                 vec![
3243                     lit('a', 0),
3244                     Ast::Repetition(ast::Repetition {
3245                         span: span(1..5),
3246                         op: ast::RepetitionOp {
3247                             span: span(2..5),
3248                             kind: ast::RepetitionKind::Range(
3249                                 ast::RepetitionRange::Exactly(5)
3250                             ),
3251                         },
3252                         greedy: true,
3253                         ast: Box::new(lit('b', 1)),
3254                     }),
3255                     lit('c', 5),
3256                 ]
3257             ))
3258         );
3259 
3260         assert_eq!(
3261             parser(r"a{ 5 }").parse(),
3262             Ok(Ast::Repetition(ast::Repetition {
3263                 span: span(0..6),
3264                 op: ast::RepetitionOp {
3265                     span: span(1..6),
3266                     kind: ast::RepetitionKind::Range(
3267                         ast::RepetitionRange::Exactly(5)
3268                     ),
3269                 },
3270                 greedy: true,
3271                 ast: Box::new(lit('a', 0)),
3272             }))
3273         );
3274         assert_eq!(
3275             parser(r"a{ 5 , 9 }").parse(),
3276             Ok(Ast::Repetition(ast::Repetition {
3277                 span: span(0..10),
3278                 op: ast::RepetitionOp {
3279                     span: span(1..10),
3280                     kind: ast::RepetitionKind::Range(
3281                         ast::RepetitionRange::Bounded(5, 9)
3282                     ),
3283                 },
3284                 greedy: true,
3285                 ast: Box::new(lit('a', 0)),
3286             }))
3287         );
3288         assert_eq!(
3289             parser_ignore_whitespace(r"a{5,9} ?").parse(),
3290             Ok(Ast::Repetition(ast::Repetition {
3291                 span: span(0..8),
3292                 op: ast::RepetitionOp {
3293                     span: span(1..8),
3294                     kind: ast::RepetitionKind::Range(
3295                         ast::RepetitionRange::Bounded(5, 9)
3296                     ),
3297                 },
3298                 greedy: false,
3299                 ast: Box::new(lit('a', 0)),
3300             }))
3301         );
3302 
3303         assert_eq!(
3304             parser(r"(?i){0}").parse().unwrap_err(),
3305             TestError {
3306                 span: span(4..4),
3307                 kind: ast::ErrorKind::RepetitionMissing,
3308             }
3309         );
3310         assert_eq!(
3311             parser(r"(?m){1,1}").parse().unwrap_err(),
3312             TestError {
3313                 span: span(4..4),
3314                 kind: ast::ErrorKind::RepetitionMissing,
3315             }
3316         );
3317         assert_eq!(
3318             parser(r"a{]}").parse().unwrap_err(),
3319             TestError {
3320                 span: span(2..2),
3321                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3322             }
3323         );
3324         assert_eq!(
3325             parser(r"a{1,]}").parse().unwrap_err(),
3326             TestError {
3327                 span: span(4..4),
3328                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3329             }
3330         );
3331         assert_eq!(
3332             parser(r"a{").parse().unwrap_err(),
3333             TestError {
3334                 span: span(1..2),
3335                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3336             }
3337         );
3338         assert_eq!(
3339             parser(r"a{}").parse().unwrap_err(),
3340             TestError {
3341                 span: span(2..2),
3342                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3343             }
3344         );
3345         assert_eq!(
3346             parser(r"a{a").parse().unwrap_err(),
3347             TestError {
3348                 span: span(2..2),
3349                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3350             }
3351         );
3352         assert_eq!(
3353             parser(r"a{9999999999}").parse().unwrap_err(),
3354             TestError {
3355                 span: span(2..12),
3356                 kind: ast::ErrorKind::DecimalInvalid,
3357             }
3358         );
3359         assert_eq!(
3360             parser(r"a{9").parse().unwrap_err(),
3361             TestError {
3362                 span: span(1..3),
3363                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3364             }
3365         );
3366         assert_eq!(
3367             parser(r"a{9,a").parse().unwrap_err(),
3368             TestError {
3369                 span: span(4..4),
3370                 kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3371             }
3372         );
3373         assert_eq!(
3374             parser(r"a{9,9999999999}").parse().unwrap_err(),
3375             TestError {
3376                 span: span(4..14),
3377                 kind: ast::ErrorKind::DecimalInvalid,
3378             }
3379         );
3380         assert_eq!(
3381             parser(r"a{9,").parse().unwrap_err(),
3382             TestError {
3383                 span: span(1..4),
3384                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3385             }
3386         );
3387         assert_eq!(
3388             parser(r"a{9,11").parse().unwrap_err(),
3389             TestError {
3390                 span: span(1..6),
3391                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3392             }
3393         );
3394         assert_eq!(
3395             parser(r"a{2,1}").parse().unwrap_err(),
3396             TestError {
3397                 span: span(1..6),
3398                 kind: ast::ErrorKind::RepetitionCountInvalid,
3399             }
3400         );
3401         assert_eq!(
3402             parser(r"{5}").parse().unwrap_err(),
3403             TestError {
3404                 span: span(0..0),
3405                 kind: ast::ErrorKind::RepetitionMissing,
3406             }
3407         );
3408         assert_eq!(
3409             parser(r"|{5}").parse().unwrap_err(),
3410             TestError {
3411                 span: span(1..1),
3412                 kind: ast::ErrorKind::RepetitionMissing,
3413             }
3414         );
3415     }
3416 
3417     #[test]
parse_alternate()3418     fn parse_alternate() {
3419         assert_eq!(
3420             parser(r"a|b").parse(),
3421             Ok(Ast::Alternation(ast::Alternation {
3422                 span: span(0..3),
3423                 asts: vec![lit('a', 0), lit('b', 2)],
3424             }))
3425         );
3426         assert_eq!(
3427             parser(r"(a|b)").parse(),
3428             Ok(group(
3429                 0..5,
3430                 1,
3431                 Ast::Alternation(ast::Alternation {
3432                     span: span(1..4),
3433                     asts: vec![lit('a', 1), lit('b', 3)],
3434                 })
3435             ))
3436         );
3437 
3438         assert_eq!(
3439             parser(r"a|b|c").parse(),
3440             Ok(Ast::Alternation(ast::Alternation {
3441                 span: span(0..5),
3442                 asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3443             }))
3444         );
3445         assert_eq!(
3446             parser(r"ax|by|cz").parse(),
3447             Ok(Ast::Alternation(ast::Alternation {
3448                 span: span(0..8),
3449                 asts: vec![
3450                     concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3451                     concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3452                     concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3453                 ],
3454             }))
3455         );
3456         assert_eq!(
3457             parser(r"(ax|by|cz)").parse(),
3458             Ok(group(
3459                 0..10,
3460                 1,
3461                 Ast::Alternation(ast::Alternation {
3462                     span: span(1..9),
3463                     asts: vec![
3464                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3465                         concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3466                         concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3467                     ],
3468                 })
3469             ))
3470         );
3471         assert_eq!(
3472             parser(r"(ax|(by|(cz)))").parse(),
3473             Ok(group(
3474                 0..14,
3475                 1,
3476                 alt(
3477                     1..13,
3478                     vec![
3479                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3480                         group(
3481                             4..13,
3482                             2,
3483                             alt(
3484                                 5..12,
3485                                 vec![
3486                                     concat(
3487                                         5..7,
3488                                         vec![lit('b', 5), lit('y', 6)]
3489                                     ),
3490                                     group(
3491                                         8..12,
3492                                         3,
3493                                         concat(
3494                                             9..11,
3495                                             vec![lit('c', 9), lit('z', 10),]
3496                                         )
3497                                     ),
3498                                 ]
3499                             )
3500                         ),
3501                     ]
3502                 )
3503             ))
3504         );
3505 
3506         assert_eq!(
3507             parser(r"|").parse(),
3508             Ok(alt(
3509                 0..1,
3510                 vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
3511             ))
3512         );
3513         assert_eq!(
3514             parser(r"||").parse(),
3515             Ok(alt(
3516                 0..2,
3517                 vec![
3518                     Ast::Empty(span(0..0)),
3519                     Ast::Empty(span(1..1)),
3520                     Ast::Empty(span(2..2)),
3521                 ]
3522             ))
3523         );
3524         assert_eq!(
3525             parser(r"a|").parse(),
3526             Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
3527         );
3528         assert_eq!(
3529             parser(r"|a").parse(),
3530             Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
3531         );
3532 
3533         assert_eq!(
3534             parser(r"(|)").parse(),
3535             Ok(group(
3536                 0..3,
3537                 1,
3538                 alt(
3539                     1..2,
3540                     vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
3541                 )
3542             ))
3543         );
3544         assert_eq!(
3545             parser(r"(a|)").parse(),
3546             Ok(group(
3547                 0..4,
3548                 1,
3549                 alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
3550             ))
3551         );
3552         assert_eq!(
3553             parser(r"(|a)").parse(),
3554             Ok(group(
3555                 0..4,
3556                 1,
3557                 alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
3558             ))
3559         );
3560 
3561         assert_eq!(
3562             parser(r"a|b)").parse().unwrap_err(),
3563             TestError {
3564                 span: span(3..4),
3565                 kind: ast::ErrorKind::GroupUnopened,
3566             }
3567         );
3568         assert_eq!(
3569             parser(r"(a|b").parse().unwrap_err(),
3570             TestError {
3571                 span: span(0..1),
3572                 kind: ast::ErrorKind::GroupUnclosed,
3573             }
3574         );
3575     }
3576 
3577     #[test]
parse_unsupported_lookaround()3578     fn parse_unsupported_lookaround() {
3579         assert_eq!(
3580             parser(r"(?=a)").parse().unwrap_err(),
3581             TestError {
3582                 span: span(0..3),
3583                 kind: ast::ErrorKind::UnsupportedLookAround,
3584             }
3585         );
3586         assert_eq!(
3587             parser(r"(?!a)").parse().unwrap_err(),
3588             TestError {
3589                 span: span(0..3),
3590                 kind: ast::ErrorKind::UnsupportedLookAround,
3591             }
3592         );
3593         assert_eq!(
3594             parser(r"(?<=a)").parse().unwrap_err(),
3595             TestError {
3596                 span: span(0..4),
3597                 kind: ast::ErrorKind::UnsupportedLookAround,
3598             }
3599         );
3600         assert_eq!(
3601             parser(r"(?<!a)").parse().unwrap_err(),
3602             TestError {
3603                 span: span(0..4),
3604                 kind: ast::ErrorKind::UnsupportedLookAround,
3605             }
3606         );
3607     }
3608 
3609     #[test]
parse_group()3610     fn parse_group() {
3611         assert_eq!(
3612             parser("(?i)").parse(),
3613             Ok(Ast::Flags(ast::SetFlags {
3614                 span: span(0..4),
3615                 flags: ast::Flags {
3616                     span: span(2..3),
3617                     items: vec![ast::FlagsItem {
3618                         span: span(2..3),
3619                         kind: ast::FlagsItemKind::Flag(
3620                             ast::Flag::CaseInsensitive
3621                         ),
3622                     }],
3623                 },
3624             }))
3625         );
3626         assert_eq!(
3627             parser("(?iU)").parse(),
3628             Ok(Ast::Flags(ast::SetFlags {
3629                 span: span(0..5),
3630                 flags: ast::Flags {
3631                     span: span(2..4),
3632                     items: vec![
3633                         ast::FlagsItem {
3634                             span: span(2..3),
3635                             kind: ast::FlagsItemKind::Flag(
3636                                 ast::Flag::CaseInsensitive
3637                             ),
3638                         },
3639                         ast::FlagsItem {
3640                             span: span(3..4),
3641                             kind: ast::FlagsItemKind::Flag(
3642                                 ast::Flag::SwapGreed
3643                             ),
3644                         },
3645                     ],
3646                 },
3647             }))
3648         );
3649         assert_eq!(
3650             parser("(?i-U)").parse(),
3651             Ok(Ast::Flags(ast::SetFlags {
3652                 span: span(0..6),
3653                 flags: ast::Flags {
3654                     span: span(2..5),
3655                     items: vec![
3656                         ast::FlagsItem {
3657                             span: span(2..3),
3658                             kind: ast::FlagsItemKind::Flag(
3659                                 ast::Flag::CaseInsensitive
3660                             ),
3661                         },
3662                         ast::FlagsItem {
3663                             span: span(3..4),
3664                             kind: ast::FlagsItemKind::Negation,
3665                         },
3666                         ast::FlagsItem {
3667                             span: span(4..5),
3668                             kind: ast::FlagsItemKind::Flag(
3669                                 ast::Flag::SwapGreed
3670                             ),
3671                         },
3672                     ],
3673                 },
3674             }))
3675         );
3676 
3677         assert_eq!(
3678             parser("()").parse(),
3679             Ok(Ast::Group(ast::Group {
3680                 span: span(0..2),
3681                 kind: ast::GroupKind::CaptureIndex(1),
3682                 ast: Box::new(Ast::Empty(span(1..1))),
3683             }))
3684         );
3685         assert_eq!(
3686             parser("(a)").parse(),
3687             Ok(Ast::Group(ast::Group {
3688                 span: span(0..3),
3689                 kind: ast::GroupKind::CaptureIndex(1),
3690                 ast: Box::new(lit('a', 1)),
3691             }))
3692         );
3693         assert_eq!(
3694             parser("(())").parse(),
3695             Ok(Ast::Group(ast::Group {
3696                 span: span(0..4),
3697                 kind: ast::GroupKind::CaptureIndex(1),
3698                 ast: Box::new(Ast::Group(ast::Group {
3699                     span: span(1..3),
3700                     kind: ast::GroupKind::CaptureIndex(2),
3701                     ast: Box::new(Ast::Empty(span(2..2))),
3702                 })),
3703             }))
3704         );
3705 
3706         assert_eq!(
3707             parser("(?:a)").parse(),
3708             Ok(Ast::Group(ast::Group {
3709                 span: span(0..5),
3710                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3711                     span: span(2..2),
3712                     items: vec![],
3713                 }),
3714                 ast: Box::new(lit('a', 3)),
3715             }))
3716         );
3717 
3718         assert_eq!(
3719             parser("(?i:a)").parse(),
3720             Ok(Ast::Group(ast::Group {
3721                 span: span(0..6),
3722                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3723                     span: span(2..3),
3724                     items: vec![ast::FlagsItem {
3725                         span: span(2..3),
3726                         kind: ast::FlagsItemKind::Flag(
3727                             ast::Flag::CaseInsensitive
3728                         ),
3729                     },],
3730                 }),
3731                 ast: Box::new(lit('a', 4)),
3732             }))
3733         );
3734         assert_eq!(
3735             parser("(?i-U:a)").parse(),
3736             Ok(Ast::Group(ast::Group {
3737                 span: span(0..8),
3738                 kind: ast::GroupKind::NonCapturing(ast::Flags {
3739                     span: span(2..5),
3740                     items: vec![
3741                         ast::FlagsItem {
3742                             span: span(2..3),
3743                             kind: ast::FlagsItemKind::Flag(
3744                                 ast::Flag::CaseInsensitive
3745                             ),
3746                         },
3747                         ast::FlagsItem {
3748                             span: span(3..4),
3749                             kind: ast::FlagsItemKind::Negation,
3750                         },
3751                         ast::FlagsItem {
3752                             span: span(4..5),
3753                             kind: ast::FlagsItemKind::Flag(
3754                                 ast::Flag::SwapGreed
3755                             ),
3756                         },
3757                     ],
3758                 }),
3759                 ast: Box::new(lit('a', 6)),
3760             }))
3761         );
3762 
3763         assert_eq!(
3764             parser("(").parse().unwrap_err(),
3765             TestError {
3766                 span: span(0..1),
3767                 kind: ast::ErrorKind::GroupUnclosed,
3768             }
3769         );
3770         assert_eq!(
3771             parser("(?").parse().unwrap_err(),
3772             TestError {
3773                 span: span(0..1),
3774                 kind: ast::ErrorKind::GroupUnclosed,
3775             }
3776         );
3777         assert_eq!(
3778             parser("(?P").parse().unwrap_err(),
3779             TestError {
3780                 span: span(2..3),
3781                 kind: ast::ErrorKind::FlagUnrecognized,
3782             }
3783         );
3784         assert_eq!(
3785             parser("(?P<").parse().unwrap_err(),
3786             TestError {
3787                 span: span(4..4),
3788                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3789             }
3790         );
3791         assert_eq!(
3792             parser("(a").parse().unwrap_err(),
3793             TestError {
3794                 span: span(0..1),
3795                 kind: ast::ErrorKind::GroupUnclosed,
3796             }
3797         );
3798         assert_eq!(
3799             parser("(()").parse().unwrap_err(),
3800             TestError {
3801                 span: span(0..1),
3802                 kind: ast::ErrorKind::GroupUnclosed,
3803             }
3804         );
3805         assert_eq!(
3806             parser(")").parse().unwrap_err(),
3807             TestError {
3808                 span: span(0..1),
3809                 kind: ast::ErrorKind::GroupUnopened,
3810             }
3811         );
3812         assert_eq!(
3813             parser("a)").parse().unwrap_err(),
3814             TestError {
3815                 span: span(1..2),
3816                 kind: ast::ErrorKind::GroupUnopened,
3817             }
3818         );
3819     }
3820 
3821     #[test]
parse_capture_name()3822     fn parse_capture_name() {
3823         assert_eq!(
3824             parser("(?P<a>z)").parse(),
3825             Ok(Ast::Group(ast::Group {
3826                 span: span(0..8),
3827                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3828                     span: span(4..5),
3829                     name: s("a"),
3830                     index: 1,
3831                 }),
3832                 ast: Box::new(lit('z', 6)),
3833             }))
3834         );
3835         assert_eq!(
3836             parser("(?P<abc>z)").parse(),
3837             Ok(Ast::Group(ast::Group {
3838                 span: span(0..10),
3839                 kind: ast::GroupKind::CaptureName(ast::CaptureName {
3840                     span: span(4..7),
3841                     name: s("abc"),
3842                     index: 1,
3843                 }),
3844                 ast: Box::new(lit('z', 8)),
3845             }))
3846         );
3847 
3848         assert_eq!(
3849             parser("(?P<").parse().unwrap_err(),
3850             TestError {
3851                 span: span(4..4),
3852                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3853             }
3854         );
3855         assert_eq!(
3856             parser("(?P<>z)").parse().unwrap_err(),
3857             TestError {
3858                 span: span(4..4),
3859                 kind: ast::ErrorKind::GroupNameEmpty,
3860             }
3861         );
3862         assert_eq!(
3863             parser("(?P<a").parse().unwrap_err(),
3864             TestError {
3865                 span: span(5..5),
3866                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3867             }
3868         );
3869         assert_eq!(
3870             parser("(?P<ab").parse().unwrap_err(),
3871             TestError {
3872                 span: span(6..6),
3873                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3874             }
3875         );
3876         assert_eq!(
3877             parser("(?P<0a").parse().unwrap_err(),
3878             TestError {
3879                 span: span(4..5),
3880                 kind: ast::ErrorKind::GroupNameInvalid,
3881             }
3882         );
3883         assert_eq!(
3884             parser("(?P<~").parse().unwrap_err(),
3885             TestError {
3886                 span: span(4..5),
3887                 kind: ast::ErrorKind::GroupNameInvalid,
3888             }
3889         );
3890         assert_eq!(
3891             parser("(?P<abc~").parse().unwrap_err(),
3892             TestError {
3893                 span: span(7..8),
3894                 kind: ast::ErrorKind::GroupNameInvalid,
3895             }
3896         );
3897         assert_eq!(
3898             parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3899             TestError {
3900                 span: span(12..13),
3901                 kind: ast::ErrorKind::GroupNameDuplicate {
3902                     original: span(4..5),
3903                 },
3904             }
3905         );
3906     }
3907 
3908     #[test]
parse_flags()3909     fn parse_flags() {
3910         assert_eq!(
3911             parser("i:").parse_flags(),
3912             Ok(ast::Flags {
3913                 span: span(0..1),
3914                 items: vec![ast::FlagsItem {
3915                     span: span(0..1),
3916                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3917                 }],
3918             })
3919         );
3920         assert_eq!(
3921             parser("i)").parse_flags(),
3922             Ok(ast::Flags {
3923                 span: span(0..1),
3924                 items: vec![ast::FlagsItem {
3925                     span: span(0..1),
3926                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3927                 }],
3928             })
3929         );
3930 
3931         assert_eq!(
3932             parser("isU:").parse_flags(),
3933             Ok(ast::Flags {
3934                 span: span(0..3),
3935                 items: vec![
3936                     ast::FlagsItem {
3937                         span: span(0..1),
3938                         kind: ast::FlagsItemKind::Flag(
3939                             ast::Flag::CaseInsensitive
3940                         ),
3941                     },
3942                     ast::FlagsItem {
3943                         span: span(1..2),
3944                         kind: ast::FlagsItemKind::Flag(
3945                             ast::Flag::DotMatchesNewLine
3946                         ),
3947                     },
3948                     ast::FlagsItem {
3949                         span: span(2..3),
3950                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3951                     },
3952                 ],
3953             })
3954         );
3955 
3956         assert_eq!(
3957             parser("-isU:").parse_flags(),
3958             Ok(ast::Flags {
3959                 span: span(0..4),
3960                 items: vec![
3961                     ast::FlagsItem {
3962                         span: span(0..1),
3963                         kind: ast::FlagsItemKind::Negation,
3964                     },
3965                     ast::FlagsItem {
3966                         span: span(1..2),
3967                         kind: ast::FlagsItemKind::Flag(
3968                             ast::Flag::CaseInsensitive
3969                         ),
3970                     },
3971                     ast::FlagsItem {
3972                         span: span(2..3),
3973                         kind: ast::FlagsItemKind::Flag(
3974                             ast::Flag::DotMatchesNewLine
3975                         ),
3976                     },
3977                     ast::FlagsItem {
3978                         span: span(3..4),
3979                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3980                     },
3981                 ],
3982             })
3983         );
3984         assert_eq!(
3985             parser("i-sU:").parse_flags(),
3986             Ok(ast::Flags {
3987                 span: span(0..4),
3988                 items: vec![
3989                     ast::FlagsItem {
3990                         span: span(0..1),
3991                         kind: ast::FlagsItemKind::Flag(
3992                             ast::Flag::CaseInsensitive
3993                         ),
3994                     },
3995                     ast::FlagsItem {
3996                         span: span(1..2),
3997                         kind: ast::FlagsItemKind::Negation,
3998                     },
3999                     ast::FlagsItem {
4000                         span: span(2..3),
4001                         kind: ast::FlagsItemKind::Flag(
4002                             ast::Flag::DotMatchesNewLine
4003                         ),
4004                     },
4005                     ast::FlagsItem {
4006                         span: span(3..4),
4007                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4008                     },
4009                 ],
4010             })
4011         );
4012 
4013         assert_eq!(
4014             parser("isU").parse_flags().unwrap_err(),
4015             TestError {
4016                 span: span(3..3),
4017                 kind: ast::ErrorKind::FlagUnexpectedEof,
4018             }
4019         );
4020         assert_eq!(
4021             parser("isUa:").parse_flags().unwrap_err(),
4022             TestError {
4023                 span: span(3..4),
4024                 kind: ast::ErrorKind::FlagUnrecognized,
4025             }
4026         );
4027         assert_eq!(
4028             parser("isUi:").parse_flags().unwrap_err(),
4029             TestError {
4030                 span: span(3..4),
4031                 kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4032             }
4033         );
4034         assert_eq!(
4035             parser("i-sU-i:").parse_flags().unwrap_err(),
4036             TestError {
4037                 span: span(4..5),
4038                 kind: ast::ErrorKind::FlagRepeatedNegation {
4039                     original: span(1..2),
4040                 },
4041             }
4042         );
4043         assert_eq!(
4044             parser("-)").parse_flags().unwrap_err(),
4045             TestError {
4046                 span: span(0..1),
4047                 kind: ast::ErrorKind::FlagDanglingNegation,
4048             }
4049         );
4050         assert_eq!(
4051             parser("i-)").parse_flags().unwrap_err(),
4052             TestError {
4053                 span: span(1..2),
4054                 kind: ast::ErrorKind::FlagDanglingNegation,
4055             }
4056         );
4057         assert_eq!(
4058             parser("iU-)").parse_flags().unwrap_err(),
4059             TestError {
4060                 span: span(2..3),
4061                 kind: ast::ErrorKind::FlagDanglingNegation,
4062             }
4063         );
4064     }
4065 
4066     #[test]
parse_flag()4067     fn parse_flag() {
4068         assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4069         assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4070         assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4071         assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4072         assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4073         assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4074 
4075         assert_eq!(
4076             parser("a").parse_flag().unwrap_err(),
4077             TestError {
4078                 span: span(0..1),
4079                 kind: ast::ErrorKind::FlagUnrecognized,
4080             }
4081         );
4082         assert_eq!(
4083             parser("☃").parse_flag().unwrap_err(),
4084             TestError {
4085                 span: span_range("☃", 0..3),
4086                 kind: ast::ErrorKind::FlagUnrecognized,
4087             }
4088         );
4089     }
4090 
4091     #[test]
parse_primitive_non_escape()4092     fn parse_primitive_non_escape() {
4093         assert_eq!(
4094             parser(r".").parse_primitive(),
4095             Ok(Primitive::Dot(span(0..1)))
4096         );
4097         assert_eq!(
4098             parser(r"^").parse_primitive(),
4099             Ok(Primitive::Assertion(ast::Assertion {
4100                 span: span(0..1),
4101                 kind: ast::AssertionKind::StartLine,
4102             }))
4103         );
4104         assert_eq!(
4105             parser(r"$").parse_primitive(),
4106             Ok(Primitive::Assertion(ast::Assertion {
4107                 span: span(0..1),
4108                 kind: ast::AssertionKind::EndLine,
4109             }))
4110         );
4111 
4112         assert_eq!(
4113             parser(r"a").parse_primitive(),
4114             Ok(Primitive::Literal(ast::Literal {
4115                 span: span(0..1),
4116                 kind: ast::LiteralKind::Verbatim,
4117                 c: 'a',
4118             }))
4119         );
4120         assert_eq!(
4121             parser(r"|").parse_primitive(),
4122             Ok(Primitive::Literal(ast::Literal {
4123                 span: span(0..1),
4124                 kind: ast::LiteralKind::Verbatim,
4125                 c: '|',
4126             }))
4127         );
4128         assert_eq!(
4129             parser(r"☃").parse_primitive(),
4130             Ok(Primitive::Literal(ast::Literal {
4131                 span: span_range("☃", 0..3),
4132                 kind: ast::LiteralKind::Verbatim,
4133                 c: '☃',
4134             }))
4135         );
4136     }
4137 
4138     #[test]
parse_escape()4139     fn parse_escape() {
4140         assert_eq!(
4141             parser(r"\|").parse_primitive(),
4142             Ok(Primitive::Literal(ast::Literal {
4143                 span: span(0..2),
4144                 kind: ast::LiteralKind::Punctuation,
4145                 c: '|',
4146             }))
4147         );
4148         let specials = &[
4149             (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4150             (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4151             (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4152             (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4153             (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4154             (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4155         ];
4156         for &(pat, c, ref kind) in specials {
4157             assert_eq!(
4158                 parser(pat).parse_primitive(),
4159                 Ok(Primitive::Literal(ast::Literal {
4160                     span: span(0..2),
4161                     kind: ast::LiteralKind::Special(kind.clone()),
4162                     c: c,
4163                 }))
4164             );
4165         }
4166         assert_eq!(
4167             parser(r"\A").parse_primitive(),
4168             Ok(Primitive::Assertion(ast::Assertion {
4169                 span: span(0..2),
4170                 kind: ast::AssertionKind::StartText,
4171             }))
4172         );
4173         assert_eq!(
4174             parser(r"\z").parse_primitive(),
4175             Ok(Primitive::Assertion(ast::Assertion {
4176                 span: span(0..2),
4177                 kind: ast::AssertionKind::EndText,
4178             }))
4179         );
4180         assert_eq!(
4181             parser(r"\b").parse_primitive(),
4182             Ok(Primitive::Assertion(ast::Assertion {
4183                 span: span(0..2),
4184                 kind: ast::AssertionKind::WordBoundary,
4185             }))
4186         );
4187         assert_eq!(
4188             parser(r"\B").parse_primitive(),
4189             Ok(Primitive::Assertion(ast::Assertion {
4190                 span: span(0..2),
4191                 kind: ast::AssertionKind::NotWordBoundary,
4192             }))
4193         );
4194 
4195         assert_eq!(
4196             parser(r"\").parse_escape().unwrap_err(),
4197             TestError {
4198                 span: span(0..1),
4199                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4200             }
4201         );
4202         assert_eq!(
4203             parser(r"\y").parse_escape().unwrap_err(),
4204             TestError {
4205                 span: span(0..2),
4206                 kind: ast::ErrorKind::EscapeUnrecognized,
4207             }
4208         );
4209     }
4210 
4211     #[test]
parse_unsupported_backreference()4212     fn parse_unsupported_backreference() {
4213         assert_eq!(
4214             parser(r"\0").parse_escape().unwrap_err(),
4215             TestError {
4216                 span: span(0..2),
4217                 kind: ast::ErrorKind::UnsupportedBackreference,
4218             }
4219         );
4220         assert_eq!(
4221             parser(r"\9").parse_escape().unwrap_err(),
4222             TestError {
4223                 span: span(0..2),
4224                 kind: ast::ErrorKind::UnsupportedBackreference,
4225             }
4226         );
4227     }
4228 
4229     #[test]
parse_octal()4230     fn parse_octal() {
4231         for i in 0..511 {
4232             let pat = format!(r"\{:o}", i);
4233             assert_eq!(
4234                 parser_octal(&pat).parse_escape(),
4235                 Ok(Primitive::Literal(ast::Literal {
4236                     span: span(0..pat.len()),
4237                     kind: ast::LiteralKind::Octal,
4238                     c: ::std::char::from_u32(i).unwrap(),
4239                 }))
4240             );
4241         }
4242         assert_eq!(
4243             parser_octal(r"\778").parse_escape(),
4244             Ok(Primitive::Literal(ast::Literal {
4245                 span: span(0..3),
4246                 kind: ast::LiteralKind::Octal,
4247                 c: '?',
4248             }))
4249         );
4250         assert_eq!(
4251             parser_octal(r"\7777").parse_escape(),
4252             Ok(Primitive::Literal(ast::Literal {
4253                 span: span(0..4),
4254                 kind: ast::LiteralKind::Octal,
4255                 c: '\u{01FF}',
4256             }))
4257         );
4258         assert_eq!(
4259             parser_octal(r"\778").parse(),
4260             Ok(Ast::Concat(ast::Concat {
4261                 span: span(0..4),
4262                 asts: vec![
4263                     Ast::Literal(ast::Literal {
4264                         span: span(0..3),
4265                         kind: ast::LiteralKind::Octal,
4266                         c: '?',
4267                     }),
4268                     Ast::Literal(ast::Literal {
4269                         span: span(3..4),
4270                         kind: ast::LiteralKind::Verbatim,
4271                         c: '8',
4272                     }),
4273                 ],
4274             }))
4275         );
4276         assert_eq!(
4277             parser_octal(r"\7777").parse(),
4278             Ok(Ast::Concat(ast::Concat {
4279                 span: span(0..5),
4280                 asts: vec![
4281                     Ast::Literal(ast::Literal {
4282                         span: span(0..4),
4283                         kind: ast::LiteralKind::Octal,
4284                         c: '\u{01FF}',
4285                     }),
4286                     Ast::Literal(ast::Literal {
4287                         span: span(4..5),
4288                         kind: ast::LiteralKind::Verbatim,
4289                         c: '7',
4290                     }),
4291                 ],
4292             }))
4293         );
4294 
4295         assert_eq!(
4296             parser_octal(r"\8").parse_escape().unwrap_err(),
4297             TestError {
4298                 span: span(0..2),
4299                 kind: ast::ErrorKind::EscapeUnrecognized,
4300             }
4301         );
4302     }
4303 
4304     #[test]
parse_hex_two()4305     fn parse_hex_two() {
4306         for i in 0..256 {
4307             let pat = format!(r"\x{:02x}", i);
4308             assert_eq!(
4309                 parser(&pat).parse_escape(),
4310                 Ok(Primitive::Literal(ast::Literal {
4311                     span: span(0..pat.len()),
4312                     kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4313                     c: ::std::char::from_u32(i).unwrap(),
4314                 }))
4315             );
4316         }
4317 
4318         assert_eq!(
4319             parser(r"\xF").parse_escape().unwrap_err(),
4320             TestError {
4321                 span: span(3..3),
4322                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4323             }
4324         );
4325         assert_eq!(
4326             parser(r"\xG").parse_escape().unwrap_err(),
4327             TestError {
4328                 span: span(2..3),
4329                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4330             }
4331         );
4332         assert_eq!(
4333             parser(r"\xFG").parse_escape().unwrap_err(),
4334             TestError {
4335                 span: span(3..4),
4336                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4337             }
4338         );
4339     }
4340 
4341     #[test]
parse_hex_four()4342     fn parse_hex_four() {
4343         for i in 0..65536 {
4344             let c = match ::std::char::from_u32(i) {
4345                 None => continue,
4346                 Some(c) => c,
4347             };
4348             let pat = format!(r"\u{:04x}", i);
4349             assert_eq!(
4350                 parser(&pat).parse_escape(),
4351                 Ok(Primitive::Literal(ast::Literal {
4352                     span: span(0..pat.len()),
4353                     kind: ast::LiteralKind::HexFixed(
4354                         ast::HexLiteralKind::UnicodeShort
4355                     ),
4356                     c: c,
4357                 }))
4358             );
4359         }
4360 
4361         assert_eq!(
4362             parser(r"\uF").parse_escape().unwrap_err(),
4363             TestError {
4364                 span: span(3..3),
4365                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4366             }
4367         );
4368         assert_eq!(
4369             parser(r"\uG").parse_escape().unwrap_err(),
4370             TestError {
4371                 span: span(2..3),
4372                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4373             }
4374         );
4375         assert_eq!(
4376             parser(r"\uFG").parse_escape().unwrap_err(),
4377             TestError {
4378                 span: span(3..4),
4379                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4380             }
4381         );
4382         assert_eq!(
4383             parser(r"\uFFG").parse_escape().unwrap_err(),
4384             TestError {
4385                 span: span(4..5),
4386                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4387             }
4388         );
4389         assert_eq!(
4390             parser(r"\uFFFG").parse_escape().unwrap_err(),
4391             TestError {
4392                 span: span(5..6),
4393                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4394             }
4395         );
4396         assert_eq!(
4397             parser(r"\uD800").parse_escape().unwrap_err(),
4398             TestError {
4399                 span: span(2..6),
4400                 kind: ast::ErrorKind::EscapeHexInvalid,
4401             }
4402         );
4403     }
4404 
4405     #[test]
parse_hex_eight()4406     fn parse_hex_eight() {
4407         for i in 0..65536 {
4408             let c = match ::std::char::from_u32(i) {
4409                 None => continue,
4410                 Some(c) => c,
4411             };
4412             let pat = format!(r"\U{:08x}", i);
4413             assert_eq!(
4414                 parser(&pat).parse_escape(),
4415                 Ok(Primitive::Literal(ast::Literal {
4416                     span: span(0..pat.len()),
4417                     kind: ast::LiteralKind::HexFixed(
4418                         ast::HexLiteralKind::UnicodeLong
4419                     ),
4420                     c: c,
4421                 }))
4422             );
4423         }
4424 
4425         assert_eq!(
4426             parser(r"\UF").parse_escape().unwrap_err(),
4427             TestError {
4428                 span: span(3..3),
4429                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4430             }
4431         );
4432         assert_eq!(
4433             parser(r"\UG").parse_escape().unwrap_err(),
4434             TestError {
4435                 span: span(2..3),
4436                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4437             }
4438         );
4439         assert_eq!(
4440             parser(r"\UFG").parse_escape().unwrap_err(),
4441             TestError {
4442                 span: span(3..4),
4443                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4444             }
4445         );
4446         assert_eq!(
4447             parser(r"\UFFG").parse_escape().unwrap_err(),
4448             TestError {
4449                 span: span(4..5),
4450                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4451             }
4452         );
4453         assert_eq!(
4454             parser(r"\UFFFG").parse_escape().unwrap_err(),
4455             TestError {
4456                 span: span(5..6),
4457                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4458             }
4459         );
4460         assert_eq!(
4461             parser(r"\UFFFFG").parse_escape().unwrap_err(),
4462             TestError {
4463                 span: span(6..7),
4464                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4465             }
4466         );
4467         assert_eq!(
4468             parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4469             TestError {
4470                 span: span(7..8),
4471                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4472             }
4473         );
4474         assert_eq!(
4475             parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4476             TestError {
4477                 span: span(8..9),
4478                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4479             }
4480         );
4481         assert_eq!(
4482             parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4483             TestError {
4484                 span: span(9..10),
4485                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4486             }
4487         );
4488     }
4489 
4490     #[test]
parse_hex_brace()4491     fn parse_hex_brace() {
4492         assert_eq!(
4493             parser(r"\u{26c4}").parse_escape(),
4494             Ok(Primitive::Literal(ast::Literal {
4495                 span: span(0..8),
4496                 kind: ast::LiteralKind::HexBrace(
4497                     ast::HexLiteralKind::UnicodeShort
4498                 ),
4499                 c: '⛄',
4500             }))
4501         );
4502         assert_eq!(
4503             parser(r"\U{26c4}").parse_escape(),
4504             Ok(Primitive::Literal(ast::Literal {
4505                 span: span(0..8),
4506                 kind: ast::LiteralKind::HexBrace(
4507                     ast::HexLiteralKind::UnicodeLong
4508                 ),
4509                 c: '⛄',
4510             }))
4511         );
4512         assert_eq!(
4513             parser(r"\x{26c4}").parse_escape(),
4514             Ok(Primitive::Literal(ast::Literal {
4515                 span: span(0..8),
4516                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4517                 c: '⛄',
4518             }))
4519         );
4520         assert_eq!(
4521             parser(r"\x{26C4}").parse_escape(),
4522             Ok(Primitive::Literal(ast::Literal {
4523                 span: span(0..8),
4524                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4525                 c: '⛄',
4526             }))
4527         );
4528         assert_eq!(
4529             parser(r"\x{10fFfF}").parse_escape(),
4530             Ok(Primitive::Literal(ast::Literal {
4531                 span: span(0..10),
4532                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4533                 c: '\u{10FFFF}',
4534             }))
4535         );
4536 
4537         assert_eq!(
4538             parser(r"\x").parse_escape().unwrap_err(),
4539             TestError {
4540                 span: span(2..2),
4541                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4542             }
4543         );
4544         assert_eq!(
4545             parser(r"\x{").parse_escape().unwrap_err(),
4546             TestError {
4547                 span: span(2..3),
4548                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4549             }
4550         );
4551         assert_eq!(
4552             parser(r"\x{FF").parse_escape().unwrap_err(),
4553             TestError {
4554                 span: span(2..5),
4555                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4556             }
4557         );
4558         assert_eq!(
4559             parser(r"\x{}").parse_escape().unwrap_err(),
4560             TestError {
4561                 span: span(2..4),
4562                 kind: ast::ErrorKind::EscapeHexEmpty,
4563             }
4564         );
4565         assert_eq!(
4566             parser(r"\x{FGF}").parse_escape().unwrap_err(),
4567             TestError {
4568                 span: span(4..5),
4569                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4570             }
4571         );
4572         assert_eq!(
4573             parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4574             TestError {
4575                 span: span(3..9),
4576                 kind: ast::ErrorKind::EscapeHexInvalid,
4577             }
4578         );
4579         assert_eq!(
4580             parser(r"\x{D800}").parse_escape().unwrap_err(),
4581             TestError {
4582                 span: span(3..7),
4583                 kind: ast::ErrorKind::EscapeHexInvalid,
4584             }
4585         );
4586         assert_eq!(
4587             parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4588             TestError {
4589                 span: span(3..12),
4590                 kind: ast::ErrorKind::EscapeHexInvalid,
4591             }
4592         );
4593     }
4594 
4595     #[test]
parse_decimal()4596     fn parse_decimal() {
4597         assert_eq!(parser("123").parse_decimal(), Ok(123));
4598         assert_eq!(parser("0").parse_decimal(), Ok(0));
4599         assert_eq!(parser("01").parse_decimal(), Ok(1));
4600 
4601         assert_eq!(
4602             parser("-1").parse_decimal().unwrap_err(),
4603             TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4604         );
4605         assert_eq!(
4606             parser("").parse_decimal().unwrap_err(),
4607             TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4608         );
4609         assert_eq!(
4610             parser("9999999999").parse_decimal().unwrap_err(),
4611             TestError {
4612                 span: span(0..10),
4613                 kind: ast::ErrorKind::DecimalInvalid,
4614             }
4615         );
4616     }
4617 
4618     #[test]
parse_set_class()4619     fn parse_set_class() {
4620         fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4621             ast::ClassSet::union(ast::ClassSetUnion {
4622                 span: span,
4623                 items: items,
4624             })
4625         }
4626 
4627         fn intersection(
4628             span: Span,
4629             lhs: ast::ClassSet,
4630             rhs: ast::ClassSet,
4631         ) -> ast::ClassSet {
4632             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4633                 span: span,
4634                 kind: ast::ClassSetBinaryOpKind::Intersection,
4635                 lhs: Box::new(lhs),
4636                 rhs: Box::new(rhs),
4637             })
4638         }
4639 
4640         fn difference(
4641             span: Span,
4642             lhs: ast::ClassSet,
4643             rhs: ast::ClassSet,
4644         ) -> ast::ClassSet {
4645             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4646                 span: span,
4647                 kind: ast::ClassSetBinaryOpKind::Difference,
4648                 lhs: Box::new(lhs),
4649                 rhs: Box::new(rhs),
4650             })
4651         }
4652 
4653         fn symdifference(
4654             span: Span,
4655             lhs: ast::ClassSet,
4656             rhs: ast::ClassSet,
4657         ) -> ast::ClassSet {
4658             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4659                 span: span,
4660                 kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4661                 lhs: Box::new(lhs),
4662                 rhs: Box::new(rhs),
4663             })
4664         }
4665 
4666         fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4667             ast::ClassSet::Item(item)
4668         }
4669 
4670         fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4671             ast::ClassSetItem::Ascii(cls)
4672         }
4673 
4674         fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4675             ast::ClassSetItem::Unicode(cls)
4676         }
4677 
4678         fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4679             ast::ClassSetItem::Perl(cls)
4680         }
4681 
4682         fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4683             ast::ClassSetItem::Bracketed(Box::new(cls))
4684         }
4685 
4686         fn lit(span: Span, c: char) -> ast::ClassSetItem {
4687             ast::ClassSetItem::Literal(ast::Literal {
4688                 span: span,
4689                 kind: ast::LiteralKind::Verbatim,
4690                 c: c,
4691             })
4692         }
4693 
4694         fn empty(span: Span) -> ast::ClassSetItem {
4695             ast::ClassSetItem::Empty(span)
4696         }
4697 
4698         fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4699             let pos1 = Position {
4700                 offset: span.start.offset + start.len_utf8(),
4701                 column: span.start.column + 1,
4702                 ..span.start
4703             };
4704             let pos2 = Position {
4705                 offset: span.end.offset - end.len_utf8(),
4706                 column: span.end.column - 1,
4707                 ..span.end
4708             };
4709             ast::ClassSetItem::Range(ast::ClassSetRange {
4710                 span: span,
4711                 start: ast::Literal {
4712                     span: Span { end: pos1, ..span },
4713                     kind: ast::LiteralKind::Verbatim,
4714                     c: start,
4715                 },
4716                 end: ast::Literal {
4717                     span: Span { start: pos2, ..span },
4718                     kind: ast::LiteralKind::Verbatim,
4719                     c: end,
4720                 },
4721             })
4722         }
4723 
4724         fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4725             ast::ClassAscii {
4726                 span: span,
4727                 kind: ast::ClassAsciiKind::Alnum,
4728                 negated: negated,
4729             }
4730         }
4731 
4732         fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4733             ast::ClassAscii {
4734                 span: span,
4735                 kind: ast::ClassAsciiKind::Lower,
4736                 negated: negated,
4737             }
4738         }
4739 
4740         assert_eq!(
4741             parser("[[:alnum:]]").parse(),
4742             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4743                 span: span(0..11),
4744                 negated: false,
4745                 kind: itemset(item_ascii(alnum(span(1..10), false))),
4746             })))
4747         );
4748         assert_eq!(
4749             parser("[[[:alnum:]]]").parse(),
4750             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4751                 span: span(0..13),
4752                 negated: false,
4753                 kind: itemset(item_bracket(ast::ClassBracketed {
4754                     span: span(1..12),
4755                     negated: false,
4756                     kind: itemset(item_ascii(alnum(span(2..11), false))),
4757                 })),
4758             })))
4759         );
4760         assert_eq!(
4761             parser("[[:alnum:]&&[:lower:]]").parse(),
4762             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4763                 span: span(0..22),
4764                 negated: false,
4765                 kind: intersection(
4766                     span(1..21),
4767                     itemset(item_ascii(alnum(span(1..10), false))),
4768                     itemset(item_ascii(lower(span(12..21), false))),
4769                 ),
4770             })))
4771         );
4772         assert_eq!(
4773             parser("[[:alnum:]--[:lower:]]").parse(),
4774             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4775                 span: span(0..22),
4776                 negated: false,
4777                 kind: difference(
4778                     span(1..21),
4779                     itemset(item_ascii(alnum(span(1..10), false))),
4780                     itemset(item_ascii(lower(span(12..21), false))),
4781                 ),
4782             })))
4783         );
4784         assert_eq!(
4785             parser("[[:alnum:]~~[:lower:]]").parse(),
4786             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4787                 span: span(0..22),
4788                 negated: false,
4789                 kind: symdifference(
4790                     span(1..21),
4791                     itemset(item_ascii(alnum(span(1..10), false))),
4792                     itemset(item_ascii(lower(span(12..21), false))),
4793                 ),
4794             })))
4795         );
4796 
4797         assert_eq!(
4798             parser("[a]").parse(),
4799             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4800                 span: span(0..3),
4801                 negated: false,
4802                 kind: itemset(lit(span(1..2), 'a')),
4803             })))
4804         );
4805         assert_eq!(
4806             parser(r"[a\]]").parse(),
4807             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4808                 span: span(0..5),
4809                 negated: false,
4810                 kind: union(
4811                     span(1..4),
4812                     vec![
4813                         lit(span(1..2), 'a'),
4814                         ast::ClassSetItem::Literal(ast::Literal {
4815                             span: span(2..4),
4816                             kind: ast::LiteralKind::Punctuation,
4817                             c: ']',
4818                         }),
4819                     ]
4820                 ),
4821             })))
4822         );
4823         assert_eq!(
4824             parser(r"[a\-z]").parse(),
4825             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4826                 span: span(0..6),
4827                 negated: false,
4828                 kind: union(
4829                     span(1..5),
4830                     vec![
4831                         lit(span(1..2), 'a'),
4832                         ast::ClassSetItem::Literal(ast::Literal {
4833                             span: span(2..4),
4834                             kind: ast::LiteralKind::Punctuation,
4835                             c: '-',
4836                         }),
4837                         lit(span(4..5), 'z'),
4838                     ]
4839                 ),
4840             })))
4841         );
4842         assert_eq!(
4843             parser("[ab]").parse(),
4844             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4845                 span: span(0..4),
4846                 negated: false,
4847                 kind: union(
4848                     span(1..3),
4849                     vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
4850                 ),
4851             })))
4852         );
4853         assert_eq!(
4854             parser("[a-]").parse(),
4855             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4856                 span: span(0..4),
4857                 negated: false,
4858                 kind: union(
4859                     span(1..3),
4860                     vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
4861                 ),
4862             })))
4863         );
4864         assert_eq!(
4865             parser("[-a]").parse(),
4866             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4867                 span: span(0..4),
4868                 negated: false,
4869                 kind: union(
4870                     span(1..3),
4871                     vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
4872                 ),
4873             })))
4874         );
4875         assert_eq!(
4876             parser(r"[\pL]").parse(),
4877             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4878                 span: span(0..5),
4879                 negated: false,
4880                 kind: itemset(item_unicode(ast::ClassUnicode {
4881                     span: span(1..4),
4882                     negated: false,
4883                     kind: ast::ClassUnicodeKind::OneLetter('L'),
4884                 })),
4885             })))
4886         );
4887         assert_eq!(
4888             parser(r"[\w]").parse(),
4889             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4890                 span: span(0..4),
4891                 negated: false,
4892                 kind: itemset(item_perl(ast::ClassPerl {
4893                     span: span(1..3),
4894                     kind: ast::ClassPerlKind::Word,
4895                     negated: false,
4896                 })),
4897             })))
4898         );
4899         assert_eq!(
4900             parser(r"[a\wz]").parse(),
4901             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4902                 span: span(0..6),
4903                 negated: false,
4904                 kind: union(
4905                     span(1..5),
4906                     vec![
4907                         lit(span(1..2), 'a'),
4908                         item_perl(ast::ClassPerl {
4909                             span: span(2..4),
4910                             kind: ast::ClassPerlKind::Word,
4911                             negated: false,
4912                         }),
4913                         lit(span(4..5), 'z'),
4914                     ]
4915                 ),
4916             })))
4917         );
4918 
4919         assert_eq!(
4920             parser("[a-z]").parse(),
4921             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4922                 span: span(0..5),
4923                 negated: false,
4924                 kind: itemset(range(span(1..4), 'a', 'z')),
4925             })))
4926         );
4927         assert_eq!(
4928             parser("[a-cx-z]").parse(),
4929             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4930                 span: span(0..8),
4931                 negated: false,
4932                 kind: union(
4933                     span(1..7),
4934                     vec![
4935                         range(span(1..4), 'a', 'c'),
4936                         range(span(4..7), 'x', 'z'),
4937                     ]
4938                 ),
4939             })))
4940         );
4941         assert_eq!(
4942             parser(r"[\w&&a-cx-z]").parse(),
4943             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4944                 span: span(0..12),
4945                 negated: false,
4946                 kind: intersection(
4947                     span(1..11),
4948                     itemset(item_perl(ast::ClassPerl {
4949                         span: span(1..3),
4950                         kind: ast::ClassPerlKind::Word,
4951                         negated: false,
4952                     })),
4953                     union(
4954                         span(5..11),
4955                         vec![
4956                             range(span(5..8), 'a', 'c'),
4957                             range(span(8..11), 'x', 'z'),
4958                         ]
4959                     ),
4960                 ),
4961             })))
4962         );
4963         assert_eq!(
4964             parser(r"[a-cx-z&&\w]").parse(),
4965             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4966                 span: span(0..12),
4967                 negated: false,
4968                 kind: intersection(
4969                     span(1..11),
4970                     union(
4971                         span(1..7),
4972                         vec![
4973                             range(span(1..4), 'a', 'c'),
4974                             range(span(4..7), 'x', 'z'),
4975                         ]
4976                     ),
4977                     itemset(item_perl(ast::ClassPerl {
4978                         span: span(9..11),
4979                         kind: ast::ClassPerlKind::Word,
4980                         negated: false,
4981                     })),
4982                 ),
4983             })))
4984         );
4985         assert_eq!(
4986             parser(r"[a--b--c]").parse(),
4987             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4988                 span: span(0..9),
4989                 negated: false,
4990                 kind: difference(
4991                     span(1..8),
4992                     difference(
4993                         span(1..5),
4994                         itemset(lit(span(1..2), 'a')),
4995                         itemset(lit(span(4..5), 'b')),
4996                     ),
4997                     itemset(lit(span(7..8), 'c')),
4998                 ),
4999             })))
5000         );
5001         assert_eq!(
5002             parser(r"[a~~b~~c]").parse(),
5003             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5004                 span: span(0..9),
5005                 negated: false,
5006                 kind: symdifference(
5007                     span(1..8),
5008                     symdifference(
5009                         span(1..5),
5010                         itemset(lit(span(1..2), 'a')),
5011                         itemset(lit(span(4..5), 'b')),
5012                     ),
5013                     itemset(lit(span(7..8), 'c')),
5014                 ),
5015             })))
5016         );
5017         assert_eq!(
5018             parser(r"[\^&&^]").parse(),
5019             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5020                 span: span(0..7),
5021                 negated: false,
5022                 kind: intersection(
5023                     span(1..6),
5024                     itemset(ast::ClassSetItem::Literal(ast::Literal {
5025                         span: span(1..3),
5026                         kind: ast::LiteralKind::Punctuation,
5027                         c: '^',
5028                     })),
5029                     itemset(lit(span(5..6), '^')),
5030                 ),
5031             })))
5032         );
5033         assert_eq!(
5034             parser(r"[\&&&&]").parse(),
5035             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5036                 span: span(0..7),
5037                 negated: false,
5038                 kind: intersection(
5039                     span(1..6),
5040                     itemset(ast::ClassSetItem::Literal(ast::Literal {
5041                         span: span(1..3),
5042                         kind: ast::LiteralKind::Punctuation,
5043                         c: '&',
5044                     })),
5045                     itemset(lit(span(5..6), '&')),
5046                 ),
5047             })))
5048         );
5049         assert_eq!(
5050             parser(r"[&&&&]").parse(),
5051             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5052                 span: span(0..6),
5053                 negated: false,
5054                 kind: intersection(
5055                     span(1..5),
5056                     intersection(
5057                         span(1..3),
5058                         itemset(empty(span(1..1))),
5059                         itemset(empty(span(3..3))),
5060                     ),
5061                     itemset(empty(span(5..5))),
5062                 ),
5063             })))
5064         );
5065 
5066         let pat = "[☃-⛄]";
5067         assert_eq!(
5068             parser(pat).parse(),
5069             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5070                 span: span_range(pat, 0..9),
5071                 negated: false,
5072                 kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5073                     span: span_range(pat, 1..8),
5074                     start: ast::Literal {
5075                         span: span_range(pat, 1..4),
5076                         kind: ast::LiteralKind::Verbatim,
5077                         c: '☃',
5078                     },
5079                     end: ast::Literal {
5080                         span: span_range(pat, 5..8),
5081                         kind: ast::LiteralKind::Verbatim,
5082                         c: '⛄',
5083                     },
5084                 })),
5085             })))
5086         );
5087 
5088         assert_eq!(
5089             parser(r"[]]").parse(),
5090             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5091                 span: span(0..3),
5092                 negated: false,
5093                 kind: itemset(lit(span(1..2), ']')),
5094             })))
5095         );
5096         assert_eq!(
5097             parser(r"[]\[]").parse(),
5098             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5099                 span: span(0..5),
5100                 negated: false,
5101                 kind: union(
5102                     span(1..4),
5103                     vec![
5104                         lit(span(1..2), ']'),
5105                         ast::ClassSetItem::Literal(ast::Literal {
5106                             span: span(2..4),
5107                             kind: ast::LiteralKind::Punctuation,
5108                             c: '[',
5109                         }),
5110                     ]
5111                 ),
5112             })))
5113         );
5114         assert_eq!(
5115             parser(r"[\[]]").parse(),
5116             Ok(concat(
5117                 0..5,
5118                 vec![
5119                     Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5120                         span: span(0..4),
5121                         negated: false,
5122                         kind: itemset(ast::ClassSetItem::Literal(
5123                             ast::Literal {
5124                                 span: span(1..3),
5125                                 kind: ast::LiteralKind::Punctuation,
5126                                 c: '[',
5127                             }
5128                         )),
5129                     })),
5130                     Ast::Literal(ast::Literal {
5131                         span: span(4..5),
5132                         kind: ast::LiteralKind::Verbatim,
5133                         c: ']',
5134                     }),
5135                 ]
5136             ))
5137         );
5138 
5139         assert_eq!(
5140             parser("[").parse().unwrap_err(),
5141             TestError {
5142                 span: span(0..1),
5143                 kind: ast::ErrorKind::ClassUnclosed,
5144             }
5145         );
5146         assert_eq!(
5147             parser("[[").parse().unwrap_err(),
5148             TestError {
5149                 span: span(1..2),
5150                 kind: ast::ErrorKind::ClassUnclosed,
5151             }
5152         );
5153         assert_eq!(
5154             parser("[[-]").parse().unwrap_err(),
5155             TestError {
5156                 span: span(0..1),
5157                 kind: ast::ErrorKind::ClassUnclosed,
5158             }
5159         );
5160         assert_eq!(
5161             parser("[[[:alnum:]").parse().unwrap_err(),
5162             TestError {
5163                 span: span(1..2),
5164                 kind: ast::ErrorKind::ClassUnclosed,
5165             }
5166         );
5167         assert_eq!(
5168             parser(r"[\b]").parse().unwrap_err(),
5169             TestError {
5170                 span: span(1..3),
5171                 kind: ast::ErrorKind::ClassEscapeInvalid,
5172             }
5173         );
5174         assert_eq!(
5175             parser(r"[\w-a]").parse().unwrap_err(),
5176             TestError {
5177                 span: span(1..3),
5178                 kind: ast::ErrorKind::ClassRangeLiteral,
5179             }
5180         );
5181         assert_eq!(
5182             parser(r"[a-\w]").parse().unwrap_err(),
5183             TestError {
5184                 span: span(3..5),
5185                 kind: ast::ErrorKind::ClassRangeLiteral,
5186             }
5187         );
5188         assert_eq!(
5189             parser(r"[z-a]").parse().unwrap_err(),
5190             TestError {
5191                 span: span(1..4),
5192                 kind: ast::ErrorKind::ClassRangeInvalid,
5193             }
5194         );
5195 
5196         assert_eq!(
5197             parser_ignore_whitespace("[a ").parse().unwrap_err(),
5198             TestError {
5199                 span: span(0..1),
5200                 kind: ast::ErrorKind::ClassUnclosed,
5201             }
5202         );
5203         assert_eq!(
5204             parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5205             TestError {
5206                 span: span(0..1),
5207                 kind: ast::ErrorKind::ClassUnclosed,
5208             }
5209         );
5210     }
5211 
5212     #[test]
parse_set_class_open()5213     fn parse_set_class_open() {
5214         assert_eq!(parser("[a]").parse_set_class_open(), {
5215             let set = ast::ClassBracketed {
5216                 span: span(0..1),
5217                 negated: false,
5218                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5219                     span: span(1..1),
5220                     items: vec![],
5221                 }),
5222             };
5223             let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5224             Ok((set, union))
5225         });
5226         assert_eq!(
5227             parser_ignore_whitespace("[   a]").parse_set_class_open(),
5228             {
5229                 let set = ast::ClassBracketed {
5230                     span: span(0..4),
5231                     negated: false,
5232                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5233                         span: span(4..4),
5234                         items: vec![],
5235                     }),
5236                 };
5237                 let union =
5238                     ast::ClassSetUnion { span: span(4..4), items: vec![] };
5239                 Ok((set, union))
5240             }
5241         );
5242         assert_eq!(parser("[^a]").parse_set_class_open(), {
5243             let set = ast::ClassBracketed {
5244                 span: span(0..2),
5245                 negated: true,
5246                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5247                     span: span(2..2),
5248                     items: vec![],
5249                 }),
5250             };
5251             let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5252             Ok((set, union))
5253         });
5254         assert_eq!(
5255             parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5256             {
5257                 let set = ast::ClassBracketed {
5258                     span: span(0..4),
5259                     negated: true,
5260                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5261                         span: span(4..4),
5262                         items: vec![],
5263                     }),
5264                 };
5265                 let union =
5266                     ast::ClassSetUnion { span: span(4..4), items: vec![] };
5267                 Ok((set, union))
5268             }
5269         );
5270         assert_eq!(parser("[-a]").parse_set_class_open(), {
5271             let set = ast::ClassBracketed {
5272                 span: span(0..2),
5273                 negated: false,
5274                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5275                     span: span(1..1),
5276                     items: vec![],
5277                 }),
5278             };
5279             let union = ast::ClassSetUnion {
5280                 span: span(1..2),
5281                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5282                     span: span(1..2),
5283                     kind: ast::LiteralKind::Verbatim,
5284                     c: '-',
5285                 })],
5286             };
5287             Ok((set, union))
5288         });
5289         assert_eq!(
5290             parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5291             {
5292                 let set = ast::ClassBracketed {
5293                     span: span(0..4),
5294                     negated: false,
5295                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5296                         span: span(2..2),
5297                         items: vec![],
5298                     }),
5299                 };
5300                 let union = ast::ClassSetUnion {
5301                     span: span(2..3),
5302                     items: vec![ast::ClassSetItem::Literal(ast::Literal {
5303                         span: span(2..3),
5304                         kind: ast::LiteralKind::Verbatim,
5305                         c: '-',
5306                     })],
5307                 };
5308                 Ok((set, union))
5309             }
5310         );
5311         assert_eq!(parser("[^-a]").parse_set_class_open(), {
5312             let set = ast::ClassBracketed {
5313                 span: span(0..3),
5314                 negated: true,
5315                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5316                     span: span(2..2),
5317                     items: vec![],
5318                 }),
5319             };
5320             let union = ast::ClassSetUnion {
5321                 span: span(2..3),
5322                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5323                     span: span(2..3),
5324                     kind: ast::LiteralKind::Verbatim,
5325                     c: '-',
5326                 })],
5327             };
5328             Ok((set, union))
5329         });
5330         assert_eq!(parser("[--a]").parse_set_class_open(), {
5331             let set = ast::ClassBracketed {
5332                 span: span(0..3),
5333                 negated: false,
5334                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5335                     span: span(1..1),
5336                     items: vec![],
5337                 }),
5338             };
5339             let union = ast::ClassSetUnion {
5340                 span: span(1..3),
5341                 items: vec![
5342                     ast::ClassSetItem::Literal(ast::Literal {
5343                         span: span(1..2),
5344                         kind: ast::LiteralKind::Verbatim,
5345                         c: '-',
5346                     }),
5347                     ast::ClassSetItem::Literal(ast::Literal {
5348                         span: span(2..3),
5349                         kind: ast::LiteralKind::Verbatim,
5350                         c: '-',
5351                     }),
5352                 ],
5353             };
5354             Ok((set, union))
5355         });
5356         assert_eq!(parser("[]a]").parse_set_class_open(), {
5357             let set = ast::ClassBracketed {
5358                 span: span(0..2),
5359                 negated: false,
5360                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5361                     span: span(1..1),
5362                     items: vec![],
5363                 }),
5364             };
5365             let union = ast::ClassSetUnion {
5366                 span: span(1..2),
5367                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5368                     span: span(1..2),
5369                     kind: ast::LiteralKind::Verbatim,
5370                     c: ']',
5371                 })],
5372             };
5373             Ok((set, union))
5374         });
5375         assert_eq!(
5376             parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5377             {
5378                 let set = ast::ClassBracketed {
5379                     span: span(0..4),
5380                     negated: false,
5381                     kind: ast::ClassSet::union(ast::ClassSetUnion {
5382                         span: span(2..2),
5383                         items: vec![],
5384                     }),
5385                 };
5386                 let union = ast::ClassSetUnion {
5387                     span: span(2..3),
5388                     items: vec![ast::ClassSetItem::Literal(ast::Literal {
5389                         span: span(2..3),
5390                         kind: ast::LiteralKind::Verbatim,
5391                         c: ']',
5392                     })],
5393                 };
5394                 Ok((set, union))
5395             }
5396         );
5397         assert_eq!(parser("[^]a]").parse_set_class_open(), {
5398             let set = ast::ClassBracketed {
5399                 span: span(0..3),
5400                 negated: true,
5401                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5402                     span: span(2..2),
5403                     items: vec![],
5404                 }),
5405             };
5406             let union = ast::ClassSetUnion {
5407                 span: span(2..3),
5408                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5409                     span: span(2..3),
5410                     kind: ast::LiteralKind::Verbatim,
5411                     c: ']',
5412                 })],
5413             };
5414             Ok((set, union))
5415         });
5416         assert_eq!(parser("[-]a]").parse_set_class_open(), {
5417             let set = ast::ClassBracketed {
5418                 span: span(0..2),
5419                 negated: false,
5420                 kind: ast::ClassSet::union(ast::ClassSetUnion {
5421                     span: span(1..1),
5422                     items: vec![],
5423                 }),
5424             };
5425             let union = ast::ClassSetUnion {
5426                 span: span(1..2),
5427                 items: vec![ast::ClassSetItem::Literal(ast::Literal {
5428                     span: span(1..2),
5429                     kind: ast::LiteralKind::Verbatim,
5430                     c: '-',
5431                 })],
5432             };
5433             Ok((set, union))
5434         });
5435 
5436         assert_eq!(
5437             parser("[").parse_set_class_open().unwrap_err(),
5438             TestError {
5439                 span: span(0..1),
5440                 kind: ast::ErrorKind::ClassUnclosed,
5441             }
5442         );
5443         assert_eq!(
5444             parser_ignore_whitespace("[    ")
5445                 .parse_set_class_open()
5446                 .unwrap_err(),
5447             TestError {
5448                 span: span(0..5),
5449                 kind: ast::ErrorKind::ClassUnclosed,
5450             }
5451         );
5452         assert_eq!(
5453             parser("[^").parse_set_class_open().unwrap_err(),
5454             TestError {
5455                 span: span(0..2),
5456                 kind: ast::ErrorKind::ClassUnclosed,
5457             }
5458         );
5459         assert_eq!(
5460             parser("[]").parse_set_class_open().unwrap_err(),
5461             TestError {
5462                 span: span(0..2),
5463                 kind: ast::ErrorKind::ClassUnclosed,
5464             }
5465         );
5466         assert_eq!(
5467             parser("[-").parse_set_class_open().unwrap_err(),
5468             TestError {
5469                 span: span(0..2),
5470                 kind: ast::ErrorKind::ClassUnclosed,
5471             }
5472         );
5473         assert_eq!(
5474             parser("[--").parse_set_class_open().unwrap_err(),
5475             TestError {
5476                 span: span(0..3),
5477                 kind: ast::ErrorKind::ClassUnclosed,
5478             }
5479         );
5480     }
5481 
5482     #[test]
maybe_parse_ascii_class()5483     fn maybe_parse_ascii_class() {
5484         assert_eq!(
5485             parser(r"[:alnum:]").maybe_parse_ascii_class(),
5486             Some(ast::ClassAscii {
5487                 span: span(0..9),
5488                 kind: ast::ClassAsciiKind::Alnum,
5489                 negated: false,
5490             })
5491         );
5492         assert_eq!(
5493             parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5494             Some(ast::ClassAscii {
5495                 span: span(0..9),
5496                 kind: ast::ClassAsciiKind::Alnum,
5497                 negated: false,
5498             })
5499         );
5500         assert_eq!(
5501             parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5502             Some(ast::ClassAscii {
5503                 span: span(0..10),
5504                 kind: ast::ClassAsciiKind::Alnum,
5505                 negated: true,
5506             })
5507         );
5508 
5509         let p = parser(r"[:");
5510         assert_eq!(p.maybe_parse_ascii_class(), None);
5511         assert_eq!(p.offset(), 0);
5512 
5513         let p = parser(r"[:^");
5514         assert_eq!(p.maybe_parse_ascii_class(), None);
5515         assert_eq!(p.offset(), 0);
5516 
5517         let p = parser(r"[^:alnum:]");
5518         assert_eq!(p.maybe_parse_ascii_class(), None);
5519         assert_eq!(p.offset(), 0);
5520 
5521         let p = parser(r"[:alnnum:]");
5522         assert_eq!(p.maybe_parse_ascii_class(), None);
5523         assert_eq!(p.offset(), 0);
5524 
5525         let p = parser(r"[:alnum]");
5526         assert_eq!(p.maybe_parse_ascii_class(), None);
5527         assert_eq!(p.offset(), 0);
5528 
5529         let p = parser(r"[:alnum:");
5530         assert_eq!(p.maybe_parse_ascii_class(), None);
5531         assert_eq!(p.offset(), 0);
5532     }
5533 
5534     #[test]
parse_unicode_class()5535     fn parse_unicode_class() {
5536         assert_eq!(
5537             parser(r"\pN").parse_escape(),
5538             Ok(Primitive::Unicode(ast::ClassUnicode {
5539                 span: span(0..3),
5540                 negated: false,
5541                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5542             }))
5543         );
5544         assert_eq!(
5545             parser(r"\PN").parse_escape(),
5546             Ok(Primitive::Unicode(ast::ClassUnicode {
5547                 span: span(0..3),
5548                 negated: true,
5549                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5550             }))
5551         );
5552         assert_eq!(
5553             parser(r"\p{N}").parse_escape(),
5554             Ok(Primitive::Unicode(ast::ClassUnicode {
5555                 span: span(0..5),
5556                 negated: false,
5557                 kind: ast::ClassUnicodeKind::Named(s("N")),
5558             }))
5559         );
5560         assert_eq!(
5561             parser(r"\P{N}").parse_escape(),
5562             Ok(Primitive::Unicode(ast::ClassUnicode {
5563                 span: span(0..5),
5564                 negated: true,
5565                 kind: ast::ClassUnicodeKind::Named(s("N")),
5566             }))
5567         );
5568         assert_eq!(
5569             parser(r"\p{Greek}").parse_escape(),
5570             Ok(Primitive::Unicode(ast::ClassUnicode {
5571                 span: span(0..9),
5572                 negated: false,
5573                 kind: ast::ClassUnicodeKind::Named(s("Greek")),
5574             }))
5575         );
5576 
5577         assert_eq!(
5578             parser(r"\p{scx:Katakana}").parse_escape(),
5579             Ok(Primitive::Unicode(ast::ClassUnicode {
5580                 span: span(0..16),
5581                 negated: false,
5582                 kind: ast::ClassUnicodeKind::NamedValue {
5583                     op: ast::ClassUnicodeOpKind::Colon,
5584                     name: s("scx"),
5585                     value: s("Katakana"),
5586                 },
5587             }))
5588         );
5589         assert_eq!(
5590             parser(r"\p{scx=Katakana}").parse_escape(),
5591             Ok(Primitive::Unicode(ast::ClassUnicode {
5592                 span: span(0..16),
5593                 negated: false,
5594                 kind: ast::ClassUnicodeKind::NamedValue {
5595                     op: ast::ClassUnicodeOpKind::Equal,
5596                     name: s("scx"),
5597                     value: s("Katakana"),
5598                 },
5599             }))
5600         );
5601         assert_eq!(
5602             parser(r"\p{scx!=Katakana}").parse_escape(),
5603             Ok(Primitive::Unicode(ast::ClassUnicode {
5604                 span: span(0..17),
5605                 negated: false,
5606                 kind: ast::ClassUnicodeKind::NamedValue {
5607                     op: ast::ClassUnicodeOpKind::NotEqual,
5608                     name: s("scx"),
5609                     value: s("Katakana"),
5610                 },
5611             }))
5612         );
5613 
5614         assert_eq!(
5615             parser(r"\p{:}").parse_escape(),
5616             Ok(Primitive::Unicode(ast::ClassUnicode {
5617                 span: span(0..5),
5618                 negated: false,
5619                 kind: ast::ClassUnicodeKind::NamedValue {
5620                     op: ast::ClassUnicodeOpKind::Colon,
5621                     name: s(""),
5622                     value: s(""),
5623                 },
5624             }))
5625         );
5626         assert_eq!(
5627             parser(r"\p{=}").parse_escape(),
5628             Ok(Primitive::Unicode(ast::ClassUnicode {
5629                 span: span(0..5),
5630                 negated: false,
5631                 kind: ast::ClassUnicodeKind::NamedValue {
5632                     op: ast::ClassUnicodeOpKind::Equal,
5633                     name: s(""),
5634                     value: s(""),
5635                 },
5636             }))
5637         );
5638         assert_eq!(
5639             parser(r"\p{!=}").parse_escape(),
5640             Ok(Primitive::Unicode(ast::ClassUnicode {
5641                 span: span(0..6),
5642                 negated: false,
5643                 kind: ast::ClassUnicodeKind::NamedValue {
5644                     op: ast::ClassUnicodeOpKind::NotEqual,
5645                     name: s(""),
5646                     value: s(""),
5647                 },
5648             }))
5649         );
5650 
5651         assert_eq!(
5652             parser(r"\p").parse_escape().unwrap_err(),
5653             TestError {
5654                 span: span(2..2),
5655                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5656             }
5657         );
5658         assert_eq!(
5659             parser(r"\p{").parse_escape().unwrap_err(),
5660             TestError {
5661                 span: span(3..3),
5662                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5663             }
5664         );
5665         assert_eq!(
5666             parser(r"\p{N").parse_escape().unwrap_err(),
5667             TestError {
5668                 span: span(4..4),
5669                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5670             }
5671         );
5672         assert_eq!(
5673             parser(r"\p{Greek").parse_escape().unwrap_err(),
5674             TestError {
5675                 span: span(8..8),
5676                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5677             }
5678         );
5679 
5680         assert_eq!(
5681             parser(r"\pNz").parse(),
5682             Ok(Ast::Concat(ast::Concat {
5683                 span: span(0..4),
5684                 asts: vec![
5685                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5686                         span: span(0..3),
5687                         negated: false,
5688                         kind: ast::ClassUnicodeKind::OneLetter('N'),
5689                     })),
5690                     Ast::Literal(ast::Literal {
5691                         span: span(3..4),
5692                         kind: ast::LiteralKind::Verbatim,
5693                         c: 'z',
5694                     }),
5695                 ],
5696             }))
5697         );
5698         assert_eq!(
5699             parser(r"\p{Greek}z").parse(),
5700             Ok(Ast::Concat(ast::Concat {
5701                 span: span(0..10),
5702                 asts: vec![
5703                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5704                         span: span(0..9),
5705                         negated: false,
5706                         kind: ast::ClassUnicodeKind::Named(s("Greek")),
5707                     })),
5708                     Ast::Literal(ast::Literal {
5709                         span: span(9..10),
5710                         kind: ast::LiteralKind::Verbatim,
5711                         c: 'z',
5712                     }),
5713                 ],
5714             }))
5715         );
5716     }
5717 
5718     #[test]
parse_perl_class()5719     fn parse_perl_class() {
5720         assert_eq!(
5721             parser(r"\d").parse_escape(),
5722             Ok(Primitive::Perl(ast::ClassPerl {
5723                 span: span(0..2),
5724                 kind: ast::ClassPerlKind::Digit,
5725                 negated: false,
5726             }))
5727         );
5728         assert_eq!(
5729             parser(r"\D").parse_escape(),
5730             Ok(Primitive::Perl(ast::ClassPerl {
5731                 span: span(0..2),
5732                 kind: ast::ClassPerlKind::Digit,
5733                 negated: true,
5734             }))
5735         );
5736         assert_eq!(
5737             parser(r"\s").parse_escape(),
5738             Ok(Primitive::Perl(ast::ClassPerl {
5739                 span: span(0..2),
5740                 kind: ast::ClassPerlKind::Space,
5741                 negated: false,
5742             }))
5743         );
5744         assert_eq!(
5745             parser(r"\S").parse_escape(),
5746             Ok(Primitive::Perl(ast::ClassPerl {
5747                 span: span(0..2),
5748                 kind: ast::ClassPerlKind::Space,
5749                 negated: true,
5750             }))
5751         );
5752         assert_eq!(
5753             parser(r"\w").parse_escape(),
5754             Ok(Primitive::Perl(ast::ClassPerl {
5755                 span: span(0..2),
5756                 kind: ast::ClassPerlKind::Word,
5757                 negated: false,
5758             }))
5759         );
5760         assert_eq!(
5761             parser(r"\W").parse_escape(),
5762             Ok(Primitive::Perl(ast::ClassPerl {
5763                 span: span(0..2),
5764                 kind: ast::ClassPerlKind::Word,
5765                 negated: true,
5766             }))
5767         );
5768 
5769         assert_eq!(
5770             parser(r"\d").parse(),
5771             Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5772                 span: span(0..2),
5773                 kind: ast::ClassPerlKind::Digit,
5774                 negated: false,
5775             })))
5776         );
5777         assert_eq!(
5778             parser(r"\dz").parse(),
5779             Ok(Ast::Concat(ast::Concat {
5780                 span: span(0..3),
5781                 asts: vec![
5782                     Ast::Class(ast::Class::Perl(ast::ClassPerl {
5783                         span: span(0..2),
5784                         kind: ast::ClassPerlKind::Digit,
5785                         negated: false,
5786                     })),
5787                     Ast::Literal(ast::Literal {
5788                         span: span(2..3),
5789                         kind: ast::LiteralKind::Verbatim,
5790                         c: 'z',
5791                     }),
5792                 ],
5793             }))
5794         );
5795     }
5796 
5797     // This tests a bug fix where the nest limit checker wasn't decrementing
5798     // its depth during post-traversal, which causes long regexes to trip
5799     // the default limit too aggressively.
5800     #[test]
regression_454_nest_too_big()5801     fn regression_454_nest_too_big() {
5802         let pattern = r#"
5803         2(?:
5804           [45]\d{3}|
5805           7(?:
5806             1[0-267]|
5807             2[0-289]|
5808             3[0-29]|
5809             4[01]|
5810             5[1-3]|
5811             6[013]|
5812             7[0178]|
5813             91
5814           )|
5815           8(?:
5816             0[125]|
5817             [139][1-6]|
5818             2[0157-9]|
5819             41|
5820             6[1-35]|
5821             7[1-5]|
5822             8[1-8]|
5823             90
5824           )|
5825           9(?:
5826             0[0-2]|
5827             1[0-4]|
5828             2[568]|
5829             3[3-6]|
5830             5[5-7]|
5831             6[0167]|
5832             7[15]|
5833             8[0146-9]
5834           )
5835         )\d{4}
5836         "#;
5837         assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5838     }
5839 
5840     // This tests that we treat a trailing `-` in a character class as a
5841     // literal `-` even when whitespace mode is enabled and there is whitespace
5842     // after the trailing `-`.
5843     #[test]
regression_455_trailing_dash_ignore_whitespace()5844     fn regression_455_trailing_dash_ignore_whitespace() {
5845         assert!(parser("(?x)[ / - ]").parse().is_ok());
5846         assert!(parser("(?x)[ a - ]").parse().is_ok());
5847         assert!(parser(
5848             "(?x)[
5849             a
5850             - ]
5851         "
5852         )
5853         .parse()
5854         .is_ok());
5855         assert!(parser(
5856             "(?x)[
5857             a # wat
5858             - ]
5859         "
5860         )
5861         .parse()
5862         .is_ok());
5863 
5864         assert!(parser("(?x)[ / -").parse().is_err());
5865         assert!(parser("(?x)[ / - ").parse().is_err());
5866         assert!(parser(
5867             "(?x)[
5868             / -
5869         "
5870         )
5871         .parse()
5872         .is_err());
5873         assert!(parser(
5874             "(?x)[
5875             / - # wat
5876         "
5877         )
5878         .parse()
5879         .is_err());
5880     }
5881 }
5882