1 // Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 /*!
12 This module provides a regular expression parser.
13 */
14 
15 use std::borrow::Borrow;
16 use std::cell::{Cell, RefCell};
17 use std::mem;
18 use std::result;
19 
20 use ast::{self, Ast, Position, Span};
21 use either::Either;
22 
23 use is_meta_character;
24 
25 type Result<T> = result::Result<T, ast::Error>;
26 
27 /// A primitive is an expression with no sub-expressions. This includes
28 /// literals, assertions and non-set character classes. This representation
29 /// is used as intermediate state in the parser.
30 ///
31 /// This does not include ASCII character classes, since they can only appear
32 /// within a set character class.
33 #[derive(Clone, Debug, Eq, PartialEq)]
34 enum Primitive {
35     Literal(ast::Literal),
36     Assertion(ast::Assertion),
37     Dot(Span),
38     Perl(ast::ClassPerl),
39     Unicode(ast::ClassUnicode),
40 }
41 
42 impl Primitive {
43     /// Return the span of this primitive.
span(&self) -> &Span44     fn span(&self) -> &Span {
45         match *self {
46             Primitive::Literal(ref x) => &x.span,
47             Primitive::Assertion(ref x) => &x.span,
48             Primitive::Dot(ref span) => span,
49             Primitive::Perl(ref x) => &x.span,
50             Primitive::Unicode(ref x) => &x.span,
51         }
52     }
53 
54     /// Convert this primitive into a proper AST.
into_ast(self) -> Ast55     fn into_ast(self) -> Ast {
56         match self {
57             Primitive::Literal(lit) => Ast::Literal(lit),
58             Primitive::Assertion(assert) => Ast::Assertion(assert),
59             Primitive::Dot(span) => Ast::Dot(span),
60             Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
61             Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
62         }
63     }
64 
65     /// Convert this primitive into an item in a character class.
66     ///
67     /// If this primitive is not a legal item (i.e., an assertion or a dot),
68     /// then return an error.
into_class_set_item<P: Borrow<Parser>>( self, p: &ParserI<P>, ) -> Result<ast::ClassSetItem>69     fn into_class_set_item<P: Borrow<Parser>>(
70         self,
71         p: &ParserI<P>,
72     ) -> Result<ast::ClassSetItem> {
73         use ast::ClassSetItem;
74         use self::Primitive::*;
75 
76         match self {
77             Literal(lit) => Ok(ClassSetItem::Literal(lit)),
78             Perl(cls) => Ok(ClassSetItem::Perl(cls)),
79             Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
80             x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
81         }
82     }
83 
84     /// Convert this primitive into a literal in a character class. In
85     /// particular, literals are the only valid items that can appear in
86     /// ranges.
87     ///
88     /// If this primitive is not a legal item (i.e., a class, assertion or a
89     /// dot), then return an error.
into_class_literal<P: Borrow<Parser>>( self, p: &ParserI<P>, ) -> Result<ast::Literal>90     fn into_class_literal<P: Borrow<Parser>>(
91         self,
92         p: &ParserI<P>,
93     ) -> Result<ast::Literal> {
94         use self::Primitive::*;
95 
96         match self {
97             Literal(lit) => Ok(lit),
98             x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
99         }
100     }
101 }
102 
103 /// Returns true if the given character is a hexadecimal digit.
is_hex(c: char) -> bool104 fn is_hex(c: char) -> bool {
105     ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
106 }
107 
108 /// Returns true if the given character is a valid in a capture group name.
109 ///
110 /// If `first` is true, then `c` is treated as the first character in the
111 /// group name (which is not allowed to be a digit).
is_capture_char(c: char, first: bool) -> bool112 fn is_capture_char(c: char, first: bool) -> bool {
113     c == '_' || (!first && c >= '0' && c <= '9')
114     || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
115 }
116 
117 /// A builder for a regular expression parser.
118 ///
119 /// This builder permits modifying configuration options for the parser.
120 #[derive(Clone, Debug)]
121 pub struct ParserBuilder {
122     ignore_whitespace: bool,
123     nest_limit: u32,
124     octal: bool,
125 }
126 
127 impl Default for ParserBuilder {
default() -> ParserBuilder128     fn default() -> ParserBuilder {
129         ParserBuilder::new()
130     }
131 }
132 
133 impl ParserBuilder {
134     /// Create a new parser builder with a default configuration.
new() -> ParserBuilder135     pub fn new() -> ParserBuilder {
136         ParserBuilder {
137             ignore_whitespace: false,
138             nest_limit: 250,
139             octal: false,
140         }
141     }
142 
143     /// Build a parser from this configuration with the given pattern.
build(&self) -> Parser144     pub fn build(&self) -> Parser {
145         Parser {
146             pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
147             capture_index: Cell::new(0),
148             nest_limit: self.nest_limit,
149             octal: self.octal,
150             initial_ignore_whitespace: self.ignore_whitespace,
151             ignore_whitespace: Cell::new(self.ignore_whitespace),
152             comments: RefCell::new(vec![]),
153             stack_group: RefCell::new(vec![]),
154             stack_class: RefCell::new(vec![]),
155             capture_names: RefCell::new(vec![]),
156             scratch: RefCell::new(String::new()),
157         }
158     }
159 
160     /// Set the nesting limit for this parser.
161     ///
162     /// The nesting limit controls how deep the abstract syntax tree is allowed
163     /// to be. If the AST exceeds the given limit (e.g., with too many nested
164     /// groups), then an error is returned by the parser.
165     ///
166     /// The purpose of this limit is to act as a heuristic to prevent stack
167     /// overflow for consumers that do structural induction on an `Ast` using
168     /// explicit recursion. While this crate never does this (instead using
169     /// constant stack space and moving the call stack to the heap), other
170     /// crates may.
171     ///
172     /// This limit is not checked until the entire Ast is parsed. Therefore,
173     /// if callers want to put a limit on the amount of heap space used, then
174     /// they should impose a limit on the length, in bytes, of the concrete
175     /// pattern string. In particular, this is viable since this parser
176     /// implementation will limit itself to heap space proportional to the
177     /// lenth of the pattern string.
178     ///
179     /// Note that a nest limit of `0` will return a nest limit error for most
180     /// patterns but not all. For example, a nest limit of `0` permits `a` but
181     /// not `ab`, since `ab` requires a concatenation, which results in a nest
182     /// depth of `1`. In general, a nest limit is not something that manifests
183     /// in an obvious way in the concrete syntax, therefore, it should not be
184     /// used in a granular way.
nest_limit(&mut self, limit: u32) -> &mut ParserBuilder185     pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
186         self.nest_limit = limit;
187         self
188     }
189 
190     /// Whether to support octal syntax or not.
191     ///
192     /// Octal syntax is a little-known way of uttering Unicode codepoints in
193     /// a regular expression. For example, `a`, `\x61`, `\u0061` and
194     /// `\141` are all equivalent regular expressions, where the last example
195     /// shows octal syntax.
196     ///
197     /// While supporting octal syntax isn't in and of itself a problem, it does
198     /// make good error messages harder. That is, in PCRE based regex engines,
199     /// syntax like `\0` invokes a backreference, which is explicitly
200     /// unsupported in Rust's regex engine. However, many users expect it to
201     /// be supported. Therefore, when octal support is disabled, the error
202     /// message will explicitly mention that backreferences aren't supported.
203     ///
204     /// Octal syntax is disabled by default.
octal(&mut self, yes: bool) -> &mut ParserBuilder205     pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
206         self.octal = yes;
207         self
208     }
209 
210     /// Enable verbose mode in the regular expression.
211     ///
212     /// When enabled, verbose mode permits insigificant whitespace in many
213     /// places in the regular expression, as well as comments. Comments are
214     /// started using `#` and continue until the end of the line.
215     ///
216     /// By default, this is disabled. It may be selectively enabled in the
217     /// regular expression by using the `x` flag regardless of this setting.
ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder218     pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
219         self.ignore_whitespace = yes;
220         self
221     }
222 }
223 
224 /// A regular expression parser.
225 ///
226 /// This parses a string representation of a regular expression into an
227 /// abstract syntax tree. The size of the tree is proportional to the length
228 /// of the regular expression pattern.
229 ///
230 /// A `Parser` can be configured in more detail via a
231 /// [`ParserBuilder`](struct.ParserBuilder.html).
232 #[derive(Clone, Debug)]
233 pub struct Parser {
234     /// The current position of the parser.
235     pos: Cell<Position>,
236     /// The current capture index.
237     capture_index: Cell<u32>,
238     /// The maximum number of open parens/brackets allowed. If the parser
239     /// exceeds this number, then an error is returned.
240     nest_limit: u32,
241     /// Whether to support octal syntax or not. When `false`, the parser will
242     /// return an error helpfully pointing out that backreferences are not
243     /// supported.
244     octal: bool,
245     /// The initial setting for `ignore_whitespace` as provided by
246     /// Th`ParserBuilder`. is is used when reseting the parser's state.
247     initial_ignore_whitespace: bool,
248     /// Whether whitespace should be ignored. When enabled, comments are
249     /// also permitted.
250     ignore_whitespace: Cell<bool>,
251     /// A list of comments, in order of appearance.
252     comments: RefCell<Vec<ast::Comment>>,
253     /// A stack of grouped sub-expressions, including alternations.
254     stack_group: RefCell<Vec<GroupState>>,
255     /// A stack of nested character classes. This is only non-empty when
256     /// parsing a class.
257     stack_class: RefCell<Vec<ClassState>>,
258     /// A sorted sequence of capture names. This is used to detect duplicate
259     /// capture names and report an error if one is detected.
260     capture_names: RefCell<Vec<ast::CaptureName>>,
261     /// A scratch buffer used in various places. Mostly this is used to
262     /// accumulate relevant characters from parts of a pattern.
263     scratch: RefCell<String>,
264 }
265 
266 /// ParserI is the internal parser implementation.
267 ///
268 /// We use this separate type so that we can carry the provided pattern string
269 /// along with us. In particular, a `Parser` internal state is not tied to any
270 /// one pattern, but `ParserI` is.
271 ///
272 /// This type also lets us use `ParserI<&Parser>` in production code while
273 /// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
274 /// work against the internal interface of the parser.
275 #[derive(Clone, Debug)]
276 struct ParserI<'s, P> {
277     /// The parser state/configuration.
278     parser: P,
279     /// The full regular expression provided by the user.
280     pattern: &'s str,
281 }
282 
283 /// GroupState represents a single stack frame while parsing nested groups
284 /// and alternations. Each frame records the state up to an opening parenthesis
285 /// or a alternating bracket `|`.
286 #[derive(Clone, Debug)]
287 enum GroupState {
288     /// This state is pushed whenever an opening group is found.
289     Group {
290         /// The concatenation immediately preceding the opening group.
291         concat: ast::Concat,
292         /// The group that has been opened. Its sub-AST is always empty.
293         group: ast::Group,
294         /// Whether this group has the `x` flag enabled or not.
295         ignore_whitespace: bool,
296     },
297     /// This state is pushed whenever a new alternation branch is found. If
298     /// an alternation branch is found and this state is at the top of the
299     /// stack, then this state should be modified to include the new
300     /// alternation.
301     Alternation(ast::Alternation),
302 }
303 
304 /// ClassState represents a single stack frame while parsing character classes.
305 /// Each frame records the state up to an intersection, difference, symmetric
306 /// difference or nested class.
307 ///
308 /// Note that a parser's character class stack is only non-empty when parsing
309 /// a character class. In all other cases, it is empty.
310 #[derive(Clone, Debug)]
311 enum ClassState {
312     /// This state is pushed whenever an opening bracket is found.
313     Open {
314         /// The union of class items immediately preceding this class.
315         union: ast::ClassSetUnion,
316         /// The class that has been opened. Typically this just corresponds
317         /// to the `[`, but it can also include `[^` since `^` indicates
318         /// negation of the class.
319         set: ast::ClassBracketed,
320     },
321     /// This state is pushed when a operator is seen. When popped, the stored
322     /// set becomes the left hand side of the operator.
323     Op {
324         /// The type of the operation, i.e., &&, -- or ~~.
325         kind: ast::ClassSetBinaryOpKind,
326         /// The left-hand side of the operator.
327         lhs: ast::ClassSet,
328     },
329 }
330 
331 impl Parser {
332     /// Create a new parser with a default configuration.
333     ///
334     /// The parser can be run with either the `parse` or `parse_with_comments`
335     /// methods. The parse methods return an abstract syntax tree.
336     ///
337     /// To set configuration options on the parser, use
338     /// [`ParserBuilder`](struct.ParserBuilder.html).
new() -> Parser339     pub fn new() -> Parser {
340         ParserBuilder::new().build()
341     }
342 
343     /// Parse the regular expression into an abstract syntax tree.
parse(&mut self, pattern: &str) -> Result<Ast>344     pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
345         ParserI::new(self, pattern).parse()
346     }
347 
348     /// Parse the regular expression and return an abstract syntax tree with
349     /// all of the comments found in the pattern.
parse_with_comments( &mut self, pattern: &str, ) -> Result<ast::WithComments>350     pub fn parse_with_comments(
351         &mut self,
352         pattern: &str,
353     ) -> Result<ast::WithComments> {
354         ParserI::new(self, pattern).parse_with_comments()
355     }
356 
357     /// Reset the internal state of a parser.
358     ///
359     /// This is called at the beginning of every parse. This prevents the
360     /// parser from running with inconsistent state (say, if a previous
361     /// invocation returned an error and the parser is reused).
reset(&self)362     fn reset(&self) {
363         // These settings should be in line with the construction
364         // in `ParserBuilder::build`.
365         self.pos.set(Position { offset: 0, line: 1, column: 1});
366         self.ignore_whitespace.set(self.initial_ignore_whitespace);
367         self.comments.borrow_mut().clear();
368         self.stack_group.borrow_mut().clear();
369         self.stack_class.borrow_mut().clear();
370     }
371 }
372 
373 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
374     /// Build an internal parser from a parser configuration and a pattern.
new(parser: P, pattern: &'s str) -> ParserI<'s, P>375     fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
376         ParserI { parser: parser, pattern: pattern }
377     }
378 
379     /// Return a reference to the parser state.
parser(&self) -> &Parser380     fn parser(&self) -> &Parser {
381         self.parser.borrow()
382     }
383 
384     /// Return a reference to the pattern being parsed.
pattern(&self) -> &str385     fn pattern(&self) -> &str {
386         self.pattern.borrow()
387     }
388 
389     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error390     fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
391         ast::Error {
392             kind: kind,
393             pattern: self.pattern().to_string(),
394             span: span,
395         }
396     }
397 
398     /// Return the current offset of the parser.
399     ///
400     /// The offset starts at `0` from the beginning of the regular expression
401     /// pattern string.
offset(&self) -> usize402     fn offset(&self) -> usize {
403         self.parser().pos.get().offset
404     }
405 
406     /// Return the current line number of the parser.
407     ///
408     /// The line number starts at `1`.
line(&self) -> usize409     fn line(&self) -> usize {
410         self.parser().pos.get().line
411     }
412 
413     /// Return the current column of the parser.
414     ///
415     /// The column number starts at `1` and is reset whenever a `\n` is seen.
column(&self) -> usize416     fn column(&self) -> usize {
417         self.parser().pos.get().column
418     }
419 
420     /// Return the next capturing index. Each subsequent call increments the
421     /// internal index.
422     ///
423     /// The span given should correspond to the location of the opening
424     /// parenthesis.
425     ///
426     /// If the capture limit is exceeded, then an error is returned.
next_capture_index(&self, span: Span) -> Result<u32>427     fn next_capture_index(&self, span: Span) -> Result<u32> {
428         let current = self.parser().capture_index.get();
429         let i = current.checked_add(1).ok_or_else(|| {
430             self.error(span, ast::ErrorKind::CaptureLimitExceeded)
431         })?;
432         self.parser().capture_index.set(i);
433         Ok(i)
434     }
435 
436     /// Adds the given capture name to this parser. If this capture name has
437     /// already been used, then an error is returned.
add_capture_name(&self, cap: &ast::CaptureName) -> Result<()>438     fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
439         let mut names = self.parser().capture_names.borrow_mut();
440         match names.binary_search_by_key(
441             &cap.name.as_str(),
442             |c| c.name.as_str(),
443         ) {
444             Err(i) => {
445                 names.insert(i, cap.clone());
446                 Ok(())
447             }
448             Ok(i) => {
449                 Err(self.error(cap.span, ast::ErrorKind::GroupNameDuplicate {
450                     original: names[i].span,
451                 }))
452             }
453         }
454     }
455 
456     /// Return whether the parser should ignore whitespace or not.
ignore_whitespace(&self) -> bool457     fn ignore_whitespace(&self) -> bool {
458         self.parser().ignore_whitespace.get()
459     }
460 
461     /// Return the character at the current position of the parser.
462     ///
463     /// This panics if the current position does not point to a valid char.
char(&self) -> char464     fn char(&self) -> char {
465         self.char_at(self.offset())
466     }
467 
468     /// Return the character at the given position.
469     ///
470     /// This panics if the given position does not point to a valid char.
char_at(&self, i: usize) -> char471     fn char_at(&self, i: usize) -> char {
472         self.pattern()[i..].chars().next()
473             .unwrap_or_else(|| {
474                 panic!("expected char at offset {}", i)
475             })
476     }
477 
478     /// Bump the parser to the next Unicode scalar value.
479     ///
480     /// If the end of the input has been reached, then `false` is returned.
bump(&self) -> bool481     fn bump(&self) -> bool {
482         if self.is_eof() {
483             return false;
484         }
485         let Position { mut offset, mut line, mut column } = self.pos();
486         if self.char() == '\n' {
487             line = line.checked_add(1).unwrap();
488             column = 1;
489         } else {
490             column = column.checked_add(1).unwrap();
491         }
492         offset += self.char().len_utf8();
493         self.parser().pos.set(Position {
494             offset: offset,
495             line: line,
496             column: column,
497         });
498         self.pattern()[self.offset()..].chars().next().is_some()
499     }
500 
501     /// If the substring starting at the current position of the parser has
502     /// the given prefix, then bump the parser to the character immediately
503     /// following the prefix and return true. Otherwise, don't bump the parser
504     /// and return false.
bump_if(&self, prefix: &str) -> bool505     fn bump_if(&self, prefix: &str) -> bool {
506         if self.pattern()[self.offset()..].starts_with(prefix) {
507             for _ in 0..prefix.chars().count() {
508                 self.bump();
509             }
510             true
511         } else {
512             false
513         }
514     }
515 
516     /// Returns true if and only if the parser is positioned at a look-around
517     /// prefix. The conditions under which this returns true must always
518     /// correspond to a regular expression that would otherwise be consider
519     /// invalid.
520     ///
521     /// This should only be called immediately after parsing the opening of
522     /// a group or a set of flags.
is_lookaround_prefix(&self) -> bool523     fn is_lookaround_prefix(&self) -> bool {
524         self.bump_if("?=")
525         || self.bump_if("?!")
526         || self.bump_if("?<=")
527         || self.bump_if("?<!")
528     }
529 
530     /// Bump the parser, and if the `x` flag is enabled, bump through any
531     /// subsequent spaces. Return true if and only if the parser is not at
532     /// EOF.
bump_and_bump_space(&self) -> bool533     fn bump_and_bump_space(&self) -> bool {
534         if !self.bump() {
535             return false;
536         }
537         self.bump_space();
538         !self.is_eof()
539     }
540 
541     /// If the `x` flag is enabled (i.e., whitespace insensitivity with
542     /// comments), then this will advance the parser through all whitespace
543     /// and comments to the next non-whitespace non-comment byte.
544     ///
545     /// If the `x` flag is disabled, then this is a no-op.
546     ///
547     /// This should be used selectively throughout the parser where
548     /// arbitrary whitespace is permitted when the `x` flag is enabled. For
549     /// example, `{   5  , 6}` is equivalent to `{5,6}`.
bump_space(&self)550     fn bump_space(&self) {
551         if !self.ignore_whitespace() {
552             return;
553         }
554         while !self.is_eof() {
555             if self.char().is_whitespace() {
556                 self.bump();
557             } else if self.char() == '#' {
558                 let start = self.pos();
559                 let mut comment_text = String::new();
560                 self.bump();
561                 while !self.is_eof() {
562                     let c = self.char();
563                     self.bump();
564                     if c == '\n' {
565                         break;
566                     }
567                     comment_text.push(c);
568                 }
569                 let comment = ast::Comment {
570                     span: Span::new(start, self.pos()),
571                     comment: comment_text,
572                 };
573                 self.parser().comments.borrow_mut().push(comment);
574             } else {
575                 break;
576             }
577         }
578     }
579 
580     /// Peek at the next character in the input without advancing the parser.
581     ///
582     /// If the input has been exhausted, then this returns `None`.
peek(&self) -> Option<char>583     fn peek(&self) -> Option<char> {
584         if self.is_eof() {
585             return None;
586         }
587         self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
588     }
589 
590     /// Like peek, but will ignore spaces when the parser is in whitespace
591     /// insensitive mode.
peek_space(&self) -> Option<char>592     fn peek_space(&self) -> Option<char> {
593         if !self.ignore_whitespace() {
594             return self.peek();
595         }
596         if self.is_eof() {
597             return None;
598         }
599         let mut start = self.offset() + self.char().len_utf8();
600         let mut in_comment = false;
601         for (i, c) in self.pattern()[start..].char_indices() {
602             if c.is_whitespace() {
603                 continue;
604             } else if !in_comment && c == '#' {
605                 in_comment = true;
606             } else if in_comment && c == '\n' {
607                 in_comment = false;
608             } else {
609                 start += i;
610                 break;
611             }
612         }
613         self.pattern()[start..].chars().next()
614     }
615 
616     /// Returns true if the next call to `bump` would return false.
is_eof(&self) -> bool617     fn is_eof(&self) -> bool {
618         self.offset() == self.pattern().len()
619     }
620 
621     /// Return the current position of the parser, which includes the offset,
622     /// line and column.
pos(&self) -> Position623     fn pos(&self) -> Position {
624         self.parser().pos.get()
625     }
626 
627     /// Create a span at the current position of the parser. Both the start
628     /// and end of the span are set.
span(&self) -> Span629     fn span(&self) -> Span {
630         Span::splat(self.pos())
631     }
632 
633     /// Create a span that covers the current character.
span_char(&self) -> Span634     fn span_char(&self) -> Span {
635         let mut next = Position {
636             offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
637             line: self.line(),
638             column: self.column().checked_add(1).unwrap(),
639         };
640         if self.char() == '\n' {
641             next.line += 1;
642             next.column = 1;
643         }
644         Span::new(self.pos(), next)
645     }
646 
647     /// Parse and push a single alternation on to the parser's internal stack.
648     /// If the top of the stack already has an alternation, then add to that
649     /// instead of pushing a new one.
650     ///
651     /// The concatenation given corresponds to a single alternation branch.
652     /// The concatenation returned starts the next branch and is empty.
653     ///
654     /// This assumes the parser is currently positioned at `|` and will advance
655     /// the parser to the character following `|`.
push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat>656     fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
657         assert_eq!(self.char(), '|');
658         concat.span.end = self.pos();
659         self.push_or_add_alternation(concat);
660         self.bump();
661         Ok(ast::Concat {
662             span: self.span(),
663             asts: vec![],
664         })
665     }
666 
667     /// Pushes or adds the given branch of an alternation to the parser's
668     /// internal stack of state.
push_or_add_alternation(&self, concat: ast::Concat)669     fn push_or_add_alternation(&self, concat: ast::Concat) {
670         use self::GroupState::*;
671 
672         let mut stack = self.parser().stack_group.borrow_mut();
673         if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
674             alts.asts.push(concat.into_ast());
675             return;
676         }
677         stack.push(Alternation(ast::Alternation {
678             span: Span::new(concat.span.start, self.pos()),
679             asts: vec![concat.into_ast()],
680         }));
681     }
682 
683     /// Parse and push a group AST (and its parent concatenation) on to the
684     /// parser's internal stack. Return a fresh concatenation corresponding
685     /// to the group's sub-AST.
686     ///
687     /// If a set of flags was found (with no group), then the concatenation
688     /// is returned with that set of flags added.
689     ///
690     /// This assumes that the parser is currently positioned on the opening
691     /// parenthesis. It advances the parser to the character at the start
692     /// of the sub-expression (or adjoining expression).
693     ///
694     /// If there was a problem parsing the start of the group, then an error
695     /// is returned.
push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat>696     fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
697         assert_eq!(self.char(), '(');
698         match self.parse_group()? {
699             Either::Left(set) => {
700                 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
701                 if let Some(v) = ignore {
702                     self.parser().ignore_whitespace.set(v);
703                 }
704 
705                 concat.asts.push(Ast::Flags(set));
706                 Ok(concat)
707             }
708             Either::Right(group) => {
709                 let old_ignore_whitespace = self.ignore_whitespace();
710                 let new_ignore_whitespace = group
711                     .flags()
712                     .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
713                     .unwrap_or(old_ignore_whitespace);
714                 self.parser().stack_group.borrow_mut().push(GroupState::Group {
715                     concat: concat,
716                     group: group,
717                     ignore_whitespace: old_ignore_whitespace,
718                 });
719                 self.parser().ignore_whitespace.set(new_ignore_whitespace);
720                 Ok(ast::Concat {
721                     span: self.span(),
722                     asts: vec![],
723                 })
724             }
725         }
726     }
727 
728     /// Pop a group AST from the parser's internal stack and set the group's
729     /// AST to the given concatenation. Return the concatenation containing
730     /// the group.
731     ///
732     /// This assumes that the parser is currently positioned on the closing
733     /// parenthesis and advances the parser to the character following the `)`.
734     ///
735     /// If no such group could be popped, then an unopened group error is
736     /// returned.
pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat>737     fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
738         use self::GroupState::*;
739 
740         assert_eq!(self.char(), ')');
741         let mut stack = self.parser().stack_group.borrow_mut();
742         let (mut prior_concat, mut group, ignore_whitespace, alt) =
743             match stack.pop() {
744                 Some(Group { concat, group, ignore_whitespace }) => {
745                     (concat, group, ignore_whitespace, None)
746                 }
747                 Some(Alternation(alt)) => {
748                     match stack.pop() {
749                         Some(Group { concat, group, ignore_whitespace }) => {
750                             (concat, group, ignore_whitespace, Some(alt))
751                         }
752                         None | Some(Alternation(_)) => {
753                             return Err(self.error(
754                                 self.span_char(),
755                                 ast::ErrorKind::GroupUnopened,
756                             ));
757                         }
758                     }
759                 }
760                 None => {
761                     return Err(self.error(
762                         self.span_char(),
763                         ast::ErrorKind::GroupUnopened,
764                     ));
765                 }
766             };
767         self.parser().ignore_whitespace.set(ignore_whitespace);
768         group_concat.span.end = self.pos();
769         self.bump();
770         group.span.end = self.pos();
771         match alt {
772             Some(mut alt) => {
773                 alt.span.end = group_concat.span.end;
774                 alt.asts.push(group_concat.into_ast());
775                 group.ast = Box::new(alt.into_ast());
776             }
777             None => {
778                 group.ast = Box::new(group_concat.into_ast());
779             }
780         }
781         prior_concat.asts.push(Ast::Group(group));
782         Ok(prior_concat)
783     }
784 
785     /// Pop the last state from the parser's internal stack, if it exists, and
786     /// add the given concatenation to it. There either must be no state or a
787     /// single alternation item on the stack. Any other scenario produces an
788     /// error.
789     ///
790     /// This assumes that the parser has advanced to the end.
pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast>791     fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
792         concat.span.end = self.pos();
793         let mut stack = self.parser().stack_group.borrow_mut();
794         let ast = match stack.pop() {
795             None => Ok(concat.into_ast()),
796             Some(GroupState::Alternation(mut alt)) => {
797                 alt.span.end = self.pos();
798                 alt.asts.push(concat.into_ast());
799                 Ok(Ast::Alternation(alt))
800             }
801             Some(GroupState::Group { group, .. }) => {
802                 return Err(self.error(
803                     group.span,
804                     ast::ErrorKind::GroupUnclosed,
805                 ));
806             }
807         };
808         // If we try to pop again, there should be nothing.
809         match stack.pop() {
810             None => ast,
811             Some(GroupState::Alternation(_)) => {
812                 // This unreachable is unfortunate. This case can't happen
813                 // because the only way we can be here is if there were two
814                 // `GroupState::Alternation`s adjacent in the parser's stack,
815                 // which we guarantee to never happen because we never push a
816                 // `GroupState::Alternation` if one is already at the top of
817                 // the stack.
818                 unreachable!()
819             }
820             Some(GroupState::Group { group, .. }) => {
821                 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
822             }
823         }
824     }
825 
826     /// Parse the opening of a character class and push the current class
827     /// parsing context onto the parser's stack. This assumes that the parser
828     /// is positioned at an opening `[`. The given union should correspond to
829     /// the union of set items built up before seeing the `[`.
830     ///
831     /// If there was a problem parsing the opening of the class, then an error
832     /// is returned. Otherwise, a new union of set items for the class is
833     /// returned (which may be populated with either a `]` or a `-`).
push_class_open( &self, parent_union: ast::ClassSetUnion, ) -> Result<ast::ClassSetUnion>834     fn push_class_open(
835         &self,
836         parent_union: ast::ClassSetUnion,
837     ) -> Result<ast::ClassSetUnion> {
838         assert_eq!(self.char(), '[');
839 
840         let (nested_set, nested_union) = self.parse_set_class_open()?;
841         self.parser().stack_class.borrow_mut().push(ClassState::Open {
842             union: parent_union,
843             set: nested_set,
844         });
845         Ok(nested_union)
846     }
847 
848     /// Parse the end of a character class set and pop the character class
849     /// parser stack. The union given corresponds to the last union built
850     /// before seeing the closing `]`. The union returned corresponds to the
851     /// parent character class set with the nested class added to it.
852     ///
853     /// This assumes that the parser is positioned at a `]` and will advance
854     /// the parser to the byte immediately following the `]`.
855     ///
856     /// If the stack is empty after popping, then this returns the final
857     /// "top-level" character class AST (where a "top-level" character class
858     /// is one that is not nested inside any other character class).
859     ///
860     /// If there is no corresponding opening bracket on the parser's stack,
861     /// then an error is returned.
pop_class( &self, nested_union: ast::ClassSetUnion, ) -> Result<Either<ast::ClassSetUnion, ast::Class>>862     fn pop_class(
863         &self,
864         nested_union: ast::ClassSetUnion,
865     ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
866         assert_eq!(self.char(), ']');
867 
868         let item = ast::ClassSet::Item(nested_union.into_item());
869         let prevset = self.pop_class_op(item);
870         let mut stack = self.parser().stack_class.borrow_mut();
871         match stack.pop() {
872             None => {
873                 // We can never observe an empty stack:
874                 //
875                 // 1) We are guaranteed to start with a non-empty stack since
876                 //    the character class parser is only initiated when it sees
877                 //    a `[`.
878                 // 2) If we ever observe an empty stack while popping after
879                 //    seeing a `]`, then we signal the character class parser
880                 //    to terminate.
881                 panic!("unexpected empty character class stack")
882             },
883             Some(ClassState::Op { .. }) => {
884                 // This panic is unfortunate, but this case is impossible
885                 // since we already popped the Op state if one exists above.
886                 // Namely, every push to the class parser stack is guarded by
887                 // whether an existing Op is already on the top of the stack.
888                 // If it is, the existing Op is modified. That is, the stack
889                 // can never have consecutive Op states.
890                 panic!("unexpected ClassState::Op")
891             }
892             Some(ClassState::Open { mut union, mut set }) => {
893                 self.bump();
894                 set.span.end = self.pos();
895                 set.kind = prevset;
896                 if stack.is_empty() {
897                     Ok(Either::Right(ast::Class::Bracketed(set)))
898                 } else {
899                     union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
900                     Ok(Either::Left(union))
901                 }
902             }
903         }
904     }
905 
906     /// Return an "unclosed class" error whose span points to the most
907     /// recently opened class.
908     ///
909     /// This should only be called while parsing a character class.
unclosed_class_error(&self) -> ast::Error910     fn unclosed_class_error(&self) -> ast::Error {
911         for state in self.parser().stack_class.borrow().iter().rev() {
912             match *state {
913                 ClassState::Open { ref set, .. } => {
914                     return self.error(set.span, ast::ErrorKind::ClassUnclosed);
915                 }
916                 _ => {}
917             }
918         }
919         // We are guaranteed to have a non-empty stack with at least
920         // one open bracket, so we should never get here.
921         panic!("no open character class found")
922     }
923 
924     /// Push the current set of class items on to the class parser's stack as
925     /// the left hand side of the given operator.
926     ///
927     /// A fresh set union is returned, which should be used to build the right
928     /// hand side of this operator.
push_class_op( &self, next_kind: ast::ClassSetBinaryOpKind, next_union: ast::ClassSetUnion, ) -> ast::ClassSetUnion929     fn push_class_op(
930         &self,
931         next_kind: ast::ClassSetBinaryOpKind,
932         next_union: ast::ClassSetUnion,
933     ) -> ast::ClassSetUnion {
934 
935         let item = ast::ClassSet::Item(next_union.into_item());
936         let new_lhs = self.pop_class_op(item);
937         self.parser().stack_class.borrow_mut().push(ClassState::Op {
938             kind: next_kind,
939             lhs: new_lhs,
940         });
941         ast::ClassSetUnion { span: self.span(), items: vec![] }
942     }
943 
944     /// Pop a character class set from the character class parser stack. If the
945     /// top of the stack is just an item (not an operation), then return the
946     /// given set unchanged. If the top of the stack is an operation, then the
947     /// given set will be used as the rhs of the operation on the top of the
948     /// stack. In that case, the binary operation is returned as a set.
pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet949     fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
950         let mut stack = self.parser().stack_class.borrow_mut();
951         let (kind, lhs) = match stack.pop() {
952             Some(ClassState::Op { kind, lhs }) => (kind, lhs),
953             Some(state @ ClassState::Open { .. }) => {
954                 stack.push(state);
955                 return rhs;
956             }
957             None => unreachable!(),
958         };
959         let span = Span::new(lhs.span().start, rhs.span().end);
960         ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
961             span: span,
962             kind: kind,
963             lhs: Box::new(lhs),
964             rhs: Box::new(rhs),
965         })
966     }
967 }
968 
969 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
970     /// Parse the regular expression into an abstract syntax tree.
parse(&self) -> Result<Ast>971     fn parse(&self) -> Result<Ast> {
972         self.parse_with_comments().map(|astc| astc.ast)
973     }
974 
975     /// Parse the regular expression and return an abstract syntax tree with
976     /// all of the comments found in the pattern.
parse_with_comments(&self) -> Result<ast::WithComments>977     fn parse_with_comments(&self) -> Result<ast::WithComments> {
978         assert_eq!(self.offset(), 0, "parser can only be used once");
979         self.parser().reset();
980         let mut concat = ast::Concat {
981             span: self.span(),
982             asts: vec![],
983         };
984         loop {
985             self.bump_space();
986             if self.is_eof() {
987                 break;
988             }
989             match self.char() {
990                 '(' => concat = self.push_group(concat)?,
991                 ')' => concat = self.pop_group(concat)?,
992                 '|' => concat = self.push_alternate(concat)?,
993                 '[' => {
994                     let class = self.parse_set_class()?;
995                     concat.asts.push(Ast::Class(class));
996                 }
997                 '?' => {
998                     concat = self.parse_uncounted_repetition(
999                         concat, ast::RepetitionKind::ZeroOrOne)?;
1000                 }
1001                 '*' => {
1002                     concat = self.parse_uncounted_repetition(
1003                         concat, ast::RepetitionKind::ZeroOrMore)?;
1004                 }
1005                 '+' => {
1006                     concat = self.parse_uncounted_repetition(
1007                         concat, ast::RepetitionKind::OneOrMore)?;
1008                 }
1009                 '{' => {
1010                     concat = self.parse_counted_repetition(concat)?;
1011                 }
1012                 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1013             }
1014         }
1015         let ast = self.pop_group_end(concat)?;
1016         NestLimiter::new(self).check(&ast)?;
1017         Ok(ast::WithComments {
1018             ast: ast,
1019             comments: mem::replace(
1020                 &mut *self.parser().comments.borrow_mut(),
1021                 vec![],
1022             ),
1023         })
1024     }
1025 
1026     /// Parses an uncounted repetition operation. An uncounted repetition
1027     /// operator includes ?, * and +, but does not include the {m,n} syntax.
1028     /// The given `kind` should correspond to the operator observed by the
1029     /// caller.
1030     ///
1031     /// This assumes that the paser is currently positioned at the repetition
1032     /// operator and advances the parser to the first character after the
1033     /// operator. (Note that the operator may include a single additional `?`,
1034     /// which makes the operator ungreedy.)
1035     ///
1036     /// The caller should include the concatenation that is being built. The
1037     /// concatenation returned includes the repetition operator applied to the
1038     /// last expression in the given concatenation.
parse_uncounted_repetition( &self, mut concat: ast::Concat, kind: ast::RepetitionKind, ) -> Result<ast::Concat>1039     fn parse_uncounted_repetition(
1040         &self,
1041         mut concat: ast::Concat,
1042         kind: ast::RepetitionKind,
1043     ) -> Result<ast::Concat> {
1044         assert!(
1045             self.char() == '?' || self.char() == '*' || self.char() == '+');
1046         let op_start = self.pos();
1047         let ast = match concat.asts.pop() {
1048             Some(ast) => ast,
1049             None => return Err(self.error(
1050                 self.span(),
1051                 ast::ErrorKind::RepetitionMissing,
1052             )),
1053         };
1054         match ast {
1055             Ast::Empty(_) | Ast::Flags(_) => return Err(self.error(
1056                 self.span(),
1057                 ast::ErrorKind::RepetitionMissing,
1058             )),
1059             _ => {}
1060         }
1061         let mut greedy = true;
1062         if self.bump() && self.char() == '?' {
1063             greedy = false;
1064             self.bump();
1065         }
1066         concat.asts.push(Ast::Repetition(ast::Repetition {
1067             span: ast.span().with_end(self.pos()),
1068             op: ast::RepetitionOp {
1069                 span: Span::new(op_start, self.pos()),
1070                 kind: kind,
1071             },
1072             greedy: greedy,
1073             ast: Box::new(ast),
1074         }));
1075         Ok(concat)
1076     }
1077 
1078     /// Parses a counted repetition operation. A counted repetition operator
1079     /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1080     /// operators.
1081     ///
1082     /// This assumes that the paser is currently positioned at the opening `{`
1083     /// and advances the parser to the first character after the operator.
1084     /// (Note that the operator may include a single additional `?`, which
1085     /// makes the operator ungreedy.)
1086     ///
1087     /// The caller should include the concatenation that is being built. The
1088     /// concatenation returned includes the repetition operator applied to the
1089     /// last expression in the given concatenation.
parse_counted_repetition( &self, mut concat: ast::Concat, ) -> Result<ast::Concat>1090     fn parse_counted_repetition(
1091         &self,
1092         mut concat: ast::Concat,
1093     ) -> Result<ast::Concat> {
1094         assert!(self.char() == '{');
1095         let start = self.pos();
1096         let ast = match concat.asts.pop() {
1097             Some(ast) => ast,
1098             None => return Err(self.error(
1099                 self.span(),
1100                 ast::ErrorKind::RepetitionMissing,
1101             )),
1102         };
1103         if !self.bump_and_bump_space() {
1104             return Err(self.error(
1105                 Span::new(start, self.pos()),
1106                 ast::ErrorKind::RepetitionCountUnclosed,
1107             ));
1108         }
1109         let count_start = self.parse_decimal()?;
1110         let mut range = ast::RepetitionRange::Exactly(count_start);
1111         if self.is_eof() {
1112             return Err(self.error(
1113                 Span::new(start, self.pos()),
1114                 ast::ErrorKind::RepetitionCountUnclosed,
1115             ));
1116         }
1117         if self.char() == ',' {
1118             if !self.bump_and_bump_space() {
1119                 return Err(self.error(
1120                     Span::new(start, self.pos()),
1121                     ast::ErrorKind::RepetitionCountUnclosed,
1122                 ));
1123             }
1124             if self.char() != '}' {
1125                 let count_end = self.parse_decimal()?;
1126                 range = ast::RepetitionRange::Bounded(count_start, count_end);
1127             } else {
1128                 range = ast::RepetitionRange::AtLeast(count_start);
1129             }
1130         }
1131         if self.is_eof() || self.char() != '}' {
1132             return Err(self.error(
1133                 Span::new(start, self.pos()),
1134                 ast::ErrorKind::RepetitionCountUnclosed,
1135             ));
1136         }
1137 
1138         let mut greedy = true;
1139         if self.bump_and_bump_space() && self.char() == '?' {
1140             greedy = false;
1141             self.bump();
1142         }
1143 
1144         let op_span = Span::new(start, self.pos());
1145         if !range.is_valid() {
1146             return Err(self.error(
1147                 op_span,
1148                 ast::ErrorKind::RepetitionCountInvalid,
1149             ));
1150         }
1151         concat.asts.push(Ast::Repetition(ast::Repetition {
1152             span: ast.span().with_end(self.pos()),
1153             op: ast::RepetitionOp {
1154                 span: op_span,
1155                 kind: ast::RepetitionKind::Range(range),
1156             },
1157             greedy: greedy,
1158             ast: Box::new(ast),
1159         }));
1160         Ok(concat)
1161     }
1162 
1163     /// Parse a group (which contains a sub-expression) or a set of flags.
1164     ///
1165     /// If a group was found, then it is returned with an empty AST. If a set
1166     /// of flags is found, then that set is returned.
1167     ///
1168     /// The parser should be positioned at the opening parenthesis.
1169     ///
1170     /// This advances the parser to the character before the start of the
1171     /// sub-expression (in the case of a group) or to the closing parenthesis
1172     /// immediately following the set of flags.
1173     ///
1174     /// # Errors
1175     ///
1176     /// If flags are given and incorrectly specified, then a corresponding
1177     /// error is returned.
1178     ///
1179     /// If a capture name is given and it is incorrectly specified, then a
1180     /// corresponding error is returned.
parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>>1181     fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1182         assert_eq!(self.char(), '(');
1183         let open_span = self.span_char();
1184         self.bump();
1185         self.bump_space();
1186         if self.is_lookaround_prefix() {
1187             return Err(self.error(
1188                 Span::new(open_span.start, self.span().end),
1189                 ast::ErrorKind::UnsupportedLookAround,
1190             ));
1191         }
1192         let inner_span = self.span();
1193         if self.bump_if("?P<") {
1194             let capture_index = self.next_capture_index(open_span)?;
1195             let cap = self.parse_capture_name(capture_index)?;
1196             Ok(Either::Right(ast::Group {
1197                 span: open_span,
1198                 kind: ast::GroupKind::CaptureName(cap),
1199                 ast: Box::new(Ast::Empty(self.span())),
1200             }))
1201         } else if self.bump_if("?") {
1202             if self.is_eof() {
1203                 return Err(self.error(
1204                     open_span,
1205                     ast::ErrorKind::GroupUnclosed,
1206                 ));
1207             }
1208             let flags = self.parse_flags()?;
1209             let char_end = self.char();
1210             self.bump();
1211             if char_end == ')' {
1212                 // We don't allow empty flags, e.g., `(?)`. We instead
1213                 // interpret it as a repetition operator missing its argument.
1214                 if flags.items.is_empty() {
1215                     return Err(self.error(
1216                         inner_span,
1217                         ast::ErrorKind::RepetitionMissing,
1218                     ));
1219                 }
1220                 Ok(Either::Left(ast::SetFlags {
1221                     span: Span { end: self.pos(), ..open_span },
1222                     flags: flags,
1223                 }))
1224             } else {
1225                 assert_eq!(char_end, ':');
1226                 Ok(Either::Right(ast::Group {
1227                     span: open_span,
1228                     kind: ast::GroupKind::NonCapturing(flags),
1229                     ast: Box::new(Ast::Empty(self.span())),
1230                 }))
1231             }
1232         } else {
1233             let capture_index = self.next_capture_index(open_span)?;
1234             Ok(Either::Right(ast::Group {
1235                 span: open_span,
1236                 kind: ast::GroupKind::CaptureIndex(capture_index),
1237                 ast: Box::new(Ast::Empty(self.span())),
1238             }))
1239         }
1240     }
1241 
1242     /// Parses a capture group name. Assumes that the parser is positioned at
1243     /// the first character in the name following the opening `<` (and may
1244     /// possibly be EOF). This advances the parser to the first character
1245     /// following the closing `>`.
1246     ///
1247     /// The caller must provide the capture index of the group for this name.
parse_capture_name( &self, capture_index: u32, ) -> Result<ast::CaptureName>1248     fn parse_capture_name(
1249         &self,
1250         capture_index: u32,
1251     ) -> Result<ast::CaptureName> {
1252         if self.is_eof() {
1253             return Err(self.error(
1254                 self.span(),
1255                 ast::ErrorKind::GroupNameUnexpectedEof,
1256             ));
1257         }
1258         let start = self.pos();
1259         loop {
1260             if self.char() == '>' {
1261                 break;
1262             }
1263             if !is_capture_char(self.char(), self.pos() == start) {
1264                 return Err(self.error(
1265                     self.span_char(),
1266                     ast::ErrorKind::GroupNameInvalid,
1267                 ));
1268             }
1269             if !self.bump() {
1270                 break;
1271             }
1272         }
1273         let end = self.pos();
1274         if self.is_eof() {
1275             return Err(self.error(
1276                 self.span(),
1277                 ast::ErrorKind::GroupNameUnexpectedEof,
1278             ));
1279         }
1280         assert_eq!(self.char(), '>');
1281         self.bump();
1282         let name = &self.pattern()[start.offset..end.offset];
1283         if name.is_empty() {
1284             return Err(self.error(
1285                 Span::new(start, start),
1286                 ast::ErrorKind::GroupNameEmpty,
1287             ));
1288         }
1289         let capname = ast::CaptureName {
1290             span: Span::new(start, end),
1291             name: name.to_string(),
1292             index: capture_index,
1293         };
1294         self.add_capture_name(&capname)?;
1295         Ok(capname)
1296     }
1297 
1298     /// Parse a sequence of flags starting at the current character.
1299     ///
1300     /// This advances the parser to the character immediately following the
1301     /// flags, which is guaranteed to be either `:` or `)`.
1302     ///
1303     /// # Errors
1304     ///
1305     /// If any flags are duplicated, then an error is returned.
1306     ///
1307     /// If the negation operator is used more than once, then an error is
1308     /// returned.
1309     ///
1310     /// If no flags could be found or if the negation operation is not followed
1311     /// by any flags, then an error is returned.
parse_flags(&self) -> Result<ast::Flags>1312     fn parse_flags(&self) -> Result<ast::Flags> {
1313         let mut flags = ast::Flags {
1314             span: self.span(),
1315             items: vec![],
1316         };
1317         let mut last_was_negation = None;
1318         while self.char() != ':' && self.char() != ')' {
1319             if self.char() == '-' {
1320                 last_was_negation = Some(self.span_char());
1321                 let item = ast::FlagsItem {
1322                     span: self.span_char(),
1323                     kind: ast::FlagsItemKind::Negation,
1324                 };
1325                 if let Some(i) = flags.add_item(item) {
1326                     return Err(self.error(
1327                         self.span_char(),
1328                         ast::ErrorKind::FlagRepeatedNegation {
1329                             original: flags.items[i].span,
1330                         },
1331                     ));
1332                 }
1333             } else {
1334                 last_was_negation = None;
1335                 let item = ast::FlagsItem {
1336                     span: self.span_char(),
1337                     kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1338                 };
1339                 if let Some(i) = flags.add_item(item) {
1340                     return Err(self.error(
1341                         self.span_char(),
1342                         ast::ErrorKind::FlagDuplicate {
1343                             original: flags.items[i].span,
1344                         },
1345                     ));
1346                 }
1347             }
1348             if !self.bump() {
1349                 return Err(self.error(
1350                     self.span(),
1351                     ast::ErrorKind::FlagUnexpectedEof,
1352                 ));
1353             }
1354         }
1355         if let Some(span) = last_was_negation {
1356             return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1357         }
1358         flags.span.end = self.pos();
1359         Ok(flags)
1360     }
1361 
1362     /// Parse the current character as a flag. Do not advance the parser.
1363     ///
1364     /// # Errors
1365     ///
1366     /// If the flag is not recognized, then an error is returned.
parse_flag(&self) -> Result<ast::Flag>1367     fn parse_flag(&self) -> Result<ast::Flag> {
1368         match self.char() {
1369             'i' => Ok(ast::Flag::CaseInsensitive),
1370             'm' => Ok(ast::Flag::MultiLine),
1371             's' => Ok(ast::Flag::DotMatchesNewLine),
1372             'U' => Ok(ast::Flag::SwapGreed),
1373             'u' => Ok(ast::Flag::Unicode),
1374             'x' => Ok(ast::Flag::IgnoreWhitespace),
1375             _ => Err(self.error(
1376                 self.span_char(),
1377                 ast::ErrorKind::FlagUnrecognized,
1378             )),
1379         }
1380     }
1381 
1382     /// Parse a primitive AST. e.g., A literal, non-set character class or
1383     /// assertion.
1384     ///
1385     /// This assumes that the parser expects a primitive at the current
1386     /// location. i.e., All other non-primitive cases have been handled.
1387     /// For example, if the parser's position is at `|`, then `|` will be
1388     /// treated as a literal (e.g., inside a character class).
1389     ///
1390     /// This advances the parser to the first character immediately following
1391     /// the primitive.
parse_primitive(&self) -> Result<Primitive>1392     fn parse_primitive(&self) -> Result<Primitive> {
1393         match self.char() {
1394             '\\' => self.parse_escape(),
1395             '.' => {
1396                 let ast = Primitive::Dot(self.span_char());
1397                 self.bump();
1398                 Ok(ast)
1399             }
1400             '^' => {
1401                 let ast = Primitive::Assertion(ast::Assertion {
1402                     span: self.span_char(),
1403                     kind: ast::AssertionKind::StartLine,
1404                 });
1405                 self.bump();
1406                 Ok(ast)
1407             }
1408             '$' => {
1409                 let ast = Primitive::Assertion(ast::Assertion {
1410                     span: self.span_char(),
1411                     kind: ast::AssertionKind::EndLine,
1412                 });
1413                 self.bump();
1414                 Ok(ast)
1415             }
1416             c => {
1417                 let ast = Primitive::Literal(ast::Literal {
1418                     span: self.span_char(),
1419                     kind: ast::LiteralKind::Verbatim,
1420                     c: c,
1421                 });
1422                 self.bump();
1423                 Ok(ast)
1424             }
1425         }
1426     }
1427 
1428     /// Parse an escape sequence as a primitive AST.
1429     ///
1430     /// This assumes the parser is positioned at the start of the escape
1431     /// sequence, i.e., `\`. It advances the parser to the first position
1432     /// immediately following the escape sequence.
parse_escape(&self) -> Result<Primitive>1433     fn parse_escape(&self) -> Result<Primitive> {
1434         assert_eq!(self.char(), '\\');
1435         let start = self.pos();
1436         if !self.bump() {
1437             return Err(self.error(
1438                 Span::new(start, self.pos()),
1439                 ast::ErrorKind::EscapeUnexpectedEof,
1440             ));
1441         }
1442         let c = self.char();
1443         // Put some of the more complicated routines into helpers.
1444         match c {
1445             '0'...'7' => {
1446                 if !self.parser().octal {
1447                     return Err(self.error(
1448                         Span::new(start, self.span_char().end),
1449                         ast::ErrorKind::UnsupportedBackreference,
1450                     ));
1451                 }
1452                 let mut lit = self.parse_octal();
1453                 lit.span.start = start;
1454                 return Ok(Primitive::Literal(lit));
1455             }
1456             '8'...'9' if !self.parser().octal => {
1457                 return Err(self.error(
1458                     Span::new(start, self.span_char().end),
1459                     ast::ErrorKind::UnsupportedBackreference,
1460                 ));
1461             }
1462             'x' | 'u' | 'U' => {
1463                 let mut lit = self.parse_hex()?;
1464                 lit.span.start = start;
1465                 return Ok(Primitive::Literal(lit));
1466             }
1467             'p' | 'P' => {
1468                 let mut cls = self.parse_unicode_class()?;
1469                 cls.span.start = start;
1470                 return Ok(Primitive::Unicode(cls));
1471             }
1472             'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1473                 let mut cls = self.parse_perl_class();
1474                 cls.span.start = start;
1475                 return Ok(Primitive::Perl(cls));
1476             }
1477             _ => {}
1478         }
1479 
1480         // Handle all of the one letter sequences inline.
1481         self.bump();
1482         let span = Span::new(start, self.pos());
1483         if is_meta_character(c) {
1484             return Ok(Primitive::Literal(ast::Literal {
1485                 span: span,
1486                 kind: ast::LiteralKind::Punctuation,
1487                 c: c,
1488             }));
1489         }
1490         let special = |kind, c| Ok(Primitive::Literal(ast::Literal {
1491             span: span,
1492             kind: ast::LiteralKind::Special(kind),
1493             c: c,
1494         }));
1495         match c {
1496             'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1497             'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1498             't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1499             'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1500             'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1501             'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1502             ' ' if self.ignore_whitespace() => {
1503                 special(ast::SpecialLiteralKind::Space, ' ')
1504             }
1505             'A' => Ok(Primitive::Assertion(ast::Assertion {
1506                 span: span,
1507                 kind: ast::AssertionKind::StartText,
1508             })),
1509             'z' => Ok(Primitive::Assertion(ast::Assertion {
1510                 span: span,
1511                 kind: ast::AssertionKind::EndText,
1512             })),
1513             'b' => Ok(Primitive::Assertion(ast::Assertion {
1514                 span: span,
1515                 kind: ast::AssertionKind::WordBoundary,
1516             })),
1517             'B' => Ok(Primitive::Assertion(ast::Assertion {
1518                 span: span,
1519                 kind: ast::AssertionKind::NotWordBoundary,
1520             })),
1521             _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1522         }
1523     }
1524 
1525     /// Parse an octal representation of a Unicode codepoint up to 3 digits
1526     /// long. This expects the parser to be positioned at the first octal
1527     /// digit and advances the parser to the first character immediately
1528     /// following the octal number. This also assumes that parsing octal
1529     /// escapes is enabled.
1530     ///
1531     /// Assuming the preconditions are met, this routine can never fail.
parse_octal(&self) -> ast::Literal1532     fn parse_octal(&self) -> ast::Literal {
1533         use std::char;
1534         use std::u32;
1535 
1536         assert!(self.parser().octal);
1537         assert!('0' <= self.char() && self.char() <= '7');
1538         let start = self.pos();
1539         // Parse up to two more digits.
1540         while
1541             self.bump() &&
1542             '0' <= self.char() && self.char() <= '7' &&
1543             self.pos().offset - start.offset <= 2
1544         {}
1545         let end = self.pos();
1546         let octal = &self.pattern()[start.offset..end.offset];
1547         // Parsing the octal should never fail since the above guarantees a
1548         // valid number.
1549         let codepoint =
1550             u32::from_str_radix(octal, 8).expect("valid octal number");
1551         // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1552         // invalid Unicode scalar values.
1553         let c = char::from_u32(codepoint).expect("Unicode scalar value");
1554         ast::Literal {
1555             span: Span::new(start, end),
1556             kind: ast::LiteralKind::Octal,
1557             c: c,
1558         }
1559     }
1560 
1561     /// Parse a hex representation of a Unicode codepoint. This handles both
1562     /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1563     /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1564     /// the first character immediately following the hexadecimal literal.
parse_hex(&self) -> Result<ast::Literal>1565     fn parse_hex(&self) -> Result<ast::Literal> {
1566         assert!(self.char() == 'x'
1567                 || self.char() == 'u'
1568                 || self.char() == 'U');
1569 
1570         let hex_kind = match self.char() {
1571             'x' => ast::HexLiteralKind::X,
1572             'u' => ast::HexLiteralKind::UnicodeShort,
1573             _ => ast::HexLiteralKind::UnicodeLong,
1574         };
1575         if !self.bump_and_bump_space() {
1576             return Err(self.error(
1577                 self.span(),
1578                 ast::ErrorKind::EscapeUnexpectedEof,
1579             ));
1580         }
1581         if self.char() == '{' {
1582             self.parse_hex_brace(hex_kind)
1583         } else {
1584             self.parse_hex_digits(hex_kind)
1585         }
1586     }
1587 
1588     /// Parse an N-digit hex representation of a Unicode codepoint. This
1589     /// expects the parser to be positioned at the first digit and will advance
1590     /// the parser to the first character immediately following the escape
1591     /// sequence.
1592     ///
1593     /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1594     /// or 8 (for `\UNNNNNNNN`).
parse_hex_digits( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1595     fn parse_hex_digits(
1596         &self,
1597         kind: ast::HexLiteralKind,
1598     ) -> Result<ast::Literal> {
1599         use std::char;
1600         use std::u32;
1601 
1602         let mut scratch = self.parser().scratch.borrow_mut();
1603         scratch.clear();
1604 
1605         let start = self.pos();
1606         for i in 0..kind.digits() {
1607             if i > 0 && !self.bump_and_bump_space() {
1608                 return Err(self.error(
1609                     self.span(),
1610                     ast::ErrorKind::EscapeUnexpectedEof,
1611                 ));
1612             }
1613             if !is_hex(self.char()) {
1614                 return Err(self.error(
1615                     self.span_char(),
1616                     ast::ErrorKind::EscapeHexInvalidDigit,
1617                 ));
1618             }
1619             scratch.push(self.char());
1620         }
1621         // The final bump just moves the parser past the literal, which may
1622         // be EOF.
1623         self.bump_and_bump_space();
1624         let end = self.pos();
1625         let hex = scratch.as_str();
1626         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1627             None => Err(self.error(
1628                 Span::new(start, end),
1629                 ast::ErrorKind::EscapeHexInvalid,
1630             )),
1631             Some(c) => Ok(ast::Literal {
1632                 span: Span::new(start, end),
1633                 kind: ast::LiteralKind::HexFixed(kind),
1634                 c: c,
1635             }),
1636         }
1637     }
1638 
1639     /// Parse a hex representation of any Unicode scalar value. This expects
1640     /// the parser to be positioned at the opening brace `{` and will advance
1641     /// the parser to the first character following the closing brace `}`.
parse_hex_brace( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1642     fn parse_hex_brace(
1643         &self,
1644         kind: ast::HexLiteralKind,
1645     ) -> Result<ast::Literal> {
1646         use std::char;
1647         use std::u32;
1648 
1649         let mut scratch = self.parser().scratch.borrow_mut();
1650         scratch.clear();
1651 
1652         let brace_pos = self.pos();
1653         let start = self.span_char().end;
1654         while self.bump_and_bump_space() && self.char() != '}' {
1655             if !is_hex(self.char()) {
1656                 return Err(self.error(
1657                     self.span_char(),
1658                     ast::ErrorKind::EscapeHexInvalidDigit,
1659                 ));
1660             }
1661             scratch.push(self.char());
1662         }
1663         if self.is_eof() {
1664             return Err(self.error(
1665                 Span::new(brace_pos, self.pos()),
1666                 ast::ErrorKind::EscapeUnexpectedEof,
1667             ));
1668         }
1669         let end = self.pos();
1670         let hex = scratch.as_str();
1671         assert_eq!(self.char(), '}');
1672         self.bump_and_bump_space();
1673 
1674         if hex.is_empty() {
1675             return Err(self.error(
1676                 Span::new(brace_pos, self.pos()),
1677                 ast::ErrorKind::EscapeHexEmpty,
1678             ));
1679         }
1680         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1681             None => Err(self.error(
1682                 Span::new(start, end),
1683                 ast::ErrorKind::EscapeHexInvalid,
1684             )),
1685             Some(c) => Ok(ast::Literal {
1686                 span: Span::new(start, self.pos()),
1687                 kind: ast::LiteralKind::HexBrace(kind),
1688                 c: c,
1689             }),
1690         }
1691     }
1692 
1693     /// Parse a decimal number into a u32 while trimming leading and trailing
1694     /// whitespace.
1695     ///
1696     /// This expects the parser to be positioned at the first position where
1697     /// a decimal digit could occur. This will advance the parser to the byte
1698     /// immediately following the last contiguous decimal digit.
1699     ///
1700     /// If no decimal digit could be found or if there was a problem parsing
1701     /// the complete set of digits into a u32, then an error is returned.
parse_decimal(&self) -> Result<u32>1702     fn parse_decimal(&self) -> Result<u32> {
1703         let mut scratch = self.parser().scratch.borrow_mut();
1704         scratch.clear();
1705 
1706         while !self.is_eof() && self.char().is_whitespace() {
1707             self.bump();
1708         }
1709         let start = self.pos();
1710         while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1711             scratch.push(self.char());
1712             self.bump_and_bump_space();
1713         }
1714         let span = Span::new(start, self.pos());
1715         while !self.is_eof() && self.char().is_whitespace() {
1716             self.bump_and_bump_space();
1717         }
1718         let digits = scratch.as_str();
1719         if digits.is_empty() {
1720             return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1721         }
1722         match u32::from_str_radix(digits, 10).ok() {
1723             Some(n) => Ok(n),
1724             None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1725         }
1726     }
1727 
1728     /// Parse a standard character class consisting primarily of characters or
1729     /// character ranges, but can also contain nested character classes of
1730     /// any type (sans `.`).
1731     ///
1732     /// This assumes the parser is positioned at the opening `[`. If parsing
1733     /// is successful, then the parser is advanced to the position immediately
1734     /// following the closing `]`.
parse_set_class(&self) -> Result<ast::Class>1735     fn parse_set_class(&self) -> Result<ast::Class> {
1736         assert_eq!(self.char(), '[');
1737 
1738         let mut union = ast::ClassSetUnion {
1739             span: self.span(),
1740             items: vec![],
1741         };
1742         loop {
1743             self.bump_space();
1744             if self.is_eof() {
1745                 return Err(self.unclosed_class_error());
1746             }
1747             match self.char() {
1748                 '[' => {
1749                     // If we've already parsed the opening bracket, then
1750                     // attempt to treat this as the beginning of an ASCII
1751                     // class. If ASCII class parsing fails, then the parser
1752                     // backs up to `[`.
1753                     if !self.parser().stack_class.borrow().is_empty() {
1754                         if let Some(cls) = self.maybe_parse_ascii_class() {
1755                             union.push(ast::ClassSetItem::Ascii(cls));
1756                             continue;
1757                         }
1758                     }
1759                     union = self.push_class_open(union)?;
1760                 }
1761                 ']' => {
1762                     match self.pop_class(union)? {
1763                         Either::Left(nested_union) => { union = nested_union; }
1764                         Either::Right(class) => return Ok(class),
1765                     }
1766                 }
1767                 '&' if self.peek() == Some('&') => {
1768                     assert!(self.bump_if("&&"));
1769                     union = self.push_class_op(
1770                         ast::ClassSetBinaryOpKind::Intersection, union);
1771                 }
1772                 '-' if self.peek() == Some('-') => {
1773                     assert!(self.bump_if("--"));
1774                     union = self.push_class_op(
1775                         ast::ClassSetBinaryOpKind::Difference, union);
1776                 }
1777                 '~' if self.peek() == Some('~') => {
1778                     assert!(self.bump_if("~~"));
1779                     union = self.push_class_op(
1780                         ast::ClassSetBinaryOpKind::SymmetricDifference, union);
1781                 }
1782                 _ => {
1783                     union.push(self.parse_set_class_range()?);
1784                 }
1785             }
1786         }
1787     }
1788 
1789     /// Parse a single primitive item in a character class set. The item to
1790     /// be parsed can either be one of a simple literal character, a range
1791     /// between two simple literal characters or a "primitive" character
1792     /// class like \w or \p{Greek}.
1793     ///
1794     /// If an invalid escape is found, or if a character class is found where
1795     /// a simple literal is expected (e.g., in a range), then an error is
1796     /// returned.
parse_set_class_range(&self) -> Result<ast::ClassSetItem>1797     fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1798         let prim1 = self.parse_set_class_item()?;
1799         self.bump_space();
1800         if self.is_eof() {
1801             return Err(self.unclosed_class_error());
1802         }
1803         // If the next char isn't a `-`, then we don't have a range.
1804         // There are two exceptions. If the char after a `-` is a `]`, then
1805         // `-` is interpreted as a literal `-`. Alternatively, if the char
1806         // after a `-` is a `-`, then `--` corresponds to a "difference"
1807         // operation.
1808         if self.char() != '-'
1809             || self.peek_space() == Some(']')
1810             || self.peek_space() == Some('-')
1811         {
1812             return prim1.into_class_set_item(self);
1813         }
1814         // OK, now we're parsing a range, so bump past the `-` and parse the
1815         // second half of the range.
1816         if !self.bump_and_bump_space() {
1817             return Err(self.unclosed_class_error());
1818         }
1819         let prim2 = self.parse_set_class_item()?;
1820         let range = ast::ClassSetRange {
1821             span: Span::new(prim1.span().start, prim2.span().end),
1822             start: prim1.into_class_literal(self)?,
1823             end: prim2.into_class_literal(self)?,
1824         };
1825         if !range.is_valid() {
1826             return Err(self.error(
1827                 range.span,
1828                 ast::ErrorKind::ClassRangeInvalid,
1829             ));
1830         }
1831         Ok(ast::ClassSetItem::Range(range))
1832     }
1833 
1834     /// Parse a single item in a character class as a primitive, where the
1835     /// primitive either consists of a verbatim literal or a single escape
1836     /// sequence.
1837     ///
1838     /// This assumes the parser is positioned at the beginning of a primitive,
1839     /// and advances the parser to the first position after the primitive if
1840     /// successful.
1841     ///
1842     /// Note that it is the caller's responsibility to report an error if an
1843     /// illegal primitive was parsed.
parse_set_class_item(&self) -> Result<Primitive>1844     fn parse_set_class_item(&self) -> Result<Primitive> {
1845         if self.char() == '\\' {
1846             self.parse_escape()
1847         } else {
1848             let x = Primitive::Literal(ast::Literal {
1849                 span: self.span_char(),
1850                 kind: ast::LiteralKind::Verbatim,
1851                 c: self.char(),
1852             });
1853             self.bump();
1854             Ok(x)
1855         }
1856     }
1857 
1858     /// Parses the opening of a character class set. This includes the opening
1859     /// bracket along with `^` if present to indicate negation. This also
1860     /// starts parsing the opening set of unioned items if applicable, since
1861     /// there are special rules applied to certain characters in the opening
1862     /// of a character class. For example, `[^]]` is the class of all
1863     /// characters not equal to `]`. (`]` would need to be escaped in any other
1864     /// position.) Similarly for `-`.
1865     ///
1866     /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1867     /// empty union. This empty union should be replaced with the actual item
1868     /// when it is popped from the parser's stack.
1869     ///
1870     /// This assumes the parser is positioned at the opening `[` and advances
1871     /// the parser to the first non-special byte of the character class.
1872     ///
1873     /// An error is returned if EOF is found.
parse_set_class_open( &self, ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)>1874     fn parse_set_class_open(
1875         &self,
1876     ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1877         assert_eq!(self.char(), '[');
1878         let start = self.pos();
1879         if !self.bump_and_bump_space() {
1880             return Err(self.error(
1881                 Span::new(start, self.pos()),
1882                 ast::ErrorKind::ClassUnclosed,
1883             ));
1884         }
1885 
1886         let negated =
1887             if self.char() != '^' {
1888                 false
1889             } else {
1890                 if !self.bump_and_bump_space() {
1891                     return Err(self.error(
1892                         Span::new(start, self.pos()),
1893                         ast::ErrorKind::ClassUnclosed,
1894                     ));
1895                 }
1896                 true
1897             };
1898         // Accept any number of `-` as literal `-`.
1899         let mut union = ast::ClassSetUnion {
1900             span: self.span(),
1901             items: vec![],
1902         };
1903         while self.char() == '-' {
1904             union.push(ast::ClassSetItem::Literal(ast::Literal {
1905                 span: self.span_char(),
1906                 kind: ast::LiteralKind::Verbatim,
1907                 c: '-',
1908             }));
1909             if !self.bump_and_bump_space() {
1910                 return Err(self.error(
1911                     Span::new(start, self.pos()),
1912                     ast::ErrorKind::ClassUnclosed,
1913                 ));
1914             }
1915         }
1916         // If `]` is the *first* char in a set, then interpret it as a literal
1917         // `]`. That is, an empty class is impossible to write.
1918         if union.items.is_empty() && self.char() == ']' {
1919             union.push(ast::ClassSetItem::Literal(ast::Literal {
1920                 span: self.span_char(),
1921                 kind: ast::LiteralKind::Verbatim,
1922                 c: ']',
1923             }));
1924             if !self.bump_and_bump_space() {
1925                 return Err(self.error(
1926                     Span::new(start, self.pos()),
1927                     ast::ErrorKind::ClassUnclosed,
1928                 ));
1929             }
1930         }
1931         let set = ast::ClassBracketed {
1932             span: Span::new(start, self.pos()),
1933             negated: negated,
1934             kind: ast::ClassSet::union(ast::ClassSetUnion {
1935                 span: Span::new(union.span.start, union.span.start),
1936                 items: vec![],
1937             }),
1938         };
1939         Ok((set, union))
1940     }
1941 
1942     /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1943     ///
1944     /// This assumes the parser is positioned at the opening `[`.
1945     ///
1946     /// If no valid ASCII character class could be found, then this does not
1947     /// advance the parser and `None` is returned. Otherwise, the parser is
1948     /// advanced to the first byte following the closing `]` and the
1949     /// corresponding ASCII class is returned.
maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii>1950     fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1951         // ASCII character classes are interesting from a parsing perspective
1952         // because parsing cannot fail with any interesting error. For example,
1953         // in order to use an ASCII character class, it must be enclosed in
1954         // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1955         // of it as "ASCII character characters have the syntax `[:NAME:]`
1956         // which can only appear within character brackets." This means that
1957         // things like `[[:lower:]A]` are legal constructs.
1958         //
1959         // However, if one types an incorrect ASCII character class, e.g.,
1960         // `[[:loower:]]`, then we treat that as a normal nested character
1961         // class containing the characters `:elorw`. One might argue that we
1962         // should return an error instead since the repeated colons give away
1963         // the intent to write an ASCII class. But what if the user typed
1964         // `[[:lower]]` instead? How can we tell that was intended to be an
1965         // ASCII class and not just a normal nested class?
1966         //
1967         // Reasonable people can probably disagree over this, but for better
1968         // or worse, we implement semantics that never fails at the expense
1969         // of better failure modes.
1970         assert_eq!(self.char(), '[');
1971         // If parsing fails, then we back up the parser to this starting point.
1972         let start = self.pos();
1973         let mut negated = false;
1974         if !self.bump() || self.char() != ':' {
1975             self.parser().pos.set(start);
1976             return None;
1977         }
1978         if !self.bump() {
1979             self.parser().pos.set(start);
1980             return None;
1981         }
1982         if self.char() == '^' {
1983             negated = true;
1984             if !self.bump() {
1985                 self.parser().pos.set(start);
1986                 return None;
1987             }
1988         }
1989         let name_start = self.offset();
1990         while self.char() != ':' && self.bump() {}
1991         if self.is_eof() {
1992             self.parser().pos.set(start);
1993             return None;
1994         }
1995         let name = &self.pattern()[name_start..self.offset()];
1996         if !self.bump_if(":]") {
1997             self.parser().pos.set(start);
1998             return None;
1999         }
2000         let kind = match ast::ClassAsciiKind::from_name(name) {
2001             Some(kind) => kind,
2002             None => {
2003                 self.parser().pos.set(start);
2004                 return None;
2005             }
2006         };
2007         Some(ast::ClassAscii {
2008             span: Span::new(start, self.pos()),
2009             kind: kind,
2010             negated: negated,
2011         })
2012     }
2013 
2014     /// Parse a Unicode class in either the single character notation, `\pN`
2015     /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2016     /// the parser is positioned at the `p` (or `P` for negation) and will
2017     /// advance the parser to the character immediately following the class.
2018     ///
2019     /// Note that this does not check whether the class name is valid or not.
parse_unicode_class(&self) -> Result<ast::ClassUnicode>2020     fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2021         assert!(self.char() == 'p' || self.char() == 'P');
2022 
2023         let mut scratch = self.parser().scratch.borrow_mut();
2024         scratch.clear();
2025 
2026         let negated = self.char() == 'P';
2027         if !self.bump_and_bump_space() {
2028             return Err(self.error(
2029                 self.span(),
2030                 ast::ErrorKind::EscapeUnexpectedEof,
2031             ));
2032         }
2033         let (start, kind) =
2034             if self.char() == '{' {
2035                 let start = self.span_char().end;
2036                 while self.bump_and_bump_space() && self.char() != '}' {
2037                     scratch.push(self.char());
2038                 }
2039                 if self.is_eof() {
2040                     return Err(self.error(
2041                         self.span(),
2042                         ast::ErrorKind::EscapeUnexpectedEof,
2043                     ));
2044                 }
2045                 assert_eq!(self.char(), '}');
2046                 self.bump();
2047 
2048                 let name = scratch.as_str();
2049                 if let Some(i) = name.find("!=") {
2050                     (start, ast::ClassUnicodeKind::NamedValue {
2051                         op: ast::ClassUnicodeOpKind::NotEqual,
2052                         name: name[..i].to_string(),
2053                         value: name[i+2..].to_string(),
2054                     })
2055                 } else if let Some(i) = name.find(':') {
2056                     (start, ast::ClassUnicodeKind::NamedValue {
2057                         op: ast::ClassUnicodeOpKind::Colon,
2058                         name: name[..i].to_string(),
2059                         value: name[i+1..].to_string(),
2060                     })
2061                 } else if let Some(i) = name.find('=') {
2062                     (start, ast::ClassUnicodeKind::NamedValue {
2063                         op: ast::ClassUnicodeOpKind::Equal,
2064                         name: name[..i].to_string(),
2065                         value: name[i+1..].to_string(),
2066                     })
2067                 } else {
2068                     (start, ast::ClassUnicodeKind::Named(name.to_string()))
2069                 }
2070             } else {
2071                 let start = self.pos();
2072                 let c = self.char();
2073                 self.bump_and_bump_space();
2074                 let kind = ast::ClassUnicodeKind::OneLetter(c);
2075                 (start, kind)
2076             };
2077         Ok(ast::ClassUnicode {
2078             span: Span::new(start, self.pos()),
2079             negated: negated,
2080             kind: kind,
2081         })
2082     }
2083 
2084     /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2085     /// parser is currently at a valid character class name and will be
2086     /// advanced to the character immediately following the class.
parse_perl_class(&self) -> ast::ClassPerl2087     fn parse_perl_class(&self) -> ast::ClassPerl {
2088         let c = self.char();
2089         let span = self.span_char();
2090         self.bump();
2091         let (negated, kind) = match c {
2092             'd' => (false, ast::ClassPerlKind::Digit),
2093             'D' => (true, ast::ClassPerlKind::Digit),
2094             's' => (false, ast::ClassPerlKind::Space),
2095             'S' => (true, ast::ClassPerlKind::Space),
2096             'w' => (false, ast::ClassPerlKind::Word),
2097             'W' => (true, ast::ClassPerlKind::Word),
2098             c => panic!("expected valid Perl class but got '{}'", c),
2099         };
2100         ast::ClassPerl { span: span, kind: kind, negated: negated }
2101     }
2102 }
2103 
2104 /// A type that traverses a fully parsed Ast and checks whether its depth
2105 /// exceeds the specified nesting limit. If it does, then an error is returned.
2106 #[derive(Debug)]
2107 struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
2108     /// The parser that is checking the nest limit.
2109     p: &'p ParserI<'s, P>,
2110     /// The current depth while walking an Ast.
2111     depth: u32,
2112 }
2113 
2114 impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P>2115     fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2116         NestLimiter { p: p, depth: 0 }
2117     }
2118 
check(self, ast: &Ast) -> Result<()>2119     fn check(self, ast: &Ast) -> Result<()> {
2120         ast::visit(ast, self)
2121     }
2122 
increment_depth(&mut self, span: &Span) -> Result<()>2123     fn increment_depth(&mut self, span: &Span) -> Result<()> {
2124         let new = self.depth.checked_add(1).ok_or_else(|| self.p.error(
2125             span.clone(),
2126             ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2127         ))?;
2128         let limit = self.p.parser().nest_limit;
2129         if new > limit {
2130             return Err(self.p.error(
2131                 span.clone(),
2132                 ast::ErrorKind::NestLimitExceeded(limit),
2133             ));
2134         }
2135         self.depth = new;
2136         Ok(())
2137     }
2138 
decrement_depth(&mut self)2139     fn decrement_depth(&mut self) {
2140         // Assuming the correctness of the visitor, this should never drop
2141         // below 0.
2142         self.depth = self.depth.checked_sub(1).unwrap();
2143     }
2144 }
2145 
2146 impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2147     type Output = ();
2148     type Err = ast::Error;
2149 
finish(self) -> Result<()>2150     fn finish(self) -> Result<()> {
2151         Ok(())
2152     }
2153 
visit_pre(&mut self, ast: &Ast) -> Result<()>2154     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2155         let span = match *ast {
2156             Ast::Empty(_)
2157             | Ast::Flags(_)
2158             | Ast::Literal(_)
2159             | Ast::Dot(_)
2160             | Ast::Assertion(_)
2161             | Ast::Class(ast::Class::Unicode(_))
2162             | Ast::Class(ast::Class::Perl(_)) => {
2163                 // These are all base cases, so we don't increment depth.
2164                 return Ok(());
2165             }
2166             Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2167             Ast::Repetition(ref x) => &x.span,
2168             Ast::Group(ref x) => &x.span,
2169             Ast::Alternation(ref x) => &x.span,
2170             Ast::Concat(ref x) => &x.span,
2171         };
2172         self.increment_depth(span)
2173     }
2174 
visit_post(&mut self, ast: &Ast) -> Result<()>2175     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2176         match *ast {
2177             Ast::Empty(_)
2178             | Ast::Flags(_)
2179             | Ast::Literal(_)
2180             | Ast::Dot(_)
2181             | Ast::Assertion(_)
2182             | Ast::Class(ast::Class::Unicode(_))
2183             | Ast::Class(ast::Class::Perl(_)) => {
2184                 // These are all base cases, so we don't decrement depth.
2185                 Ok(())
2186             }
2187             Ast::Class(ast::Class::Bracketed(_))
2188             | Ast::Repetition(_)
2189             | Ast::Group(_)
2190             | Ast::Alternation(_)
2191             | Ast::Concat(_) => {
2192                 self.decrement_depth();
2193                 Ok(())
2194             }
2195         }
2196     }
2197 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2198     fn visit_class_set_item_pre(
2199         &mut self,
2200         ast: &ast::ClassSetItem,
2201     ) -> Result<()> {
2202         let span = match *ast {
2203             ast::ClassSetItem::Empty(_)
2204             | ast::ClassSetItem::Literal(_)
2205             | ast::ClassSetItem::Range(_)
2206             | ast::ClassSetItem::Ascii(_)
2207             | ast::ClassSetItem::Unicode(_)
2208             | ast::ClassSetItem::Perl(_) => {
2209                 // These are all base cases, so we don't increment depth.
2210                 return Ok(());
2211             }
2212             ast::ClassSetItem::Bracketed(ref x) => &x.span,
2213             ast::ClassSetItem::Union(ref x) => &x.span,
2214         };
2215         self.increment_depth(span)
2216     }
2217 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2218     fn visit_class_set_item_post(
2219         &mut self,
2220         ast: &ast::ClassSetItem,
2221     ) -> Result<()> {
2222         match *ast {
2223             ast::ClassSetItem::Empty(_)
2224             | ast::ClassSetItem::Literal(_)
2225             | ast::ClassSetItem::Range(_)
2226             | ast::ClassSetItem::Ascii(_)
2227             | ast::ClassSetItem::Unicode(_)
2228             | ast::ClassSetItem::Perl(_) => {
2229                 // These are all base cases, so we don't decrement depth.
2230                 Ok(())
2231             }
2232             ast::ClassSetItem::Bracketed(_)
2233             | ast::ClassSetItem::Union(_) => {
2234                 self.decrement_depth();
2235                 Ok(())
2236             }
2237         }
2238     }
2239 
visit_class_set_binary_op_pre( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<()>2240     fn visit_class_set_binary_op_pre(
2241         &mut self,
2242         ast: &ast::ClassSetBinaryOp,
2243     ) -> Result<()> {
2244         self.increment_depth(&ast.span)
2245     }
2246 
visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<()>2247     fn visit_class_set_binary_op_post(
2248         &mut self,
2249         _ast: &ast::ClassSetBinaryOp,
2250     ) -> Result<()> {
2251         self.decrement_depth();
2252         Ok(())
2253     }
2254 }
2255 
2256 #[cfg(test)]
2257 mod tests {
2258     use std::ops::Range;
2259 
2260     use ast::{self, Ast, Position, Span};
2261     use super::{Parser, ParserI, ParserBuilder, Primitive};
2262 
2263     // Our own assert_eq, which has slightly better formatting (but honestly
2264     // still kind of crappy).
2265     macro_rules! assert_eq {
2266         ($left:expr, $right:expr) => ({
2267             match (&$left, &$right) {
2268                 (left_val, right_val) => {
2269                     if !(*left_val == *right_val) {
2270                         panic!("assertion failed: `(left == right)`\n\n\
2271                                left:  `{:?}`\nright: `{:?}`\n\n",
2272                                left_val, right_val)
2273                     }
2274                 }
2275             }
2276         });
2277     }
2278 
2279     // We create these errors to compare with real ast::Errors in the tests.
2280     // We define equality between TestError and ast::Error to disregard the
2281     // pattern string in ast::Error, which is annoying to provide in tests.
2282     #[derive(Clone, Debug)]
2283     struct TestError {
2284         span: Span,
2285         kind: ast::ErrorKind,
2286     }
2287 
2288     impl PartialEq<ast::Error> for TestError {
eq(&self, other: &ast::Error) -> bool2289         fn eq(&self, other: &ast::Error) -> bool {
2290             self.span == other.span && self.kind == other.kind
2291         }
2292     }
2293 
2294     impl PartialEq<TestError> for ast::Error {
eq(&self, other: &TestError) -> bool2295         fn eq(&self, other: &TestError) -> bool {
2296             self.span == other.span && self.kind == other.kind
2297         }
2298     }
2299 
s(str: &str) -> String2300     fn s(str: &str) -> String {
2301         str.to_string()
2302     }
2303 
parser(pattern: &str) -> ParserI<Parser>2304     fn parser(pattern: &str) -> ParserI<Parser> {
2305         ParserI::new(Parser::new(), pattern)
2306     }
2307 
parser_octal(pattern: &str) -> ParserI<Parser>2308     fn parser_octal(pattern: &str) -> ParserI<Parser> {
2309         let parser = ParserBuilder::new().octal(true).build();
2310         ParserI::new(parser, pattern)
2311     }
2312 
parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser>2313     fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
2314         let p = ParserBuilder::new().nest_limit(nest_limit).build();
2315         ParserI::new(p, pattern)
2316     }
2317 
parser_ignore_whitespace(pattern: &str) -> ParserI<Parser>2318     fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
2319         let p = ParserBuilder::new().ignore_whitespace(true).build();
2320         ParserI::new(p, pattern)
2321     }
2322 
2323     /// Short alias for creating a new span.
nspan(start: Position, end: Position) -> Span2324     fn nspan(start: Position, end: Position) -> Span {
2325         Span::new(start, end)
2326     }
2327 
2328     /// Short alias for creating a new position.
npos(offset: usize, line: usize, column: usize) -> Position2329     fn npos(offset: usize, line: usize, column: usize) -> Position {
2330         Position::new(offset, line, column)
2331     }
2332 
2333     /// Create a new span from the given offset range. This assumes a single
2334     /// line and sets the columns based on the offsets. i.e., This only works
2335     /// out of the box for ASCII, which is fine for most tests.
span(range: Range<usize>) -> Span2336     fn span(range: Range<usize>) -> Span {
2337         let start = Position::new(range.start, 1, range.start + 1);
2338         let end = Position::new(range.end, 1, range.end + 1);
2339         Span::new(start, end)
2340     }
2341 
2342     /// Create a new span for the corresponding byte range in the given string.
span_range(subject: &str, range: Range<usize>) -> Span2343     fn span_range(subject: &str, range: Range<usize>) -> Span {
2344         let start = Position {
2345             offset: range.start,
2346             line: 1 + subject[..range.start].matches('\n').count(),
2347             column: 1 + subject[..range.start]
2348                 .chars()
2349                 .rev()
2350                 .position(|c| c == '\n')
2351                 .unwrap_or(subject[..range.start].chars().count()),
2352         };
2353         let end = Position {
2354             offset: range.end,
2355             line: 1 + subject[..range.end].matches('\n').count(),
2356             column: 1 + subject[..range.end]
2357                 .chars()
2358                 .rev()
2359                 .position(|c| c == '\n')
2360                 .unwrap_or(subject[..range.end].chars().count()),
2361         };
2362         Span::new(start, end)
2363     }
2364 
2365     /// Create a verbatim literal starting at the given position.
lit(c: char, start: usize) -> Ast2366     fn lit(c: char, start: usize) -> Ast {
2367         lit_with(c, span(start..start + c.len_utf8()))
2368     }
2369 
2370     /// Create a punctuation literal starting at the given position.
punct_lit(c: char, span: Span) -> Ast2371     fn punct_lit(c: char, span: Span) -> Ast {
2372         Ast::Literal(ast::Literal {
2373             span: span,
2374             kind: ast::LiteralKind::Punctuation,
2375             c: c,
2376         })
2377     }
2378 
2379     /// Create a verbatim literal with the given span.
lit_with(c: char, span: Span) -> Ast2380     fn lit_with(c: char, span: Span) -> Ast {
2381         Ast::Literal(ast::Literal {
2382             span: span,
2383             kind: ast::LiteralKind::Verbatim,
2384             c: c,
2385         })
2386     }
2387 
2388     /// Create a concatenation with the given range.
concat(range: Range<usize>, asts: Vec<Ast>) -> Ast2389     fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2390         concat_with(span(range), asts)
2391     }
2392 
2393     /// Create a concatenation with the given span.
concat_with(span: Span, asts: Vec<Ast>) -> Ast2394     fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2395         Ast::Concat(ast::Concat { span: span, asts: asts })
2396     }
2397 
2398     /// Create an alternation with the given span.
alt(range: Range<usize>, asts: Vec<Ast>) -> Ast2399     fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2400         Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
2401     }
2402 
2403     /// Create a capturing group with the given span.
group(range: Range<usize>, index: u32, ast: Ast) -> Ast2404     fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2405         Ast::Group(ast::Group {
2406             span: span(range),
2407             kind: ast::GroupKind::CaptureIndex(index),
2408             ast: Box::new(ast),
2409         })
2410     }
2411 
2412     /// Create an ast::SetFlags.
2413     ///
2414     /// The given pattern should be the full pattern string. The range given
2415     /// should correspond to the byte offsets where the flag set occurs.
2416     ///
2417     /// If negated is true, then the set is interpreted as beginning with a
2418     /// negation.
flag_set( pat: &str, range: Range<usize>, flag: ast::Flag, negated: bool, ) -> Ast2419     fn flag_set(
2420         pat: &str,
2421         range: Range<usize>,
2422         flag: ast::Flag,
2423         negated: bool,
2424     ) -> Ast {
2425         let mut items = vec![
2426             ast::FlagsItem {
2427                 span: span_range(pat, (range.end - 2)..(range.end - 1)),
2428                 kind: ast::FlagsItemKind::Flag(flag),
2429             },
2430         ];
2431         if negated {
2432             items.insert(0, ast::FlagsItem {
2433                 span: span_range(pat, (range.start + 2)..(range.end - 2)),
2434                 kind: ast::FlagsItemKind::Negation,
2435             });
2436         }
2437         Ast::Flags(ast::SetFlags {
2438             span: span_range(pat, range.clone()),
2439             flags: ast::Flags {
2440                 span: span_range(pat, (range.start + 2)..(range.end - 1)),
2441                 items: items,
2442             },
2443         })
2444     }
2445 
2446     #[test]
parse_nest_limit()2447     fn parse_nest_limit() {
2448         // A nest limit of 0 still allows some types of regexes.
2449         assert_eq!(
2450             parser_nest_limit("", 0).parse(),
2451             Ok(Ast::Empty(span(0..0))));
2452         assert_eq!(
2453             parser_nest_limit("a", 0).parse(),
2454             Ok(lit('a', 0)));
2455 
2456         // Test repetition operations, which require one level of nesting.
2457         assert_eq!(
2458             parser_nest_limit("a+", 0).parse().unwrap_err(),
2459             TestError {
2460                 span: span(0..2),
2461                 kind: ast::ErrorKind::NestLimitExceeded(0),
2462             });
2463         assert_eq!(
2464             parser_nest_limit("a+", 1).parse(),
2465             Ok(Ast::Repetition(ast::Repetition {
2466                 span: span(0..2),
2467                 op: ast::RepetitionOp {
2468                     span: span(1..2),
2469                     kind: ast::RepetitionKind::OneOrMore,
2470                 },
2471                 greedy: true,
2472                 ast: Box::new(lit('a', 0)),
2473             })));
2474         assert_eq!(
2475             parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2476             TestError {
2477                 span: span(0..3),
2478                 kind: ast::ErrorKind::NestLimitExceeded(1),
2479             });
2480         assert_eq!(
2481             parser_nest_limit("a+*", 1).parse().unwrap_err(),
2482             TestError {
2483                 span: span(0..2),
2484                 kind: ast::ErrorKind::NestLimitExceeded(1),
2485             });
2486         assert_eq!(
2487             parser_nest_limit("a+*", 2).parse(),
2488             Ok(Ast::Repetition(ast::Repetition {
2489                 span: span(0..3),
2490                 op: ast::RepetitionOp {
2491                     span: span(2..3),
2492                     kind: ast::RepetitionKind::ZeroOrMore,
2493                 },
2494                 greedy: true,
2495                 ast: Box::new(Ast::Repetition(ast::Repetition {
2496                     span: span(0..2),
2497                     op: ast::RepetitionOp {
2498                         span: span(1..2),
2499                         kind: ast::RepetitionKind::OneOrMore,
2500                     },
2501                     greedy: true,
2502                     ast: Box::new(lit('a', 0)),
2503                 })),
2504             })));
2505 
2506         // Test concatenations. A concatenation requires one level of nesting.
2507         assert_eq!(
2508             parser_nest_limit("ab", 0).parse().unwrap_err(),
2509             TestError {
2510                 span: span(0..2),
2511                 kind: ast::ErrorKind::NestLimitExceeded(0),
2512             });
2513         assert_eq!(
2514             parser_nest_limit("ab", 1).parse(),
2515             Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)])));
2516         assert_eq!(
2517             parser_nest_limit("abc", 1).parse(),
2518             Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)])));
2519 
2520         // Test alternations. An alternation requires one level of nesting.
2521         assert_eq!(
2522             parser_nest_limit("a|b", 0).parse().unwrap_err(),
2523             TestError {
2524                 span: span(0..3),
2525                 kind: ast::ErrorKind::NestLimitExceeded(0),
2526             });
2527         assert_eq!(
2528             parser_nest_limit("a|b", 1).parse(),
2529             Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)])));
2530         assert_eq!(
2531             parser_nest_limit("a|b|c", 1).parse(),
2532             Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)])));
2533 
2534         // Test character classes. Classes form their own mini-recursive
2535         // syntax!
2536         assert_eq!(
2537             parser_nest_limit("[a]", 0).parse().unwrap_err(),
2538             TestError {
2539                 span: span(0..3),
2540                 kind: ast::ErrorKind::NestLimitExceeded(0),
2541             });
2542         assert_eq!(
2543             parser_nest_limit("[a]", 1).parse(),
2544             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2545                 span: span(0..3),
2546                 negated: false,
2547                 kind: ast::ClassSet::Item(
2548                     ast::ClassSetItem::Literal(ast::Literal {
2549                         span: span(1..2),
2550                         kind: ast::LiteralKind::Verbatim,
2551                         c: 'a',
2552                     })
2553                 ),
2554             }))));
2555         assert_eq!(
2556             parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2557             TestError {
2558                 span: span(1..3),
2559                 kind: ast::ErrorKind::NestLimitExceeded(1),
2560             });
2561         assert_eq!(
2562             parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2563             TestError {
2564                 span: span(3..7),
2565                 kind: ast::ErrorKind::NestLimitExceeded(2),
2566             });
2567         assert_eq!(
2568             parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2569             TestError {
2570                 span: span(4..6),
2571                 kind: ast::ErrorKind::NestLimitExceeded(3),
2572             });
2573         assert_eq!(
2574             parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2575             TestError {
2576                 span: span(1..5),
2577                 kind: ast::ErrorKind::NestLimitExceeded(1),
2578             });
2579         assert_eq!(
2580             parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2581             TestError {
2582                 span: span(4..6),
2583                 kind: ast::ErrorKind::NestLimitExceeded(2),
2584             });
2585     }
2586 
2587     #[test]
parse_comments()2588     fn parse_comments() {
2589         let pat = "(?x)
2590 # This is comment 1.
2591 foo # This is comment 2.
2592   # This is comment 3.
2593 bar
2594 # This is comment 4.";
2595         let astc = parser(pat).parse_with_comments().unwrap();
2596         assert_eq!(
2597             astc.ast,
2598             concat_with(span_range(pat, 0..pat.len()), vec![
2599                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2600                 lit_with('f', span_range(pat, 26..27)),
2601                 lit_with('o', span_range(pat, 27..28)),
2602                 lit_with('o', span_range(pat, 28..29)),
2603                 lit_with('b', span_range(pat, 74..75)),
2604                 lit_with('a', span_range(pat, 75..76)),
2605                 lit_with('r', span_range(pat, 76..77)),
2606             ]));
2607         assert_eq!(astc.comments, vec![
2608             ast::Comment {
2609                 span: span_range(pat, 5..26),
2610                 comment: s(" This is comment 1."),
2611             },
2612             ast::Comment {
2613                 span: span_range(pat, 30..51),
2614                 comment: s(" This is comment 2."),
2615             },
2616             ast::Comment {
2617                 span: span_range(pat, 53..74),
2618                 comment: s(" This is comment 3."),
2619             },
2620             ast::Comment {
2621                 span: span_range(pat, 78..98),
2622                 comment: s(" This is comment 4."),
2623             },
2624         ]);
2625     }
2626 
2627     #[test]
parse_holistic()2628     fn parse_holistic() {
2629         assert_eq!(
2630             parser("]").parse(),
2631             Ok(lit(']', 0)));
2632         assert_eq!(
2633             parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2634             Ok(concat(0..36, vec![
2635                 punct_lit('\\', span(0..2)),
2636                 punct_lit('.', span(2..4)),
2637                 punct_lit('+', span(4..6)),
2638                 punct_lit('*', span(6..8)),
2639                 punct_lit('?', span(8..10)),
2640                 punct_lit('(', span(10..12)),
2641                 punct_lit(')', span(12..14)),
2642                 punct_lit('|', span(14..16)),
2643                 punct_lit('[', span(16..18)),
2644                 punct_lit(']', span(18..20)),
2645                 punct_lit('{', span(20..22)),
2646                 punct_lit('}', span(22..24)),
2647                 punct_lit('^', span(24..26)),
2648                 punct_lit('$', span(26..28)),
2649                 punct_lit('#', span(28..30)),
2650                 punct_lit('&', span(30..32)),
2651                 punct_lit('-', span(32..34)),
2652                 punct_lit('~', span(34..36)),
2653             ])));
2654     }
2655 
2656     #[test]
parse_ignore_whitespace()2657     fn parse_ignore_whitespace() {
2658         // Test that basic whitespace insensitivity works.
2659         let pat = "(?x)a b";
2660         assert_eq!(
2661             parser(pat).parse(),
2662             Ok(concat_with(nspan(npos(0, 1, 1), npos(7, 1, 8)), vec![
2663                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2664                 lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2665                 lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2666             ])));
2667 
2668         // Test that we can toggle whitespace insensitivity.
2669         let pat = "(?x)a b(?-x)a b";
2670         assert_eq!(
2671             parser(pat).parse(),
2672             Ok(concat_with(nspan(npos(0, 1, 1), npos(15, 1, 16)), vec![
2673                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2674                 lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2675                 lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2676                 flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2677                 lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2678                 lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2679                 lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2680             ])));
2681 
2682         // Test that nesting whitespace insensitive flags works.
2683         let pat = "a (?x:a )a ";
2684         assert_eq!(
2685             parser(pat).parse(),
2686             Ok(concat_with(span_range(pat, 0..11), vec![
2687                 lit_with('a', span_range(pat, 0..1)),
2688                 lit_with(' ', span_range(pat, 1..2)),
2689                 Ast::Group(ast::Group {
2690                     span: span_range(pat, 2..9),
2691                     kind: ast::GroupKind::NonCapturing(ast::Flags {
2692                         span: span_range(pat, 4..5),
2693                         items: vec![
2694                             ast::FlagsItem {
2695                                 span: span_range(pat, 4..5),
2696                                 kind: ast::FlagsItemKind::Flag(
2697                                     ast::Flag::IgnoreWhitespace),
2698                             },
2699                         ],
2700                     }),
2701                     ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2702                 }),
2703                 lit_with('a', span_range(pat, 9..10)),
2704                 lit_with(' ', span_range(pat, 10..11)),
2705             ])));
2706 
2707         // Test that whitespace after an opening paren is insignificant.
2708         let pat = "(?x)( ?P<foo> a )";
2709         assert_eq!(
2710             parser(pat).parse(),
2711             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2712                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2713                 Ast::Group(ast::Group {
2714                     span: span_range(pat, 4..pat.len()),
2715                     kind: ast::GroupKind::CaptureName(ast::CaptureName {
2716                         span: span_range(pat, 9..12),
2717                         name: s("foo"),
2718                         index: 1,
2719                     }),
2720                     ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2721                 }),
2722             ])));
2723         let pat = "(?x)(  a )";
2724         assert_eq!(
2725             parser(pat).parse(),
2726             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2727                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2728                 Ast::Group(ast::Group {
2729                     span: span_range(pat, 4..pat.len()),
2730                     kind: ast::GroupKind::CaptureIndex(1),
2731                     ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2732                 }),
2733             ])));
2734         let pat = "(?x)(  ?:  a )";
2735         assert_eq!(
2736             parser(pat).parse(),
2737             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2738                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2739                 Ast::Group(ast::Group {
2740                     span: span_range(pat, 4..pat.len()),
2741                     kind: ast::GroupKind::NonCapturing(ast::Flags {
2742                         span: span_range(pat, 8..8),
2743                         items: vec![],
2744                     }),
2745                     ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2746                 }),
2747             ])));
2748         let pat = r"(?x)\x { 53 }";
2749         assert_eq!(
2750             parser(pat).parse(),
2751             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2752                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2753                 Ast::Literal(ast::Literal {
2754                     span: span(4..13),
2755                     kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
2756                     c: 'S',
2757                 }),
2758             ])));
2759 
2760         // Test that whitespace after an escape is OK.
2761         let pat = r"(?x)\ ";
2762         assert_eq!(
2763             parser(pat).parse(),
2764             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2765                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2766                 Ast::Literal(ast::Literal {
2767                     span: span_range(pat, 4..6),
2768                     kind: ast::LiteralKind::Special(
2769                         ast::SpecialLiteralKind::Space),
2770                     c: ' ',
2771                 }),
2772             ])));
2773         // ... but only when `x` mode is enabled.
2774         let pat = r"\ ";
2775         assert_eq!(
2776             parser(pat).parse().unwrap_err(),
2777             TestError {
2778                 span: span_range(pat, 0..2),
2779                 kind: ast::ErrorKind::EscapeUnrecognized,
2780             });
2781     }
2782 
2783     #[test]
parse_newlines()2784     fn parse_newlines() {
2785         let pat = ".\n.";
2786         assert_eq!(
2787             parser(pat).parse(),
2788             Ok(concat_with(span_range(pat, 0..3), vec![
2789                 Ast::Dot(span_range(pat, 0..1)),
2790                 lit_with('\n', span_range(pat, 1..2)),
2791                 Ast::Dot(span_range(pat, 2..3)),
2792             ])));
2793 
2794         let pat = "foobar\nbaz\nquux\n";
2795         assert_eq!(
2796             parser(pat).parse(),
2797             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2798                 lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2799                 lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2800                 lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2801                 lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2802                 lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2803                 lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2804                 lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2805                 lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2806                 lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2807                 lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2808                 lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2809                 lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2810                 lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2811                 lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2812                 lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2813                 lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2814             ])));
2815     }
2816 
2817     #[test]
parse_uncounted_repetition()2818     fn parse_uncounted_repetition() {
2819         assert_eq!(
2820             parser(r"a*").parse(),
2821             Ok(Ast::Repetition(ast::Repetition {
2822                 span: span(0..2),
2823                 op: ast::RepetitionOp {
2824                     span: span(1..2),
2825                     kind: ast::RepetitionKind::ZeroOrMore,
2826                 },
2827                 greedy: true,
2828                 ast: Box::new(lit('a', 0)),
2829             })));
2830         assert_eq!(
2831             parser(r"a+").parse(),
2832             Ok(Ast::Repetition(ast::Repetition {
2833                 span: span(0..2),
2834                 op: ast::RepetitionOp {
2835                     span: span(1..2),
2836                     kind: ast::RepetitionKind::OneOrMore,
2837                 },
2838                 greedy: true,
2839                 ast: Box::new(lit('a', 0)),
2840             })));
2841 
2842         assert_eq!(
2843             parser(r"a?").parse(),
2844             Ok(Ast::Repetition(ast::Repetition {
2845                 span: span(0..2),
2846                 op: ast::RepetitionOp {
2847                     span: span(1..2),
2848                     kind: ast::RepetitionKind::ZeroOrOne,
2849                 },
2850                 greedy: true,
2851                 ast: Box::new(lit('a', 0)),
2852             })));
2853         assert_eq!(
2854             parser(r"a??").parse(),
2855             Ok(Ast::Repetition(ast::Repetition {
2856                 span: span(0..3),
2857                 op: ast::RepetitionOp {
2858                     span: span(1..3),
2859                     kind: ast::RepetitionKind::ZeroOrOne,
2860                 },
2861                 greedy: false,
2862                 ast: Box::new(lit('a', 0)),
2863             })));
2864         assert_eq!(
2865             parser(r"a?").parse(),
2866             Ok(Ast::Repetition(ast::Repetition {
2867                 span: span(0..2),
2868                 op: ast::RepetitionOp {
2869                     span: span(1..2),
2870                     kind: ast::RepetitionKind::ZeroOrOne,
2871                 },
2872                 greedy: true,
2873                 ast: Box::new(lit('a', 0)),
2874             })));
2875         assert_eq!(
2876             parser(r"a?b").parse(),
2877             Ok(concat(0..3, vec![
2878                 Ast::Repetition(ast::Repetition {
2879                     span: span(0..2),
2880                     op: ast::RepetitionOp {
2881                         span: span(1..2),
2882                         kind: ast::RepetitionKind::ZeroOrOne,
2883                     },
2884                     greedy: true,
2885                     ast: Box::new(lit('a', 0)),
2886                 }),
2887                 lit('b', 2),
2888             ])));
2889         assert_eq!(
2890             parser(r"a??b").parse(),
2891             Ok(concat(0..4, vec![
2892                 Ast::Repetition(ast::Repetition {
2893                     span: span(0..3),
2894                     op: ast::RepetitionOp {
2895                         span: span(1..3),
2896                         kind: ast::RepetitionKind::ZeroOrOne,
2897                     },
2898                     greedy: false,
2899                     ast: Box::new(lit('a', 0)),
2900                 }),
2901                 lit('b', 3),
2902             ])));
2903         assert_eq!(
2904             parser(r"ab?").parse(),
2905             Ok(concat(0..3, vec![
2906                 lit('a', 0),
2907                 Ast::Repetition(ast::Repetition {
2908                     span: span(1..3),
2909                     op: ast::RepetitionOp {
2910                         span: span(2..3),
2911                         kind: ast::RepetitionKind::ZeroOrOne,
2912                     },
2913                     greedy: true,
2914                     ast: Box::new(lit('b', 1)),
2915                 }),
2916             ])));
2917         assert_eq!(
2918             parser(r"(ab)?").parse(),
2919             Ok(Ast::Repetition(ast::Repetition {
2920                 span: span(0..5),
2921                 op: ast::RepetitionOp {
2922                     span: span(4..5),
2923                     kind: ast::RepetitionKind::ZeroOrOne,
2924                 },
2925                 greedy: true,
2926                 ast: Box::new(group(0..4, 1, concat(1..3, vec![
2927                     lit('a', 1),
2928                     lit('b', 2),
2929                 ]))),
2930             })));
2931         assert_eq!(
2932             parser(r"|a?").parse(),
2933             Ok(alt(0..3, vec![
2934                 Ast::Empty(span(0..0)),
2935                 Ast::Repetition(ast::Repetition {
2936                     span: span(1..3),
2937                     op: ast::RepetitionOp {
2938                         span: span(2..3),
2939                         kind: ast::RepetitionKind::ZeroOrOne,
2940                     },
2941                     greedy: true,
2942                     ast: Box::new(lit('a', 1)),
2943                 }),
2944             ])));
2945 
2946         assert_eq!(
2947             parser(r"*").parse().unwrap_err(),
2948             TestError {
2949                 span: span(0..0),
2950                 kind: ast::ErrorKind::RepetitionMissing,
2951             });
2952         assert_eq!(
2953             parser(r"(?i)*").parse().unwrap_err(),
2954             TestError {
2955                 span: span(4..4),
2956                 kind: ast::ErrorKind::RepetitionMissing,
2957             });
2958         assert_eq!(
2959             parser(r"(*)").parse().unwrap_err(),
2960             TestError {
2961                 span: span(1..1),
2962                 kind: ast::ErrorKind::RepetitionMissing,
2963             });
2964         assert_eq!(
2965             parser(r"(?:?)").parse().unwrap_err(),
2966             TestError {
2967                 span: span(3..3),
2968                 kind: ast::ErrorKind::RepetitionMissing,
2969             });
2970         assert_eq!(
2971             parser(r"+").parse().unwrap_err(),
2972             TestError {
2973                 span: span(0..0),
2974                 kind: ast::ErrorKind::RepetitionMissing,
2975             });
2976         assert_eq!(
2977             parser(r"?").parse().unwrap_err(),
2978             TestError {
2979                 span: span(0..0),
2980                 kind: ast::ErrorKind::RepetitionMissing,
2981             });
2982         assert_eq!(
2983             parser(r"(?)").parse().unwrap_err(),
2984             TestError {
2985                 span: span(1..1),
2986                 kind: ast::ErrorKind::RepetitionMissing,
2987             });
2988         assert_eq!(
2989             parser(r"|*").parse().unwrap_err(),
2990             TestError {
2991                 span: span(1..1),
2992                 kind: ast::ErrorKind::RepetitionMissing,
2993             });
2994         assert_eq!(
2995             parser(r"|+").parse().unwrap_err(),
2996             TestError {
2997                 span: span(1..1),
2998                 kind: ast::ErrorKind::RepetitionMissing,
2999             });
3000         assert_eq!(
3001             parser(r"|?").parse().unwrap_err(),
3002             TestError {
3003                 span: span(1..1),
3004                 kind: ast::ErrorKind::RepetitionMissing,
3005             });
3006     }
3007 
3008     #[test]
parse_counted_repetition()3009     fn parse_counted_repetition() {
3010         assert_eq!(
3011             parser(r"a{5}").parse(),
3012             Ok(Ast::Repetition(ast::Repetition {
3013                 span: span(0..4),
3014                 op: ast::RepetitionOp {
3015                     span: span(1..4),
3016                     kind: ast::RepetitionKind::Range(
3017                         ast::RepetitionRange::Exactly(5)),
3018                 },
3019                 greedy: true,
3020                 ast: Box::new(lit('a', 0)),
3021             })));
3022         assert_eq!(
3023             parser(r"a{5,}").parse(),
3024             Ok(Ast::Repetition(ast::Repetition {
3025                 span: span(0..5),
3026                 op: ast::RepetitionOp {
3027                     span: span(1..5),
3028                     kind: ast::RepetitionKind::Range(
3029                         ast::RepetitionRange::AtLeast(5)),
3030                 },
3031                 greedy: true,
3032                 ast: Box::new(lit('a', 0)),
3033             })));
3034         assert_eq!(
3035             parser(r"a{5,9}").parse(),
3036             Ok(Ast::Repetition(ast::Repetition {
3037                 span: span(0..6),
3038                 op: ast::RepetitionOp {
3039                     span: span(1..6),
3040                     kind: ast::RepetitionKind::Range(
3041                         ast::RepetitionRange::Bounded(5, 9)),
3042                 },
3043                 greedy: true,
3044                 ast: Box::new(lit('a', 0)),
3045             })));
3046         assert_eq!(
3047             parser(r"a{5}?").parse(),
3048             Ok(Ast::Repetition(ast::Repetition {
3049                 span: span(0..5),
3050                 op: ast::RepetitionOp {
3051                     span: span(1..5),
3052                     kind: ast::RepetitionKind::Range(
3053                         ast::RepetitionRange::Exactly(5)),
3054                 },
3055                 greedy: false,
3056                 ast: Box::new(lit('a', 0)),
3057             })));
3058         assert_eq!(
3059             parser(r"ab{5}").parse(),
3060             Ok(concat(0..5, vec![
3061                 lit('a', 0),
3062                 Ast::Repetition(ast::Repetition {
3063                     span: span(1..5),
3064                     op: ast::RepetitionOp {
3065                         span: span(2..5),
3066                         kind: ast::RepetitionKind::Range(
3067                             ast::RepetitionRange::Exactly(5)),
3068                     },
3069                     greedy: true,
3070                     ast: Box::new(lit('b', 1)),
3071                 }),
3072             ])));
3073         assert_eq!(
3074             parser(r"ab{5}c").parse(),
3075             Ok(concat(0..6, vec![
3076                 lit('a', 0),
3077                 Ast::Repetition(ast::Repetition {
3078                     span: span(1..5),
3079                     op: ast::RepetitionOp {
3080                         span: span(2..5),
3081                         kind: ast::RepetitionKind::Range(
3082                             ast::RepetitionRange::Exactly(5)),
3083                     },
3084                     greedy: true,
3085                     ast: Box::new(lit('b', 1)),
3086                 }),
3087                 lit('c', 5),
3088             ])));
3089 
3090         assert_eq!(
3091             parser(r"a{ 5 }").parse(),
3092             Ok(Ast::Repetition(ast::Repetition {
3093                 span: span(0..6),
3094                 op: ast::RepetitionOp {
3095                     span: span(1..6),
3096                     kind: ast::RepetitionKind::Range(
3097                         ast::RepetitionRange::Exactly(5)),
3098                 },
3099                 greedy: true,
3100                 ast: Box::new(lit('a', 0)),
3101             })));
3102         assert_eq!(
3103             parser(r"a{ 5 , 9 }").parse(),
3104             Ok(Ast::Repetition(ast::Repetition {
3105                 span: span(0..10),
3106                 op: ast::RepetitionOp {
3107                     span: span(1..10),
3108                     kind: ast::RepetitionKind::Range(
3109                         ast::RepetitionRange::Bounded(5, 9)),
3110                 },
3111                 greedy: true,
3112                 ast: Box::new(lit('a', 0)),
3113             })));
3114         assert_eq!(
3115             parser_ignore_whitespace(r"a{5,9} ?").parse(),
3116             Ok(Ast::Repetition(ast::Repetition {
3117                 span: span(0..8),
3118                 op: ast::RepetitionOp {
3119                     span: span(1..8),
3120                     kind: ast::RepetitionKind::Range(
3121                         ast::RepetitionRange::Bounded(5, 9)),
3122                 },
3123                 greedy: false,
3124                 ast: Box::new(lit('a', 0)),
3125             })));
3126 
3127         assert_eq!(
3128             parser(r"a{").parse().unwrap_err(),
3129             TestError {
3130                 span: span(1..2),
3131                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3132             });
3133         assert_eq!(
3134             parser(r"a{}").parse().unwrap_err(),
3135             TestError {
3136                 span: span(2..2),
3137                 kind: ast::ErrorKind::DecimalEmpty,
3138             });
3139         assert_eq!(
3140             parser(r"a{a").parse().unwrap_err(),
3141             TestError {
3142                 span: span(2..2),
3143                 kind: ast::ErrorKind::DecimalEmpty,
3144             });
3145         assert_eq!(
3146             parser(r"a{9999999999}").parse().unwrap_err(),
3147             TestError {
3148                 span: span(2..12),
3149                 kind: ast::ErrorKind::DecimalInvalid,
3150             });
3151         assert_eq!(
3152             parser(r"a{9").parse().unwrap_err(),
3153             TestError {
3154                 span: span(1..3),
3155                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3156             });
3157         assert_eq!(
3158             parser(r"a{9,a").parse().unwrap_err(),
3159             TestError {
3160                 span: span(4..4),
3161                 kind: ast::ErrorKind::DecimalEmpty,
3162             });
3163         assert_eq!(
3164             parser(r"a{9,9999999999}").parse().unwrap_err(),
3165             TestError {
3166                 span: span(4..14),
3167                 kind: ast::ErrorKind::DecimalInvalid,
3168             });
3169         assert_eq!(
3170             parser(r"a{9,").parse().unwrap_err(),
3171             TestError {
3172                 span: span(1..4),
3173                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3174             });
3175         assert_eq!(
3176             parser(r"a{9,11").parse().unwrap_err(),
3177             TestError {
3178                 span: span(1..6),
3179                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3180             });
3181         assert_eq!(
3182             parser(r"a{2,1}").parse().unwrap_err(),
3183             TestError {
3184                 span: span(1..6),
3185                 kind: ast::ErrorKind::RepetitionCountInvalid,
3186             });
3187         assert_eq!(
3188             parser(r"{5}").parse().unwrap_err(),
3189             TestError {
3190                 span: span(0..0),
3191                 kind: ast::ErrorKind::RepetitionMissing,
3192             });
3193         assert_eq!(
3194             parser(r"|{5}").parse().unwrap_err(),
3195             TestError {
3196                 span: span(1..1),
3197                 kind: ast::ErrorKind::RepetitionMissing,
3198             });
3199     }
3200 
3201     #[test]
parse_alternate()3202     fn parse_alternate() {
3203         assert_eq!(
3204             parser(r"a|b").parse(),
3205             Ok(Ast::Alternation(ast::Alternation {
3206                 span: span(0..3),
3207                 asts: vec![lit('a', 0), lit('b', 2)],
3208             })));
3209         assert_eq!(
3210             parser(r"(a|b)").parse(),
3211             Ok(group(0..5, 1, Ast::Alternation(ast::Alternation {
3212                 span: span(1..4),
3213                 asts: vec![lit('a', 1), lit('b', 3)],
3214             }))));
3215 
3216         assert_eq!(
3217             parser(r"a|b|c").parse(),
3218             Ok(Ast::Alternation(ast::Alternation {
3219                 span: span(0..5),
3220                 asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3221             })));
3222         assert_eq!(
3223             parser(r"ax|by|cz").parse(),
3224             Ok(Ast::Alternation(ast::Alternation {
3225                 span: span(0..8),
3226                 asts: vec![
3227                     concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3228                     concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3229                     concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3230                 ],
3231             })));
3232         assert_eq!(
3233             parser(r"(ax|by|cz)").parse(),
3234             Ok(group(0..10, 1, Ast::Alternation(ast::Alternation {
3235                 span: span(1..9),
3236                 asts: vec![
3237                     concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3238                     concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3239                     concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3240                 ],
3241             }))));
3242         assert_eq!(
3243             parser(r"(ax|(by|(cz)))").parse(),
3244             Ok(group(0..14, 1, alt(1..13, vec![
3245                 concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3246                 group(4..13, 2, alt(5..12, vec![
3247                     concat(5..7, vec![lit('b', 5), lit('y', 6)]),
3248                     group(8..12, 3, concat(9..11, vec![
3249                         lit('c', 9),
3250                         lit('z', 10),
3251                     ])),
3252                 ])),
3253             ]))));
3254 
3255         assert_eq!(
3256             parser(r"|").parse(), Ok(alt(0..1, vec![
3257                 Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),
3258             ])));
3259         assert_eq!(
3260             parser(r"||").parse(), Ok(alt(0..2, vec![
3261                 Ast::Empty(span(0..0)),
3262                 Ast::Empty(span(1..1)),
3263                 Ast::Empty(span(2..2)),
3264             ])));
3265         assert_eq!(
3266             parser(r"a|").parse(), Ok(alt(0..2, vec![
3267                 lit('a', 0), Ast::Empty(span(2..2)),
3268             ])));
3269         assert_eq!(
3270             parser(r"|a").parse(), Ok(alt(0..2, vec![
3271                 Ast::Empty(span(0..0)), lit('a', 1),
3272             ])));
3273 
3274         assert_eq!(
3275             parser(r"(|)").parse(), Ok(group(0..3, 1, alt(1..2, vec![
3276                 Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),
3277             ]))));
3278         assert_eq!(
3279             parser(r"(a|)").parse(), Ok(group(0..4, 1, alt(1..3, vec![
3280                 lit('a', 1), Ast::Empty(span(3..3)),
3281             ]))));
3282         assert_eq!(
3283             parser(r"(|a)").parse(), Ok(group(0..4, 1, alt(1..3, vec![
3284                 Ast::Empty(span(1..1)), lit('a', 2),
3285             ]))));
3286 
3287         assert_eq!(
3288             parser(r"a|b)").parse().unwrap_err(),
3289             TestError {
3290                 span: span(3..4),
3291                 kind: ast::ErrorKind::GroupUnopened,
3292             });
3293         assert_eq!(
3294             parser(r"(a|b").parse().unwrap_err(),
3295             TestError {
3296                 span: span(0..1),
3297                 kind: ast::ErrorKind::GroupUnclosed,
3298             });
3299     }
3300 
3301     #[test]
parse_unsupported_lookaround()3302     fn parse_unsupported_lookaround() {
3303         assert_eq!(
3304             parser(r"(?=a)").parse().unwrap_err(),
3305             TestError {
3306                 span: span(0..3),
3307                 kind: ast::ErrorKind::UnsupportedLookAround,
3308             });
3309         assert_eq!(
3310             parser(r"(?!a)").parse().unwrap_err(),
3311             TestError {
3312                 span: span(0..3),
3313                 kind: ast::ErrorKind::UnsupportedLookAround,
3314             });
3315         assert_eq!(
3316             parser(r"(?<=a)").parse().unwrap_err(),
3317             TestError {
3318                 span: span(0..4),
3319                 kind: ast::ErrorKind::UnsupportedLookAround,
3320             });
3321         assert_eq!(
3322             parser(r"(?<!a)").parse().unwrap_err(),
3323             TestError {
3324                 span: span(0..4),
3325                 kind: ast::ErrorKind::UnsupportedLookAround,
3326             });
3327     }
3328 
3329     #[test]
parse_group()3330     fn parse_group() {
3331         assert_eq!(parser("(?i)").parse(), Ok(Ast::Flags(ast::SetFlags {
3332             span: span(0..4),
3333             flags: ast::Flags {
3334                 span: span(2..3),
3335                 items: vec![ast::FlagsItem {
3336                     span: span(2..3),
3337                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3338                 }],
3339             },
3340         })));
3341         assert_eq!(parser("(?iU)").parse(), Ok(Ast::Flags(ast::SetFlags {
3342             span: span(0..5),
3343             flags: ast::Flags {
3344                 span: span(2..4),
3345                 items: vec![
3346                     ast::FlagsItem {
3347                         span: span(2..3),
3348                         kind: ast::FlagsItemKind::Flag(
3349                             ast::Flag::CaseInsensitive),
3350                     },
3351                     ast::FlagsItem {
3352                         span: span(3..4),
3353                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3354                     },
3355                 ],
3356             },
3357         })));
3358         assert_eq!(parser("(?i-U)").parse(), Ok(Ast::Flags(ast::SetFlags {
3359             span: span(0..6),
3360             flags: ast::Flags {
3361                 span: span(2..5),
3362                 items: vec![
3363                     ast::FlagsItem {
3364                         span: span(2..3),
3365                         kind: ast::FlagsItemKind::Flag(
3366                             ast::Flag::CaseInsensitive),
3367                     },
3368                     ast::FlagsItem {
3369                         span: span(3..4),
3370                         kind: ast::FlagsItemKind::Negation,
3371                     },
3372                     ast::FlagsItem {
3373                         span: span(4..5),
3374                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3375                     },
3376                 ],
3377             },
3378         })));
3379 
3380         assert_eq!(parser("()").parse(), Ok(Ast::Group(ast::Group {
3381             span: span(0..2),
3382             kind: ast::GroupKind::CaptureIndex(1),
3383             ast: Box::new(Ast::Empty(span(1..1))),
3384         })));
3385         assert_eq!(parser("(a)").parse(), Ok(Ast::Group(ast::Group {
3386             span: span(0..3),
3387             kind: ast::GroupKind::CaptureIndex(1),
3388             ast: Box::new(lit('a', 1)),
3389         })));
3390         assert_eq!(parser("(())").parse(), Ok(Ast::Group(ast::Group {
3391             span: span(0..4),
3392             kind: ast::GroupKind::CaptureIndex(1),
3393             ast: Box::new(Ast::Group(ast::Group {
3394                 span: span(1..3),
3395                 kind: ast::GroupKind::CaptureIndex(2),
3396                 ast: Box::new(Ast::Empty(span(2..2))),
3397             })),
3398         })));
3399 
3400         assert_eq!(parser("(?:a)").parse(), Ok(Ast::Group(ast::Group {
3401             span: span(0..5),
3402             kind: ast::GroupKind::NonCapturing(ast::Flags {
3403                 span: span(2..2),
3404                 items: vec![],
3405             }),
3406             ast: Box::new(lit('a', 3)),
3407         })));
3408 
3409         assert_eq!(parser("(?i:a)").parse(), Ok(Ast::Group(ast::Group {
3410             span: span(0..6),
3411             kind: ast::GroupKind::NonCapturing(ast::Flags {
3412                 span: span(2..3),
3413                 items: vec![
3414                     ast::FlagsItem {
3415                         span: span(2..3),
3416                         kind: ast::FlagsItemKind::Flag(
3417                             ast::Flag::CaseInsensitive),
3418                     },
3419                 ],
3420             }),
3421             ast: Box::new(lit('a', 4)),
3422         })));
3423         assert_eq!(parser("(?i-U:a)").parse(), Ok(Ast::Group(ast::Group {
3424             span: span(0..8),
3425             kind: ast::GroupKind::NonCapturing(ast::Flags {
3426                 span: span(2..5),
3427                 items: vec![
3428                     ast::FlagsItem {
3429                         span: span(2..3),
3430                         kind: ast::FlagsItemKind::Flag(
3431                             ast::Flag::CaseInsensitive),
3432                     },
3433                     ast::FlagsItem {
3434                         span: span(3..4),
3435                         kind: ast::FlagsItemKind::Negation,
3436                     },
3437                     ast::FlagsItem {
3438                         span: span(4..5),
3439                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3440                     },
3441                 ],
3442             }),
3443             ast: Box::new(lit('a', 6)),
3444         })));
3445 
3446         assert_eq!(
3447             parser("(").parse().unwrap_err(),
3448             TestError {
3449                 span: span(0..1),
3450                 kind: ast::ErrorKind::GroupUnclosed,
3451             });
3452         assert_eq!(
3453             parser("(?").parse().unwrap_err(),
3454             TestError {
3455                 span: span(0..1),
3456                 kind: ast::ErrorKind::GroupUnclosed,
3457             });
3458         assert_eq!(
3459             parser("(?P").parse().unwrap_err(),
3460             TestError {
3461                 span: span(2..3),
3462                 kind: ast::ErrorKind::FlagUnrecognized,
3463             });
3464         assert_eq!(
3465             parser("(?P<").parse().unwrap_err(),
3466             TestError {
3467                 span: span(4..4),
3468                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3469             });
3470         assert_eq!(
3471             parser("(a").parse().unwrap_err(),
3472             TestError {
3473                 span: span(0..1),
3474                 kind: ast::ErrorKind::GroupUnclosed,
3475             });
3476         assert_eq!(
3477             parser("(()").parse().unwrap_err(),
3478             TestError {
3479                 span: span(0..1),
3480                 kind: ast::ErrorKind::GroupUnclosed,
3481             });
3482         assert_eq!(
3483             parser(")").parse().unwrap_err(),
3484             TestError {
3485                 span: span(0..1),
3486                 kind: ast::ErrorKind::GroupUnopened,
3487             });
3488         assert_eq!(
3489             parser("a)").parse().unwrap_err(),
3490             TestError {
3491                 span: span(1..2),
3492                 kind: ast::ErrorKind::GroupUnopened,
3493             });
3494     }
3495 
3496     #[test]
parse_capture_name()3497     fn parse_capture_name() {
3498         assert_eq!(parser("(?P<a>z)").parse(), Ok(Ast::Group(ast::Group {
3499             span: span(0..8),
3500             kind: ast::GroupKind::CaptureName(ast::CaptureName {
3501                 span: span(4..5),
3502                 name: s("a"),
3503                 index: 1,
3504             }),
3505             ast: Box::new(lit('z', 6)),
3506         })));
3507         assert_eq!(parser("(?P<abc>z)").parse(), Ok(Ast::Group(ast::Group {
3508             span: span(0..10),
3509             kind: ast::GroupKind::CaptureName(ast::CaptureName {
3510                 span: span(4..7),
3511                 name: s("abc"),
3512                 index: 1,
3513             }),
3514             ast: Box::new(lit('z', 8)),
3515         })));
3516 
3517         assert_eq!(
3518             parser("(?P<").parse().unwrap_err(),
3519             TestError {
3520                 span: span(4..4),
3521                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3522             });
3523         assert_eq!(
3524             parser("(?P<>z)").parse().unwrap_err(),
3525             TestError {
3526                 span: span(4..4),
3527                 kind: ast::ErrorKind::GroupNameEmpty,
3528             });
3529         assert_eq!(
3530             parser("(?P<a").parse().unwrap_err(),
3531             TestError {
3532                 span: span(5..5),
3533                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3534             });
3535         assert_eq!(
3536             parser("(?P<ab").parse().unwrap_err(),
3537             TestError {
3538                 span: span(6..6),
3539                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3540             });
3541         assert_eq!(
3542             parser("(?P<0a").parse().unwrap_err(),
3543             TestError {
3544                 span: span(4..5),
3545                 kind: ast::ErrorKind::GroupNameInvalid,
3546             });
3547         assert_eq!(
3548             parser("(?P<~").parse().unwrap_err(),
3549             TestError {
3550                 span: span(4..5),
3551                 kind: ast::ErrorKind::GroupNameInvalid,
3552             });
3553         assert_eq!(
3554             parser("(?P<abc~").parse().unwrap_err(),
3555             TestError {
3556                 span: span(7..8),
3557                 kind: ast::ErrorKind::GroupNameInvalid,
3558             });
3559         assert_eq!(
3560             parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3561             TestError {
3562                 span: span(12..13),
3563                 kind: ast::ErrorKind::GroupNameDuplicate {
3564                     original: span(4..5),
3565                 },
3566             });
3567     }
3568 
3569     #[test]
parse_flags()3570     fn parse_flags() {
3571         assert_eq!(parser("i:").parse_flags(), Ok(ast::Flags {
3572             span: span(0..1),
3573             items: vec![ast::FlagsItem {
3574                 span: span(0..1),
3575                 kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3576             }],
3577         }));
3578         assert_eq!(parser("i)").parse_flags(), Ok(ast::Flags {
3579             span: span(0..1),
3580             items: vec![ast::FlagsItem {
3581                 span: span(0..1),
3582                 kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3583             }],
3584         }));
3585 
3586         assert_eq!(parser("isU:").parse_flags(), Ok(ast::Flags {
3587             span: span(0..3),
3588             items: vec![
3589                 ast::FlagsItem {
3590                     span: span(0..1),
3591                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3592                 },
3593                 ast::FlagsItem {
3594                     span: span(1..2),
3595                     kind: ast::FlagsItemKind::Flag(
3596                         ast::Flag::DotMatchesNewLine),
3597                 },
3598                 ast::FlagsItem {
3599                     span: span(2..3),
3600                     kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3601                 },
3602             ],
3603         }));
3604 
3605         assert_eq!(parser("-isU:").parse_flags(), Ok(ast::Flags {
3606             span: span(0..4),
3607             items: vec![
3608                 ast::FlagsItem {
3609                     span: span(0..1),
3610                     kind: ast::FlagsItemKind::Negation,
3611                 },
3612                 ast::FlagsItem {
3613                     span: span(1..2),
3614                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3615                 },
3616                 ast::FlagsItem {
3617                     span: span(2..3),
3618                     kind: ast::FlagsItemKind::Flag(
3619                         ast::Flag::DotMatchesNewLine),
3620                 },
3621                 ast::FlagsItem {
3622                     span: span(3..4),
3623                     kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3624                 },
3625             ],
3626         }));
3627         assert_eq!(parser("i-sU:").parse_flags(), Ok(ast::Flags {
3628             span: span(0..4),
3629             items: vec![
3630                 ast::FlagsItem {
3631                     span: span(0..1),
3632                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3633                 },
3634                 ast::FlagsItem {
3635                     span: span(1..2),
3636                     kind: ast::FlagsItemKind::Negation,
3637                 },
3638                 ast::FlagsItem {
3639                     span: span(2..3),
3640                     kind: ast::FlagsItemKind::Flag(
3641                         ast::Flag::DotMatchesNewLine),
3642                 },
3643                 ast::FlagsItem {
3644                     span: span(3..4),
3645                     kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3646                 },
3647             ],
3648         }));
3649 
3650         assert_eq!(
3651             parser("isU").parse_flags().unwrap_err(),
3652             TestError {
3653                 span: span(3..3),
3654                 kind: ast::ErrorKind::FlagUnexpectedEof,
3655             });
3656         assert_eq!(
3657             parser("isUa:").parse_flags().unwrap_err(),
3658             TestError {
3659                 span: span(3..4),
3660                 kind: ast::ErrorKind::FlagUnrecognized,
3661             });
3662         assert_eq!(
3663             parser("isUi:").parse_flags().unwrap_err(),
3664             TestError {
3665                 span: span(3..4),
3666                 kind: ast::ErrorKind::FlagDuplicate {
3667                     original: span(0..1),
3668                 },
3669             });
3670         assert_eq!(
3671             parser("i-sU-i:").parse_flags().unwrap_err(),
3672             TestError {
3673                 span: span(4..5),
3674                 kind: ast::ErrorKind::FlagRepeatedNegation {
3675                     original: span(1..2),
3676                 },
3677             });
3678         assert_eq!(
3679             parser("-)").parse_flags().unwrap_err(),
3680             TestError {
3681                 span: span(0..1),
3682                 kind: ast::ErrorKind::FlagDanglingNegation,
3683             });
3684         assert_eq!(
3685             parser("i-)").parse_flags().unwrap_err(),
3686             TestError {
3687                 span: span(1..2),
3688                 kind: ast::ErrorKind::FlagDanglingNegation,
3689             });
3690         assert_eq!(
3691             parser("iU-)").parse_flags().unwrap_err(),
3692             TestError {
3693                 span: span(2..3),
3694                 kind: ast::ErrorKind::FlagDanglingNegation,
3695             });
3696     }
3697 
3698     #[test]
parse_flag()3699     fn parse_flag() {
3700         assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
3701         assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
3702         assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
3703         assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
3704         assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
3705         assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
3706 
3707         assert_eq!(
3708             parser("a").parse_flag().unwrap_err(),
3709             TestError {
3710                 span: span(0..1),
3711                 kind: ast::ErrorKind::FlagUnrecognized,
3712             });
3713         assert_eq!(
3714             parser("☃").parse_flag().unwrap_err(),
3715             TestError {
3716                 span: span_range("☃", 0..3),
3717                 kind: ast::ErrorKind::FlagUnrecognized,
3718             });
3719     }
3720 
3721     #[test]
parse_primitive_non_escape()3722     fn parse_primitive_non_escape() {
3723         assert_eq!(
3724             parser(r".").parse_primitive(),
3725             Ok(Primitive::Dot(span(0..1))));
3726         assert_eq!(
3727             parser(r"^").parse_primitive(),
3728             Ok(Primitive::Assertion(ast::Assertion {
3729                 span: span(0..1),
3730                 kind: ast::AssertionKind::StartLine,
3731             })));
3732         assert_eq!(
3733             parser(r"$").parse_primitive(),
3734             Ok(Primitive::Assertion(ast::Assertion {
3735                 span: span(0..1),
3736                 kind: ast::AssertionKind::EndLine,
3737             })));
3738 
3739         assert_eq!(
3740             parser(r"a").parse_primitive(),
3741             Ok(Primitive::Literal(ast::Literal {
3742                 span: span(0..1),
3743                 kind: ast::LiteralKind::Verbatim,
3744                 c: 'a',
3745             })));
3746         assert_eq!(
3747             parser(r"|").parse_primitive(),
3748             Ok(Primitive::Literal(ast::Literal {
3749                 span: span(0..1),
3750                 kind: ast::LiteralKind::Verbatim,
3751                 c: '|',
3752             })));
3753         assert_eq!(
3754             parser(r"☃").parse_primitive(),
3755             Ok(Primitive::Literal(ast::Literal {
3756                 span: span_range("☃", 0..3),
3757                 kind: ast::LiteralKind::Verbatim,
3758                 c: '☃',
3759             })));
3760     }
3761 
3762     #[test]
parse_escape()3763     fn parse_escape() {
3764         assert_eq!(
3765             parser(r"\|").parse_primitive(),
3766             Ok(Primitive::Literal(ast::Literal {
3767                 span: span(0..2),
3768                 kind: ast::LiteralKind::Punctuation,
3769                 c: '|',
3770             })));
3771         let specials = &[
3772             (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
3773             (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
3774             (r"\t", '\t', ast::SpecialLiteralKind::Tab),
3775             (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
3776             (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
3777             (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
3778         ];
3779         for &(pat, c, ref kind) in specials {
3780             assert_eq!(
3781                 parser(pat).parse_primitive(),
3782                 Ok(Primitive::Literal(ast::Literal {
3783                     span: span(0..2),
3784                     kind: ast::LiteralKind::Special(kind.clone()),
3785                     c: c,
3786                 })));
3787         }
3788         assert_eq!(
3789             parser(r"\A").parse_primitive(),
3790             Ok(Primitive::Assertion(ast::Assertion {
3791                 span: span(0..2),
3792                 kind: ast::AssertionKind::StartText,
3793             })));
3794         assert_eq!(
3795             parser(r"\z").parse_primitive(),
3796             Ok(Primitive::Assertion(ast::Assertion {
3797                 span: span(0..2),
3798                 kind: ast::AssertionKind::EndText,
3799             })));
3800         assert_eq!(
3801             parser(r"\b").parse_primitive(),
3802             Ok(Primitive::Assertion(ast::Assertion {
3803                 span: span(0..2),
3804                 kind: ast::AssertionKind::WordBoundary,
3805             })));
3806         assert_eq!(
3807             parser(r"\B").parse_primitive(),
3808             Ok(Primitive::Assertion(ast::Assertion {
3809                 span: span(0..2),
3810                 kind: ast::AssertionKind::NotWordBoundary,
3811             })));
3812 
3813         assert_eq!(
3814             parser(r"\").parse_escape().unwrap_err(),
3815             TestError {
3816                 span: span(0..1),
3817                 kind: ast::ErrorKind::EscapeUnexpectedEof,
3818             });
3819         assert_eq!(
3820             parser(r"\y").parse_escape().unwrap_err(),
3821             TestError {
3822                 span: span(0..2),
3823                 kind: ast::ErrorKind::EscapeUnrecognized,
3824             });
3825     }
3826 
3827     #[test]
parse_unsupported_backreference()3828     fn parse_unsupported_backreference() {
3829         assert_eq!(
3830             parser(r"\0").parse_escape().unwrap_err(),
3831             TestError {
3832                 span: span(0..2),
3833                 kind: ast::ErrorKind::UnsupportedBackreference,
3834             });
3835         assert_eq!(
3836             parser(r"\9").parse_escape().unwrap_err(),
3837             TestError {
3838                 span: span(0..2),
3839                 kind: ast::ErrorKind::UnsupportedBackreference,
3840             });
3841     }
3842 
3843     #[test]
parse_octal()3844     fn parse_octal() {
3845         for i in 0..511 {
3846             let pat = format!(r"\{:o}", i);
3847             assert_eq!(
3848                 parser_octal(&pat).parse_escape(),
3849                 Ok(Primitive::Literal(ast::Literal {
3850                     span: span(0..pat.len()),
3851                     kind: ast::LiteralKind::Octal,
3852                     c: ::std::char::from_u32(i).unwrap(),
3853                 })));
3854         }
3855         assert_eq!(
3856             parser_octal(r"\778").parse_escape(),
3857             Ok(Primitive::Literal(ast::Literal {
3858                 span: span(0..3),
3859                 kind: ast::LiteralKind::Octal,
3860                 c: '?',
3861             })));
3862         assert_eq!(
3863             parser_octal(r"\7777").parse_escape(),
3864             Ok(Primitive::Literal(ast::Literal {
3865                 span: span(0..4),
3866                 kind: ast::LiteralKind::Octal,
3867                 c: '\u{01FF}',
3868             })));
3869         assert_eq!(
3870             parser_octal(r"\778").parse(),
3871             Ok(Ast::Concat(ast::Concat {
3872                 span: span(0..4),
3873                 asts: vec![
3874                     Ast::Literal(ast::Literal {
3875                         span: span(0..3),
3876                         kind: ast::LiteralKind::Octal,
3877                         c: '?',
3878                     }),
3879                     Ast::Literal(ast::Literal {
3880                         span: span(3..4),
3881                         kind: ast::LiteralKind::Verbatim,
3882                         c: '8',
3883                     }),
3884                 ],
3885             })));
3886         assert_eq!(
3887             parser_octal(r"\7777").parse(),
3888             Ok(Ast::Concat(ast::Concat {
3889                 span: span(0..5),
3890                 asts: vec![
3891                     Ast::Literal(ast::Literal {
3892                         span: span(0..4),
3893                         kind: ast::LiteralKind::Octal,
3894                         c: '\u{01FF}',
3895                     }),
3896                     Ast::Literal(ast::Literal {
3897                         span: span(4..5),
3898                         kind: ast::LiteralKind::Verbatim,
3899                         c: '7',
3900                     }),
3901                 ],
3902             })));
3903 
3904         assert_eq!(
3905             parser_octal(r"\8").parse_escape().unwrap_err(),
3906             TestError {
3907                 span: span(0..2),
3908                 kind: ast::ErrorKind::EscapeUnrecognized,
3909             });
3910     }
3911 
3912     #[test]
parse_hex_two()3913     fn parse_hex_two() {
3914         for i in 0..256 {
3915             let pat = format!(r"\x{:02x}", i);
3916             assert_eq!(
3917                 parser(&pat).parse_escape(),
3918                 Ok(Primitive::Literal(ast::Literal {
3919                     span: span(0..pat.len()),
3920                     kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
3921                     c: ::std::char::from_u32(i).unwrap(),
3922                 })));
3923         }
3924 
3925         assert_eq!(
3926             parser(r"\xF").parse_escape().unwrap_err(),
3927             TestError {
3928                 span: span(3..3),
3929                 kind: ast::ErrorKind::EscapeUnexpectedEof,
3930             });
3931         assert_eq!(
3932             parser(r"\xG").parse_escape().unwrap_err(),
3933             TestError {
3934                 span: span(2..3),
3935                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
3936             });
3937         assert_eq!(
3938             parser(r"\xFG").parse_escape().unwrap_err(),
3939             TestError {
3940                 span: span(3..4),
3941                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
3942             });
3943     }
3944 
3945     #[test]
parse_hex_four()3946     fn parse_hex_four() {
3947         for i in 0..65536 {
3948             let c = match ::std::char::from_u32(i) {
3949                 None => continue,
3950                 Some(c) => c,
3951             };
3952             let pat = format!(r"\u{:04x}", i);
3953             assert_eq!(
3954                 parser(&pat).parse_escape(),
3955                 Ok(Primitive::Literal(ast::Literal {
3956                     span: span(0..pat.len()),
3957                     kind: ast::LiteralKind::HexFixed(
3958                         ast::HexLiteralKind::UnicodeShort),
3959                     c: c,
3960                 })));
3961         }
3962 
3963         assert_eq!(
3964             parser(r"\uF").parse_escape().unwrap_err(),
3965             TestError {
3966                 span: span(3..3),
3967                 kind: ast::ErrorKind::EscapeUnexpectedEof,
3968             });
3969         assert_eq!(
3970             parser(r"\uG").parse_escape().unwrap_err(),
3971             TestError {
3972                 span: span(2..3),
3973                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
3974             });
3975         assert_eq!(
3976             parser(r"\uFG").parse_escape().unwrap_err(),
3977             TestError {
3978                 span: span(3..4),
3979                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
3980             });
3981         assert_eq!(
3982             parser(r"\uFFG").parse_escape().unwrap_err(),
3983             TestError {
3984                 span: span(4..5),
3985                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
3986             });
3987         assert_eq!(
3988             parser(r"\uFFFG").parse_escape().unwrap_err(),
3989             TestError {
3990                 span: span(5..6),
3991                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
3992             });
3993         assert_eq!(
3994             parser(r"\uD800").parse_escape().unwrap_err(),
3995             TestError {
3996                 span: span(2..6),
3997                 kind: ast::ErrorKind::EscapeHexInvalid,
3998             });
3999     }
4000 
4001     #[test]
parse_hex_eight()4002     fn parse_hex_eight() {
4003         for i in 0..65536 {
4004             let c = match ::std::char::from_u32(i) {
4005                 None => continue,
4006                 Some(c) => c,
4007             };
4008             let pat = format!(r"\U{:08x}", i);
4009             assert_eq!(
4010                 parser(&pat).parse_escape(),
4011                 Ok(Primitive::Literal(ast::Literal {
4012                     span: span(0..pat.len()),
4013                     kind: ast::LiteralKind::HexFixed(
4014                         ast::HexLiteralKind::UnicodeLong),
4015                     c: c,
4016                 })));
4017         }
4018 
4019         assert_eq!(
4020             parser(r"\UF").parse_escape().unwrap_err(),
4021             TestError {
4022                 span: span(3..3),
4023                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4024             });
4025         assert_eq!(
4026             parser(r"\UG").parse_escape().unwrap_err(),
4027             TestError {
4028                 span: span(2..3),
4029                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4030             });
4031         assert_eq!(
4032             parser(r"\UFG").parse_escape().unwrap_err(),
4033             TestError {
4034                 span: span(3..4),
4035                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4036             });
4037         assert_eq!(
4038             parser(r"\UFFG").parse_escape().unwrap_err(),
4039             TestError {
4040                 span: span(4..5),
4041                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4042             });
4043         assert_eq!(
4044             parser(r"\UFFFG").parse_escape().unwrap_err(),
4045             TestError {
4046                 span: span(5..6),
4047                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4048             });
4049         assert_eq!(
4050             parser(r"\UFFFFG").parse_escape().unwrap_err(),
4051             TestError {
4052                 span: span(6..7),
4053                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4054             });
4055         assert_eq!(
4056             parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4057             TestError {
4058                 span: span(7..8),
4059                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4060             });
4061         assert_eq!(
4062             parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4063             TestError {
4064                 span: span(8..9),
4065                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4066             });
4067         assert_eq!(
4068             parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4069             TestError {
4070                 span: span(9..10),
4071                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4072             });
4073     }
4074 
4075     #[test]
parse_hex_brace()4076     fn parse_hex_brace() {
4077         assert_eq!(
4078             parser(r"\u{26c4}").parse_escape(),
4079             Ok(Primitive::Literal(ast::Literal {
4080                 span: span(0..8),
4081                 kind: ast::LiteralKind::HexBrace(
4082                     ast::HexLiteralKind::UnicodeShort),
4083                 c: '⛄',
4084             })));
4085         assert_eq!(
4086             parser(r"\U{26c4}").parse_escape(),
4087             Ok(Primitive::Literal(ast::Literal {
4088                 span: span(0..8),
4089                 kind: ast::LiteralKind::HexBrace(
4090                     ast::HexLiteralKind::UnicodeLong),
4091                 c: '⛄',
4092             })));
4093         assert_eq!(
4094             parser(r"\x{26c4}").parse_escape(),
4095             Ok(Primitive::Literal(ast::Literal {
4096                 span: span(0..8),
4097                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4098                 c: '⛄',
4099             })));
4100         assert_eq!(
4101             parser(r"\x{26C4}").parse_escape(),
4102             Ok(Primitive::Literal(ast::Literal {
4103                 span: span(0..8),
4104                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4105                 c: '⛄',
4106             })));
4107         assert_eq!(
4108             parser(r"\x{10fFfF}").parse_escape(),
4109             Ok(Primitive::Literal(ast::Literal {
4110                 span: span(0..10),
4111                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4112                 c: '\u{10FFFF}',
4113             })));
4114 
4115         assert_eq!(
4116             parser(r"\x").parse_escape().unwrap_err(),
4117             TestError {
4118                 span: span(2..2),
4119                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4120             });
4121         assert_eq!(
4122             parser(r"\x{").parse_escape().unwrap_err(),
4123             TestError {
4124                 span: span(2..3),
4125                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4126             });
4127         assert_eq!(
4128             parser(r"\x{FF").parse_escape().unwrap_err(),
4129             TestError {
4130                 span: span(2..5),
4131                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4132             });
4133         assert_eq!(
4134             parser(r"\x{}").parse_escape().unwrap_err(),
4135             TestError {
4136                 span: span(2..4),
4137                 kind: ast::ErrorKind::EscapeHexEmpty,
4138             });
4139         assert_eq!(
4140             parser(r"\x{FGF}").parse_escape().unwrap_err(),
4141             TestError {
4142                 span: span(4..5),
4143                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4144             });
4145         assert_eq!(
4146             parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4147             TestError {
4148                 span: span(3..9),
4149                 kind: ast::ErrorKind::EscapeHexInvalid,
4150             });
4151         assert_eq!(
4152             parser(r"\x{D800}").parse_escape().unwrap_err(),
4153             TestError {
4154                 span: span(3..7),
4155                 kind: ast::ErrorKind::EscapeHexInvalid,
4156             });
4157         assert_eq!(
4158             parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4159             TestError {
4160                 span: span(3..12),
4161                 kind: ast::ErrorKind::EscapeHexInvalid,
4162             });
4163     }
4164 
4165     #[test]
parse_decimal()4166     fn parse_decimal() {
4167         assert_eq!(parser("123").parse_decimal(), Ok(123));
4168         assert_eq!(parser("0").parse_decimal(), Ok(0));
4169         assert_eq!(parser("01").parse_decimal(), Ok(1));
4170 
4171         assert_eq!(
4172             parser("-1").parse_decimal().unwrap_err(),
4173             TestError {
4174                 span: span(0..0),
4175                 kind: ast::ErrorKind::DecimalEmpty,
4176             });
4177         assert_eq!(
4178             parser("").parse_decimal().unwrap_err(),
4179             TestError {
4180                 span: span(0..0),
4181                 kind: ast::ErrorKind::DecimalEmpty,
4182             });
4183         assert_eq!(
4184             parser("9999999999").parse_decimal().unwrap_err(),
4185             TestError {
4186                 span: span(0..10),
4187                 kind: ast::ErrorKind::DecimalInvalid,
4188             });
4189     }
4190 
4191     #[test]
parse_set_class()4192     fn parse_set_class() {
4193         fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4194             ast::ClassSet::union(ast::ClassSetUnion {
4195                 span: span,
4196                 items: items,
4197             })
4198         }
4199 
4200         fn intersection(
4201             span: Span,
4202             lhs: ast::ClassSet,
4203             rhs: ast::ClassSet,
4204         ) -> ast::ClassSet {
4205             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4206                 span: span,
4207                 kind: ast::ClassSetBinaryOpKind::Intersection,
4208                 lhs: Box::new(lhs),
4209                 rhs: Box::new(rhs),
4210             })
4211         }
4212 
4213         fn difference(
4214             span: Span,
4215             lhs: ast::ClassSet,
4216             rhs: ast::ClassSet,
4217         ) -> ast::ClassSet {
4218             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4219                 span: span,
4220                 kind: ast::ClassSetBinaryOpKind::Difference,
4221                 lhs: Box::new(lhs),
4222                 rhs: Box::new(rhs),
4223             })
4224         }
4225 
4226         fn symdifference(
4227             span: Span,
4228             lhs: ast::ClassSet,
4229             rhs: ast::ClassSet,
4230         ) -> ast::ClassSet {
4231             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4232                 span: span,
4233                 kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4234                 lhs: Box::new(lhs),
4235                 rhs: Box::new(rhs),
4236             })
4237         }
4238 
4239         fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4240             ast::ClassSet::Item(item)
4241         }
4242 
4243         fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4244             ast::ClassSetItem::Ascii(cls)
4245         }
4246 
4247         fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4248             ast::ClassSetItem::Unicode(cls)
4249         }
4250 
4251         fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4252             ast::ClassSetItem::Perl(cls)
4253         }
4254 
4255         fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4256             ast::ClassSetItem::Bracketed(Box::new(cls))
4257         }
4258 
4259         fn lit(span: Span, c: char) -> ast::ClassSetItem {
4260             ast::ClassSetItem::Literal(ast::Literal {
4261                 span: span,
4262                 kind: ast::LiteralKind::Verbatim,
4263                 c: c,
4264             })
4265         }
4266 
4267         fn empty(span: Span) -> ast::ClassSetItem {
4268             ast::ClassSetItem::Empty(span)
4269         }
4270 
4271         fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4272             let pos1 = Position {
4273                 offset: span.start.offset + start.len_utf8(),
4274                 column: span.start.column + 1,
4275                 ..span.start
4276             };
4277             let pos2 = Position {
4278                 offset: span.end.offset - end.len_utf8(),
4279                 column: span.end.column - 1,
4280                 ..span.end
4281             };
4282             ast::ClassSetItem::Range(ast::ClassSetRange {
4283                 span: span,
4284                 start: ast::Literal {
4285                     span: Span { end: pos1, ..span },
4286                     kind: ast::LiteralKind::Verbatim,
4287                     c: start,
4288                 },
4289                 end: ast::Literal {
4290                     span: Span { start: pos2, ..span },
4291                     kind: ast::LiteralKind::Verbatim,
4292                     c: end,
4293                 },
4294             })
4295         }
4296 
4297         fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4298             ast::ClassAscii {
4299                 span: span,
4300                 kind: ast::ClassAsciiKind::Alnum,
4301                 negated: negated,
4302             }
4303         }
4304 
4305         fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4306             ast::ClassAscii {
4307                 span: span,
4308                 kind: ast::ClassAsciiKind::Lower,
4309                 negated: negated,
4310             }
4311         }
4312 
4313         assert_eq!(
4314             parser("[[:alnum:]]").parse(),
4315             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4316                 span: span(0..11),
4317                 negated: false,
4318                 kind: itemset(item_ascii(alnum(span(1..10), false))),
4319             }))));
4320         assert_eq!(
4321             parser("[[[:alnum:]]]").parse(),
4322             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4323                 span: span(0..13),
4324                 negated: false,
4325                 kind: itemset(item_bracket(ast::ClassBracketed {
4326                     span: span(1..12),
4327                     negated: false,
4328                     kind: itemset(item_ascii(alnum(span(2..11), false))),
4329                 })),
4330             }))));
4331         assert_eq!(
4332             parser("[[:alnum:]&&[:lower:]]").parse(),
4333             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4334                 span: span(0..22),
4335                 negated: false,
4336                 kind: intersection(
4337                     span(1..21),
4338                     itemset(item_ascii(alnum(span(1..10), false))),
4339                     itemset(item_ascii(lower(span(12..21), false))),
4340                 ),
4341             }))));
4342         assert_eq!(
4343             parser("[[:alnum:]--[:lower:]]").parse(),
4344             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4345                 span: span(0..22),
4346                 negated: false,
4347                 kind: difference(
4348                     span(1..21),
4349                     itemset(item_ascii(alnum(span(1..10), false))),
4350                     itemset(item_ascii(lower(span(12..21), false))),
4351                 ),
4352             }))));
4353         assert_eq!(
4354             parser("[[:alnum:]~~[:lower:]]").parse(),
4355             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4356                 span: span(0..22),
4357                 negated: false,
4358                 kind: symdifference(
4359                     span(1..21),
4360                     itemset(item_ascii(alnum(span(1..10), false))),
4361                     itemset(item_ascii(lower(span(12..21), false))),
4362                 ),
4363             }))));
4364 
4365         assert_eq!(
4366             parser("[a]").parse(),
4367             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4368                 span: span(0..3),
4369                 negated: false,
4370                 kind: itemset(lit(span(1..2), 'a')),
4371             }))));
4372         assert_eq!(
4373             parser(r"[a\]]").parse(),
4374             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4375                 span: span(0..5),
4376                 negated: false,
4377                 kind: union(span(1..4), vec![
4378                     lit(span(1..2), 'a'),
4379                     ast::ClassSetItem::Literal(ast::Literal {
4380                         span: span(2..4),
4381                         kind: ast::LiteralKind::Punctuation,
4382                         c: ']',
4383                     }),
4384                 ]),
4385             }))));
4386         assert_eq!(
4387             parser(r"[a\-z]").parse(),
4388             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4389                 span: span(0..6),
4390                 negated: false,
4391                 kind: union(span(1..5), vec![
4392                     lit(span(1..2), 'a'),
4393                     ast::ClassSetItem::Literal(ast::Literal {
4394                         span: span(2..4),
4395                         kind: ast::LiteralKind::Punctuation,
4396                         c: '-',
4397                     }),
4398                     lit(span(4..5), 'z'),
4399                 ]),
4400             }))));
4401         assert_eq!(
4402             parser("[ab]").parse(),
4403             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4404                 span: span(0..4),
4405                 negated: false,
4406                 kind: union(span(1..3), vec![
4407                     lit(span(1..2), 'a'),
4408                     lit(span(2..3), 'b'),
4409                 ]),
4410             }))));
4411         assert_eq!(
4412             parser("[a-]").parse(),
4413             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4414                 span: span(0..4),
4415                 negated: false,
4416                 kind: union(span(1..3), vec![
4417                     lit(span(1..2), 'a'),
4418                     lit(span(2..3), '-'),
4419                 ]),
4420             }))));
4421         assert_eq!(
4422             parser("[-a]").parse(),
4423             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4424                 span: span(0..4),
4425                 negated: false,
4426                 kind: union(span(1..3), vec![
4427                     lit(span(1..2), '-'),
4428                     lit(span(2..3), 'a'),
4429                 ]),
4430             }))));
4431         assert_eq!(
4432             parser(r"[\pL]").parse(),
4433             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4434                 span: span(0..5),
4435                 negated: false,
4436                 kind: itemset(item_unicode(ast::ClassUnicode {
4437                     span: span(1..4),
4438                     negated: false,
4439                     kind: ast::ClassUnicodeKind::OneLetter('L'),
4440                 })),
4441             }))));
4442         assert_eq!(
4443             parser(r"[\w]").parse(),
4444             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4445                 span: span(0..4),
4446                 negated: false,
4447                 kind: itemset(item_perl(ast::ClassPerl {
4448                     span: span(1..3),
4449                     kind: ast::ClassPerlKind::Word,
4450                     negated: false,
4451                 })),
4452             }))));
4453         assert_eq!(
4454             parser(r"[a\wz]").parse(),
4455             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4456                 span: span(0..6),
4457                 negated: false,
4458                 kind: union(span(1..5), vec![
4459                     lit(span(1..2), 'a'),
4460                     item_perl(ast::ClassPerl {
4461                         span: span(2..4),
4462                         kind: ast::ClassPerlKind::Word,
4463                         negated: false,
4464                     }),
4465                     lit(span(4..5), 'z'),
4466                 ]),
4467             }))));
4468 
4469         assert_eq!(
4470             parser("[a-z]").parse(),
4471             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4472                 span: span(0..5),
4473                 negated: false,
4474                 kind: itemset(range(span(1..4), 'a', 'z')),
4475             }))));
4476         assert_eq!(
4477             parser("[a-cx-z]").parse(),
4478             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4479                 span: span(0..8),
4480                 negated: false,
4481                 kind: union(span(1..7), vec![
4482                     range(span(1..4), 'a', 'c'),
4483                     range(span(4..7), 'x', 'z'),
4484                 ]),
4485             }))));
4486         assert_eq!(
4487             parser(r"[\w&&a-cx-z]").parse(),
4488             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4489                 span: span(0..12),
4490                 negated: false,
4491                 kind: intersection(
4492                     span(1..11),
4493                     itemset(item_perl(ast::ClassPerl {
4494                         span: span(1..3),
4495                         kind: ast::ClassPerlKind::Word,
4496                         negated: false,
4497                     })),
4498                     union(span(5..11), vec![
4499                         range(span(5..8), 'a', 'c'),
4500                         range(span(8..11), 'x', 'z'),
4501                     ]),
4502                 ),
4503             }))));
4504         assert_eq!(
4505             parser(r"[a-cx-z&&\w]").parse(),
4506             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4507                 span: span(0..12),
4508                 negated: false,
4509                 kind: intersection(
4510                     span(1..11),
4511                     union(span(1..7), vec![
4512                         range(span(1..4), 'a', 'c'),
4513                         range(span(4..7), 'x', 'z'),
4514                     ]),
4515                     itemset(item_perl(ast::ClassPerl {
4516                         span: span(9..11),
4517                         kind: ast::ClassPerlKind::Word,
4518                         negated: false,
4519                     })),
4520                 ),
4521             }))));
4522         assert_eq!(
4523             parser(r"[a--b--c]").parse(),
4524             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4525                 span: span(0..9),
4526                 negated: false,
4527                 kind: difference(
4528                     span(1..8),
4529                     difference(
4530                         span(1..5),
4531                         itemset(lit(span(1..2), 'a')),
4532                         itemset(lit(span(4..5), 'b')),
4533                     ),
4534                     itemset(lit(span(7..8), 'c')),
4535                 ),
4536             }))));
4537         assert_eq!(
4538             parser(r"[a~~b~~c]").parse(),
4539             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4540                 span: span(0..9),
4541                 negated: false,
4542                 kind: symdifference(
4543                     span(1..8),
4544                     symdifference(
4545                         span(1..5),
4546                         itemset(lit(span(1..2), 'a')),
4547                         itemset(lit(span(4..5), 'b')),
4548                     ),
4549                     itemset(lit(span(7..8), 'c')),
4550                 ),
4551             }))));
4552         assert_eq!(
4553             parser(r"[\^&&^]").parse(),
4554             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4555                 span: span(0..7),
4556                 negated: false,
4557                 kind: intersection(
4558                     span(1..6),
4559                     itemset(ast::ClassSetItem::Literal(ast::Literal {
4560                         span: span(1..3),
4561                         kind: ast::LiteralKind::Punctuation,
4562                         c: '^',
4563                     })),
4564                     itemset(lit(span(5..6), '^')),
4565                 ),
4566             }))));
4567         assert_eq!(
4568             parser(r"[\&&&&]").parse(),
4569             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4570                 span: span(0..7),
4571                 negated: false,
4572                 kind: intersection(
4573                     span(1..6),
4574                     itemset(ast::ClassSetItem::Literal(ast::Literal {
4575                         span: span(1..3),
4576                         kind: ast::LiteralKind::Punctuation,
4577                         c: '&',
4578                     })),
4579                     itemset(lit(span(5..6), '&')),
4580                 ),
4581             }))));
4582         assert_eq!(
4583             parser(r"[&&&&]").parse(),
4584             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4585                 span: span(0..6),
4586                 negated: false,
4587                 kind: intersection(
4588                     span(1..5),
4589                     intersection(
4590                         span(1..3),
4591                         itemset(empty(span(1..1))),
4592                         itemset(empty(span(3..3))),
4593                     ),
4594                     itemset(empty(span(5..5))),
4595                 ),
4596             }))));
4597 
4598         let pat = "[☃-⛄]";
4599         assert_eq!(
4600             parser(pat).parse(),
4601             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4602                 span: span_range(pat, 0..9),
4603                 negated: false,
4604                 kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
4605                     span: span_range(pat, 1..8),
4606                     start: ast::Literal {
4607                         span: span_range(pat, 1..4),
4608                         kind: ast::LiteralKind::Verbatim,
4609                         c: '☃',
4610                     },
4611                     end: ast::Literal {
4612                         span: span_range(pat, 5..8),
4613                         kind: ast::LiteralKind::Verbatim,
4614                         c: '⛄',
4615                     },
4616                 })),
4617             }))));
4618 
4619         assert_eq!(
4620             parser(r"[]]").parse(),
4621             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4622                 span: span(0..3),
4623                 negated: false,
4624                 kind: itemset(lit(span(1..2), ']')),
4625             }))));
4626         assert_eq!(
4627             parser(r"[]\[]").parse(),
4628             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4629                 span: span(0..5),
4630                 negated: false,
4631                 kind: union(span(1..4), vec![
4632                     lit(span(1..2), ']'),
4633                     ast::ClassSetItem::Literal(ast::Literal  {
4634                         span: span(2..4),
4635                         kind: ast::LiteralKind::Punctuation,
4636                         c: '[',
4637                     }),
4638                 ]),
4639             }))));
4640         assert_eq!(
4641             parser(r"[\[]]").parse(),
4642             Ok(concat(0..5, vec![
4643                 Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4644                     span: span(0..4),
4645                     negated: false,
4646                     kind: itemset(ast::ClassSetItem::Literal(ast::Literal  {
4647                         span: span(1..3),
4648                         kind: ast::LiteralKind::Punctuation,
4649                         c: '[',
4650                     })),
4651                 })),
4652                 Ast::Literal(ast::Literal {
4653                     span: span(4..5),
4654                     kind: ast::LiteralKind::Verbatim,
4655                     c: ']',
4656                 }),
4657             ])));
4658 
4659         assert_eq!(
4660             parser("[").parse().unwrap_err(),
4661             TestError {
4662                 span: span(0..1),
4663                 kind: ast::ErrorKind::ClassUnclosed,
4664             });
4665         assert_eq!(
4666             parser("[[").parse().unwrap_err(),
4667             TestError {
4668                 span: span(1..2),
4669                 kind: ast::ErrorKind::ClassUnclosed,
4670             });
4671         assert_eq!(
4672             parser("[[-]").parse().unwrap_err(),
4673             TestError {
4674                 span: span(0..1),
4675                 kind: ast::ErrorKind::ClassUnclosed,
4676             });
4677         assert_eq!(
4678             parser("[[[:alnum:]").parse().unwrap_err(),
4679             TestError {
4680                 span: span(1..2),
4681                 kind: ast::ErrorKind::ClassUnclosed,
4682             });
4683         assert_eq!(
4684             parser(r"[\b]").parse().unwrap_err(),
4685             TestError {
4686                 span: span(1..3),
4687                 kind: ast::ErrorKind::ClassEscapeInvalid,
4688             });
4689         assert_eq!(
4690             parser(r"[\w-a]").parse().unwrap_err(),
4691             TestError {
4692                 span: span(1..3),
4693                 kind: ast::ErrorKind::ClassRangeLiteral,
4694             });
4695         assert_eq!(
4696             parser(r"[a-\w]").parse().unwrap_err(),
4697             TestError {
4698                 span: span(3..5),
4699                 kind: ast::ErrorKind::ClassRangeLiteral,
4700             });
4701         assert_eq!(
4702             parser(r"[z-a]").parse().unwrap_err(),
4703             TestError {
4704                 span: span(1..4),
4705                 kind: ast::ErrorKind::ClassRangeInvalid,
4706             });
4707 
4708         assert_eq!(
4709             parser_ignore_whitespace("[a ").parse().unwrap_err(),
4710             TestError {
4711                 span: span(0..1),
4712                 kind: ast::ErrorKind::ClassUnclosed,
4713             });
4714         assert_eq!(
4715             parser_ignore_whitespace("[a- ").parse().unwrap_err(),
4716             TestError {
4717                 span: span(0..1),
4718                 kind: ast::ErrorKind::ClassUnclosed,
4719             });
4720     }
4721 
4722     #[test]
parse_set_class_open()4723     fn parse_set_class_open() {
4724         assert_eq!(
4725             parser("[a]").parse_set_class_open(), {
4726                 let set = ast::ClassBracketed {
4727                     span: span(0..1),
4728                     negated: false,
4729                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4730                         span: span(1..1),
4731                         items: vec![],
4732                     }),
4733                 };
4734                 let union = ast::ClassSetUnion {
4735                     span: span(1..1),
4736                     items: vec![],
4737                 };
4738                 Ok((set, union))
4739             });
4740         assert_eq!(
4741             parser_ignore_whitespace("[   a]").parse_set_class_open(), {
4742                 let set = ast::ClassBracketed {
4743                     span: span(0..4),
4744                     negated: false,
4745                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4746                         span: span(4..4),
4747                         items: vec![],
4748                     }),
4749                 };
4750                 let union = ast::ClassSetUnion {
4751                     span: span(4..4),
4752                     items: vec![],
4753                 };
4754                 Ok((set, union))
4755             });
4756         assert_eq!(
4757             parser("[^a]").parse_set_class_open(), {
4758                 let set = ast::ClassBracketed {
4759                     span: span(0..2),
4760                     negated: true,
4761                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4762                         span: span(2..2),
4763                         items: vec![],
4764                     }),
4765                 };
4766                 let union = ast::ClassSetUnion {
4767                     span: span(2..2),
4768                     items: vec![],
4769                 };
4770                 Ok((set, union))
4771             });
4772         assert_eq!(
4773             parser_ignore_whitespace("[ ^ a]").parse_set_class_open(), {
4774                 let set = ast::ClassBracketed {
4775                     span: span(0..4),
4776                     negated: true,
4777                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4778                         span: span(4..4),
4779                         items: vec![],
4780                     }),
4781                 };
4782                 let union = ast::ClassSetUnion {
4783                     span: span(4..4),
4784                     items: vec![],
4785                 };
4786                 Ok((set, union))
4787             });
4788         assert_eq!(
4789             parser("[-a]").parse_set_class_open(), {
4790                 let set = ast::ClassBracketed {
4791                     span: span(0..2),
4792                     negated: false,
4793                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4794                         span: span(1..1),
4795                         items: vec![],
4796                     }),
4797                 };
4798                 let union = ast::ClassSetUnion {
4799                     span: span(1..2),
4800                     items: vec![
4801                         ast::ClassSetItem::Literal(ast::Literal {
4802                             span: span(1..2),
4803                             kind: ast::LiteralKind::Verbatim,
4804                             c: '-',
4805                         }),
4806                     ],
4807                 };
4808                 Ok((set, union))
4809             });
4810         assert_eq!(
4811             parser_ignore_whitespace("[ - a]").parse_set_class_open(), {
4812                 let set = ast::ClassBracketed {
4813                     span: span(0..4),
4814                     negated: false,
4815                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4816                         span: span(2..2),
4817                         items: vec![],
4818                     }),
4819                 };
4820                 let union = ast::ClassSetUnion {
4821                     span: span(2..3),
4822                     items: vec![
4823                         ast::ClassSetItem::Literal(ast::Literal {
4824                             span: span(2..3),
4825                             kind: ast::LiteralKind::Verbatim,
4826                             c: '-',
4827                         }),
4828                     ],
4829                 };
4830                 Ok((set, union))
4831             });
4832         assert_eq!(
4833             parser("[^-a]").parse_set_class_open(), {
4834                 let set = ast::ClassBracketed {
4835                     span: span(0..3),
4836                     negated: true,
4837                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4838                         span: span(2..2),
4839                         items: vec![],
4840                     }),
4841                 };
4842                 let union = ast::ClassSetUnion {
4843                     span: span(2..3),
4844                     items: vec![
4845                         ast::ClassSetItem::Literal(ast::Literal {
4846                             span: span(2..3),
4847                             kind: ast::LiteralKind::Verbatim,
4848                             c: '-',
4849                         }),
4850                     ],
4851                 };
4852                 Ok((set, union))
4853             });
4854         assert_eq!(
4855             parser("[--a]").parse_set_class_open(), {
4856                 let set = ast::ClassBracketed {
4857                     span: span(0..3),
4858                     negated: false,
4859                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4860                         span: span(1..1),
4861                         items: vec![],
4862                     }),
4863                 };
4864                 let union = ast::ClassSetUnion {
4865                     span: span(1..3),
4866                     items: vec![
4867                         ast::ClassSetItem::Literal(ast::Literal {
4868                             span: span(1..2),
4869                             kind: ast::LiteralKind::Verbatim,
4870                             c: '-',
4871                         }),
4872                         ast::ClassSetItem::Literal(ast::Literal {
4873                             span: span(2..3),
4874                             kind: ast::LiteralKind::Verbatim,
4875                             c: '-',
4876                         }),
4877                     ],
4878                 };
4879                 Ok((set, union))
4880             });
4881         assert_eq!(
4882             parser("[]a]").parse_set_class_open(), {
4883                 let set = ast::ClassBracketed {
4884                     span: span(0..2),
4885                     negated: false,
4886                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4887                         span: span(1..1),
4888                         items: vec![],
4889                     }),
4890                 };
4891                 let union = ast::ClassSetUnion {
4892                     span: span(1..2),
4893                     items: vec![
4894                         ast::ClassSetItem::Literal(ast::Literal {
4895                             span: span(1..2),
4896                             kind: ast::LiteralKind::Verbatim,
4897                             c: ']',
4898                         }),
4899                     ],
4900                 };
4901                 Ok((set, union))
4902             });
4903         assert_eq!(
4904             parser_ignore_whitespace("[ ] a]").parse_set_class_open(), {
4905                 let set = ast::ClassBracketed {
4906                     span: span(0..4),
4907                     negated: false,
4908                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4909                         span: span(2..2),
4910                         items: vec![],
4911                     }),
4912                 };
4913                 let union = ast::ClassSetUnion {
4914                     span: span(2..3),
4915                     items: vec![
4916                         ast::ClassSetItem::Literal(ast::Literal {
4917                             span: span(2..3),
4918                             kind: ast::LiteralKind::Verbatim,
4919                             c: ']',
4920                         }),
4921                     ],
4922                 };
4923                 Ok((set, union))
4924             });
4925         assert_eq!(
4926             parser("[^]a]").parse_set_class_open(), {
4927                 let set = ast::ClassBracketed {
4928                     span: span(0..3),
4929                     negated: true,
4930                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4931                         span: span(2..2),
4932                         items: vec![],
4933                     }),
4934                 };
4935                 let union = ast::ClassSetUnion {
4936                     span: span(2..3),
4937                     items: vec![
4938                         ast::ClassSetItem::Literal(ast::Literal {
4939                             span: span(2..3),
4940                             kind: ast::LiteralKind::Verbatim,
4941                             c: ']',
4942                         }),
4943                     ],
4944                 };
4945                 Ok((set, union))
4946             });
4947         assert_eq!(
4948             parser("[-]a]").parse_set_class_open(), {
4949                 let set = ast::ClassBracketed {
4950                     span: span(0..2),
4951                     negated: false,
4952                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4953                         span: span(1..1),
4954                         items: vec![],
4955                     }),
4956                 };
4957                 let union = ast::ClassSetUnion {
4958                     span: span(1..2),
4959                     items: vec![
4960                         ast::ClassSetItem::Literal(ast::Literal {
4961                             span: span(1..2),
4962                             kind: ast::LiteralKind::Verbatim,
4963                             c: '-',
4964                         }),
4965                     ],
4966                 };
4967                 Ok((set, union))
4968             });
4969 
4970         assert_eq!(
4971             parser("[").parse_set_class_open().unwrap_err(),
4972             TestError {
4973                 span: span(0..1),
4974                 kind: ast::ErrorKind::ClassUnclosed,
4975             });
4976         assert_eq!(
4977             parser_ignore_whitespace("[    ")
4978             .parse_set_class_open()
4979             .unwrap_err(),
4980             TestError {
4981                 span: span(0..5),
4982                 kind: ast::ErrorKind::ClassUnclosed,
4983             });
4984         assert_eq!(
4985             parser("[^").parse_set_class_open().unwrap_err(),
4986             TestError {
4987                 span: span(0..2),
4988                 kind: ast::ErrorKind::ClassUnclosed,
4989             });
4990         assert_eq!(
4991             parser("[]").parse_set_class_open().unwrap_err(),
4992             TestError {
4993                 span: span(0..2),
4994                 kind: ast::ErrorKind::ClassUnclosed,
4995             });
4996         assert_eq!(
4997             parser("[-").parse_set_class_open().unwrap_err(),
4998             TestError {
4999                 span: span(0..2),
5000                 kind: ast::ErrorKind::ClassUnclosed,
5001             });
5002         assert_eq!(
5003             parser("[--").parse_set_class_open().unwrap_err(),
5004             TestError {
5005                 span: span(0..3),
5006                 kind: ast::ErrorKind::ClassUnclosed,
5007             });
5008     }
5009 
5010     #[test]
maybe_parse_ascii_class()5011     fn maybe_parse_ascii_class() {
5012         assert_eq!(
5013             parser(r"[:alnum:]").maybe_parse_ascii_class(),
5014             Some(ast::ClassAscii {
5015                 span: span(0..9),
5016                 kind: ast::ClassAsciiKind::Alnum,
5017                 negated: false,
5018             }));
5019         assert_eq!(
5020             parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5021             Some(ast::ClassAscii {
5022                 span: span(0..9),
5023                 kind: ast::ClassAsciiKind::Alnum,
5024                 negated: false,
5025             }));
5026         assert_eq!(
5027             parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5028             Some(ast::ClassAscii {
5029                 span: span(0..10),
5030                 kind: ast::ClassAsciiKind::Alnum,
5031                 negated: true,
5032             }));
5033 
5034         let p = parser(r"[:");
5035         assert_eq!(p.maybe_parse_ascii_class(), None);
5036         assert_eq!(p.offset(), 0);
5037 
5038         let p = parser(r"[:^");
5039         assert_eq!(p.maybe_parse_ascii_class(), None);
5040         assert_eq!(p.offset(), 0);
5041 
5042         let p = parser(r"[^:alnum:]");
5043         assert_eq!(p.maybe_parse_ascii_class(), None);
5044         assert_eq!(p.offset(), 0);
5045 
5046         let p = parser(r"[:alnnum:]");
5047         assert_eq!(p.maybe_parse_ascii_class(), None);
5048         assert_eq!(p.offset(), 0);
5049 
5050         let p = parser(r"[:alnum]");
5051         assert_eq!(p.maybe_parse_ascii_class(), None);
5052         assert_eq!(p.offset(), 0);
5053 
5054         let p = parser(r"[:alnum:");
5055         assert_eq!(p.maybe_parse_ascii_class(), None);
5056         assert_eq!(p.offset(), 0);
5057     }
5058 
5059     #[test]
parse_unicode_class()5060     fn parse_unicode_class() {
5061         assert_eq!(
5062             parser(r"\pN").parse_escape(),
5063             Ok(Primitive::Unicode(ast::ClassUnicode {
5064                 span: span(0..3),
5065                 negated: false,
5066                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5067             })));
5068         assert_eq!(
5069             parser(r"\PN").parse_escape(),
5070             Ok(Primitive::Unicode(ast::ClassUnicode {
5071                 span: span(0..3),
5072                 negated: true,
5073                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5074             })));
5075         assert_eq!(
5076             parser(r"\p{N}").parse_escape(),
5077             Ok(Primitive::Unicode(ast::ClassUnicode {
5078                 span: span(0..5),
5079                 negated: false,
5080                 kind: ast::ClassUnicodeKind::Named(s("N")),
5081             })));
5082         assert_eq!(
5083             parser(r"\P{N}").parse_escape(),
5084             Ok(Primitive::Unicode(ast::ClassUnicode {
5085                 span: span(0..5),
5086                 negated: true,
5087                 kind: ast::ClassUnicodeKind::Named(s("N")),
5088             })));
5089         assert_eq!(
5090             parser(r"\p{Greek}").parse_escape(),
5091             Ok(Primitive::Unicode(ast::ClassUnicode {
5092                 span: span(0..9),
5093                 negated: false,
5094                 kind: ast::ClassUnicodeKind::Named(s("Greek")),
5095             })));
5096 
5097         assert_eq!(
5098             parser(r"\p{scx:Katakana}").parse_escape(),
5099             Ok(Primitive::Unicode(ast::ClassUnicode {
5100                 span: span(0..16),
5101                 negated: false,
5102                 kind: ast::ClassUnicodeKind::NamedValue {
5103                     op: ast::ClassUnicodeOpKind::Colon,
5104                     name: s("scx"),
5105                     value: s("Katakana"),
5106                 },
5107             })));
5108         assert_eq!(
5109             parser(r"\p{scx=Katakana}").parse_escape(),
5110             Ok(Primitive::Unicode(ast::ClassUnicode {
5111                 span: span(0..16),
5112                 negated: false,
5113                 kind: ast::ClassUnicodeKind::NamedValue {
5114                     op: ast::ClassUnicodeOpKind::Equal,
5115                     name: s("scx"),
5116                     value: s("Katakana"),
5117                 },
5118             })));
5119         assert_eq!(
5120             parser(r"\p{scx!=Katakana}").parse_escape(),
5121             Ok(Primitive::Unicode(ast::ClassUnicode {
5122                 span: span(0..17),
5123                 negated: false,
5124                 kind: ast::ClassUnicodeKind::NamedValue {
5125                     op: ast::ClassUnicodeOpKind::NotEqual,
5126                     name: s("scx"),
5127                     value: s("Katakana"),
5128                 },
5129             })));
5130 
5131         assert_eq!(
5132             parser(r"\p{:}").parse_escape(),
5133             Ok(Primitive::Unicode(ast::ClassUnicode {
5134                 span: span(0..5),
5135                 negated: false,
5136                 kind: ast::ClassUnicodeKind::NamedValue {
5137                     op: ast::ClassUnicodeOpKind::Colon,
5138                     name: s(""),
5139                     value: s(""),
5140                 },
5141             })));
5142         assert_eq!(
5143             parser(r"\p{=}").parse_escape(),
5144             Ok(Primitive::Unicode(ast::ClassUnicode {
5145                 span: span(0..5),
5146                 negated: false,
5147                 kind: ast::ClassUnicodeKind::NamedValue {
5148                     op: ast::ClassUnicodeOpKind::Equal,
5149                     name: s(""),
5150                     value: s(""),
5151                 },
5152             })));
5153         assert_eq!(
5154             parser(r"\p{!=}").parse_escape(),
5155             Ok(Primitive::Unicode(ast::ClassUnicode {
5156                 span: span(0..6),
5157                 negated: false,
5158                 kind: ast::ClassUnicodeKind::NamedValue {
5159                     op: ast::ClassUnicodeOpKind::NotEqual,
5160                     name: s(""),
5161                     value: s(""),
5162                 },
5163             })));
5164 
5165         assert_eq!(
5166             parser(r"\p").parse_escape().unwrap_err(),
5167             TestError {
5168                 span: span(2..2),
5169                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5170             });
5171         assert_eq!(
5172             parser(r"\p{").parse_escape().unwrap_err(),
5173             TestError {
5174                 span: span(3..3),
5175                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5176             });
5177         assert_eq!(
5178             parser(r"\p{N").parse_escape().unwrap_err(),
5179             TestError {
5180                 span: span(4..4),
5181                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5182             });
5183         assert_eq!(
5184             parser(r"\p{Greek").parse_escape().unwrap_err(),
5185             TestError {
5186                 span: span(8..8),
5187                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5188             });
5189 
5190         assert_eq!(
5191             parser(r"\pNz").parse(),
5192             Ok(Ast::Concat(ast::Concat {
5193                 span: span(0..4),
5194                 asts: vec![
5195                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5196                         span: span(0..3),
5197                         negated: false,
5198                         kind: ast::ClassUnicodeKind::OneLetter('N'),
5199                     })),
5200                     Ast::Literal(ast::Literal {
5201                         span: span(3..4),
5202                         kind: ast::LiteralKind::Verbatim,
5203                         c: 'z',
5204                     }),
5205                 ],
5206             })));
5207         assert_eq!(
5208             parser(r"\p{Greek}z").parse(),
5209             Ok(Ast::Concat(ast::Concat {
5210                 span: span(0..10),
5211                 asts: vec![
5212                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5213                         span: span(0..9),
5214                         negated: false,
5215                         kind: ast::ClassUnicodeKind::Named(s("Greek")),
5216                     })),
5217                     Ast::Literal(ast::Literal {
5218                         span: span(9..10),
5219                         kind: ast::LiteralKind::Verbatim,
5220                         c: 'z',
5221                     }),
5222                 ],
5223             })));
5224     }
5225 
5226     #[test]
parse_perl_class()5227     fn parse_perl_class() {
5228         assert_eq!(
5229             parser(r"\d").parse_escape(),
5230             Ok(Primitive::Perl(ast::ClassPerl {
5231                 span: span(0..2),
5232                 kind: ast::ClassPerlKind::Digit,
5233                 negated: false,
5234             })));
5235         assert_eq!(
5236             parser(r"\D").parse_escape(),
5237             Ok(Primitive::Perl(ast::ClassPerl {
5238                 span: span(0..2),
5239                 kind: ast::ClassPerlKind::Digit,
5240                 negated: true,
5241             })));
5242         assert_eq!(
5243             parser(r"\s").parse_escape(),
5244             Ok(Primitive::Perl(ast::ClassPerl {
5245                 span: span(0..2),
5246                 kind: ast::ClassPerlKind::Space,
5247                 negated: false,
5248             })));
5249         assert_eq!(
5250             parser(r"\S").parse_escape(),
5251             Ok(Primitive::Perl(ast::ClassPerl {
5252                 span: span(0..2),
5253                 kind: ast::ClassPerlKind::Space,
5254                 negated: true,
5255             })));
5256         assert_eq!(
5257             parser(r"\w").parse_escape(),
5258             Ok(Primitive::Perl(ast::ClassPerl {
5259                 span: span(0..2),
5260                 kind: ast::ClassPerlKind::Word,
5261                 negated: false,
5262             })));
5263         assert_eq!(
5264             parser(r"\W").parse_escape(),
5265             Ok(Primitive::Perl(ast::ClassPerl {
5266                 span: span(0..2),
5267                 kind: ast::ClassPerlKind::Word,
5268                 negated: true,
5269             })));
5270 
5271         assert_eq!(
5272             parser(r"\d").parse(),
5273             Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5274                 span: span(0..2),
5275                 kind: ast::ClassPerlKind::Digit,
5276                 negated: false,
5277             }))));
5278         assert_eq!(
5279             parser(r"\dz").parse(),
5280             Ok(Ast::Concat(ast::Concat {
5281                 span: span(0..3),
5282                 asts: vec![
5283                     Ast::Class(ast::Class::Perl(ast::ClassPerl {
5284                         span: span(0..2),
5285                         kind: ast::ClassPerlKind::Digit,
5286                         negated: false,
5287                     })),
5288                     Ast::Literal(ast::Literal {
5289                         span: span(2..3),
5290                         kind: ast::LiteralKind::Verbatim,
5291                         c: 'z',
5292                     }),
5293                 ],
5294             })));
5295     }
5296 
5297     // This tests a bug fix where the nest limit checker wasn't decrementing
5298     // its depth during post-traversal, which causes long regexes to trip
5299     // the default limit too aggressively.
5300     #[test]
regression_454_nest_too_big()5301     fn regression_454_nest_too_big() {
5302         let pattern = r#"
5303         2(?:
5304           [45]\d{3}|
5305           7(?:
5306             1[0-267]|
5307             2[0-289]|
5308             3[0-29]|
5309             4[01]|
5310             5[1-3]|
5311             6[013]|
5312             7[0178]|
5313             91
5314           )|
5315           8(?:
5316             0[125]|
5317             [139][1-6]|
5318             2[0157-9]|
5319             41|
5320             6[1-35]|
5321             7[1-5]|
5322             8[1-8]|
5323             90
5324           )|
5325           9(?:
5326             0[0-2]|
5327             1[0-4]|
5328             2[568]|
5329             3[3-6]|
5330             5[5-7]|
5331             6[0167]|
5332             7[15]|
5333             8[0146-9]
5334           )
5335         )\d{4}
5336         "#;
5337         assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5338     }
5339 
5340     // This tests that we treat a trailing `-` in a character class as a
5341     // literal `-` even when whitespace mode is enabled and there is whitespace
5342     // after the trailing `-`.
5343     #[test]
regression_455_trailing_dash_ignore_whitespace()5344     fn regression_455_trailing_dash_ignore_whitespace() {
5345         assert!(parser("(?x)[ / - ]").parse().is_ok());
5346         assert!(parser("(?x)[ a - ]").parse().is_ok());
5347         assert!(parser("(?x)[
5348             a
5349             - ]
5350         ").parse().is_ok());
5351         assert!(parser("(?x)[
5352             a # wat
5353             - ]
5354         ").parse().is_ok());
5355 
5356         assert!(parser("(?x)[ / -").parse().is_err());
5357         assert!(parser("(?x)[ / - ").parse().is_err());
5358         assert!(parser("(?x)[
5359             / -
5360         ").parse().is_err());
5361         assert!(parser("(?x)[
5362             / - # wat
5363         ").parse().is_err());
5364     }
5365 }
5366