1 // Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 /*!
12 This module provides a regular expression parser.
13 */
14 
15 use std::borrow::Borrow;
16 use std::cell::{Cell, RefCell};
17 use std::mem;
18 use std::result;
19 
20 use ast::{self, Ast, Position, Span};
21 use either::Either;
22 
23 use is_meta_character;
24 
25 type Result<T> = result::Result<T, ast::Error>;
26 
27 /// A primitive is an expression with no sub-expressions. This includes
28 /// literals, assertions and non-set character classes. This representation
29 /// is used as intermediate state in the parser.
30 ///
31 /// This does not include ASCII character classes, since they can only appear
32 /// within a set character class.
33 #[derive(Clone, Debug, Eq, PartialEq)]
34 enum Primitive {
35     Literal(ast::Literal),
36     Assertion(ast::Assertion),
37     Dot(Span),
38     Perl(ast::ClassPerl),
39     Unicode(ast::ClassUnicode),
40 }
41 
42 impl Primitive {
43     /// Return the span of this primitive.
span(&self) -> &Span44     fn span(&self) -> &Span {
45         match *self {
46             Primitive::Literal(ref x) => &x.span,
47             Primitive::Assertion(ref x) => &x.span,
48             Primitive::Dot(ref span) => span,
49             Primitive::Perl(ref x) => &x.span,
50             Primitive::Unicode(ref x) => &x.span,
51         }
52     }
53 
54     /// Convert this primitive into a proper AST.
into_ast(self) -> Ast55     fn into_ast(self) -> Ast {
56         match self {
57             Primitive::Literal(lit) => Ast::Literal(lit),
58             Primitive::Assertion(assert) => Ast::Assertion(assert),
59             Primitive::Dot(span) => Ast::Dot(span),
60             Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
61             Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
62         }
63     }
64 
65     /// Convert this primitive into an item in a character class.
66     ///
67     /// If this primitive is not a legal item (i.e., an assertion or a dot),
68     /// then return an error.
into_class_set_item<P: Borrow<Parser>>( self, p: &ParserI<P>, ) -> Result<ast::ClassSetItem>69     fn into_class_set_item<P: Borrow<Parser>>(
70         self,
71         p: &ParserI<P>,
72     ) -> Result<ast::ClassSetItem> {
73         use ast::ClassSetItem;
74         use self::Primitive::*;
75 
76         match self {
77             Literal(lit) => Ok(ClassSetItem::Literal(lit)),
78             Perl(cls) => Ok(ClassSetItem::Perl(cls)),
79             Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
80             x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
81         }
82     }
83 
84     /// Convert this primitive into a literal in a character class. In
85     /// particular, literals are the only valid items that can appear in
86     /// ranges.
87     ///
88     /// If this primitive is not a legal item (i.e., a class, assertion or a
89     /// dot), then return an error.
into_class_literal<P: Borrow<Parser>>( self, p: &ParserI<P>, ) -> Result<ast::Literal>90     fn into_class_literal<P: Borrow<Parser>>(
91         self,
92         p: &ParserI<P>,
93     ) -> Result<ast::Literal> {
94         use self::Primitive::*;
95 
96         match self {
97             Literal(lit) => Ok(lit),
98             x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
99         }
100     }
101 }
102 
103 /// Returns true if the given character is a hexadecimal digit.
is_hex(c: char) -> bool104 fn is_hex(c: char) -> bool {
105     ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
106 }
107 
108 /// Returns true if the given character is a valid in a capture group name.
109 ///
110 /// If `first` is true, then `c` is treated as the first character in the
111 /// group name (which is not allowed to be a digit).
is_capture_char(c: char, first: bool) -> bool112 fn is_capture_char(c: char, first: bool) -> bool {
113     c == '_' || (!first && c >= '0' && c <= '9')
114     || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
115 }
116 
117 /// A builder for a regular expression parser.
118 ///
119 /// This builder permits modifying configuration options for the parser.
120 #[derive(Clone, Debug)]
121 pub struct ParserBuilder {
122     ignore_whitespace: bool,
123     nest_limit: u32,
124     octal: bool,
125 }
126 
127 impl Default for ParserBuilder {
default() -> ParserBuilder128     fn default() -> ParserBuilder {
129         ParserBuilder::new()
130     }
131 }
132 
133 impl ParserBuilder {
134     /// Create a new parser builder with a default configuration.
new() -> ParserBuilder135     pub fn new() -> ParserBuilder {
136         ParserBuilder {
137             ignore_whitespace: false,
138             nest_limit: 250,
139             octal: false,
140         }
141     }
142 
143     /// Build a parser from this configuration with the given pattern.
build(&self) -> Parser144     pub fn build(&self) -> Parser {
145         Parser {
146             pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
147             capture_index: Cell::new(0),
148             nest_limit: self.nest_limit,
149             octal: self.octal,
150             initial_ignore_whitespace: self.ignore_whitespace,
151             ignore_whitespace: Cell::new(self.ignore_whitespace),
152             comments: RefCell::new(vec![]),
153             stack_group: RefCell::new(vec![]),
154             stack_class: RefCell::new(vec![]),
155             capture_names: RefCell::new(vec![]),
156             scratch: RefCell::new(String::new()),
157         }
158     }
159 
160     /// Set the nesting limit for this parser.
161     ///
162     /// The nesting limit controls how deep the abstract syntax tree is allowed
163     /// to be. If the AST exceeds the given limit (e.g., with too many nested
164     /// groups), then an error is returned by the parser.
165     ///
166     /// The purpose of this limit is to act as a heuristic to prevent stack
167     /// overflow for consumers that do structural induction on an `Ast` using
168     /// explicit recursion. While this crate never does this (instead using
169     /// constant stack space and moving the call stack to the heap), other
170     /// crates may.
171     ///
172     /// This limit is not checked until the entire Ast is parsed. Therefore,
173     /// if callers want to put a limit on the amount of heap space used, then
174     /// they should impose a limit on the length, in bytes, of the concrete
175     /// pattern string. In particular, this is viable since this parser
176     /// implementation will limit itself to heap space proportional to the
177     /// lenth of the pattern string.
178     ///
179     /// Note that a nest limit of `0` will return a nest limit error for most
180     /// patterns but not all. For example, a nest limit of `0` permits `a` but
181     /// not `ab`, since `ab` requires a concatenation, which results in a nest
182     /// depth of `1`. In general, a nest limit is not something that manifests
183     /// in an obvious way in the concrete syntax, therefore, it should not be
184     /// used in a granular way.
nest_limit(&mut self, limit: u32) -> &mut ParserBuilder185     pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
186         self.nest_limit = limit;
187         self
188     }
189 
190     /// Whether to support octal syntax or not.
191     ///
192     /// Octal syntax is a little-known way of uttering Unicode codepoints in
193     /// a regular expression. For example, `a`, `\x61`, `\u0061` and
194     /// `\141` are all equivalent regular expressions, where the last example
195     /// shows octal syntax.
196     ///
197     /// While supporting octal syntax isn't in and of itself a problem, it does
198     /// make good error messages harder. That is, in PCRE based regex engines,
199     /// syntax like `\0` invokes a backreference, which is explicitly
200     /// unsupported in Rust's regex engine. However, many users expect it to
201     /// be supported. Therefore, when octal support is disabled, the error
202     /// message will explicitly mention that backreferences aren't supported.
203     ///
204     /// Octal syntax is disabled by default.
octal(&mut self, yes: bool) -> &mut ParserBuilder205     pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
206         self.octal = yes;
207         self
208     }
209 
210     /// Enable verbose mode in the regular expression.
211     ///
212     /// When enabled, verbose mode permits insigificant whitespace in many
213     /// places in the regular expression, as well as comments. Comments are
214     /// started using `#` and continue until the end of the line.
215     ///
216     /// By default, this is disabled. It may be selectively enabled in the
217     /// regular expression by using the `x` flag regardless of this setting.
ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder218     pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
219         self.ignore_whitespace = yes;
220         self
221     }
222 }
223 
224 /// A regular expression parser.
225 ///
226 /// This parses a string representation of a regular expression into an
227 /// abstract syntax tree. The size of the tree is proportional to the length
228 /// of the regular expression pattern.
229 ///
230 /// A `Parser` can be configured in more detail via a
231 /// [`ParserBuilder`](struct.ParserBuilder.html).
232 #[derive(Clone, Debug)]
233 pub struct Parser {
234     /// The current position of the parser.
235     pos: Cell<Position>,
236     /// The current capture index.
237     capture_index: Cell<u32>,
238     /// The maximum number of open parens/brackets allowed. If the parser
239     /// exceeds this number, then an error is returned.
240     nest_limit: u32,
241     /// Whether to support octal syntax or not. When `false`, the parser will
242     /// return an error helpfully pointing out that backreferences are not
243     /// supported.
244     octal: bool,
245     /// The initial setting for `ignore_whitespace` as provided by
246     /// Th`ParserBuilder`. is is used when reseting the parser's state.
247     initial_ignore_whitespace: bool,
248     /// Whether whitespace should be ignored. When enabled, comments are
249     /// also permitted.
250     ignore_whitespace: Cell<bool>,
251     /// A list of comments, in order of appearance.
252     comments: RefCell<Vec<ast::Comment>>,
253     /// A stack of grouped sub-expressions, including alternations.
254     stack_group: RefCell<Vec<GroupState>>,
255     /// A stack of nested character classes. This is only non-empty when
256     /// parsing a class.
257     stack_class: RefCell<Vec<ClassState>>,
258     /// A sorted sequence of capture names. This is used to detect duplicate
259     /// capture names and report an error if one is detected.
260     capture_names: RefCell<Vec<ast::CaptureName>>,
261     /// A scratch buffer used in various places. Mostly this is used to
262     /// accumulate relevant characters from parts of a pattern.
263     scratch: RefCell<String>,
264 }
265 
266 /// ParserI is the internal parser implementation.
267 ///
268 /// We use this separate type so that we can carry the provided pattern string
269 /// along with us. In particular, a `Parser` internal state is not tied to any
270 /// one pattern, but `ParserI` is.
271 ///
272 /// This type also lets us use `ParserI<&Parser>` in production code while
273 /// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
274 /// work against the internal interface of the parser.
275 #[derive(Clone, Debug)]
276 struct ParserI<'s, P> {
277     /// The parser state/configuration.
278     parser: P,
279     /// The full regular expression provided by the user.
280     pattern: &'s str,
281 }
282 
283 /// GroupState represents a single stack frame while parsing nested groups
284 /// and alternations. Each frame records the state up to an opening parenthesis
285 /// or a alternating bracket `|`.
286 #[derive(Clone, Debug)]
287 enum GroupState {
288     /// This state is pushed whenever an opening group is found.
289     Group {
290         /// The concatenation immediately preceding the opening group.
291         concat: ast::Concat,
292         /// The group that has been opened. Its sub-AST is always empty.
293         group: ast::Group,
294         /// Whether this group has the `x` flag enabled or not.
295         ignore_whitespace: bool,
296     },
297     /// This state is pushed whenever a new alternation branch is found. If
298     /// an alternation branch is found and this state is at the top of the
299     /// stack, then this state should be modified to include the new
300     /// alternation.
301     Alternation(ast::Alternation),
302 }
303 
304 /// ClassState represents a single stack frame while parsing character classes.
305 /// Each frame records the state up to an intersection, difference, symmetric
306 /// difference or nested class.
307 ///
308 /// Note that a parser's character class stack is only non-empty when parsing
309 /// a character class. In all other cases, it is empty.
310 #[derive(Clone, Debug)]
311 enum ClassState {
312     /// This state is pushed whenever an opening bracket is found.
313     Open {
314         /// The union of class items immediately preceding this class.
315         union: ast::ClassSetUnion,
316         /// The class that has been opened. Typically this just corresponds
317         /// to the `[`, but it can also include `[^` since `^` indicates
318         /// negation of the class.
319         set: ast::ClassBracketed,
320     },
321     /// This state is pushed when a operator is seen. When popped, the stored
322     /// set becomes the left hand side of the operator.
323     Op {
324         /// The type of the operation, i.e., &&, -- or ~~.
325         kind: ast::ClassSetBinaryOpKind,
326         /// The left-hand side of the operator.
327         lhs: ast::ClassSet,
328     },
329 }
330 
331 impl Parser {
332     /// Create a new parser with a default configuration.
333     ///
334     /// The parser can be run with either the `parse` or `parse_with_comments`
335     /// methods. The parse methods return an abstract syntax tree.
336     ///
337     /// To set configuration options on the parser, use
338     /// [`ParserBuilder`](struct.ParserBuilder.html).
new() -> Parser339     pub fn new() -> Parser {
340         ParserBuilder::new().build()
341     }
342 
343     /// Parse the regular expression into an abstract syntax tree.
parse(&mut self, pattern: &str) -> Result<Ast>344     pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
345         ParserI::new(self, pattern).parse()
346     }
347 
348     /// Parse the regular expression and return an abstract syntax tree with
349     /// all of the comments found in the pattern.
parse_with_comments( &mut self, pattern: &str, ) -> Result<ast::WithComments>350     pub fn parse_with_comments(
351         &mut self,
352         pattern: &str,
353     ) -> Result<ast::WithComments> {
354         ParserI::new(self, pattern).parse_with_comments()
355     }
356 
357     /// Reset the internal state of a parser.
358     ///
359     /// This is called at the beginning of every parse. This prevents the
360     /// parser from running with inconsistent state (say, if a previous
361     /// invocation returned an error and the parser is reused).
reset(&self)362     fn reset(&self) {
363         // These settings should be in line with the construction
364         // in `ParserBuilder::build`.
365         self.pos.set(Position { offset: 0, line: 1, column: 1});
366         self.ignore_whitespace.set(self.initial_ignore_whitespace);
367         self.comments.borrow_mut().clear();
368         self.stack_group.borrow_mut().clear();
369         self.stack_class.borrow_mut().clear();
370     }
371 }
372 
373 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
374     /// Build an internal parser from a parser configuration and a pattern.
new(parser: P, pattern: &'s str) -> ParserI<'s, P>375     fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
376         ParserI { parser: parser, pattern: pattern }
377     }
378 
379     /// Return a reference to the parser state.
parser(&self) -> &Parser380     fn parser(&self) -> &Parser {
381         self.parser.borrow()
382     }
383 
384     /// Return a reference to the pattern being parsed.
pattern(&self) -> &str385     fn pattern(&self) -> &str {
386         self.pattern.borrow()
387     }
388 
389     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error390     fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
391         ast::Error {
392             kind: kind,
393             pattern: self.pattern().to_string(),
394             span: span,
395         }
396     }
397 
398     /// Return the current offset of the parser.
399     ///
400     /// The offset starts at `0` from the beginning of the regular expression
401     /// pattern string.
offset(&self) -> usize402     fn offset(&self) -> usize {
403         self.parser().pos.get().offset
404     }
405 
406     /// Return the current line number of the parser.
407     ///
408     /// The line number starts at `1`.
line(&self) -> usize409     fn line(&self) -> usize {
410         self.parser().pos.get().line
411     }
412 
413     /// Return the current column of the parser.
414     ///
415     /// The column number starts at `1` and is reset whenever a `\n` is seen.
column(&self) -> usize416     fn column(&self) -> usize {
417         self.parser().pos.get().column
418     }
419 
420     /// Return the next capturing index. Each subsequent call increments the
421     /// internal index.
422     ///
423     /// The span given should correspond to the location of the opening
424     /// parenthesis.
425     ///
426     /// If the capture limit is exceeded, then an error is returned.
next_capture_index(&self, span: Span) -> Result<u32>427     fn next_capture_index(&self, span: Span) -> Result<u32> {
428         let current = self.parser().capture_index.get();
429         let i = current.checked_add(1).ok_or_else(|| {
430             self.error(span, ast::ErrorKind::CaptureLimitExceeded)
431         })?;
432         self.parser().capture_index.set(i);
433         Ok(i)
434     }
435 
436     /// Adds the given capture name to this parser. If this capture name has
437     /// already been used, then an error is returned.
add_capture_name(&self, cap: &ast::CaptureName) -> Result<()>438     fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
439         let mut names = self.parser().capture_names.borrow_mut();
440         match names.binary_search_by_key(
441             &cap.name.as_str(),
442             |c| c.name.as_str(),
443         ) {
444             Err(i) => {
445                 names.insert(i, cap.clone());
446                 Ok(())
447             }
448             Ok(i) => {
449                 Err(self.error(cap.span, ast::ErrorKind::GroupNameDuplicate {
450                     original: names[i].span,
451                 }))
452             }
453         }
454     }
455 
456     /// Return whether the parser should ignore whitespace or not.
ignore_whitespace(&self) -> bool457     fn ignore_whitespace(&self) -> bool {
458         self.parser().ignore_whitespace.get()
459     }
460 
461     /// Return the character at the current position of the parser.
462     ///
463     /// This panics if the current position does not point to a valid char.
char(&self) -> char464     fn char(&self) -> char {
465         self.char_at(self.offset())
466     }
467 
468     /// Return the character at the given position.
469     ///
470     /// This panics if the given position does not point to a valid char.
char_at(&self, i: usize) -> char471     fn char_at(&self, i: usize) -> char {
472         self.pattern()[i..].chars().next()
473             .unwrap_or_else(|| {
474                 panic!("expected char at offset {}", i)
475             })
476     }
477 
478     /// Bump the parser to the next Unicode scalar value.
479     ///
480     /// If the end of the input has been reached, then `false` is returned.
bump(&self) -> bool481     fn bump(&self) -> bool {
482         if self.is_eof() {
483             return false;
484         }
485         let Position { mut offset, mut line, mut column } = self.pos();
486         if self.char() == '\n' {
487             line = line.checked_add(1).unwrap();
488             column = 1;
489         } else {
490             column = column.checked_add(1).unwrap();
491         }
492         offset += self.char().len_utf8();
493         self.parser().pos.set(Position {
494             offset: offset,
495             line: line,
496             column: column,
497         });
498         self.pattern()[self.offset()..].chars().next().is_some()
499     }
500 
501     /// If the substring starting at the current position of the parser has
502     /// the given prefix, then bump the parser to the character immediately
503     /// following the prefix and return true. Otherwise, don't bump the parser
504     /// and return false.
bump_if(&self, prefix: &str) -> bool505     fn bump_if(&self, prefix: &str) -> bool {
506         if self.pattern()[self.offset()..].starts_with(prefix) {
507             for _ in 0..prefix.chars().count() {
508                 self.bump();
509             }
510             true
511         } else {
512             false
513         }
514     }
515 
516     /// Returns true if and only if the parser is positioned at a look-around
517     /// prefix. The conditions under which this returns true must always
518     /// correspond to a regular expression that would otherwise be consider
519     /// invalid.
520     ///
521     /// This should only be called immediately after parsing the opening of
522     /// a group or a set of flags.
is_lookaround_prefix(&self) -> bool523     fn is_lookaround_prefix(&self) -> bool {
524         self.bump_if("?=")
525         || self.bump_if("?!")
526         || self.bump_if("?<=")
527         || self.bump_if("?<!")
528     }
529 
530     /// Bump the parser, and if the `x` flag is enabled, bump through any
531     /// subsequent spaces. Return true if and only if the parser is not at
532     /// EOF.
bump_and_bump_space(&self) -> bool533     fn bump_and_bump_space(&self) -> bool {
534         if !self.bump() {
535             return false;
536         }
537         self.bump_space();
538         !self.is_eof()
539     }
540 
541     /// If the `x` flag is enabled (i.e., whitespace insensitivity with
542     /// comments), then this will advance the parser through all whitespace
543     /// and comments to the next non-whitespace non-comment byte.
544     ///
545     /// If the `x` flag is disabled, then this is a no-op.
546     ///
547     /// This should be used selectively throughout the parser where
548     /// arbitrary whitespace is permitted when the `x` flag is enabled. For
549     /// example, `{   5  , 6}` is equivalent to `{5,6}`.
bump_space(&self)550     fn bump_space(&self) {
551         if !self.ignore_whitespace() {
552             return;
553         }
554         while !self.is_eof() {
555             if self.char().is_whitespace() {
556                 self.bump();
557             } else if self.char() == '#' {
558                 let start = self.pos();
559                 let mut comment_text = String::new();
560                 self.bump();
561                 while !self.is_eof() {
562                     let c = self.char();
563                     self.bump();
564                     if c == '\n' {
565                         break;
566                     }
567                     comment_text.push(c);
568                 }
569                 let comment = ast::Comment {
570                     span: Span::new(start, self.pos()),
571                     comment: comment_text,
572                 };
573                 self.parser().comments.borrow_mut().push(comment);
574             } else {
575                 break;
576             }
577         }
578     }
579 
580     /// Peek at the next character in the input without advancing the parser.
581     ///
582     /// If the input has been exhausted, then this returns `None`.
peek(&self) -> Option<char>583     fn peek(&self) -> Option<char> {
584         if self.is_eof() {
585             return None;
586         }
587         self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
588     }
589 
590     /// Like peek, but will ignore spaces when the parser is in whitespace
591     /// insensitive mode.
peek_space(&self) -> Option<char>592     fn peek_space(&self) -> Option<char> {
593         if !self.ignore_whitespace() {
594             return self.peek();
595         }
596         if self.is_eof() {
597             return None;
598         }
599         let mut start = self.offset() + self.char().len_utf8();
600         let mut in_comment = false;
601         for (i, c) in self.pattern()[start..].char_indices() {
602             if c.is_whitespace() {
603                 continue;
604             } else if !in_comment && c == '#' {
605                 in_comment = true;
606             } else if in_comment && c == '\n' {
607                 in_comment = false;
608             } else {
609                 start += i;
610                 break;
611             }
612         }
613         self.pattern()[start..].chars().next()
614     }
615 
616     /// Returns true if the next call to `bump` would return false.
is_eof(&self) -> bool617     fn is_eof(&self) -> bool {
618         self.offset() == self.pattern().len()
619     }
620 
621     /// Return the current position of the parser, which includes the offset,
622     /// line and column.
pos(&self) -> Position623     fn pos(&self) -> Position {
624         self.parser().pos.get()
625     }
626 
627     /// Create a span at the current position of the parser. Both the start
628     /// and end of the span are set.
span(&self) -> Span629     fn span(&self) -> Span {
630         Span::splat(self.pos())
631     }
632 
633     /// Create a span that covers the current character.
span_char(&self) -> Span634     fn span_char(&self) -> Span {
635         let mut next = Position {
636             offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
637             line: self.line(),
638             column: self.column().checked_add(1).unwrap(),
639         };
640         if self.char() == '\n' {
641             next.line += 1;
642             next.column = 1;
643         }
644         Span::new(self.pos(), next)
645     }
646 
647     /// Parse and push a single alternation on to the parser's internal stack.
648     /// If the top of the stack already has an alternation, then add to that
649     /// instead of pushing a new one.
650     ///
651     /// The concatenation given corresponds to a single alternation branch.
652     /// The concatenation returned starts the next branch and is empty.
653     ///
654     /// This assumes the parser is currently positioned at `|` and will advance
655     /// the parser to the character following `|`.
push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat>656     fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
657         assert_eq!(self.char(), '|');
658         concat.span.end = self.pos();
659         self.push_or_add_alternation(concat);
660         self.bump();
661         Ok(ast::Concat {
662             span: self.span(),
663             asts: vec![],
664         })
665     }
666 
667     /// Pushes or adds the given branch of an alternation to the parser's
668     /// internal stack of state.
push_or_add_alternation(&self, concat: ast::Concat)669     fn push_or_add_alternation(&self, concat: ast::Concat) {
670         use self::GroupState::*;
671 
672         let mut stack = self.parser().stack_group.borrow_mut();
673         if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
674             alts.asts.push(concat.into_ast());
675             return;
676         }
677         stack.push(Alternation(ast::Alternation {
678             span: Span::new(concat.span.start, self.pos()),
679             asts: vec![concat.into_ast()],
680         }));
681     }
682 
683     /// Parse and push a group AST (and its parent concatenation) on to the
684     /// parser's internal stack. Return a fresh concatenation corresponding
685     /// to the group's sub-AST.
686     ///
687     /// If a set of flags was found (with no group), then the concatenation
688     /// is returned with that set of flags added.
689     ///
690     /// This assumes that the parser is currently positioned on the opening
691     /// parenthesis. It advances the parser to the character at the start
692     /// of the sub-expression (or adjoining expression).
693     ///
694     /// If there was a problem parsing the start of the group, then an error
695     /// is returned.
push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat>696     fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
697         assert_eq!(self.char(), '(');
698         match self.parse_group()? {
699             Either::Left(set) => {
700                 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
701                 if let Some(v) = ignore {
702                     self.parser().ignore_whitespace.set(v);
703                 }
704 
705                 concat.asts.push(Ast::Flags(set));
706                 Ok(concat)
707             }
708             Either::Right(group) => {
709                 let old_ignore_whitespace = self.ignore_whitespace();
710                 let new_ignore_whitespace = group
711                     .flags()
712                     .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
713                     .unwrap_or(old_ignore_whitespace);
714                 self.parser().stack_group.borrow_mut().push(GroupState::Group {
715                     concat: concat,
716                     group: group,
717                     ignore_whitespace: old_ignore_whitespace,
718                 });
719                 self.parser().ignore_whitespace.set(new_ignore_whitespace);
720                 Ok(ast::Concat {
721                     span: self.span(),
722                     asts: vec![],
723                 })
724             }
725         }
726     }
727 
728     /// Pop a group AST from the parser's internal stack and set the group's
729     /// AST to the given concatenation. Return the concatenation containing
730     /// the group.
731     ///
732     /// This assumes that the parser is currently positioned on the closing
733     /// parenthesis and advances the parser to the character following the `)`.
734     ///
735     /// If no such group could be popped, then an unopened group error is
736     /// returned.
pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat>737     fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
738         use self::GroupState::*;
739 
740         assert_eq!(self.char(), ')');
741         let mut stack = self.parser().stack_group.borrow_mut();
742         let (mut prior_concat, mut group, ignore_whitespace, alt) =
743             match stack.pop() {
744                 Some(Group { concat, group, ignore_whitespace }) => {
745                     (concat, group, ignore_whitespace, None)
746                 }
747                 Some(Alternation(alt)) => {
748                     match stack.pop() {
749                         Some(Group { concat, group, ignore_whitespace }) => {
750                             (concat, group, ignore_whitespace, Some(alt))
751                         }
752                         None | Some(Alternation(_)) => {
753                             return Err(self.error(
754                                 self.span_char(),
755                                 ast::ErrorKind::GroupUnopened,
756                             ));
757                         }
758                     }
759                 }
760                 None => {
761                     return Err(self.error(
762                         self.span_char(),
763                         ast::ErrorKind::GroupUnopened,
764                     ));
765                 }
766             };
767         self.parser().ignore_whitespace.set(ignore_whitespace);
768         group_concat.span.end = self.pos();
769         self.bump();
770         group.span.end = self.pos();
771         match alt {
772             Some(mut alt) => {
773                 alt.span.end = group_concat.span.end;
774                 alt.asts.push(group_concat.into_ast());
775                 group.ast = Box::new(alt.into_ast());
776             }
777             None => {
778                 group.ast = Box::new(group_concat.into_ast());
779             }
780         }
781         prior_concat.asts.push(Ast::Group(group));
782         Ok(prior_concat)
783     }
784 
785     /// Pop the last state from the parser's internal stack, if it exists, and
786     /// add the given concatenation to it. There either must be no state or a
787     /// single alternation item on the stack. Any other scenario produces an
788     /// error.
789     ///
790     /// This assumes that the parser has advanced to the end.
pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast>791     fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
792         concat.span.end = self.pos();
793         let mut stack = self.parser().stack_group.borrow_mut();
794         let ast = match stack.pop() {
795             None => Ok(concat.into_ast()),
796             Some(GroupState::Alternation(mut alt)) => {
797                 alt.span.end = self.pos();
798                 alt.asts.push(concat.into_ast());
799                 Ok(Ast::Alternation(alt))
800             }
801             Some(GroupState::Group { group, .. }) => {
802                 return Err(self.error(
803                     group.span,
804                     ast::ErrorKind::GroupUnclosed,
805                 ));
806             }
807         };
808         // If we try to pop again, there should be nothing.
809         match stack.pop() {
810             None => ast,
811             Some(GroupState::Alternation(_)) => {
812                 // This unreachable is unfortunate. This case can't happen
813                 // because the only way we can be here is if there were two
814                 // `GroupState::Alternation`s adjacent in the parser's stack,
815                 // which we guarantee to never happen because we never push a
816                 // `GroupState::Alternation` if one is already at the top of
817                 // the stack.
818                 unreachable!()
819             }
820             Some(GroupState::Group { group, .. }) => {
821                 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
822             }
823         }
824     }
825 
826     /// Parse the opening of a character class and push the current class
827     /// parsing context onto the parser's stack. This assumes that the parser
828     /// is positioned at an opening `[`. The given union should correspond to
829     /// the union of set items built up before seeing the `[`.
830     ///
831     /// If there was a problem parsing the opening of the class, then an error
832     /// is returned. Otherwise, a new union of set items for the class is
833     /// returned (which may be populated with either a `]` or a `-`).
push_class_open( &self, parent_union: ast::ClassSetUnion, ) -> Result<ast::ClassSetUnion>834     fn push_class_open(
835         &self,
836         parent_union: ast::ClassSetUnion,
837     ) -> Result<ast::ClassSetUnion> {
838         assert_eq!(self.char(), '[');
839 
840         let (nested_set, nested_union) = self.parse_set_class_open()?;
841         self.parser().stack_class.borrow_mut().push(ClassState::Open {
842             union: parent_union,
843             set: nested_set,
844         });
845         Ok(nested_union)
846     }
847 
848     /// Parse the end of a character class set and pop the character class
849     /// parser stack. The union given corresponds to the last union built
850     /// before seeing the closing `]`. The union returned corresponds to the
851     /// parent character class set with the nested class added to it.
852     ///
853     /// This assumes that the parser is positioned at a `]` and will advance
854     /// the parser to the byte immediately following the `]`.
855     ///
856     /// If the stack is empty after popping, then this returns the final
857     /// "top-level" character class AST (where a "top-level" character class
858     /// is one that is not nested inside any other character class).
859     ///
860     /// If there is no corresponding opening bracket on the parser's stack,
861     /// then an error is returned.
pop_class( &self, nested_union: ast::ClassSetUnion, ) -> Result<Either<ast::ClassSetUnion, ast::Class>>862     fn pop_class(
863         &self,
864         nested_union: ast::ClassSetUnion,
865     ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
866         assert_eq!(self.char(), ']');
867 
868         let item = ast::ClassSet::Item(nested_union.into_item());
869         let prevset = self.pop_class_op(item);
870         let mut stack = self.parser().stack_class.borrow_mut();
871         match stack.pop() {
872             None => {
873                 // We can never observe an empty stack:
874                 //
875                 // 1) We are guaranteed to start with a non-empty stack since
876                 //    the character class parser is only initiated when it sees
877                 //    a `[`.
878                 // 2) If we ever observe an empty stack while popping after
879                 //    seeing a `]`, then we signal the character class parser
880                 //    to terminate.
881                 panic!("unexpected empty character class stack")
882             },
883             Some(ClassState::Op { .. }) => {
884                 // This panic is unfortunate, but this case is impossible
885                 // since we already popped the Op state if one exists above.
886                 // Namely, every push to the class parser stack is guarded by
887                 // whether an existing Op is already on the top of the stack.
888                 // If it is, the existing Op is modified. That is, the stack
889                 // can never have consecutive Op states.
890                 panic!("unexpected ClassState::Op")
891             }
892             Some(ClassState::Open { mut union, mut set }) => {
893                 self.bump();
894                 set.span.end = self.pos();
895                 set.kind = prevset;
896                 if stack.is_empty() {
897                     Ok(Either::Right(ast::Class::Bracketed(set)))
898                 } else {
899                     union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
900                     Ok(Either::Left(union))
901                 }
902             }
903         }
904     }
905 
906     /// Return an "unclosed class" error whose span points to the most
907     /// recently opened class.
908     ///
909     /// This should only be called while parsing a character class.
unclosed_class_error(&self) -> ast::Error910     fn unclosed_class_error(&self) -> ast::Error {
911         for state in self.parser().stack_class.borrow().iter().rev() {
912             match *state {
913                 ClassState::Open { ref set, .. } => {
914                     return self.error(set.span, ast::ErrorKind::ClassUnclosed);
915                 }
916                 _ => {}
917             }
918         }
919         // We are guaranteed to have a non-empty stack with at least
920         // one open bracket, so we should never get here.
921         panic!("no open character class found")
922     }
923 
924     /// Push the current set of class items on to the class parser's stack as
925     /// the left hand side of the given operator.
926     ///
927     /// A fresh set union is returned, which should be used to build the right
928     /// hand side of this operator.
push_class_op( &self, next_kind: ast::ClassSetBinaryOpKind, next_union: ast::ClassSetUnion, ) -> ast::ClassSetUnion929     fn push_class_op(
930         &self,
931         next_kind: ast::ClassSetBinaryOpKind,
932         next_union: ast::ClassSetUnion,
933     ) -> ast::ClassSetUnion {
934 
935         let item = ast::ClassSet::Item(next_union.into_item());
936         let new_lhs = self.pop_class_op(item);
937         self.parser().stack_class.borrow_mut().push(ClassState::Op {
938             kind: next_kind,
939             lhs: new_lhs,
940         });
941         ast::ClassSetUnion { span: self.span(), items: vec![] }
942     }
943 
944     /// Pop a character class set from the character class parser stack. If the
945     /// top of the stack is just an item (not an operation), then return the
946     /// given set unchanged. If the top of the stack is an operation, then the
947     /// given set will be used as the rhs of the operation on the top of the
948     /// stack. In that case, the binary operation is returned as a set.
pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet949     fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
950         let mut stack = self.parser().stack_class.borrow_mut();
951         let (kind, lhs) = match stack.pop() {
952             Some(ClassState::Op { kind, lhs }) => (kind, lhs),
953             Some(state @ ClassState::Open { .. }) => {
954                 stack.push(state);
955                 return rhs;
956             }
957             None => unreachable!(),
958         };
959         let span = Span::new(lhs.span().start, rhs.span().end);
960         ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
961             span: span,
962             kind: kind,
963             lhs: Box::new(lhs),
964             rhs: Box::new(rhs),
965         })
966     }
967 }
968 
969 impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
970     /// Parse the regular expression into an abstract syntax tree.
parse(&self) -> Result<Ast>971     fn parse(&self) -> Result<Ast> {
972         self.parse_with_comments().map(|astc| astc.ast)
973     }
974 
975     /// Parse the regular expression and return an abstract syntax tree with
976     /// all of the comments found in the pattern.
parse_with_comments(&self) -> Result<ast::WithComments>977     fn parse_with_comments(&self) -> Result<ast::WithComments> {
978         assert_eq!(self.offset(), 0, "parser can only be used once");
979         self.parser().reset();
980         let mut concat = ast::Concat {
981             span: self.span(),
982             asts: vec![],
983         };
984         loop {
985             self.bump_space();
986             if self.is_eof() {
987                 break;
988             }
989             match self.char() {
990                 '(' => concat = self.push_group(concat)?,
991                 ')' => concat = self.pop_group(concat)?,
992                 '|' => concat = self.push_alternate(concat)?,
993                 '[' => {
994                     let class = self.parse_set_class()?;
995                     concat.asts.push(Ast::Class(class));
996                 }
997                 '?' => {
998                     concat = self.parse_uncounted_repetition(
999                         concat, ast::RepetitionKind::ZeroOrOne)?;
1000                 }
1001                 '*' => {
1002                     concat = self.parse_uncounted_repetition(
1003                         concat, ast::RepetitionKind::ZeroOrMore)?;
1004                 }
1005                 '+' => {
1006                     concat = self.parse_uncounted_repetition(
1007                         concat, ast::RepetitionKind::OneOrMore)?;
1008                 }
1009                 '{' => {
1010                     concat = self.parse_counted_repetition(concat)?;
1011                 }
1012                 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1013             }
1014         }
1015         let ast = self.pop_group_end(concat)?;
1016         NestLimiter::new(self).check(&ast)?;
1017         Ok(ast::WithComments {
1018             ast: ast,
1019             comments: mem::replace(
1020                 &mut *self.parser().comments.borrow_mut(),
1021                 vec![],
1022             ),
1023         })
1024     }
1025 
1026     /// Parses an uncounted repetition operation. An uncounted repetition
1027     /// operator includes ?, * and +, but does not include the {m,n} syntax.
1028     /// The given `kind` should correspond to the operator observed by the
1029     /// caller.
1030     ///
1031     /// This assumes that the paser is currently positioned at the repetition
1032     /// operator and advances the parser to the first character after the
1033     /// operator. (Note that the operator may include a single additional `?`,
1034     /// which makes the operator ungreedy.)
1035     ///
1036     /// The caller should include the concatenation that is being built. The
1037     /// concatenation returned includes the repetition operator applied to the
1038     /// last expression in the given concatenation.
parse_uncounted_repetition( &self, mut concat: ast::Concat, kind: ast::RepetitionKind, ) -> Result<ast::Concat>1039     fn parse_uncounted_repetition(
1040         &self,
1041         mut concat: ast::Concat,
1042         kind: ast::RepetitionKind,
1043     ) -> Result<ast::Concat> {
1044         assert!(
1045             self.char() == '?' || self.char() == '*' || self.char() == '+');
1046         let op_start = self.pos();
1047         let ast = match concat.asts.pop() {
1048             Some(ast) => ast,
1049             None => return Err(self.error(
1050                 self.span(),
1051                 ast::ErrorKind::RepetitionMissing,
1052             )),
1053         };
1054         match ast {
1055             Ast::Empty(_) | Ast::Flags(_) => return Err(self.error(
1056                 self.span(),
1057                 ast::ErrorKind::RepetitionMissing,
1058             )),
1059             _ => {}
1060         }
1061         let mut greedy = true;
1062         if self.bump() && self.char() == '?' {
1063             greedy = false;
1064             self.bump();
1065         }
1066         concat.asts.push(Ast::Repetition(ast::Repetition {
1067             span: ast.span().with_end(self.pos()),
1068             op: ast::RepetitionOp {
1069                 span: Span::new(op_start, self.pos()),
1070                 kind: kind,
1071             },
1072             greedy: greedy,
1073             ast: Box::new(ast),
1074         }));
1075         Ok(concat)
1076     }
1077 
1078     /// Parses a counted repetition operation. A counted repetition operator
1079     /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1080     /// operators.
1081     ///
1082     /// This assumes that the paser is currently positioned at the opening `{`
1083     /// and advances the parser to the first character after the operator.
1084     /// (Note that the operator may include a single additional `?`, which
1085     /// makes the operator ungreedy.)
1086     ///
1087     /// The caller should include the concatenation that is being built. The
1088     /// concatenation returned includes the repetition operator applied to the
1089     /// last expression in the given concatenation.
parse_counted_repetition( &self, mut concat: ast::Concat, ) -> Result<ast::Concat>1090     fn parse_counted_repetition(
1091         &self,
1092         mut concat: ast::Concat,
1093     ) -> Result<ast::Concat> {
1094         assert!(self.char() == '{');
1095         let start = self.pos();
1096         let ast = match concat.asts.pop() {
1097             Some(ast) => ast,
1098             None => return Err(self.error(
1099                 self.span(),
1100                 ast::ErrorKind::RepetitionMissing,
1101             )),
1102         };
1103         match ast {
1104             Ast::Empty(_) | Ast::Flags(_) => return Err(self.error(
1105                 self.span(),
1106                 ast::ErrorKind::RepetitionMissing,
1107             )),
1108             _ => {}
1109         }
1110         if !self.bump_and_bump_space() {
1111             return Err(self.error(
1112                 Span::new(start, self.pos()),
1113                 ast::ErrorKind::RepetitionCountUnclosed,
1114             ));
1115         }
1116         let count_start = self.parse_decimal()?;
1117         let mut range = ast::RepetitionRange::Exactly(count_start);
1118         if self.is_eof() {
1119             return Err(self.error(
1120                 Span::new(start, self.pos()),
1121                 ast::ErrorKind::RepetitionCountUnclosed,
1122             ));
1123         }
1124         if self.char() == ',' {
1125             if !self.bump_and_bump_space() {
1126                 return Err(self.error(
1127                     Span::new(start, self.pos()),
1128                     ast::ErrorKind::RepetitionCountUnclosed,
1129                 ));
1130             }
1131             if self.char() != '}' {
1132                 let count_end = self.parse_decimal()?;
1133                 range = ast::RepetitionRange::Bounded(count_start, count_end);
1134             } else {
1135                 range = ast::RepetitionRange::AtLeast(count_start);
1136             }
1137         }
1138         if self.is_eof() || self.char() != '}' {
1139             return Err(self.error(
1140                 Span::new(start, self.pos()),
1141                 ast::ErrorKind::RepetitionCountUnclosed,
1142             ));
1143         }
1144 
1145         let mut greedy = true;
1146         if self.bump_and_bump_space() && self.char() == '?' {
1147             greedy = false;
1148             self.bump();
1149         }
1150 
1151         let op_span = Span::new(start, self.pos());
1152         if !range.is_valid() {
1153             return Err(self.error(
1154                 op_span,
1155                 ast::ErrorKind::RepetitionCountInvalid,
1156             ));
1157         }
1158         concat.asts.push(Ast::Repetition(ast::Repetition {
1159             span: ast.span().with_end(self.pos()),
1160             op: ast::RepetitionOp {
1161                 span: op_span,
1162                 kind: ast::RepetitionKind::Range(range),
1163             },
1164             greedy: greedy,
1165             ast: Box::new(ast),
1166         }));
1167         Ok(concat)
1168     }
1169 
1170     /// Parse a group (which contains a sub-expression) or a set of flags.
1171     ///
1172     /// If a group was found, then it is returned with an empty AST. If a set
1173     /// of flags is found, then that set is returned.
1174     ///
1175     /// The parser should be positioned at the opening parenthesis.
1176     ///
1177     /// This advances the parser to the character before the start of the
1178     /// sub-expression (in the case of a group) or to the closing parenthesis
1179     /// immediately following the set of flags.
1180     ///
1181     /// # Errors
1182     ///
1183     /// If flags are given and incorrectly specified, then a corresponding
1184     /// error is returned.
1185     ///
1186     /// If a capture name is given and it is incorrectly specified, then a
1187     /// corresponding error is returned.
parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>>1188     fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1189         assert_eq!(self.char(), '(');
1190         let open_span = self.span_char();
1191         self.bump();
1192         self.bump_space();
1193         if self.is_lookaround_prefix() {
1194             return Err(self.error(
1195                 Span::new(open_span.start, self.span().end),
1196                 ast::ErrorKind::UnsupportedLookAround,
1197             ));
1198         }
1199         let inner_span = self.span();
1200         if self.bump_if("?P<") {
1201             let capture_index = self.next_capture_index(open_span)?;
1202             let cap = self.parse_capture_name(capture_index)?;
1203             Ok(Either::Right(ast::Group {
1204                 span: open_span,
1205                 kind: ast::GroupKind::CaptureName(cap),
1206                 ast: Box::new(Ast::Empty(self.span())),
1207             }))
1208         } else if self.bump_if("?") {
1209             if self.is_eof() {
1210                 return Err(self.error(
1211                     open_span,
1212                     ast::ErrorKind::GroupUnclosed,
1213                 ));
1214             }
1215             let flags = self.parse_flags()?;
1216             let char_end = self.char();
1217             self.bump();
1218             if char_end == ')' {
1219                 // We don't allow empty flags, e.g., `(?)`. We instead
1220                 // interpret it as a repetition operator missing its argument.
1221                 if flags.items.is_empty() {
1222                     return Err(self.error(
1223                         inner_span,
1224                         ast::ErrorKind::RepetitionMissing,
1225                     ));
1226                 }
1227                 Ok(Either::Left(ast::SetFlags {
1228                     span: Span { end: self.pos(), ..open_span },
1229                     flags: flags,
1230                 }))
1231             } else {
1232                 assert_eq!(char_end, ':');
1233                 Ok(Either::Right(ast::Group {
1234                     span: open_span,
1235                     kind: ast::GroupKind::NonCapturing(flags),
1236                     ast: Box::new(Ast::Empty(self.span())),
1237                 }))
1238             }
1239         } else {
1240             let capture_index = self.next_capture_index(open_span)?;
1241             Ok(Either::Right(ast::Group {
1242                 span: open_span,
1243                 kind: ast::GroupKind::CaptureIndex(capture_index),
1244                 ast: Box::new(Ast::Empty(self.span())),
1245             }))
1246         }
1247     }
1248 
1249     /// Parses a capture group name. Assumes that the parser is positioned at
1250     /// the first character in the name following the opening `<` (and may
1251     /// possibly be EOF). This advances the parser to the first character
1252     /// following the closing `>`.
1253     ///
1254     /// The caller must provide the capture index of the group for this name.
parse_capture_name( &self, capture_index: u32, ) -> Result<ast::CaptureName>1255     fn parse_capture_name(
1256         &self,
1257         capture_index: u32,
1258     ) -> Result<ast::CaptureName> {
1259         if self.is_eof() {
1260             return Err(self.error(
1261                 self.span(),
1262                 ast::ErrorKind::GroupNameUnexpectedEof,
1263             ));
1264         }
1265         let start = self.pos();
1266         loop {
1267             if self.char() == '>' {
1268                 break;
1269             }
1270             if !is_capture_char(self.char(), self.pos() == start) {
1271                 return Err(self.error(
1272                     self.span_char(),
1273                     ast::ErrorKind::GroupNameInvalid,
1274                 ));
1275             }
1276             if !self.bump() {
1277                 break;
1278             }
1279         }
1280         let end = self.pos();
1281         if self.is_eof() {
1282             return Err(self.error(
1283                 self.span(),
1284                 ast::ErrorKind::GroupNameUnexpectedEof,
1285             ));
1286         }
1287         assert_eq!(self.char(), '>');
1288         self.bump();
1289         let name = &self.pattern()[start.offset..end.offset];
1290         if name.is_empty() {
1291             return Err(self.error(
1292                 Span::new(start, start),
1293                 ast::ErrorKind::GroupNameEmpty,
1294             ));
1295         }
1296         let capname = ast::CaptureName {
1297             span: Span::new(start, end),
1298             name: name.to_string(),
1299             index: capture_index,
1300         };
1301         self.add_capture_name(&capname)?;
1302         Ok(capname)
1303     }
1304 
1305     /// Parse a sequence of flags starting at the current character.
1306     ///
1307     /// This advances the parser to the character immediately following the
1308     /// flags, which is guaranteed to be either `:` or `)`.
1309     ///
1310     /// # Errors
1311     ///
1312     /// If any flags are duplicated, then an error is returned.
1313     ///
1314     /// If the negation operator is used more than once, then an error is
1315     /// returned.
1316     ///
1317     /// If no flags could be found or if the negation operation is not followed
1318     /// by any flags, then an error is returned.
parse_flags(&self) -> Result<ast::Flags>1319     fn parse_flags(&self) -> Result<ast::Flags> {
1320         let mut flags = ast::Flags {
1321             span: self.span(),
1322             items: vec![],
1323         };
1324         let mut last_was_negation = None;
1325         while self.char() != ':' && self.char() != ')' {
1326             if self.char() == '-' {
1327                 last_was_negation = Some(self.span_char());
1328                 let item = ast::FlagsItem {
1329                     span: self.span_char(),
1330                     kind: ast::FlagsItemKind::Negation,
1331                 };
1332                 if let Some(i) = flags.add_item(item) {
1333                     return Err(self.error(
1334                         self.span_char(),
1335                         ast::ErrorKind::FlagRepeatedNegation {
1336                             original: flags.items[i].span,
1337                         },
1338                     ));
1339                 }
1340             } else {
1341                 last_was_negation = None;
1342                 let item = ast::FlagsItem {
1343                     span: self.span_char(),
1344                     kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1345                 };
1346                 if let Some(i) = flags.add_item(item) {
1347                     return Err(self.error(
1348                         self.span_char(),
1349                         ast::ErrorKind::FlagDuplicate {
1350                             original: flags.items[i].span,
1351                         },
1352                     ));
1353                 }
1354             }
1355             if !self.bump() {
1356                 return Err(self.error(
1357                     self.span(),
1358                     ast::ErrorKind::FlagUnexpectedEof,
1359                 ));
1360             }
1361         }
1362         if let Some(span) = last_was_negation {
1363             return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1364         }
1365         flags.span.end = self.pos();
1366         Ok(flags)
1367     }
1368 
1369     /// Parse the current character as a flag. Do not advance the parser.
1370     ///
1371     /// # Errors
1372     ///
1373     /// If the flag is not recognized, then an error is returned.
parse_flag(&self) -> Result<ast::Flag>1374     fn parse_flag(&self) -> Result<ast::Flag> {
1375         match self.char() {
1376             'i' => Ok(ast::Flag::CaseInsensitive),
1377             'm' => Ok(ast::Flag::MultiLine),
1378             's' => Ok(ast::Flag::DotMatchesNewLine),
1379             'U' => Ok(ast::Flag::SwapGreed),
1380             'u' => Ok(ast::Flag::Unicode),
1381             'x' => Ok(ast::Flag::IgnoreWhitespace),
1382             _ => Err(self.error(
1383                 self.span_char(),
1384                 ast::ErrorKind::FlagUnrecognized,
1385             )),
1386         }
1387     }
1388 
1389     /// Parse a primitive AST. e.g., A literal, non-set character class or
1390     /// assertion.
1391     ///
1392     /// This assumes that the parser expects a primitive at the current
1393     /// location. i.e., All other non-primitive cases have been handled.
1394     /// For example, if the parser's position is at `|`, then `|` will be
1395     /// treated as a literal (e.g., inside a character class).
1396     ///
1397     /// This advances the parser to the first character immediately following
1398     /// the primitive.
parse_primitive(&self) -> Result<Primitive>1399     fn parse_primitive(&self) -> Result<Primitive> {
1400         match self.char() {
1401             '\\' => self.parse_escape(),
1402             '.' => {
1403                 let ast = Primitive::Dot(self.span_char());
1404                 self.bump();
1405                 Ok(ast)
1406             }
1407             '^' => {
1408                 let ast = Primitive::Assertion(ast::Assertion {
1409                     span: self.span_char(),
1410                     kind: ast::AssertionKind::StartLine,
1411                 });
1412                 self.bump();
1413                 Ok(ast)
1414             }
1415             '$' => {
1416                 let ast = Primitive::Assertion(ast::Assertion {
1417                     span: self.span_char(),
1418                     kind: ast::AssertionKind::EndLine,
1419                 });
1420                 self.bump();
1421                 Ok(ast)
1422             }
1423             c => {
1424                 let ast = Primitive::Literal(ast::Literal {
1425                     span: self.span_char(),
1426                     kind: ast::LiteralKind::Verbatim,
1427                     c: c,
1428                 });
1429                 self.bump();
1430                 Ok(ast)
1431             }
1432         }
1433     }
1434 
1435     /// Parse an escape sequence as a primitive AST.
1436     ///
1437     /// This assumes the parser is positioned at the start of the escape
1438     /// sequence, i.e., `\`. It advances the parser to the first position
1439     /// immediately following the escape sequence.
parse_escape(&self) -> Result<Primitive>1440     fn parse_escape(&self) -> Result<Primitive> {
1441         assert_eq!(self.char(), '\\');
1442         let start = self.pos();
1443         if !self.bump() {
1444             return Err(self.error(
1445                 Span::new(start, self.pos()),
1446                 ast::ErrorKind::EscapeUnexpectedEof,
1447             ));
1448         }
1449         let c = self.char();
1450         // Put some of the more complicated routines into helpers.
1451         match c {
1452             '0'...'7' => {
1453                 if !self.parser().octal {
1454                     return Err(self.error(
1455                         Span::new(start, self.span_char().end),
1456                         ast::ErrorKind::UnsupportedBackreference,
1457                     ));
1458                 }
1459                 let mut lit = self.parse_octal();
1460                 lit.span.start = start;
1461                 return Ok(Primitive::Literal(lit));
1462             }
1463             '8'...'9' if !self.parser().octal => {
1464                 return Err(self.error(
1465                     Span::new(start, self.span_char().end),
1466                     ast::ErrorKind::UnsupportedBackreference,
1467                 ));
1468             }
1469             'x' | 'u' | 'U' => {
1470                 let mut lit = self.parse_hex()?;
1471                 lit.span.start = start;
1472                 return Ok(Primitive::Literal(lit));
1473             }
1474             'p' | 'P' => {
1475                 let mut cls = self.parse_unicode_class()?;
1476                 cls.span.start = start;
1477                 return Ok(Primitive::Unicode(cls));
1478             }
1479             'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1480                 let mut cls = self.parse_perl_class();
1481                 cls.span.start = start;
1482                 return Ok(Primitive::Perl(cls));
1483             }
1484             _ => {}
1485         }
1486 
1487         // Handle all of the one letter sequences inline.
1488         self.bump();
1489         let span = Span::new(start, self.pos());
1490         if is_meta_character(c) {
1491             return Ok(Primitive::Literal(ast::Literal {
1492                 span: span,
1493                 kind: ast::LiteralKind::Punctuation,
1494                 c: c,
1495             }));
1496         }
1497         let special = |kind, c| Ok(Primitive::Literal(ast::Literal {
1498             span: span,
1499             kind: ast::LiteralKind::Special(kind),
1500             c: c,
1501         }));
1502         match c {
1503             'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1504             'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1505             't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1506             'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1507             'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1508             'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1509             ' ' if self.ignore_whitespace() => {
1510                 special(ast::SpecialLiteralKind::Space, ' ')
1511             }
1512             'A' => Ok(Primitive::Assertion(ast::Assertion {
1513                 span: span,
1514                 kind: ast::AssertionKind::StartText,
1515             })),
1516             'z' => Ok(Primitive::Assertion(ast::Assertion {
1517                 span: span,
1518                 kind: ast::AssertionKind::EndText,
1519             })),
1520             'b' => Ok(Primitive::Assertion(ast::Assertion {
1521                 span: span,
1522                 kind: ast::AssertionKind::WordBoundary,
1523             })),
1524             'B' => Ok(Primitive::Assertion(ast::Assertion {
1525                 span: span,
1526                 kind: ast::AssertionKind::NotWordBoundary,
1527             })),
1528             _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1529         }
1530     }
1531 
1532     /// Parse an octal representation of a Unicode codepoint up to 3 digits
1533     /// long. This expects the parser to be positioned at the first octal
1534     /// digit and advances the parser to the first character immediately
1535     /// following the octal number. This also assumes that parsing octal
1536     /// escapes is enabled.
1537     ///
1538     /// Assuming the preconditions are met, this routine can never fail.
parse_octal(&self) -> ast::Literal1539     fn parse_octal(&self) -> ast::Literal {
1540         use std::char;
1541         use std::u32;
1542 
1543         assert!(self.parser().octal);
1544         assert!('0' <= self.char() && self.char() <= '7');
1545         let start = self.pos();
1546         // Parse up to two more digits.
1547         while
1548             self.bump() &&
1549             '0' <= self.char() && self.char() <= '7' &&
1550             self.pos().offset - start.offset <= 2
1551         {}
1552         let end = self.pos();
1553         let octal = &self.pattern()[start.offset..end.offset];
1554         // Parsing the octal should never fail since the above guarantees a
1555         // valid number.
1556         let codepoint =
1557             u32::from_str_radix(octal, 8).expect("valid octal number");
1558         // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1559         // invalid Unicode scalar values.
1560         let c = char::from_u32(codepoint).expect("Unicode scalar value");
1561         ast::Literal {
1562             span: Span::new(start, end),
1563             kind: ast::LiteralKind::Octal,
1564             c: c,
1565         }
1566     }
1567 
1568     /// Parse a hex representation of a Unicode codepoint. This handles both
1569     /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1570     /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1571     /// the first character immediately following the hexadecimal literal.
parse_hex(&self) -> Result<ast::Literal>1572     fn parse_hex(&self) -> Result<ast::Literal> {
1573         assert!(self.char() == 'x'
1574                 || self.char() == 'u'
1575                 || self.char() == 'U');
1576 
1577         let hex_kind = match self.char() {
1578             'x' => ast::HexLiteralKind::X,
1579             'u' => ast::HexLiteralKind::UnicodeShort,
1580             _ => ast::HexLiteralKind::UnicodeLong,
1581         };
1582         if !self.bump_and_bump_space() {
1583             return Err(self.error(
1584                 self.span(),
1585                 ast::ErrorKind::EscapeUnexpectedEof,
1586             ));
1587         }
1588         if self.char() == '{' {
1589             self.parse_hex_brace(hex_kind)
1590         } else {
1591             self.parse_hex_digits(hex_kind)
1592         }
1593     }
1594 
1595     /// Parse an N-digit hex representation of a Unicode codepoint. This
1596     /// expects the parser to be positioned at the first digit and will advance
1597     /// the parser to the first character immediately following the escape
1598     /// sequence.
1599     ///
1600     /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1601     /// or 8 (for `\UNNNNNNNN`).
parse_hex_digits( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1602     fn parse_hex_digits(
1603         &self,
1604         kind: ast::HexLiteralKind,
1605     ) -> Result<ast::Literal> {
1606         use std::char;
1607         use std::u32;
1608 
1609         let mut scratch = self.parser().scratch.borrow_mut();
1610         scratch.clear();
1611 
1612         let start = self.pos();
1613         for i in 0..kind.digits() {
1614             if i > 0 && !self.bump_and_bump_space() {
1615                 return Err(self.error(
1616                     self.span(),
1617                     ast::ErrorKind::EscapeUnexpectedEof,
1618                 ));
1619             }
1620             if !is_hex(self.char()) {
1621                 return Err(self.error(
1622                     self.span_char(),
1623                     ast::ErrorKind::EscapeHexInvalidDigit,
1624                 ));
1625             }
1626             scratch.push(self.char());
1627         }
1628         // The final bump just moves the parser past the literal, which may
1629         // be EOF.
1630         self.bump_and_bump_space();
1631         let end = self.pos();
1632         let hex = scratch.as_str();
1633         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1634             None => Err(self.error(
1635                 Span::new(start, end),
1636                 ast::ErrorKind::EscapeHexInvalid,
1637             )),
1638             Some(c) => Ok(ast::Literal {
1639                 span: Span::new(start, end),
1640                 kind: ast::LiteralKind::HexFixed(kind),
1641                 c: c,
1642             }),
1643         }
1644     }
1645 
1646     /// Parse a hex representation of any Unicode scalar value. This expects
1647     /// the parser to be positioned at the opening brace `{` and will advance
1648     /// the parser to the first character following the closing brace `}`.
parse_hex_brace( &self, kind: ast::HexLiteralKind, ) -> Result<ast::Literal>1649     fn parse_hex_brace(
1650         &self,
1651         kind: ast::HexLiteralKind,
1652     ) -> Result<ast::Literal> {
1653         use std::char;
1654         use std::u32;
1655 
1656         let mut scratch = self.parser().scratch.borrow_mut();
1657         scratch.clear();
1658 
1659         let brace_pos = self.pos();
1660         let start = self.span_char().end;
1661         while self.bump_and_bump_space() && self.char() != '}' {
1662             if !is_hex(self.char()) {
1663                 return Err(self.error(
1664                     self.span_char(),
1665                     ast::ErrorKind::EscapeHexInvalidDigit,
1666                 ));
1667             }
1668             scratch.push(self.char());
1669         }
1670         if self.is_eof() {
1671             return Err(self.error(
1672                 Span::new(brace_pos, self.pos()),
1673                 ast::ErrorKind::EscapeUnexpectedEof,
1674             ));
1675         }
1676         let end = self.pos();
1677         let hex = scratch.as_str();
1678         assert_eq!(self.char(), '}');
1679         self.bump_and_bump_space();
1680 
1681         if hex.is_empty() {
1682             return Err(self.error(
1683                 Span::new(brace_pos, self.pos()),
1684                 ast::ErrorKind::EscapeHexEmpty,
1685             ));
1686         }
1687         match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1688             None => Err(self.error(
1689                 Span::new(start, end),
1690                 ast::ErrorKind::EscapeHexInvalid,
1691             )),
1692             Some(c) => Ok(ast::Literal {
1693                 span: Span::new(start, self.pos()),
1694                 kind: ast::LiteralKind::HexBrace(kind),
1695                 c: c,
1696             }),
1697         }
1698     }
1699 
1700     /// Parse a decimal number into a u32 while trimming leading and trailing
1701     /// whitespace.
1702     ///
1703     /// This expects the parser to be positioned at the first position where
1704     /// a decimal digit could occur. This will advance the parser to the byte
1705     /// immediately following the last contiguous decimal digit.
1706     ///
1707     /// If no decimal digit could be found or if there was a problem parsing
1708     /// the complete set of digits into a u32, then an error is returned.
parse_decimal(&self) -> Result<u32>1709     fn parse_decimal(&self) -> Result<u32> {
1710         let mut scratch = self.parser().scratch.borrow_mut();
1711         scratch.clear();
1712 
1713         while !self.is_eof() && self.char().is_whitespace() {
1714             self.bump();
1715         }
1716         let start = self.pos();
1717         while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1718             scratch.push(self.char());
1719             self.bump_and_bump_space();
1720         }
1721         let span = Span::new(start, self.pos());
1722         while !self.is_eof() && self.char().is_whitespace() {
1723             self.bump_and_bump_space();
1724         }
1725         let digits = scratch.as_str();
1726         if digits.is_empty() {
1727             return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1728         }
1729         match u32::from_str_radix(digits, 10).ok() {
1730             Some(n) => Ok(n),
1731             None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1732         }
1733     }
1734 
1735     /// Parse a standard character class consisting primarily of characters or
1736     /// character ranges, but can also contain nested character classes of
1737     /// any type (sans `.`).
1738     ///
1739     /// This assumes the parser is positioned at the opening `[`. If parsing
1740     /// is successful, then the parser is advanced to the position immediately
1741     /// following the closing `]`.
parse_set_class(&self) -> Result<ast::Class>1742     fn parse_set_class(&self) -> Result<ast::Class> {
1743         assert_eq!(self.char(), '[');
1744 
1745         let mut union = ast::ClassSetUnion {
1746             span: self.span(),
1747             items: vec![],
1748         };
1749         loop {
1750             self.bump_space();
1751             if self.is_eof() {
1752                 return Err(self.unclosed_class_error());
1753             }
1754             match self.char() {
1755                 '[' => {
1756                     // If we've already parsed the opening bracket, then
1757                     // attempt to treat this as the beginning of an ASCII
1758                     // class. If ASCII class parsing fails, then the parser
1759                     // backs up to `[`.
1760                     if !self.parser().stack_class.borrow().is_empty() {
1761                         if let Some(cls) = self.maybe_parse_ascii_class() {
1762                             union.push(ast::ClassSetItem::Ascii(cls));
1763                             continue;
1764                         }
1765                     }
1766                     union = self.push_class_open(union)?;
1767                 }
1768                 ']' => {
1769                     match self.pop_class(union)? {
1770                         Either::Left(nested_union) => { union = nested_union; }
1771                         Either::Right(class) => return Ok(class),
1772                     }
1773                 }
1774                 '&' if self.peek() == Some('&') => {
1775                     assert!(self.bump_if("&&"));
1776                     union = self.push_class_op(
1777                         ast::ClassSetBinaryOpKind::Intersection, union);
1778                 }
1779                 '-' if self.peek() == Some('-') => {
1780                     assert!(self.bump_if("--"));
1781                     union = self.push_class_op(
1782                         ast::ClassSetBinaryOpKind::Difference, union);
1783                 }
1784                 '~' if self.peek() == Some('~') => {
1785                     assert!(self.bump_if("~~"));
1786                     union = self.push_class_op(
1787                         ast::ClassSetBinaryOpKind::SymmetricDifference, union);
1788                 }
1789                 _ => {
1790                     union.push(self.parse_set_class_range()?);
1791                 }
1792             }
1793         }
1794     }
1795 
1796     /// Parse a single primitive item in a character class set. The item to
1797     /// be parsed can either be one of a simple literal character, a range
1798     /// between two simple literal characters or a "primitive" character
1799     /// class like \w or \p{Greek}.
1800     ///
1801     /// If an invalid escape is found, or if a character class is found where
1802     /// a simple literal is expected (e.g., in a range), then an error is
1803     /// returned.
parse_set_class_range(&self) -> Result<ast::ClassSetItem>1804     fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1805         let prim1 = self.parse_set_class_item()?;
1806         self.bump_space();
1807         if self.is_eof() {
1808             return Err(self.unclosed_class_error());
1809         }
1810         // If the next char isn't a `-`, then we don't have a range.
1811         // There are two exceptions. If the char after a `-` is a `]`, then
1812         // `-` is interpreted as a literal `-`. Alternatively, if the char
1813         // after a `-` is a `-`, then `--` corresponds to a "difference"
1814         // operation.
1815         if self.char() != '-'
1816             || self.peek_space() == Some(']')
1817             || self.peek_space() == Some('-')
1818         {
1819             return prim1.into_class_set_item(self);
1820         }
1821         // OK, now we're parsing a range, so bump past the `-` and parse the
1822         // second half of the range.
1823         if !self.bump_and_bump_space() {
1824             return Err(self.unclosed_class_error());
1825         }
1826         let prim2 = self.parse_set_class_item()?;
1827         let range = ast::ClassSetRange {
1828             span: Span::new(prim1.span().start, prim2.span().end),
1829             start: prim1.into_class_literal(self)?,
1830             end: prim2.into_class_literal(self)?,
1831         };
1832         if !range.is_valid() {
1833             return Err(self.error(
1834                 range.span,
1835                 ast::ErrorKind::ClassRangeInvalid,
1836             ));
1837         }
1838         Ok(ast::ClassSetItem::Range(range))
1839     }
1840 
1841     /// Parse a single item in a character class as a primitive, where the
1842     /// primitive either consists of a verbatim literal or a single escape
1843     /// sequence.
1844     ///
1845     /// This assumes the parser is positioned at the beginning of a primitive,
1846     /// and advances the parser to the first position after the primitive if
1847     /// successful.
1848     ///
1849     /// Note that it is the caller's responsibility to report an error if an
1850     /// illegal primitive was parsed.
parse_set_class_item(&self) -> Result<Primitive>1851     fn parse_set_class_item(&self) -> Result<Primitive> {
1852         if self.char() == '\\' {
1853             self.parse_escape()
1854         } else {
1855             let x = Primitive::Literal(ast::Literal {
1856                 span: self.span_char(),
1857                 kind: ast::LiteralKind::Verbatim,
1858                 c: self.char(),
1859             });
1860             self.bump();
1861             Ok(x)
1862         }
1863     }
1864 
1865     /// Parses the opening of a character class set. This includes the opening
1866     /// bracket along with `^` if present to indicate negation. This also
1867     /// starts parsing the opening set of unioned items if applicable, since
1868     /// there are special rules applied to certain characters in the opening
1869     /// of a character class. For example, `[^]]` is the class of all
1870     /// characters not equal to `]`. (`]` would need to be escaped in any other
1871     /// position.) Similarly for `-`.
1872     ///
1873     /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1874     /// empty union. This empty union should be replaced with the actual item
1875     /// when it is popped from the parser's stack.
1876     ///
1877     /// This assumes the parser is positioned at the opening `[` and advances
1878     /// the parser to the first non-special byte of the character class.
1879     ///
1880     /// An error is returned if EOF is found.
parse_set_class_open( &self, ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)>1881     fn parse_set_class_open(
1882         &self,
1883     ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1884         assert_eq!(self.char(), '[');
1885         let start = self.pos();
1886         if !self.bump_and_bump_space() {
1887             return Err(self.error(
1888                 Span::new(start, self.pos()),
1889                 ast::ErrorKind::ClassUnclosed,
1890             ));
1891         }
1892 
1893         let negated =
1894             if self.char() != '^' {
1895                 false
1896             } else {
1897                 if !self.bump_and_bump_space() {
1898                     return Err(self.error(
1899                         Span::new(start, self.pos()),
1900                         ast::ErrorKind::ClassUnclosed,
1901                     ));
1902                 }
1903                 true
1904             };
1905         // Accept any number of `-` as literal `-`.
1906         let mut union = ast::ClassSetUnion {
1907             span: self.span(),
1908             items: vec![],
1909         };
1910         while self.char() == '-' {
1911             union.push(ast::ClassSetItem::Literal(ast::Literal {
1912                 span: self.span_char(),
1913                 kind: ast::LiteralKind::Verbatim,
1914                 c: '-',
1915             }));
1916             if !self.bump_and_bump_space() {
1917                 return Err(self.error(
1918                     Span::new(start, self.pos()),
1919                     ast::ErrorKind::ClassUnclosed,
1920                 ));
1921             }
1922         }
1923         // If `]` is the *first* char in a set, then interpret it as a literal
1924         // `]`. That is, an empty class is impossible to write.
1925         if union.items.is_empty() && self.char() == ']' {
1926             union.push(ast::ClassSetItem::Literal(ast::Literal {
1927                 span: self.span_char(),
1928                 kind: ast::LiteralKind::Verbatim,
1929                 c: ']',
1930             }));
1931             if !self.bump_and_bump_space() {
1932                 return Err(self.error(
1933                     Span::new(start, self.pos()),
1934                     ast::ErrorKind::ClassUnclosed,
1935                 ));
1936             }
1937         }
1938         let set = ast::ClassBracketed {
1939             span: Span::new(start, self.pos()),
1940             negated: negated,
1941             kind: ast::ClassSet::union(ast::ClassSetUnion {
1942                 span: Span::new(union.span.start, union.span.start),
1943                 items: vec![],
1944             }),
1945         };
1946         Ok((set, union))
1947     }
1948 
1949     /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1950     ///
1951     /// This assumes the parser is positioned at the opening `[`.
1952     ///
1953     /// If no valid ASCII character class could be found, then this does not
1954     /// advance the parser and `None` is returned. Otherwise, the parser is
1955     /// advanced to the first byte following the closing `]` and the
1956     /// corresponding ASCII class is returned.
maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii>1957     fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1958         // ASCII character classes are interesting from a parsing perspective
1959         // because parsing cannot fail with any interesting error. For example,
1960         // in order to use an ASCII character class, it must be enclosed in
1961         // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1962         // of it as "ASCII character characters have the syntax `[:NAME:]`
1963         // which can only appear within character brackets." This means that
1964         // things like `[[:lower:]A]` are legal constructs.
1965         //
1966         // However, if one types an incorrect ASCII character class, e.g.,
1967         // `[[:loower:]]`, then we treat that as a normal nested character
1968         // class containing the characters `:elorw`. One might argue that we
1969         // should return an error instead since the repeated colons give away
1970         // the intent to write an ASCII class. But what if the user typed
1971         // `[[:lower]]` instead? How can we tell that was intended to be an
1972         // ASCII class and not just a normal nested class?
1973         //
1974         // Reasonable people can probably disagree over this, but for better
1975         // or worse, we implement semantics that never fails at the expense
1976         // of better failure modes.
1977         assert_eq!(self.char(), '[');
1978         // If parsing fails, then we back up the parser to this starting point.
1979         let start = self.pos();
1980         let mut negated = false;
1981         if !self.bump() || self.char() != ':' {
1982             self.parser().pos.set(start);
1983             return None;
1984         }
1985         if !self.bump() {
1986             self.parser().pos.set(start);
1987             return None;
1988         }
1989         if self.char() == '^' {
1990             negated = true;
1991             if !self.bump() {
1992                 self.parser().pos.set(start);
1993                 return None;
1994             }
1995         }
1996         let name_start = self.offset();
1997         while self.char() != ':' && self.bump() {}
1998         if self.is_eof() {
1999             self.parser().pos.set(start);
2000             return None;
2001         }
2002         let name = &self.pattern()[name_start..self.offset()];
2003         if !self.bump_if(":]") {
2004             self.parser().pos.set(start);
2005             return None;
2006         }
2007         let kind = match ast::ClassAsciiKind::from_name(name) {
2008             Some(kind) => kind,
2009             None => {
2010                 self.parser().pos.set(start);
2011                 return None;
2012             }
2013         };
2014         Some(ast::ClassAscii {
2015             span: Span::new(start, self.pos()),
2016             kind: kind,
2017             negated: negated,
2018         })
2019     }
2020 
2021     /// Parse a Unicode class in either the single character notation, `\pN`
2022     /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2023     /// the parser is positioned at the `p` (or `P` for negation) and will
2024     /// advance the parser to the character immediately following the class.
2025     ///
2026     /// Note that this does not check whether the class name is valid or not.
parse_unicode_class(&self) -> Result<ast::ClassUnicode>2027     fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2028         assert!(self.char() == 'p' || self.char() == 'P');
2029 
2030         let mut scratch = self.parser().scratch.borrow_mut();
2031         scratch.clear();
2032 
2033         let negated = self.char() == 'P';
2034         if !self.bump_and_bump_space() {
2035             return Err(self.error(
2036                 self.span(),
2037                 ast::ErrorKind::EscapeUnexpectedEof,
2038             ));
2039         }
2040         let (start, kind) =
2041             if self.char() == '{' {
2042                 let start = self.span_char().end;
2043                 while self.bump_and_bump_space() && self.char() != '}' {
2044                     scratch.push(self.char());
2045                 }
2046                 if self.is_eof() {
2047                     return Err(self.error(
2048                         self.span(),
2049                         ast::ErrorKind::EscapeUnexpectedEof,
2050                     ));
2051                 }
2052                 assert_eq!(self.char(), '}');
2053                 self.bump();
2054 
2055                 let name = scratch.as_str();
2056                 if let Some(i) = name.find("!=") {
2057                     (start, ast::ClassUnicodeKind::NamedValue {
2058                         op: ast::ClassUnicodeOpKind::NotEqual,
2059                         name: name[..i].to_string(),
2060                         value: name[i+2..].to_string(),
2061                     })
2062                 } else if let Some(i) = name.find(':') {
2063                     (start, ast::ClassUnicodeKind::NamedValue {
2064                         op: ast::ClassUnicodeOpKind::Colon,
2065                         name: name[..i].to_string(),
2066                         value: name[i+1..].to_string(),
2067                     })
2068                 } else if let Some(i) = name.find('=') {
2069                     (start, ast::ClassUnicodeKind::NamedValue {
2070                         op: ast::ClassUnicodeOpKind::Equal,
2071                         name: name[..i].to_string(),
2072                         value: name[i+1..].to_string(),
2073                     })
2074                 } else {
2075                     (start, ast::ClassUnicodeKind::Named(name.to_string()))
2076                 }
2077             } else {
2078                 let start = self.pos();
2079                 let c = self.char();
2080                 self.bump_and_bump_space();
2081                 let kind = ast::ClassUnicodeKind::OneLetter(c);
2082                 (start, kind)
2083             };
2084         Ok(ast::ClassUnicode {
2085             span: Span::new(start, self.pos()),
2086             negated: negated,
2087             kind: kind,
2088         })
2089     }
2090 
2091     /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2092     /// parser is currently at a valid character class name and will be
2093     /// advanced to the character immediately following the class.
parse_perl_class(&self) -> ast::ClassPerl2094     fn parse_perl_class(&self) -> ast::ClassPerl {
2095         let c = self.char();
2096         let span = self.span_char();
2097         self.bump();
2098         let (negated, kind) = match c {
2099             'd' => (false, ast::ClassPerlKind::Digit),
2100             'D' => (true, ast::ClassPerlKind::Digit),
2101             's' => (false, ast::ClassPerlKind::Space),
2102             'S' => (true, ast::ClassPerlKind::Space),
2103             'w' => (false, ast::ClassPerlKind::Word),
2104             'W' => (true, ast::ClassPerlKind::Word),
2105             c => panic!("expected valid Perl class but got '{}'", c),
2106         };
2107         ast::ClassPerl { span: span, kind: kind, negated: negated }
2108     }
2109 }
2110 
2111 /// A type that traverses a fully parsed Ast and checks whether its depth
2112 /// exceeds the specified nesting limit. If it does, then an error is returned.
2113 #[derive(Debug)]
2114 struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
2115     /// The parser that is checking the nest limit.
2116     p: &'p ParserI<'s, P>,
2117     /// The current depth while walking an Ast.
2118     depth: u32,
2119 }
2120 
2121 impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P>2122     fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2123         NestLimiter { p: p, depth: 0 }
2124     }
2125 
check(self, ast: &Ast) -> Result<()>2126     fn check(self, ast: &Ast) -> Result<()> {
2127         ast::visit(ast, self)
2128     }
2129 
increment_depth(&mut self, span: &Span) -> Result<()>2130     fn increment_depth(&mut self, span: &Span) -> Result<()> {
2131         let new = self.depth.checked_add(1).ok_or_else(|| self.p.error(
2132             span.clone(),
2133             ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2134         ))?;
2135         let limit = self.p.parser().nest_limit;
2136         if new > limit {
2137             return Err(self.p.error(
2138                 span.clone(),
2139                 ast::ErrorKind::NestLimitExceeded(limit),
2140             ));
2141         }
2142         self.depth = new;
2143         Ok(())
2144     }
2145 
decrement_depth(&mut self)2146     fn decrement_depth(&mut self) {
2147         // Assuming the correctness of the visitor, this should never drop
2148         // below 0.
2149         self.depth = self.depth.checked_sub(1).unwrap();
2150     }
2151 }
2152 
2153 impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2154     type Output = ();
2155     type Err = ast::Error;
2156 
finish(self) -> Result<()>2157     fn finish(self) -> Result<()> {
2158         Ok(())
2159     }
2160 
visit_pre(&mut self, ast: &Ast) -> Result<()>2161     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2162         let span = match *ast {
2163             Ast::Empty(_)
2164             | Ast::Flags(_)
2165             | Ast::Literal(_)
2166             | Ast::Dot(_)
2167             | Ast::Assertion(_)
2168             | Ast::Class(ast::Class::Unicode(_))
2169             | Ast::Class(ast::Class::Perl(_)) => {
2170                 // These are all base cases, so we don't increment depth.
2171                 return Ok(());
2172             }
2173             Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2174             Ast::Repetition(ref x) => &x.span,
2175             Ast::Group(ref x) => &x.span,
2176             Ast::Alternation(ref x) => &x.span,
2177             Ast::Concat(ref x) => &x.span,
2178         };
2179         self.increment_depth(span)
2180     }
2181 
visit_post(&mut self, ast: &Ast) -> Result<()>2182     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2183         match *ast {
2184             Ast::Empty(_)
2185             | Ast::Flags(_)
2186             | Ast::Literal(_)
2187             | Ast::Dot(_)
2188             | Ast::Assertion(_)
2189             | Ast::Class(ast::Class::Unicode(_))
2190             | Ast::Class(ast::Class::Perl(_)) => {
2191                 // These are all base cases, so we don't decrement depth.
2192                 Ok(())
2193             }
2194             Ast::Class(ast::Class::Bracketed(_))
2195             | Ast::Repetition(_)
2196             | Ast::Group(_)
2197             | Ast::Alternation(_)
2198             | Ast::Concat(_) => {
2199                 self.decrement_depth();
2200                 Ok(())
2201             }
2202         }
2203     }
2204 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2205     fn visit_class_set_item_pre(
2206         &mut self,
2207         ast: &ast::ClassSetItem,
2208     ) -> Result<()> {
2209         let span = match *ast {
2210             ast::ClassSetItem::Empty(_)
2211             | ast::ClassSetItem::Literal(_)
2212             | ast::ClassSetItem::Range(_)
2213             | ast::ClassSetItem::Ascii(_)
2214             | ast::ClassSetItem::Unicode(_)
2215             | ast::ClassSetItem::Perl(_) => {
2216                 // These are all base cases, so we don't increment depth.
2217                 return Ok(());
2218             }
2219             ast::ClassSetItem::Bracketed(ref x) => &x.span,
2220             ast::ClassSetItem::Union(ref x) => &x.span,
2221         };
2222         self.increment_depth(span)
2223     }
2224 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>2225     fn visit_class_set_item_post(
2226         &mut self,
2227         ast: &ast::ClassSetItem,
2228     ) -> Result<()> {
2229         match *ast {
2230             ast::ClassSetItem::Empty(_)
2231             | ast::ClassSetItem::Literal(_)
2232             | ast::ClassSetItem::Range(_)
2233             | ast::ClassSetItem::Ascii(_)
2234             | ast::ClassSetItem::Unicode(_)
2235             | ast::ClassSetItem::Perl(_) => {
2236                 // These are all base cases, so we don't decrement depth.
2237                 Ok(())
2238             }
2239             ast::ClassSetItem::Bracketed(_)
2240             | ast::ClassSetItem::Union(_) => {
2241                 self.decrement_depth();
2242                 Ok(())
2243             }
2244         }
2245     }
2246 
visit_class_set_binary_op_pre( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<()>2247     fn visit_class_set_binary_op_pre(
2248         &mut self,
2249         ast: &ast::ClassSetBinaryOp,
2250     ) -> Result<()> {
2251         self.increment_depth(&ast.span)
2252     }
2253 
visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<()>2254     fn visit_class_set_binary_op_post(
2255         &mut self,
2256         _ast: &ast::ClassSetBinaryOp,
2257     ) -> Result<()> {
2258         self.decrement_depth();
2259         Ok(())
2260     }
2261 }
2262 
2263 #[cfg(test)]
2264 mod tests {
2265     use std::ops::Range;
2266 
2267     use ast::{self, Ast, Position, Span};
2268     use super::{Parser, ParserI, ParserBuilder, Primitive};
2269 
2270     // Our own assert_eq, which has slightly better formatting (but honestly
2271     // still kind of crappy).
2272     macro_rules! assert_eq {
2273         ($left:expr, $right:expr) => ({
2274             match (&$left, &$right) {
2275                 (left_val, right_val) => {
2276                     if !(*left_val == *right_val) {
2277                         panic!("assertion failed: `(left == right)`\n\n\
2278                                left:  `{:?}`\nright: `{:?}`\n\n",
2279                                left_val, right_val)
2280                     }
2281                 }
2282             }
2283         });
2284     }
2285 
2286     // We create these errors to compare with real ast::Errors in the tests.
2287     // We define equality between TestError and ast::Error to disregard the
2288     // pattern string in ast::Error, which is annoying to provide in tests.
2289     #[derive(Clone, Debug)]
2290     struct TestError {
2291         span: Span,
2292         kind: ast::ErrorKind,
2293     }
2294 
2295     impl PartialEq<ast::Error> for TestError {
eq(&self, other: &ast::Error) -> bool2296         fn eq(&self, other: &ast::Error) -> bool {
2297             self.span == other.span && self.kind == other.kind
2298         }
2299     }
2300 
2301     impl PartialEq<TestError> for ast::Error {
eq(&self, other: &TestError) -> bool2302         fn eq(&self, other: &TestError) -> bool {
2303             self.span == other.span && self.kind == other.kind
2304         }
2305     }
2306 
s(str: &str) -> String2307     fn s(str: &str) -> String {
2308         str.to_string()
2309     }
2310 
parser(pattern: &str) -> ParserI<Parser>2311     fn parser(pattern: &str) -> ParserI<Parser> {
2312         ParserI::new(Parser::new(), pattern)
2313     }
2314 
parser_octal(pattern: &str) -> ParserI<Parser>2315     fn parser_octal(pattern: &str) -> ParserI<Parser> {
2316         let parser = ParserBuilder::new().octal(true).build();
2317         ParserI::new(parser, pattern)
2318     }
2319 
parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser>2320     fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
2321         let p = ParserBuilder::new().nest_limit(nest_limit).build();
2322         ParserI::new(p, pattern)
2323     }
2324 
parser_ignore_whitespace(pattern: &str) -> ParserI<Parser>2325     fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
2326         let p = ParserBuilder::new().ignore_whitespace(true).build();
2327         ParserI::new(p, pattern)
2328     }
2329 
2330     /// Short alias for creating a new span.
nspan(start: Position, end: Position) -> Span2331     fn nspan(start: Position, end: Position) -> Span {
2332         Span::new(start, end)
2333     }
2334 
2335     /// Short alias for creating a new position.
npos(offset: usize, line: usize, column: usize) -> Position2336     fn npos(offset: usize, line: usize, column: usize) -> Position {
2337         Position::new(offset, line, column)
2338     }
2339 
2340     /// Create a new span from the given offset range. This assumes a single
2341     /// line and sets the columns based on the offsets. i.e., This only works
2342     /// out of the box for ASCII, which is fine for most tests.
span(range: Range<usize>) -> Span2343     fn span(range: Range<usize>) -> Span {
2344         let start = Position::new(range.start, 1, range.start + 1);
2345         let end = Position::new(range.end, 1, range.end + 1);
2346         Span::new(start, end)
2347     }
2348 
2349     /// Create a new span for the corresponding byte range in the given string.
span_range(subject: &str, range: Range<usize>) -> Span2350     fn span_range(subject: &str, range: Range<usize>) -> Span {
2351         let start = Position {
2352             offset: range.start,
2353             line: 1 + subject[..range.start].matches('\n').count(),
2354             column: 1 + subject[..range.start]
2355                 .chars()
2356                 .rev()
2357                 .position(|c| c == '\n')
2358                 .unwrap_or(subject[..range.start].chars().count()),
2359         };
2360         let end = Position {
2361             offset: range.end,
2362             line: 1 + subject[..range.end].matches('\n').count(),
2363             column: 1 + subject[..range.end]
2364                 .chars()
2365                 .rev()
2366                 .position(|c| c == '\n')
2367                 .unwrap_or(subject[..range.end].chars().count()),
2368         };
2369         Span::new(start, end)
2370     }
2371 
2372     /// Create a verbatim literal starting at the given position.
lit(c: char, start: usize) -> Ast2373     fn lit(c: char, start: usize) -> Ast {
2374         lit_with(c, span(start..start + c.len_utf8()))
2375     }
2376 
2377     /// Create a punctuation literal starting at the given position.
punct_lit(c: char, span: Span) -> Ast2378     fn punct_lit(c: char, span: Span) -> Ast {
2379         Ast::Literal(ast::Literal {
2380             span: span,
2381             kind: ast::LiteralKind::Punctuation,
2382             c: c,
2383         })
2384     }
2385 
2386     /// Create a verbatim literal with the given span.
lit_with(c: char, span: Span) -> Ast2387     fn lit_with(c: char, span: Span) -> Ast {
2388         Ast::Literal(ast::Literal {
2389             span: span,
2390             kind: ast::LiteralKind::Verbatim,
2391             c: c,
2392         })
2393     }
2394 
2395     /// Create a concatenation with the given range.
concat(range: Range<usize>, asts: Vec<Ast>) -> Ast2396     fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2397         concat_with(span(range), asts)
2398     }
2399 
2400     /// Create a concatenation with the given span.
concat_with(span: Span, asts: Vec<Ast>) -> Ast2401     fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2402         Ast::Concat(ast::Concat { span: span, asts: asts })
2403     }
2404 
2405     /// Create an alternation with the given span.
alt(range: Range<usize>, asts: Vec<Ast>) -> Ast2406     fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2407         Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
2408     }
2409 
2410     /// Create a capturing group with the given span.
group(range: Range<usize>, index: u32, ast: Ast) -> Ast2411     fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2412         Ast::Group(ast::Group {
2413             span: span(range),
2414             kind: ast::GroupKind::CaptureIndex(index),
2415             ast: Box::new(ast),
2416         })
2417     }
2418 
2419     /// Create an ast::SetFlags.
2420     ///
2421     /// The given pattern should be the full pattern string. The range given
2422     /// should correspond to the byte offsets where the flag set occurs.
2423     ///
2424     /// If negated is true, then the set is interpreted as beginning with a
2425     /// negation.
flag_set( pat: &str, range: Range<usize>, flag: ast::Flag, negated: bool, ) -> Ast2426     fn flag_set(
2427         pat: &str,
2428         range: Range<usize>,
2429         flag: ast::Flag,
2430         negated: bool,
2431     ) -> Ast {
2432         let mut items = vec![
2433             ast::FlagsItem {
2434                 span: span_range(pat, (range.end - 2)..(range.end - 1)),
2435                 kind: ast::FlagsItemKind::Flag(flag),
2436             },
2437         ];
2438         if negated {
2439             items.insert(0, ast::FlagsItem {
2440                 span: span_range(pat, (range.start + 2)..(range.end - 2)),
2441                 kind: ast::FlagsItemKind::Negation,
2442             });
2443         }
2444         Ast::Flags(ast::SetFlags {
2445             span: span_range(pat, range.clone()),
2446             flags: ast::Flags {
2447                 span: span_range(pat, (range.start + 2)..(range.end - 1)),
2448                 items: items,
2449             },
2450         })
2451     }
2452 
2453     #[test]
parse_nest_limit()2454     fn parse_nest_limit() {
2455         // A nest limit of 0 still allows some types of regexes.
2456         assert_eq!(
2457             parser_nest_limit("", 0).parse(),
2458             Ok(Ast::Empty(span(0..0))));
2459         assert_eq!(
2460             parser_nest_limit("a", 0).parse(),
2461             Ok(lit('a', 0)));
2462 
2463         // Test repetition operations, which require one level of nesting.
2464         assert_eq!(
2465             parser_nest_limit("a+", 0).parse().unwrap_err(),
2466             TestError {
2467                 span: span(0..2),
2468                 kind: ast::ErrorKind::NestLimitExceeded(0),
2469             });
2470         assert_eq!(
2471             parser_nest_limit("a+", 1).parse(),
2472             Ok(Ast::Repetition(ast::Repetition {
2473                 span: span(0..2),
2474                 op: ast::RepetitionOp {
2475                     span: span(1..2),
2476                     kind: ast::RepetitionKind::OneOrMore,
2477                 },
2478                 greedy: true,
2479                 ast: Box::new(lit('a', 0)),
2480             })));
2481         assert_eq!(
2482             parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2483             TestError {
2484                 span: span(0..3),
2485                 kind: ast::ErrorKind::NestLimitExceeded(1),
2486             });
2487         assert_eq!(
2488             parser_nest_limit("a+*", 1).parse().unwrap_err(),
2489             TestError {
2490                 span: span(0..2),
2491                 kind: ast::ErrorKind::NestLimitExceeded(1),
2492             });
2493         assert_eq!(
2494             parser_nest_limit("a+*", 2).parse(),
2495             Ok(Ast::Repetition(ast::Repetition {
2496                 span: span(0..3),
2497                 op: ast::RepetitionOp {
2498                     span: span(2..3),
2499                     kind: ast::RepetitionKind::ZeroOrMore,
2500                 },
2501                 greedy: true,
2502                 ast: Box::new(Ast::Repetition(ast::Repetition {
2503                     span: span(0..2),
2504                     op: ast::RepetitionOp {
2505                         span: span(1..2),
2506                         kind: ast::RepetitionKind::OneOrMore,
2507                     },
2508                     greedy: true,
2509                     ast: Box::new(lit('a', 0)),
2510                 })),
2511             })));
2512 
2513         // Test concatenations. A concatenation requires one level of nesting.
2514         assert_eq!(
2515             parser_nest_limit("ab", 0).parse().unwrap_err(),
2516             TestError {
2517                 span: span(0..2),
2518                 kind: ast::ErrorKind::NestLimitExceeded(0),
2519             });
2520         assert_eq!(
2521             parser_nest_limit("ab", 1).parse(),
2522             Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)])));
2523         assert_eq!(
2524             parser_nest_limit("abc", 1).parse(),
2525             Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)])));
2526 
2527         // Test alternations. An alternation requires one level of nesting.
2528         assert_eq!(
2529             parser_nest_limit("a|b", 0).parse().unwrap_err(),
2530             TestError {
2531                 span: span(0..3),
2532                 kind: ast::ErrorKind::NestLimitExceeded(0),
2533             });
2534         assert_eq!(
2535             parser_nest_limit("a|b", 1).parse(),
2536             Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)])));
2537         assert_eq!(
2538             parser_nest_limit("a|b|c", 1).parse(),
2539             Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)])));
2540 
2541         // Test character classes. Classes form their own mini-recursive
2542         // syntax!
2543         assert_eq!(
2544             parser_nest_limit("[a]", 0).parse().unwrap_err(),
2545             TestError {
2546                 span: span(0..3),
2547                 kind: ast::ErrorKind::NestLimitExceeded(0),
2548             });
2549         assert_eq!(
2550             parser_nest_limit("[a]", 1).parse(),
2551             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2552                 span: span(0..3),
2553                 negated: false,
2554                 kind: ast::ClassSet::Item(
2555                     ast::ClassSetItem::Literal(ast::Literal {
2556                         span: span(1..2),
2557                         kind: ast::LiteralKind::Verbatim,
2558                         c: 'a',
2559                     })
2560                 ),
2561             }))));
2562         assert_eq!(
2563             parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2564             TestError {
2565                 span: span(1..3),
2566                 kind: ast::ErrorKind::NestLimitExceeded(1),
2567             });
2568         assert_eq!(
2569             parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2570             TestError {
2571                 span: span(3..7),
2572                 kind: ast::ErrorKind::NestLimitExceeded(2),
2573             });
2574         assert_eq!(
2575             parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2576             TestError {
2577                 span: span(4..6),
2578                 kind: ast::ErrorKind::NestLimitExceeded(3),
2579             });
2580         assert_eq!(
2581             parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2582             TestError {
2583                 span: span(1..5),
2584                 kind: ast::ErrorKind::NestLimitExceeded(1),
2585             });
2586         assert_eq!(
2587             parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2588             TestError {
2589                 span: span(4..6),
2590                 kind: ast::ErrorKind::NestLimitExceeded(2),
2591             });
2592     }
2593 
2594     #[test]
parse_comments()2595     fn parse_comments() {
2596         let pat = "(?x)
2597 # This is comment 1.
2598 foo # This is comment 2.
2599   # This is comment 3.
2600 bar
2601 # This is comment 4.";
2602         let astc = parser(pat).parse_with_comments().unwrap();
2603         assert_eq!(
2604             astc.ast,
2605             concat_with(span_range(pat, 0..pat.len()), vec![
2606                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2607                 lit_with('f', span_range(pat, 26..27)),
2608                 lit_with('o', span_range(pat, 27..28)),
2609                 lit_with('o', span_range(pat, 28..29)),
2610                 lit_with('b', span_range(pat, 74..75)),
2611                 lit_with('a', span_range(pat, 75..76)),
2612                 lit_with('r', span_range(pat, 76..77)),
2613             ]));
2614         assert_eq!(astc.comments, vec![
2615             ast::Comment {
2616                 span: span_range(pat, 5..26),
2617                 comment: s(" This is comment 1."),
2618             },
2619             ast::Comment {
2620                 span: span_range(pat, 30..51),
2621                 comment: s(" This is comment 2."),
2622             },
2623             ast::Comment {
2624                 span: span_range(pat, 53..74),
2625                 comment: s(" This is comment 3."),
2626             },
2627             ast::Comment {
2628                 span: span_range(pat, 78..98),
2629                 comment: s(" This is comment 4."),
2630             },
2631         ]);
2632     }
2633 
2634     #[test]
parse_holistic()2635     fn parse_holistic() {
2636         assert_eq!(
2637             parser("]").parse(),
2638             Ok(lit(']', 0)));
2639         assert_eq!(
2640             parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2641             Ok(concat(0..36, vec![
2642                 punct_lit('\\', span(0..2)),
2643                 punct_lit('.', span(2..4)),
2644                 punct_lit('+', span(4..6)),
2645                 punct_lit('*', span(6..8)),
2646                 punct_lit('?', span(8..10)),
2647                 punct_lit('(', span(10..12)),
2648                 punct_lit(')', span(12..14)),
2649                 punct_lit('|', span(14..16)),
2650                 punct_lit('[', span(16..18)),
2651                 punct_lit(']', span(18..20)),
2652                 punct_lit('{', span(20..22)),
2653                 punct_lit('}', span(22..24)),
2654                 punct_lit('^', span(24..26)),
2655                 punct_lit('$', span(26..28)),
2656                 punct_lit('#', span(28..30)),
2657                 punct_lit('&', span(30..32)),
2658                 punct_lit('-', span(32..34)),
2659                 punct_lit('~', span(34..36)),
2660             ])));
2661     }
2662 
2663     #[test]
parse_ignore_whitespace()2664     fn parse_ignore_whitespace() {
2665         // Test that basic whitespace insensitivity works.
2666         let pat = "(?x)a b";
2667         assert_eq!(
2668             parser(pat).parse(),
2669             Ok(concat_with(nspan(npos(0, 1, 1), npos(7, 1, 8)), vec![
2670                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2671                 lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2672                 lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2673             ])));
2674 
2675         // Test that we can toggle whitespace insensitivity.
2676         let pat = "(?x)a b(?-x)a b";
2677         assert_eq!(
2678             parser(pat).parse(),
2679             Ok(concat_with(nspan(npos(0, 1, 1), npos(15, 1, 16)), vec![
2680                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2681                 lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2682                 lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2683                 flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2684                 lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2685                 lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2686                 lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2687             ])));
2688 
2689         // Test that nesting whitespace insensitive flags works.
2690         let pat = "a (?x:a )a ";
2691         assert_eq!(
2692             parser(pat).parse(),
2693             Ok(concat_with(span_range(pat, 0..11), vec![
2694                 lit_with('a', span_range(pat, 0..1)),
2695                 lit_with(' ', span_range(pat, 1..2)),
2696                 Ast::Group(ast::Group {
2697                     span: span_range(pat, 2..9),
2698                     kind: ast::GroupKind::NonCapturing(ast::Flags {
2699                         span: span_range(pat, 4..5),
2700                         items: vec![
2701                             ast::FlagsItem {
2702                                 span: span_range(pat, 4..5),
2703                                 kind: ast::FlagsItemKind::Flag(
2704                                     ast::Flag::IgnoreWhitespace),
2705                             },
2706                         ],
2707                     }),
2708                     ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2709                 }),
2710                 lit_with('a', span_range(pat, 9..10)),
2711                 lit_with(' ', span_range(pat, 10..11)),
2712             ])));
2713 
2714         // Test that whitespace after an opening paren is insignificant.
2715         let pat = "(?x)( ?P<foo> a )";
2716         assert_eq!(
2717             parser(pat).parse(),
2718             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2719                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2720                 Ast::Group(ast::Group {
2721                     span: span_range(pat, 4..pat.len()),
2722                     kind: ast::GroupKind::CaptureName(ast::CaptureName {
2723                         span: span_range(pat, 9..12),
2724                         name: s("foo"),
2725                         index: 1,
2726                     }),
2727                     ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2728                 }),
2729             ])));
2730         let pat = "(?x)(  a )";
2731         assert_eq!(
2732             parser(pat).parse(),
2733             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2734                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2735                 Ast::Group(ast::Group {
2736                     span: span_range(pat, 4..pat.len()),
2737                     kind: ast::GroupKind::CaptureIndex(1),
2738                     ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2739                 }),
2740             ])));
2741         let pat = "(?x)(  ?:  a )";
2742         assert_eq!(
2743             parser(pat).parse(),
2744             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2745                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2746                 Ast::Group(ast::Group {
2747                     span: span_range(pat, 4..pat.len()),
2748                     kind: ast::GroupKind::NonCapturing(ast::Flags {
2749                         span: span_range(pat, 8..8),
2750                         items: vec![],
2751                     }),
2752                     ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2753                 }),
2754             ])));
2755         let pat = r"(?x)\x { 53 }";
2756         assert_eq!(
2757             parser(pat).parse(),
2758             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2759                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2760                 Ast::Literal(ast::Literal {
2761                     span: span(4..13),
2762                     kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
2763                     c: 'S',
2764                 }),
2765             ])));
2766 
2767         // Test that whitespace after an escape is OK.
2768         let pat = r"(?x)\ ";
2769         assert_eq!(
2770             parser(pat).parse(),
2771             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2772                 flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2773                 Ast::Literal(ast::Literal {
2774                     span: span_range(pat, 4..6),
2775                     kind: ast::LiteralKind::Special(
2776                         ast::SpecialLiteralKind::Space),
2777                     c: ' ',
2778                 }),
2779             ])));
2780         // ... but only when `x` mode is enabled.
2781         let pat = r"\ ";
2782         assert_eq!(
2783             parser(pat).parse().unwrap_err(),
2784             TestError {
2785                 span: span_range(pat, 0..2),
2786                 kind: ast::ErrorKind::EscapeUnrecognized,
2787             });
2788     }
2789 
2790     #[test]
parse_newlines()2791     fn parse_newlines() {
2792         let pat = ".\n.";
2793         assert_eq!(
2794             parser(pat).parse(),
2795             Ok(concat_with(span_range(pat, 0..3), vec![
2796                 Ast::Dot(span_range(pat, 0..1)),
2797                 lit_with('\n', span_range(pat, 1..2)),
2798                 Ast::Dot(span_range(pat, 2..3)),
2799             ])));
2800 
2801         let pat = "foobar\nbaz\nquux\n";
2802         assert_eq!(
2803             parser(pat).parse(),
2804             Ok(concat_with(span_range(pat, 0..pat.len()), vec![
2805                 lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2806                 lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2807                 lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2808                 lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2809                 lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2810                 lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2811                 lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2812                 lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2813                 lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2814                 lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2815                 lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2816                 lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2817                 lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2818                 lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2819                 lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2820                 lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2821             ])));
2822     }
2823 
2824     #[test]
parse_uncounted_repetition()2825     fn parse_uncounted_repetition() {
2826         assert_eq!(
2827             parser(r"a*").parse(),
2828             Ok(Ast::Repetition(ast::Repetition {
2829                 span: span(0..2),
2830                 op: ast::RepetitionOp {
2831                     span: span(1..2),
2832                     kind: ast::RepetitionKind::ZeroOrMore,
2833                 },
2834                 greedy: true,
2835                 ast: Box::new(lit('a', 0)),
2836             })));
2837         assert_eq!(
2838             parser(r"a+").parse(),
2839             Ok(Ast::Repetition(ast::Repetition {
2840                 span: span(0..2),
2841                 op: ast::RepetitionOp {
2842                     span: span(1..2),
2843                     kind: ast::RepetitionKind::OneOrMore,
2844                 },
2845                 greedy: true,
2846                 ast: Box::new(lit('a', 0)),
2847             })));
2848 
2849         assert_eq!(
2850             parser(r"a?").parse(),
2851             Ok(Ast::Repetition(ast::Repetition {
2852                 span: span(0..2),
2853                 op: ast::RepetitionOp {
2854                     span: span(1..2),
2855                     kind: ast::RepetitionKind::ZeroOrOne,
2856                 },
2857                 greedy: true,
2858                 ast: Box::new(lit('a', 0)),
2859             })));
2860         assert_eq!(
2861             parser(r"a??").parse(),
2862             Ok(Ast::Repetition(ast::Repetition {
2863                 span: span(0..3),
2864                 op: ast::RepetitionOp {
2865                     span: span(1..3),
2866                     kind: ast::RepetitionKind::ZeroOrOne,
2867                 },
2868                 greedy: false,
2869                 ast: Box::new(lit('a', 0)),
2870             })));
2871         assert_eq!(
2872             parser(r"a?").parse(),
2873             Ok(Ast::Repetition(ast::Repetition {
2874                 span: span(0..2),
2875                 op: ast::RepetitionOp {
2876                     span: span(1..2),
2877                     kind: ast::RepetitionKind::ZeroOrOne,
2878                 },
2879                 greedy: true,
2880                 ast: Box::new(lit('a', 0)),
2881             })));
2882         assert_eq!(
2883             parser(r"a?b").parse(),
2884             Ok(concat(0..3, vec![
2885                 Ast::Repetition(ast::Repetition {
2886                     span: span(0..2),
2887                     op: ast::RepetitionOp {
2888                         span: span(1..2),
2889                         kind: ast::RepetitionKind::ZeroOrOne,
2890                     },
2891                     greedy: true,
2892                     ast: Box::new(lit('a', 0)),
2893                 }),
2894                 lit('b', 2),
2895             ])));
2896         assert_eq!(
2897             parser(r"a??b").parse(),
2898             Ok(concat(0..4, vec![
2899                 Ast::Repetition(ast::Repetition {
2900                     span: span(0..3),
2901                     op: ast::RepetitionOp {
2902                         span: span(1..3),
2903                         kind: ast::RepetitionKind::ZeroOrOne,
2904                     },
2905                     greedy: false,
2906                     ast: Box::new(lit('a', 0)),
2907                 }),
2908                 lit('b', 3),
2909             ])));
2910         assert_eq!(
2911             parser(r"ab?").parse(),
2912             Ok(concat(0..3, vec![
2913                 lit('a', 0),
2914                 Ast::Repetition(ast::Repetition {
2915                     span: span(1..3),
2916                     op: ast::RepetitionOp {
2917                         span: span(2..3),
2918                         kind: ast::RepetitionKind::ZeroOrOne,
2919                     },
2920                     greedy: true,
2921                     ast: Box::new(lit('b', 1)),
2922                 }),
2923             ])));
2924         assert_eq!(
2925             parser(r"(ab)?").parse(),
2926             Ok(Ast::Repetition(ast::Repetition {
2927                 span: span(0..5),
2928                 op: ast::RepetitionOp {
2929                     span: span(4..5),
2930                     kind: ast::RepetitionKind::ZeroOrOne,
2931                 },
2932                 greedy: true,
2933                 ast: Box::new(group(0..4, 1, concat(1..3, vec![
2934                     lit('a', 1),
2935                     lit('b', 2),
2936                 ]))),
2937             })));
2938         assert_eq!(
2939             parser(r"|a?").parse(),
2940             Ok(alt(0..3, vec![
2941                 Ast::Empty(span(0..0)),
2942                 Ast::Repetition(ast::Repetition {
2943                     span: span(1..3),
2944                     op: ast::RepetitionOp {
2945                         span: span(2..3),
2946                         kind: ast::RepetitionKind::ZeroOrOne,
2947                     },
2948                     greedy: true,
2949                     ast: Box::new(lit('a', 1)),
2950                 }),
2951             ])));
2952 
2953         assert_eq!(
2954             parser(r"*").parse().unwrap_err(),
2955             TestError {
2956                 span: span(0..0),
2957                 kind: ast::ErrorKind::RepetitionMissing,
2958             });
2959         assert_eq!(
2960             parser(r"(?i)*").parse().unwrap_err(),
2961             TestError {
2962                 span: span(4..4),
2963                 kind: ast::ErrorKind::RepetitionMissing,
2964             });
2965         assert_eq!(
2966             parser(r"(*)").parse().unwrap_err(),
2967             TestError {
2968                 span: span(1..1),
2969                 kind: ast::ErrorKind::RepetitionMissing,
2970             });
2971         assert_eq!(
2972             parser(r"(?:?)").parse().unwrap_err(),
2973             TestError {
2974                 span: span(3..3),
2975                 kind: ast::ErrorKind::RepetitionMissing,
2976             });
2977         assert_eq!(
2978             parser(r"+").parse().unwrap_err(),
2979             TestError {
2980                 span: span(0..0),
2981                 kind: ast::ErrorKind::RepetitionMissing,
2982             });
2983         assert_eq!(
2984             parser(r"?").parse().unwrap_err(),
2985             TestError {
2986                 span: span(0..0),
2987                 kind: ast::ErrorKind::RepetitionMissing,
2988             });
2989         assert_eq!(
2990             parser(r"(?)").parse().unwrap_err(),
2991             TestError {
2992                 span: span(1..1),
2993                 kind: ast::ErrorKind::RepetitionMissing,
2994             });
2995         assert_eq!(
2996             parser(r"|*").parse().unwrap_err(),
2997             TestError {
2998                 span: span(1..1),
2999                 kind: ast::ErrorKind::RepetitionMissing,
3000             });
3001         assert_eq!(
3002             parser(r"|+").parse().unwrap_err(),
3003             TestError {
3004                 span: span(1..1),
3005                 kind: ast::ErrorKind::RepetitionMissing,
3006             });
3007         assert_eq!(
3008             parser(r"|?").parse().unwrap_err(),
3009             TestError {
3010                 span: span(1..1),
3011                 kind: ast::ErrorKind::RepetitionMissing,
3012             });
3013     }
3014 
3015     #[test]
parse_counted_repetition()3016     fn parse_counted_repetition() {
3017         assert_eq!(
3018             parser(r"a{5}").parse(),
3019             Ok(Ast::Repetition(ast::Repetition {
3020                 span: span(0..4),
3021                 op: ast::RepetitionOp {
3022                     span: span(1..4),
3023                     kind: ast::RepetitionKind::Range(
3024                         ast::RepetitionRange::Exactly(5)),
3025                 },
3026                 greedy: true,
3027                 ast: Box::new(lit('a', 0)),
3028             })));
3029         assert_eq!(
3030             parser(r"a{5,}").parse(),
3031             Ok(Ast::Repetition(ast::Repetition {
3032                 span: span(0..5),
3033                 op: ast::RepetitionOp {
3034                     span: span(1..5),
3035                     kind: ast::RepetitionKind::Range(
3036                         ast::RepetitionRange::AtLeast(5)),
3037                 },
3038                 greedy: true,
3039                 ast: Box::new(lit('a', 0)),
3040             })));
3041         assert_eq!(
3042             parser(r"a{5,9}").parse(),
3043             Ok(Ast::Repetition(ast::Repetition {
3044                 span: span(0..6),
3045                 op: ast::RepetitionOp {
3046                     span: span(1..6),
3047                     kind: ast::RepetitionKind::Range(
3048                         ast::RepetitionRange::Bounded(5, 9)),
3049                 },
3050                 greedy: true,
3051                 ast: Box::new(lit('a', 0)),
3052             })));
3053         assert_eq!(
3054             parser(r"a{5}?").parse(),
3055             Ok(Ast::Repetition(ast::Repetition {
3056                 span: span(0..5),
3057                 op: ast::RepetitionOp {
3058                     span: span(1..5),
3059                     kind: ast::RepetitionKind::Range(
3060                         ast::RepetitionRange::Exactly(5)),
3061                 },
3062                 greedy: false,
3063                 ast: Box::new(lit('a', 0)),
3064             })));
3065         assert_eq!(
3066             parser(r"ab{5}").parse(),
3067             Ok(concat(0..5, vec![
3068                 lit('a', 0),
3069                 Ast::Repetition(ast::Repetition {
3070                     span: span(1..5),
3071                     op: ast::RepetitionOp {
3072                         span: span(2..5),
3073                         kind: ast::RepetitionKind::Range(
3074                             ast::RepetitionRange::Exactly(5)),
3075                     },
3076                     greedy: true,
3077                     ast: Box::new(lit('b', 1)),
3078                 }),
3079             ])));
3080         assert_eq!(
3081             parser(r"ab{5}c").parse(),
3082             Ok(concat(0..6, vec![
3083                 lit('a', 0),
3084                 Ast::Repetition(ast::Repetition {
3085                     span: span(1..5),
3086                     op: ast::RepetitionOp {
3087                         span: span(2..5),
3088                         kind: ast::RepetitionKind::Range(
3089                             ast::RepetitionRange::Exactly(5)),
3090                     },
3091                     greedy: true,
3092                     ast: Box::new(lit('b', 1)),
3093                 }),
3094                 lit('c', 5),
3095             ])));
3096 
3097         assert_eq!(
3098             parser(r"a{ 5 }").parse(),
3099             Ok(Ast::Repetition(ast::Repetition {
3100                 span: span(0..6),
3101                 op: ast::RepetitionOp {
3102                     span: span(1..6),
3103                     kind: ast::RepetitionKind::Range(
3104                         ast::RepetitionRange::Exactly(5)),
3105                 },
3106                 greedy: true,
3107                 ast: Box::new(lit('a', 0)),
3108             })));
3109         assert_eq!(
3110             parser(r"a{ 5 , 9 }").parse(),
3111             Ok(Ast::Repetition(ast::Repetition {
3112                 span: span(0..10),
3113                 op: ast::RepetitionOp {
3114                     span: span(1..10),
3115                     kind: ast::RepetitionKind::Range(
3116                         ast::RepetitionRange::Bounded(5, 9)),
3117                 },
3118                 greedy: true,
3119                 ast: Box::new(lit('a', 0)),
3120             })));
3121         assert_eq!(
3122             parser_ignore_whitespace(r"a{5,9} ?").parse(),
3123             Ok(Ast::Repetition(ast::Repetition {
3124                 span: span(0..8),
3125                 op: ast::RepetitionOp {
3126                     span: span(1..8),
3127                     kind: ast::RepetitionKind::Range(
3128                         ast::RepetitionRange::Bounded(5, 9)),
3129                 },
3130                 greedy: false,
3131                 ast: Box::new(lit('a', 0)),
3132             })));
3133 
3134         assert_eq!(
3135             parser(r"(?i){0}").parse().unwrap_err(),
3136             TestError {
3137                 span: span(4..4),
3138                 kind: ast::ErrorKind::RepetitionMissing,
3139             });
3140         assert_eq!(
3141             parser(r"(?m){1,1}").parse().unwrap_err(),
3142             TestError {
3143                 span: span(4..4),
3144                 kind: ast::ErrorKind::RepetitionMissing,
3145             });
3146         assert_eq!(
3147             parser(r"a{").parse().unwrap_err(),
3148             TestError {
3149                 span: span(1..2),
3150                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3151             });
3152         assert_eq!(
3153             parser(r"a{}").parse().unwrap_err(),
3154             TestError {
3155                 span: span(2..2),
3156                 kind: ast::ErrorKind::DecimalEmpty,
3157             });
3158         assert_eq!(
3159             parser(r"a{a").parse().unwrap_err(),
3160             TestError {
3161                 span: span(2..2),
3162                 kind: ast::ErrorKind::DecimalEmpty,
3163             });
3164         assert_eq!(
3165             parser(r"a{9999999999}").parse().unwrap_err(),
3166             TestError {
3167                 span: span(2..12),
3168                 kind: ast::ErrorKind::DecimalInvalid,
3169             });
3170         assert_eq!(
3171             parser(r"a{9").parse().unwrap_err(),
3172             TestError {
3173                 span: span(1..3),
3174                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3175             });
3176         assert_eq!(
3177             parser(r"a{9,a").parse().unwrap_err(),
3178             TestError {
3179                 span: span(4..4),
3180                 kind: ast::ErrorKind::DecimalEmpty,
3181             });
3182         assert_eq!(
3183             parser(r"a{9,9999999999}").parse().unwrap_err(),
3184             TestError {
3185                 span: span(4..14),
3186                 kind: ast::ErrorKind::DecimalInvalid,
3187             });
3188         assert_eq!(
3189             parser(r"a{9,").parse().unwrap_err(),
3190             TestError {
3191                 span: span(1..4),
3192                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3193             });
3194         assert_eq!(
3195             parser(r"a{9,11").parse().unwrap_err(),
3196             TestError {
3197                 span: span(1..6),
3198                 kind: ast::ErrorKind::RepetitionCountUnclosed,
3199             });
3200         assert_eq!(
3201             parser(r"a{2,1}").parse().unwrap_err(),
3202             TestError {
3203                 span: span(1..6),
3204                 kind: ast::ErrorKind::RepetitionCountInvalid,
3205             });
3206         assert_eq!(
3207             parser(r"{5}").parse().unwrap_err(),
3208             TestError {
3209                 span: span(0..0),
3210                 kind: ast::ErrorKind::RepetitionMissing,
3211             });
3212         assert_eq!(
3213             parser(r"|{5}").parse().unwrap_err(),
3214             TestError {
3215                 span: span(1..1),
3216                 kind: ast::ErrorKind::RepetitionMissing,
3217             });
3218     }
3219 
3220     #[test]
parse_alternate()3221     fn parse_alternate() {
3222         assert_eq!(
3223             parser(r"a|b").parse(),
3224             Ok(Ast::Alternation(ast::Alternation {
3225                 span: span(0..3),
3226                 asts: vec![lit('a', 0), lit('b', 2)],
3227             })));
3228         assert_eq!(
3229             parser(r"(a|b)").parse(),
3230             Ok(group(0..5, 1, Ast::Alternation(ast::Alternation {
3231                 span: span(1..4),
3232                 asts: vec![lit('a', 1), lit('b', 3)],
3233             }))));
3234 
3235         assert_eq!(
3236             parser(r"a|b|c").parse(),
3237             Ok(Ast::Alternation(ast::Alternation {
3238                 span: span(0..5),
3239                 asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3240             })));
3241         assert_eq!(
3242             parser(r"ax|by|cz").parse(),
3243             Ok(Ast::Alternation(ast::Alternation {
3244                 span: span(0..8),
3245                 asts: vec![
3246                     concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3247                     concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3248                     concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3249                 ],
3250             })));
3251         assert_eq!(
3252             parser(r"(ax|by|cz)").parse(),
3253             Ok(group(0..10, 1, Ast::Alternation(ast::Alternation {
3254                 span: span(1..9),
3255                 asts: vec![
3256                     concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3257                     concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3258                     concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3259                 ],
3260             }))));
3261         assert_eq!(
3262             parser(r"(ax|(by|(cz)))").parse(),
3263             Ok(group(0..14, 1, alt(1..13, vec![
3264                 concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3265                 group(4..13, 2, alt(5..12, vec![
3266                     concat(5..7, vec![lit('b', 5), lit('y', 6)]),
3267                     group(8..12, 3, concat(9..11, vec![
3268                         lit('c', 9),
3269                         lit('z', 10),
3270                     ])),
3271                 ])),
3272             ]))));
3273 
3274         assert_eq!(
3275             parser(r"|").parse(), Ok(alt(0..1, vec![
3276                 Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),
3277             ])));
3278         assert_eq!(
3279             parser(r"||").parse(), Ok(alt(0..2, vec![
3280                 Ast::Empty(span(0..0)),
3281                 Ast::Empty(span(1..1)),
3282                 Ast::Empty(span(2..2)),
3283             ])));
3284         assert_eq!(
3285             parser(r"a|").parse(), Ok(alt(0..2, vec![
3286                 lit('a', 0), Ast::Empty(span(2..2)),
3287             ])));
3288         assert_eq!(
3289             parser(r"|a").parse(), Ok(alt(0..2, vec![
3290                 Ast::Empty(span(0..0)), lit('a', 1),
3291             ])));
3292 
3293         assert_eq!(
3294             parser(r"(|)").parse(), Ok(group(0..3, 1, alt(1..2, vec![
3295                 Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),
3296             ]))));
3297         assert_eq!(
3298             parser(r"(a|)").parse(), Ok(group(0..4, 1, alt(1..3, vec![
3299                 lit('a', 1), Ast::Empty(span(3..3)),
3300             ]))));
3301         assert_eq!(
3302             parser(r"(|a)").parse(), Ok(group(0..4, 1, alt(1..3, vec![
3303                 Ast::Empty(span(1..1)), lit('a', 2),
3304             ]))));
3305 
3306         assert_eq!(
3307             parser(r"a|b)").parse().unwrap_err(),
3308             TestError {
3309                 span: span(3..4),
3310                 kind: ast::ErrorKind::GroupUnopened,
3311             });
3312         assert_eq!(
3313             parser(r"(a|b").parse().unwrap_err(),
3314             TestError {
3315                 span: span(0..1),
3316                 kind: ast::ErrorKind::GroupUnclosed,
3317             });
3318     }
3319 
3320     #[test]
parse_unsupported_lookaround()3321     fn parse_unsupported_lookaround() {
3322         assert_eq!(
3323             parser(r"(?=a)").parse().unwrap_err(),
3324             TestError {
3325                 span: span(0..3),
3326                 kind: ast::ErrorKind::UnsupportedLookAround,
3327             });
3328         assert_eq!(
3329             parser(r"(?!a)").parse().unwrap_err(),
3330             TestError {
3331                 span: span(0..3),
3332                 kind: ast::ErrorKind::UnsupportedLookAround,
3333             });
3334         assert_eq!(
3335             parser(r"(?<=a)").parse().unwrap_err(),
3336             TestError {
3337                 span: span(0..4),
3338                 kind: ast::ErrorKind::UnsupportedLookAround,
3339             });
3340         assert_eq!(
3341             parser(r"(?<!a)").parse().unwrap_err(),
3342             TestError {
3343                 span: span(0..4),
3344                 kind: ast::ErrorKind::UnsupportedLookAround,
3345             });
3346     }
3347 
3348     #[test]
parse_group()3349     fn parse_group() {
3350         assert_eq!(parser("(?i)").parse(), Ok(Ast::Flags(ast::SetFlags {
3351             span: span(0..4),
3352             flags: ast::Flags {
3353                 span: span(2..3),
3354                 items: vec![ast::FlagsItem {
3355                     span: span(2..3),
3356                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3357                 }],
3358             },
3359         })));
3360         assert_eq!(parser("(?iU)").parse(), Ok(Ast::Flags(ast::SetFlags {
3361             span: span(0..5),
3362             flags: ast::Flags {
3363                 span: span(2..4),
3364                 items: vec![
3365                     ast::FlagsItem {
3366                         span: span(2..3),
3367                         kind: ast::FlagsItemKind::Flag(
3368                             ast::Flag::CaseInsensitive),
3369                     },
3370                     ast::FlagsItem {
3371                         span: span(3..4),
3372                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3373                     },
3374                 ],
3375             },
3376         })));
3377         assert_eq!(parser("(?i-U)").parse(), Ok(Ast::Flags(ast::SetFlags {
3378             span: span(0..6),
3379             flags: ast::Flags {
3380                 span: span(2..5),
3381                 items: vec![
3382                     ast::FlagsItem {
3383                         span: span(2..3),
3384                         kind: ast::FlagsItemKind::Flag(
3385                             ast::Flag::CaseInsensitive),
3386                     },
3387                     ast::FlagsItem {
3388                         span: span(3..4),
3389                         kind: ast::FlagsItemKind::Negation,
3390                     },
3391                     ast::FlagsItem {
3392                         span: span(4..5),
3393                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3394                     },
3395                 ],
3396             },
3397         })));
3398 
3399         assert_eq!(parser("()").parse(), Ok(Ast::Group(ast::Group {
3400             span: span(0..2),
3401             kind: ast::GroupKind::CaptureIndex(1),
3402             ast: Box::new(Ast::Empty(span(1..1))),
3403         })));
3404         assert_eq!(parser("(a)").parse(), Ok(Ast::Group(ast::Group {
3405             span: span(0..3),
3406             kind: ast::GroupKind::CaptureIndex(1),
3407             ast: Box::new(lit('a', 1)),
3408         })));
3409         assert_eq!(parser("(())").parse(), Ok(Ast::Group(ast::Group {
3410             span: span(0..4),
3411             kind: ast::GroupKind::CaptureIndex(1),
3412             ast: Box::new(Ast::Group(ast::Group {
3413                 span: span(1..3),
3414                 kind: ast::GroupKind::CaptureIndex(2),
3415                 ast: Box::new(Ast::Empty(span(2..2))),
3416             })),
3417         })));
3418 
3419         assert_eq!(parser("(?:a)").parse(), Ok(Ast::Group(ast::Group {
3420             span: span(0..5),
3421             kind: ast::GroupKind::NonCapturing(ast::Flags {
3422                 span: span(2..2),
3423                 items: vec![],
3424             }),
3425             ast: Box::new(lit('a', 3)),
3426         })));
3427 
3428         assert_eq!(parser("(?i:a)").parse(), Ok(Ast::Group(ast::Group {
3429             span: span(0..6),
3430             kind: ast::GroupKind::NonCapturing(ast::Flags {
3431                 span: span(2..3),
3432                 items: vec![
3433                     ast::FlagsItem {
3434                         span: span(2..3),
3435                         kind: ast::FlagsItemKind::Flag(
3436                             ast::Flag::CaseInsensitive),
3437                     },
3438                 ],
3439             }),
3440             ast: Box::new(lit('a', 4)),
3441         })));
3442         assert_eq!(parser("(?i-U:a)").parse(), Ok(Ast::Group(ast::Group {
3443             span: span(0..8),
3444             kind: ast::GroupKind::NonCapturing(ast::Flags {
3445                 span: span(2..5),
3446                 items: vec![
3447                     ast::FlagsItem {
3448                         span: span(2..3),
3449                         kind: ast::FlagsItemKind::Flag(
3450                             ast::Flag::CaseInsensitive),
3451                     },
3452                     ast::FlagsItem {
3453                         span: span(3..4),
3454                         kind: ast::FlagsItemKind::Negation,
3455                     },
3456                     ast::FlagsItem {
3457                         span: span(4..5),
3458                         kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3459                     },
3460                 ],
3461             }),
3462             ast: Box::new(lit('a', 6)),
3463         })));
3464 
3465         assert_eq!(
3466             parser("(").parse().unwrap_err(),
3467             TestError {
3468                 span: span(0..1),
3469                 kind: ast::ErrorKind::GroupUnclosed,
3470             });
3471         assert_eq!(
3472             parser("(?").parse().unwrap_err(),
3473             TestError {
3474                 span: span(0..1),
3475                 kind: ast::ErrorKind::GroupUnclosed,
3476             });
3477         assert_eq!(
3478             parser("(?P").parse().unwrap_err(),
3479             TestError {
3480                 span: span(2..3),
3481                 kind: ast::ErrorKind::FlagUnrecognized,
3482             });
3483         assert_eq!(
3484             parser("(?P<").parse().unwrap_err(),
3485             TestError {
3486                 span: span(4..4),
3487                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3488             });
3489         assert_eq!(
3490             parser("(a").parse().unwrap_err(),
3491             TestError {
3492                 span: span(0..1),
3493                 kind: ast::ErrorKind::GroupUnclosed,
3494             });
3495         assert_eq!(
3496             parser("(()").parse().unwrap_err(),
3497             TestError {
3498                 span: span(0..1),
3499                 kind: ast::ErrorKind::GroupUnclosed,
3500             });
3501         assert_eq!(
3502             parser(")").parse().unwrap_err(),
3503             TestError {
3504                 span: span(0..1),
3505                 kind: ast::ErrorKind::GroupUnopened,
3506             });
3507         assert_eq!(
3508             parser("a)").parse().unwrap_err(),
3509             TestError {
3510                 span: span(1..2),
3511                 kind: ast::ErrorKind::GroupUnopened,
3512             });
3513     }
3514 
3515     #[test]
parse_capture_name()3516     fn parse_capture_name() {
3517         assert_eq!(parser("(?P<a>z)").parse(), Ok(Ast::Group(ast::Group {
3518             span: span(0..8),
3519             kind: ast::GroupKind::CaptureName(ast::CaptureName {
3520                 span: span(4..5),
3521                 name: s("a"),
3522                 index: 1,
3523             }),
3524             ast: Box::new(lit('z', 6)),
3525         })));
3526         assert_eq!(parser("(?P<abc>z)").parse(), Ok(Ast::Group(ast::Group {
3527             span: span(0..10),
3528             kind: ast::GroupKind::CaptureName(ast::CaptureName {
3529                 span: span(4..7),
3530                 name: s("abc"),
3531                 index: 1,
3532             }),
3533             ast: Box::new(lit('z', 8)),
3534         })));
3535 
3536         assert_eq!(
3537             parser("(?P<").parse().unwrap_err(),
3538             TestError {
3539                 span: span(4..4),
3540                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3541             });
3542         assert_eq!(
3543             parser("(?P<>z)").parse().unwrap_err(),
3544             TestError {
3545                 span: span(4..4),
3546                 kind: ast::ErrorKind::GroupNameEmpty,
3547             });
3548         assert_eq!(
3549             parser("(?P<a").parse().unwrap_err(),
3550             TestError {
3551                 span: span(5..5),
3552                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3553             });
3554         assert_eq!(
3555             parser("(?P<ab").parse().unwrap_err(),
3556             TestError {
3557                 span: span(6..6),
3558                 kind: ast::ErrorKind::GroupNameUnexpectedEof,
3559             });
3560         assert_eq!(
3561             parser("(?P<0a").parse().unwrap_err(),
3562             TestError {
3563                 span: span(4..5),
3564                 kind: ast::ErrorKind::GroupNameInvalid,
3565             });
3566         assert_eq!(
3567             parser("(?P<~").parse().unwrap_err(),
3568             TestError {
3569                 span: span(4..5),
3570                 kind: ast::ErrorKind::GroupNameInvalid,
3571             });
3572         assert_eq!(
3573             parser("(?P<abc~").parse().unwrap_err(),
3574             TestError {
3575                 span: span(7..8),
3576                 kind: ast::ErrorKind::GroupNameInvalid,
3577             });
3578         assert_eq!(
3579             parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3580             TestError {
3581                 span: span(12..13),
3582                 kind: ast::ErrorKind::GroupNameDuplicate {
3583                     original: span(4..5),
3584                 },
3585             });
3586     }
3587 
3588     #[test]
parse_flags()3589     fn parse_flags() {
3590         assert_eq!(parser("i:").parse_flags(), Ok(ast::Flags {
3591             span: span(0..1),
3592             items: vec![ast::FlagsItem {
3593                 span: span(0..1),
3594                 kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3595             }],
3596         }));
3597         assert_eq!(parser("i)").parse_flags(), Ok(ast::Flags {
3598             span: span(0..1),
3599             items: vec![ast::FlagsItem {
3600                 span: span(0..1),
3601                 kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3602             }],
3603         }));
3604 
3605         assert_eq!(parser("isU:").parse_flags(), Ok(ast::Flags {
3606             span: span(0..3),
3607             items: vec![
3608                 ast::FlagsItem {
3609                     span: span(0..1),
3610                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3611                 },
3612                 ast::FlagsItem {
3613                     span: span(1..2),
3614                     kind: ast::FlagsItemKind::Flag(
3615                         ast::Flag::DotMatchesNewLine),
3616                 },
3617                 ast::FlagsItem {
3618                     span: span(2..3),
3619                     kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3620                 },
3621             ],
3622         }));
3623 
3624         assert_eq!(parser("-isU:").parse_flags(), Ok(ast::Flags {
3625             span: span(0..4),
3626             items: vec![
3627                 ast::FlagsItem {
3628                     span: span(0..1),
3629                     kind: ast::FlagsItemKind::Negation,
3630                 },
3631                 ast::FlagsItem {
3632                     span: span(1..2),
3633                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3634                 },
3635                 ast::FlagsItem {
3636                     span: span(2..3),
3637                     kind: ast::FlagsItemKind::Flag(
3638                         ast::Flag::DotMatchesNewLine),
3639                 },
3640                 ast::FlagsItem {
3641                     span: span(3..4),
3642                     kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3643                 },
3644             ],
3645         }));
3646         assert_eq!(parser("i-sU:").parse_flags(), Ok(ast::Flags {
3647             span: span(0..4),
3648             items: vec![
3649                 ast::FlagsItem {
3650                     span: span(0..1),
3651                     kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3652                 },
3653                 ast::FlagsItem {
3654                     span: span(1..2),
3655                     kind: ast::FlagsItemKind::Negation,
3656                 },
3657                 ast::FlagsItem {
3658                     span: span(2..3),
3659                     kind: ast::FlagsItemKind::Flag(
3660                         ast::Flag::DotMatchesNewLine),
3661                 },
3662                 ast::FlagsItem {
3663                     span: span(3..4),
3664                     kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3665                 },
3666             ],
3667         }));
3668 
3669         assert_eq!(
3670             parser("isU").parse_flags().unwrap_err(),
3671             TestError {
3672                 span: span(3..3),
3673                 kind: ast::ErrorKind::FlagUnexpectedEof,
3674             });
3675         assert_eq!(
3676             parser("isUa:").parse_flags().unwrap_err(),
3677             TestError {
3678                 span: span(3..4),
3679                 kind: ast::ErrorKind::FlagUnrecognized,
3680             });
3681         assert_eq!(
3682             parser("isUi:").parse_flags().unwrap_err(),
3683             TestError {
3684                 span: span(3..4),
3685                 kind: ast::ErrorKind::FlagDuplicate {
3686                     original: span(0..1),
3687                 },
3688             });
3689         assert_eq!(
3690             parser("i-sU-i:").parse_flags().unwrap_err(),
3691             TestError {
3692                 span: span(4..5),
3693                 kind: ast::ErrorKind::FlagRepeatedNegation {
3694                     original: span(1..2),
3695                 },
3696             });
3697         assert_eq!(
3698             parser("-)").parse_flags().unwrap_err(),
3699             TestError {
3700                 span: span(0..1),
3701                 kind: ast::ErrorKind::FlagDanglingNegation,
3702             });
3703         assert_eq!(
3704             parser("i-)").parse_flags().unwrap_err(),
3705             TestError {
3706                 span: span(1..2),
3707                 kind: ast::ErrorKind::FlagDanglingNegation,
3708             });
3709         assert_eq!(
3710             parser("iU-)").parse_flags().unwrap_err(),
3711             TestError {
3712                 span: span(2..3),
3713                 kind: ast::ErrorKind::FlagDanglingNegation,
3714             });
3715     }
3716 
3717     #[test]
parse_flag()3718     fn parse_flag() {
3719         assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
3720         assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
3721         assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
3722         assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
3723         assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
3724         assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
3725 
3726         assert_eq!(
3727             parser("a").parse_flag().unwrap_err(),
3728             TestError {
3729                 span: span(0..1),
3730                 kind: ast::ErrorKind::FlagUnrecognized,
3731             });
3732         assert_eq!(
3733             parser("☃").parse_flag().unwrap_err(),
3734             TestError {
3735                 span: span_range("☃", 0..3),
3736                 kind: ast::ErrorKind::FlagUnrecognized,
3737             });
3738     }
3739 
3740     #[test]
parse_primitive_non_escape()3741     fn parse_primitive_non_escape() {
3742         assert_eq!(
3743             parser(r".").parse_primitive(),
3744             Ok(Primitive::Dot(span(0..1))));
3745         assert_eq!(
3746             parser(r"^").parse_primitive(),
3747             Ok(Primitive::Assertion(ast::Assertion {
3748                 span: span(0..1),
3749                 kind: ast::AssertionKind::StartLine,
3750             })));
3751         assert_eq!(
3752             parser(r"$").parse_primitive(),
3753             Ok(Primitive::Assertion(ast::Assertion {
3754                 span: span(0..1),
3755                 kind: ast::AssertionKind::EndLine,
3756             })));
3757 
3758         assert_eq!(
3759             parser(r"a").parse_primitive(),
3760             Ok(Primitive::Literal(ast::Literal {
3761                 span: span(0..1),
3762                 kind: ast::LiteralKind::Verbatim,
3763                 c: 'a',
3764             })));
3765         assert_eq!(
3766             parser(r"|").parse_primitive(),
3767             Ok(Primitive::Literal(ast::Literal {
3768                 span: span(0..1),
3769                 kind: ast::LiteralKind::Verbatim,
3770                 c: '|',
3771             })));
3772         assert_eq!(
3773             parser(r"☃").parse_primitive(),
3774             Ok(Primitive::Literal(ast::Literal {
3775                 span: span_range("☃", 0..3),
3776                 kind: ast::LiteralKind::Verbatim,
3777                 c: '☃',
3778             })));
3779     }
3780 
3781     #[test]
parse_escape()3782     fn parse_escape() {
3783         assert_eq!(
3784             parser(r"\|").parse_primitive(),
3785             Ok(Primitive::Literal(ast::Literal {
3786                 span: span(0..2),
3787                 kind: ast::LiteralKind::Punctuation,
3788                 c: '|',
3789             })));
3790         let specials = &[
3791             (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
3792             (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
3793             (r"\t", '\t', ast::SpecialLiteralKind::Tab),
3794             (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
3795             (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
3796             (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
3797         ];
3798         for &(pat, c, ref kind) in specials {
3799             assert_eq!(
3800                 parser(pat).parse_primitive(),
3801                 Ok(Primitive::Literal(ast::Literal {
3802                     span: span(0..2),
3803                     kind: ast::LiteralKind::Special(kind.clone()),
3804                     c: c,
3805                 })));
3806         }
3807         assert_eq!(
3808             parser(r"\A").parse_primitive(),
3809             Ok(Primitive::Assertion(ast::Assertion {
3810                 span: span(0..2),
3811                 kind: ast::AssertionKind::StartText,
3812             })));
3813         assert_eq!(
3814             parser(r"\z").parse_primitive(),
3815             Ok(Primitive::Assertion(ast::Assertion {
3816                 span: span(0..2),
3817                 kind: ast::AssertionKind::EndText,
3818             })));
3819         assert_eq!(
3820             parser(r"\b").parse_primitive(),
3821             Ok(Primitive::Assertion(ast::Assertion {
3822                 span: span(0..2),
3823                 kind: ast::AssertionKind::WordBoundary,
3824             })));
3825         assert_eq!(
3826             parser(r"\B").parse_primitive(),
3827             Ok(Primitive::Assertion(ast::Assertion {
3828                 span: span(0..2),
3829                 kind: ast::AssertionKind::NotWordBoundary,
3830             })));
3831 
3832         assert_eq!(
3833             parser(r"\").parse_escape().unwrap_err(),
3834             TestError {
3835                 span: span(0..1),
3836                 kind: ast::ErrorKind::EscapeUnexpectedEof,
3837             });
3838         assert_eq!(
3839             parser(r"\y").parse_escape().unwrap_err(),
3840             TestError {
3841                 span: span(0..2),
3842                 kind: ast::ErrorKind::EscapeUnrecognized,
3843             });
3844     }
3845 
3846     #[test]
parse_unsupported_backreference()3847     fn parse_unsupported_backreference() {
3848         assert_eq!(
3849             parser(r"\0").parse_escape().unwrap_err(),
3850             TestError {
3851                 span: span(0..2),
3852                 kind: ast::ErrorKind::UnsupportedBackreference,
3853             });
3854         assert_eq!(
3855             parser(r"\9").parse_escape().unwrap_err(),
3856             TestError {
3857                 span: span(0..2),
3858                 kind: ast::ErrorKind::UnsupportedBackreference,
3859             });
3860     }
3861 
3862     #[test]
parse_octal()3863     fn parse_octal() {
3864         for i in 0..511 {
3865             let pat = format!(r"\{:o}", i);
3866             assert_eq!(
3867                 parser_octal(&pat).parse_escape(),
3868                 Ok(Primitive::Literal(ast::Literal {
3869                     span: span(0..pat.len()),
3870                     kind: ast::LiteralKind::Octal,
3871                     c: ::std::char::from_u32(i).unwrap(),
3872                 })));
3873         }
3874         assert_eq!(
3875             parser_octal(r"\778").parse_escape(),
3876             Ok(Primitive::Literal(ast::Literal {
3877                 span: span(0..3),
3878                 kind: ast::LiteralKind::Octal,
3879                 c: '?',
3880             })));
3881         assert_eq!(
3882             parser_octal(r"\7777").parse_escape(),
3883             Ok(Primitive::Literal(ast::Literal {
3884                 span: span(0..4),
3885                 kind: ast::LiteralKind::Octal,
3886                 c: '\u{01FF}',
3887             })));
3888         assert_eq!(
3889             parser_octal(r"\778").parse(),
3890             Ok(Ast::Concat(ast::Concat {
3891                 span: span(0..4),
3892                 asts: vec![
3893                     Ast::Literal(ast::Literal {
3894                         span: span(0..3),
3895                         kind: ast::LiteralKind::Octal,
3896                         c: '?',
3897                     }),
3898                     Ast::Literal(ast::Literal {
3899                         span: span(3..4),
3900                         kind: ast::LiteralKind::Verbatim,
3901                         c: '8',
3902                     }),
3903                 ],
3904             })));
3905         assert_eq!(
3906             parser_octal(r"\7777").parse(),
3907             Ok(Ast::Concat(ast::Concat {
3908                 span: span(0..5),
3909                 asts: vec![
3910                     Ast::Literal(ast::Literal {
3911                         span: span(0..4),
3912                         kind: ast::LiteralKind::Octal,
3913                         c: '\u{01FF}',
3914                     }),
3915                     Ast::Literal(ast::Literal {
3916                         span: span(4..5),
3917                         kind: ast::LiteralKind::Verbatim,
3918                         c: '7',
3919                     }),
3920                 ],
3921             })));
3922 
3923         assert_eq!(
3924             parser_octal(r"\8").parse_escape().unwrap_err(),
3925             TestError {
3926                 span: span(0..2),
3927                 kind: ast::ErrorKind::EscapeUnrecognized,
3928             });
3929     }
3930 
3931     #[test]
parse_hex_two()3932     fn parse_hex_two() {
3933         for i in 0..256 {
3934             let pat = format!(r"\x{:02x}", i);
3935             assert_eq!(
3936                 parser(&pat).parse_escape(),
3937                 Ok(Primitive::Literal(ast::Literal {
3938                     span: span(0..pat.len()),
3939                     kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
3940                     c: ::std::char::from_u32(i).unwrap(),
3941                 })));
3942         }
3943 
3944         assert_eq!(
3945             parser(r"\xF").parse_escape().unwrap_err(),
3946             TestError {
3947                 span: span(3..3),
3948                 kind: ast::ErrorKind::EscapeUnexpectedEof,
3949             });
3950         assert_eq!(
3951             parser(r"\xG").parse_escape().unwrap_err(),
3952             TestError {
3953                 span: span(2..3),
3954                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
3955             });
3956         assert_eq!(
3957             parser(r"\xFG").parse_escape().unwrap_err(),
3958             TestError {
3959                 span: span(3..4),
3960                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
3961             });
3962     }
3963 
3964     #[test]
parse_hex_four()3965     fn parse_hex_four() {
3966         for i in 0..65536 {
3967             let c = match ::std::char::from_u32(i) {
3968                 None => continue,
3969                 Some(c) => c,
3970             };
3971             let pat = format!(r"\u{:04x}", i);
3972             assert_eq!(
3973                 parser(&pat).parse_escape(),
3974                 Ok(Primitive::Literal(ast::Literal {
3975                     span: span(0..pat.len()),
3976                     kind: ast::LiteralKind::HexFixed(
3977                         ast::HexLiteralKind::UnicodeShort),
3978                     c: c,
3979                 })));
3980         }
3981 
3982         assert_eq!(
3983             parser(r"\uF").parse_escape().unwrap_err(),
3984             TestError {
3985                 span: span(3..3),
3986                 kind: ast::ErrorKind::EscapeUnexpectedEof,
3987             });
3988         assert_eq!(
3989             parser(r"\uG").parse_escape().unwrap_err(),
3990             TestError {
3991                 span: span(2..3),
3992                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
3993             });
3994         assert_eq!(
3995             parser(r"\uFG").parse_escape().unwrap_err(),
3996             TestError {
3997                 span: span(3..4),
3998                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
3999             });
4000         assert_eq!(
4001             parser(r"\uFFG").parse_escape().unwrap_err(),
4002             TestError {
4003                 span: span(4..5),
4004                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4005             });
4006         assert_eq!(
4007             parser(r"\uFFFG").parse_escape().unwrap_err(),
4008             TestError {
4009                 span: span(5..6),
4010                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4011             });
4012         assert_eq!(
4013             parser(r"\uD800").parse_escape().unwrap_err(),
4014             TestError {
4015                 span: span(2..6),
4016                 kind: ast::ErrorKind::EscapeHexInvalid,
4017             });
4018     }
4019 
4020     #[test]
parse_hex_eight()4021     fn parse_hex_eight() {
4022         for i in 0..65536 {
4023             let c = match ::std::char::from_u32(i) {
4024                 None => continue,
4025                 Some(c) => c,
4026             };
4027             let pat = format!(r"\U{:08x}", i);
4028             assert_eq!(
4029                 parser(&pat).parse_escape(),
4030                 Ok(Primitive::Literal(ast::Literal {
4031                     span: span(0..pat.len()),
4032                     kind: ast::LiteralKind::HexFixed(
4033                         ast::HexLiteralKind::UnicodeLong),
4034                     c: c,
4035                 })));
4036         }
4037 
4038         assert_eq!(
4039             parser(r"\UF").parse_escape().unwrap_err(),
4040             TestError {
4041                 span: span(3..3),
4042                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4043             });
4044         assert_eq!(
4045             parser(r"\UG").parse_escape().unwrap_err(),
4046             TestError {
4047                 span: span(2..3),
4048                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4049             });
4050         assert_eq!(
4051             parser(r"\UFG").parse_escape().unwrap_err(),
4052             TestError {
4053                 span: span(3..4),
4054                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4055             });
4056         assert_eq!(
4057             parser(r"\UFFG").parse_escape().unwrap_err(),
4058             TestError {
4059                 span: span(4..5),
4060                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4061             });
4062         assert_eq!(
4063             parser(r"\UFFFG").parse_escape().unwrap_err(),
4064             TestError {
4065                 span: span(5..6),
4066                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4067             });
4068         assert_eq!(
4069             parser(r"\UFFFFG").parse_escape().unwrap_err(),
4070             TestError {
4071                 span: span(6..7),
4072                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4073             });
4074         assert_eq!(
4075             parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4076             TestError {
4077                 span: span(7..8),
4078                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4079             });
4080         assert_eq!(
4081             parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4082             TestError {
4083                 span: span(8..9),
4084                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4085             });
4086         assert_eq!(
4087             parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4088             TestError {
4089                 span: span(9..10),
4090                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4091             });
4092     }
4093 
4094     #[test]
parse_hex_brace()4095     fn parse_hex_brace() {
4096         assert_eq!(
4097             parser(r"\u{26c4}").parse_escape(),
4098             Ok(Primitive::Literal(ast::Literal {
4099                 span: span(0..8),
4100                 kind: ast::LiteralKind::HexBrace(
4101                     ast::HexLiteralKind::UnicodeShort),
4102                 c: '⛄',
4103             })));
4104         assert_eq!(
4105             parser(r"\U{26c4}").parse_escape(),
4106             Ok(Primitive::Literal(ast::Literal {
4107                 span: span(0..8),
4108                 kind: ast::LiteralKind::HexBrace(
4109                     ast::HexLiteralKind::UnicodeLong),
4110                 c: '⛄',
4111             })));
4112         assert_eq!(
4113             parser(r"\x{26c4}").parse_escape(),
4114             Ok(Primitive::Literal(ast::Literal {
4115                 span: span(0..8),
4116                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4117                 c: '⛄',
4118             })));
4119         assert_eq!(
4120             parser(r"\x{26C4}").parse_escape(),
4121             Ok(Primitive::Literal(ast::Literal {
4122                 span: span(0..8),
4123                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4124                 c: '⛄',
4125             })));
4126         assert_eq!(
4127             parser(r"\x{10fFfF}").parse_escape(),
4128             Ok(Primitive::Literal(ast::Literal {
4129                 span: span(0..10),
4130                 kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4131                 c: '\u{10FFFF}',
4132             })));
4133 
4134         assert_eq!(
4135             parser(r"\x").parse_escape().unwrap_err(),
4136             TestError {
4137                 span: span(2..2),
4138                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4139             });
4140         assert_eq!(
4141             parser(r"\x{").parse_escape().unwrap_err(),
4142             TestError {
4143                 span: span(2..3),
4144                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4145             });
4146         assert_eq!(
4147             parser(r"\x{FF").parse_escape().unwrap_err(),
4148             TestError {
4149                 span: span(2..5),
4150                 kind: ast::ErrorKind::EscapeUnexpectedEof,
4151             });
4152         assert_eq!(
4153             parser(r"\x{}").parse_escape().unwrap_err(),
4154             TestError {
4155                 span: span(2..4),
4156                 kind: ast::ErrorKind::EscapeHexEmpty,
4157             });
4158         assert_eq!(
4159             parser(r"\x{FGF}").parse_escape().unwrap_err(),
4160             TestError {
4161                 span: span(4..5),
4162                 kind: ast::ErrorKind::EscapeHexInvalidDigit,
4163             });
4164         assert_eq!(
4165             parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4166             TestError {
4167                 span: span(3..9),
4168                 kind: ast::ErrorKind::EscapeHexInvalid,
4169             });
4170         assert_eq!(
4171             parser(r"\x{D800}").parse_escape().unwrap_err(),
4172             TestError {
4173                 span: span(3..7),
4174                 kind: ast::ErrorKind::EscapeHexInvalid,
4175             });
4176         assert_eq!(
4177             parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4178             TestError {
4179                 span: span(3..12),
4180                 kind: ast::ErrorKind::EscapeHexInvalid,
4181             });
4182     }
4183 
4184     #[test]
parse_decimal()4185     fn parse_decimal() {
4186         assert_eq!(parser("123").parse_decimal(), Ok(123));
4187         assert_eq!(parser("0").parse_decimal(), Ok(0));
4188         assert_eq!(parser("01").parse_decimal(), Ok(1));
4189 
4190         assert_eq!(
4191             parser("-1").parse_decimal().unwrap_err(),
4192             TestError {
4193                 span: span(0..0),
4194                 kind: ast::ErrorKind::DecimalEmpty,
4195             });
4196         assert_eq!(
4197             parser("").parse_decimal().unwrap_err(),
4198             TestError {
4199                 span: span(0..0),
4200                 kind: ast::ErrorKind::DecimalEmpty,
4201             });
4202         assert_eq!(
4203             parser("9999999999").parse_decimal().unwrap_err(),
4204             TestError {
4205                 span: span(0..10),
4206                 kind: ast::ErrorKind::DecimalInvalid,
4207             });
4208     }
4209 
4210     #[test]
parse_set_class()4211     fn parse_set_class() {
4212         fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4213             ast::ClassSet::union(ast::ClassSetUnion {
4214                 span: span,
4215                 items: items,
4216             })
4217         }
4218 
4219         fn intersection(
4220             span: Span,
4221             lhs: ast::ClassSet,
4222             rhs: ast::ClassSet,
4223         ) -> ast::ClassSet {
4224             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4225                 span: span,
4226                 kind: ast::ClassSetBinaryOpKind::Intersection,
4227                 lhs: Box::new(lhs),
4228                 rhs: Box::new(rhs),
4229             })
4230         }
4231 
4232         fn difference(
4233             span: Span,
4234             lhs: ast::ClassSet,
4235             rhs: ast::ClassSet,
4236         ) -> ast::ClassSet {
4237             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4238                 span: span,
4239                 kind: ast::ClassSetBinaryOpKind::Difference,
4240                 lhs: Box::new(lhs),
4241                 rhs: Box::new(rhs),
4242             })
4243         }
4244 
4245         fn symdifference(
4246             span: Span,
4247             lhs: ast::ClassSet,
4248             rhs: ast::ClassSet,
4249         ) -> ast::ClassSet {
4250             ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4251                 span: span,
4252                 kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4253                 lhs: Box::new(lhs),
4254                 rhs: Box::new(rhs),
4255             })
4256         }
4257 
4258         fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4259             ast::ClassSet::Item(item)
4260         }
4261 
4262         fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4263             ast::ClassSetItem::Ascii(cls)
4264         }
4265 
4266         fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4267             ast::ClassSetItem::Unicode(cls)
4268         }
4269 
4270         fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4271             ast::ClassSetItem::Perl(cls)
4272         }
4273 
4274         fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4275             ast::ClassSetItem::Bracketed(Box::new(cls))
4276         }
4277 
4278         fn lit(span: Span, c: char) -> ast::ClassSetItem {
4279             ast::ClassSetItem::Literal(ast::Literal {
4280                 span: span,
4281                 kind: ast::LiteralKind::Verbatim,
4282                 c: c,
4283             })
4284         }
4285 
4286         fn empty(span: Span) -> ast::ClassSetItem {
4287             ast::ClassSetItem::Empty(span)
4288         }
4289 
4290         fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4291             let pos1 = Position {
4292                 offset: span.start.offset + start.len_utf8(),
4293                 column: span.start.column + 1,
4294                 ..span.start
4295             };
4296             let pos2 = Position {
4297                 offset: span.end.offset - end.len_utf8(),
4298                 column: span.end.column - 1,
4299                 ..span.end
4300             };
4301             ast::ClassSetItem::Range(ast::ClassSetRange {
4302                 span: span,
4303                 start: ast::Literal {
4304                     span: Span { end: pos1, ..span },
4305                     kind: ast::LiteralKind::Verbatim,
4306                     c: start,
4307                 },
4308                 end: ast::Literal {
4309                     span: Span { start: pos2, ..span },
4310                     kind: ast::LiteralKind::Verbatim,
4311                     c: end,
4312                 },
4313             })
4314         }
4315 
4316         fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4317             ast::ClassAscii {
4318                 span: span,
4319                 kind: ast::ClassAsciiKind::Alnum,
4320                 negated: negated,
4321             }
4322         }
4323 
4324         fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4325             ast::ClassAscii {
4326                 span: span,
4327                 kind: ast::ClassAsciiKind::Lower,
4328                 negated: negated,
4329             }
4330         }
4331 
4332         assert_eq!(
4333             parser("[[:alnum:]]").parse(),
4334             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4335                 span: span(0..11),
4336                 negated: false,
4337                 kind: itemset(item_ascii(alnum(span(1..10), false))),
4338             }))));
4339         assert_eq!(
4340             parser("[[[:alnum:]]]").parse(),
4341             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4342                 span: span(0..13),
4343                 negated: false,
4344                 kind: itemset(item_bracket(ast::ClassBracketed {
4345                     span: span(1..12),
4346                     negated: false,
4347                     kind: itemset(item_ascii(alnum(span(2..11), false))),
4348                 })),
4349             }))));
4350         assert_eq!(
4351             parser("[[:alnum:]&&[:lower:]]").parse(),
4352             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4353                 span: span(0..22),
4354                 negated: false,
4355                 kind: intersection(
4356                     span(1..21),
4357                     itemset(item_ascii(alnum(span(1..10), false))),
4358                     itemset(item_ascii(lower(span(12..21), false))),
4359                 ),
4360             }))));
4361         assert_eq!(
4362             parser("[[:alnum:]--[:lower:]]").parse(),
4363             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4364                 span: span(0..22),
4365                 negated: false,
4366                 kind: difference(
4367                     span(1..21),
4368                     itemset(item_ascii(alnum(span(1..10), false))),
4369                     itemset(item_ascii(lower(span(12..21), false))),
4370                 ),
4371             }))));
4372         assert_eq!(
4373             parser("[[:alnum:]~~[:lower:]]").parse(),
4374             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4375                 span: span(0..22),
4376                 negated: false,
4377                 kind: symdifference(
4378                     span(1..21),
4379                     itemset(item_ascii(alnum(span(1..10), false))),
4380                     itemset(item_ascii(lower(span(12..21), false))),
4381                 ),
4382             }))));
4383 
4384         assert_eq!(
4385             parser("[a]").parse(),
4386             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4387                 span: span(0..3),
4388                 negated: false,
4389                 kind: itemset(lit(span(1..2), 'a')),
4390             }))));
4391         assert_eq!(
4392             parser(r"[a\]]").parse(),
4393             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4394                 span: span(0..5),
4395                 negated: false,
4396                 kind: union(span(1..4), vec![
4397                     lit(span(1..2), 'a'),
4398                     ast::ClassSetItem::Literal(ast::Literal {
4399                         span: span(2..4),
4400                         kind: ast::LiteralKind::Punctuation,
4401                         c: ']',
4402                     }),
4403                 ]),
4404             }))));
4405         assert_eq!(
4406             parser(r"[a\-z]").parse(),
4407             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4408                 span: span(0..6),
4409                 negated: false,
4410                 kind: union(span(1..5), vec![
4411                     lit(span(1..2), 'a'),
4412                     ast::ClassSetItem::Literal(ast::Literal {
4413                         span: span(2..4),
4414                         kind: ast::LiteralKind::Punctuation,
4415                         c: '-',
4416                     }),
4417                     lit(span(4..5), 'z'),
4418                 ]),
4419             }))));
4420         assert_eq!(
4421             parser("[ab]").parse(),
4422             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4423                 span: span(0..4),
4424                 negated: false,
4425                 kind: union(span(1..3), vec![
4426                     lit(span(1..2), 'a'),
4427                     lit(span(2..3), 'b'),
4428                 ]),
4429             }))));
4430         assert_eq!(
4431             parser("[a-]").parse(),
4432             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4433                 span: span(0..4),
4434                 negated: false,
4435                 kind: union(span(1..3), vec![
4436                     lit(span(1..2), 'a'),
4437                     lit(span(2..3), '-'),
4438                 ]),
4439             }))));
4440         assert_eq!(
4441             parser("[-a]").parse(),
4442             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4443                 span: span(0..4),
4444                 negated: false,
4445                 kind: union(span(1..3), vec![
4446                     lit(span(1..2), '-'),
4447                     lit(span(2..3), 'a'),
4448                 ]),
4449             }))));
4450         assert_eq!(
4451             parser(r"[\pL]").parse(),
4452             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4453                 span: span(0..5),
4454                 negated: false,
4455                 kind: itemset(item_unicode(ast::ClassUnicode {
4456                     span: span(1..4),
4457                     negated: false,
4458                     kind: ast::ClassUnicodeKind::OneLetter('L'),
4459                 })),
4460             }))));
4461         assert_eq!(
4462             parser(r"[\w]").parse(),
4463             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4464                 span: span(0..4),
4465                 negated: false,
4466                 kind: itemset(item_perl(ast::ClassPerl {
4467                     span: span(1..3),
4468                     kind: ast::ClassPerlKind::Word,
4469                     negated: false,
4470                 })),
4471             }))));
4472         assert_eq!(
4473             parser(r"[a\wz]").parse(),
4474             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4475                 span: span(0..6),
4476                 negated: false,
4477                 kind: union(span(1..5), vec![
4478                     lit(span(1..2), 'a'),
4479                     item_perl(ast::ClassPerl {
4480                         span: span(2..4),
4481                         kind: ast::ClassPerlKind::Word,
4482                         negated: false,
4483                     }),
4484                     lit(span(4..5), 'z'),
4485                 ]),
4486             }))));
4487 
4488         assert_eq!(
4489             parser("[a-z]").parse(),
4490             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4491                 span: span(0..5),
4492                 negated: false,
4493                 kind: itemset(range(span(1..4), 'a', 'z')),
4494             }))));
4495         assert_eq!(
4496             parser("[a-cx-z]").parse(),
4497             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4498                 span: span(0..8),
4499                 negated: false,
4500                 kind: union(span(1..7), vec![
4501                     range(span(1..4), 'a', 'c'),
4502                     range(span(4..7), 'x', 'z'),
4503                 ]),
4504             }))));
4505         assert_eq!(
4506             parser(r"[\w&&a-cx-z]").parse(),
4507             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4508                 span: span(0..12),
4509                 negated: false,
4510                 kind: intersection(
4511                     span(1..11),
4512                     itemset(item_perl(ast::ClassPerl {
4513                         span: span(1..3),
4514                         kind: ast::ClassPerlKind::Word,
4515                         negated: false,
4516                     })),
4517                     union(span(5..11), vec![
4518                         range(span(5..8), 'a', 'c'),
4519                         range(span(8..11), 'x', 'z'),
4520                     ]),
4521                 ),
4522             }))));
4523         assert_eq!(
4524             parser(r"[a-cx-z&&\w]").parse(),
4525             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4526                 span: span(0..12),
4527                 negated: false,
4528                 kind: intersection(
4529                     span(1..11),
4530                     union(span(1..7), vec![
4531                         range(span(1..4), 'a', 'c'),
4532                         range(span(4..7), 'x', 'z'),
4533                     ]),
4534                     itemset(item_perl(ast::ClassPerl {
4535                         span: span(9..11),
4536                         kind: ast::ClassPerlKind::Word,
4537                         negated: false,
4538                     })),
4539                 ),
4540             }))));
4541         assert_eq!(
4542             parser(r"[a--b--c]").parse(),
4543             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4544                 span: span(0..9),
4545                 negated: false,
4546                 kind: difference(
4547                     span(1..8),
4548                     difference(
4549                         span(1..5),
4550                         itemset(lit(span(1..2), 'a')),
4551                         itemset(lit(span(4..5), 'b')),
4552                     ),
4553                     itemset(lit(span(7..8), 'c')),
4554                 ),
4555             }))));
4556         assert_eq!(
4557             parser(r"[a~~b~~c]").parse(),
4558             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4559                 span: span(0..9),
4560                 negated: false,
4561                 kind: symdifference(
4562                     span(1..8),
4563                     symdifference(
4564                         span(1..5),
4565                         itemset(lit(span(1..2), 'a')),
4566                         itemset(lit(span(4..5), 'b')),
4567                     ),
4568                     itemset(lit(span(7..8), 'c')),
4569                 ),
4570             }))));
4571         assert_eq!(
4572             parser(r"[\^&&^]").parse(),
4573             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4574                 span: span(0..7),
4575                 negated: false,
4576                 kind: intersection(
4577                     span(1..6),
4578                     itemset(ast::ClassSetItem::Literal(ast::Literal {
4579                         span: span(1..3),
4580                         kind: ast::LiteralKind::Punctuation,
4581                         c: '^',
4582                     })),
4583                     itemset(lit(span(5..6), '^')),
4584                 ),
4585             }))));
4586         assert_eq!(
4587             parser(r"[\&&&&]").parse(),
4588             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4589                 span: span(0..7),
4590                 negated: false,
4591                 kind: intersection(
4592                     span(1..6),
4593                     itemset(ast::ClassSetItem::Literal(ast::Literal {
4594                         span: span(1..3),
4595                         kind: ast::LiteralKind::Punctuation,
4596                         c: '&',
4597                     })),
4598                     itemset(lit(span(5..6), '&')),
4599                 ),
4600             }))));
4601         assert_eq!(
4602             parser(r"[&&&&]").parse(),
4603             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4604                 span: span(0..6),
4605                 negated: false,
4606                 kind: intersection(
4607                     span(1..5),
4608                     intersection(
4609                         span(1..3),
4610                         itemset(empty(span(1..1))),
4611                         itemset(empty(span(3..3))),
4612                     ),
4613                     itemset(empty(span(5..5))),
4614                 ),
4615             }))));
4616 
4617         let pat = "[☃-⛄]";
4618         assert_eq!(
4619             parser(pat).parse(),
4620             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4621                 span: span_range(pat, 0..9),
4622                 negated: false,
4623                 kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
4624                     span: span_range(pat, 1..8),
4625                     start: ast::Literal {
4626                         span: span_range(pat, 1..4),
4627                         kind: ast::LiteralKind::Verbatim,
4628                         c: '☃',
4629                     },
4630                     end: ast::Literal {
4631                         span: span_range(pat, 5..8),
4632                         kind: ast::LiteralKind::Verbatim,
4633                         c: '⛄',
4634                     },
4635                 })),
4636             }))));
4637 
4638         assert_eq!(
4639             parser(r"[]]").parse(),
4640             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4641                 span: span(0..3),
4642                 negated: false,
4643                 kind: itemset(lit(span(1..2), ']')),
4644             }))));
4645         assert_eq!(
4646             parser(r"[]\[]").parse(),
4647             Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4648                 span: span(0..5),
4649                 negated: false,
4650                 kind: union(span(1..4), vec![
4651                     lit(span(1..2), ']'),
4652                     ast::ClassSetItem::Literal(ast::Literal  {
4653                         span: span(2..4),
4654                         kind: ast::LiteralKind::Punctuation,
4655                         c: '[',
4656                     }),
4657                 ]),
4658             }))));
4659         assert_eq!(
4660             parser(r"[\[]]").parse(),
4661             Ok(concat(0..5, vec![
4662                 Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4663                     span: span(0..4),
4664                     negated: false,
4665                     kind: itemset(ast::ClassSetItem::Literal(ast::Literal  {
4666                         span: span(1..3),
4667                         kind: ast::LiteralKind::Punctuation,
4668                         c: '[',
4669                     })),
4670                 })),
4671                 Ast::Literal(ast::Literal {
4672                     span: span(4..5),
4673                     kind: ast::LiteralKind::Verbatim,
4674                     c: ']',
4675                 }),
4676             ])));
4677 
4678         assert_eq!(
4679             parser("[").parse().unwrap_err(),
4680             TestError {
4681                 span: span(0..1),
4682                 kind: ast::ErrorKind::ClassUnclosed,
4683             });
4684         assert_eq!(
4685             parser("[[").parse().unwrap_err(),
4686             TestError {
4687                 span: span(1..2),
4688                 kind: ast::ErrorKind::ClassUnclosed,
4689             });
4690         assert_eq!(
4691             parser("[[-]").parse().unwrap_err(),
4692             TestError {
4693                 span: span(0..1),
4694                 kind: ast::ErrorKind::ClassUnclosed,
4695             });
4696         assert_eq!(
4697             parser("[[[:alnum:]").parse().unwrap_err(),
4698             TestError {
4699                 span: span(1..2),
4700                 kind: ast::ErrorKind::ClassUnclosed,
4701             });
4702         assert_eq!(
4703             parser(r"[\b]").parse().unwrap_err(),
4704             TestError {
4705                 span: span(1..3),
4706                 kind: ast::ErrorKind::ClassEscapeInvalid,
4707             });
4708         assert_eq!(
4709             parser(r"[\w-a]").parse().unwrap_err(),
4710             TestError {
4711                 span: span(1..3),
4712                 kind: ast::ErrorKind::ClassRangeLiteral,
4713             });
4714         assert_eq!(
4715             parser(r"[a-\w]").parse().unwrap_err(),
4716             TestError {
4717                 span: span(3..5),
4718                 kind: ast::ErrorKind::ClassRangeLiteral,
4719             });
4720         assert_eq!(
4721             parser(r"[z-a]").parse().unwrap_err(),
4722             TestError {
4723                 span: span(1..4),
4724                 kind: ast::ErrorKind::ClassRangeInvalid,
4725             });
4726 
4727         assert_eq!(
4728             parser_ignore_whitespace("[a ").parse().unwrap_err(),
4729             TestError {
4730                 span: span(0..1),
4731                 kind: ast::ErrorKind::ClassUnclosed,
4732             });
4733         assert_eq!(
4734             parser_ignore_whitespace("[a- ").parse().unwrap_err(),
4735             TestError {
4736                 span: span(0..1),
4737                 kind: ast::ErrorKind::ClassUnclosed,
4738             });
4739     }
4740 
4741     #[test]
parse_set_class_open()4742     fn parse_set_class_open() {
4743         assert_eq!(
4744             parser("[a]").parse_set_class_open(), {
4745                 let set = ast::ClassBracketed {
4746                     span: span(0..1),
4747                     negated: false,
4748                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4749                         span: span(1..1),
4750                         items: vec![],
4751                     }),
4752                 };
4753                 let union = ast::ClassSetUnion {
4754                     span: span(1..1),
4755                     items: vec![],
4756                 };
4757                 Ok((set, union))
4758             });
4759         assert_eq!(
4760             parser_ignore_whitespace("[   a]").parse_set_class_open(), {
4761                 let set = ast::ClassBracketed {
4762                     span: span(0..4),
4763                     negated: false,
4764                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4765                         span: span(4..4),
4766                         items: vec![],
4767                     }),
4768                 };
4769                 let union = ast::ClassSetUnion {
4770                     span: span(4..4),
4771                     items: vec![],
4772                 };
4773                 Ok((set, union))
4774             });
4775         assert_eq!(
4776             parser("[^a]").parse_set_class_open(), {
4777                 let set = ast::ClassBracketed {
4778                     span: span(0..2),
4779                     negated: true,
4780                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4781                         span: span(2..2),
4782                         items: vec![],
4783                     }),
4784                 };
4785                 let union = ast::ClassSetUnion {
4786                     span: span(2..2),
4787                     items: vec![],
4788                 };
4789                 Ok((set, union))
4790             });
4791         assert_eq!(
4792             parser_ignore_whitespace("[ ^ a]").parse_set_class_open(), {
4793                 let set = ast::ClassBracketed {
4794                     span: span(0..4),
4795                     negated: true,
4796                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4797                         span: span(4..4),
4798                         items: vec![],
4799                     }),
4800                 };
4801                 let union = ast::ClassSetUnion {
4802                     span: span(4..4),
4803                     items: vec![],
4804                 };
4805                 Ok((set, union))
4806             });
4807         assert_eq!(
4808             parser("[-a]").parse_set_class_open(), {
4809                 let set = ast::ClassBracketed {
4810                     span: span(0..2),
4811                     negated: false,
4812                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4813                         span: span(1..1),
4814                         items: vec![],
4815                     }),
4816                 };
4817                 let union = ast::ClassSetUnion {
4818                     span: span(1..2),
4819                     items: vec![
4820                         ast::ClassSetItem::Literal(ast::Literal {
4821                             span: span(1..2),
4822                             kind: ast::LiteralKind::Verbatim,
4823                             c: '-',
4824                         }),
4825                     ],
4826                 };
4827                 Ok((set, union))
4828             });
4829         assert_eq!(
4830             parser_ignore_whitespace("[ - a]").parse_set_class_open(), {
4831                 let set = ast::ClassBracketed {
4832                     span: span(0..4),
4833                     negated: false,
4834                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4835                         span: span(2..2),
4836                         items: vec![],
4837                     }),
4838                 };
4839                 let union = ast::ClassSetUnion {
4840                     span: span(2..3),
4841                     items: vec![
4842                         ast::ClassSetItem::Literal(ast::Literal {
4843                             span: span(2..3),
4844                             kind: ast::LiteralKind::Verbatim,
4845                             c: '-',
4846                         }),
4847                     ],
4848                 };
4849                 Ok((set, union))
4850             });
4851         assert_eq!(
4852             parser("[^-a]").parse_set_class_open(), {
4853                 let set = ast::ClassBracketed {
4854                     span: span(0..3),
4855                     negated: true,
4856                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4857                         span: span(2..2),
4858                         items: vec![],
4859                     }),
4860                 };
4861                 let union = ast::ClassSetUnion {
4862                     span: span(2..3),
4863                     items: vec![
4864                         ast::ClassSetItem::Literal(ast::Literal {
4865                             span: span(2..3),
4866                             kind: ast::LiteralKind::Verbatim,
4867                             c: '-',
4868                         }),
4869                     ],
4870                 };
4871                 Ok((set, union))
4872             });
4873         assert_eq!(
4874             parser("[--a]").parse_set_class_open(), {
4875                 let set = ast::ClassBracketed {
4876                     span: span(0..3),
4877                     negated: false,
4878                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4879                         span: span(1..1),
4880                         items: vec![],
4881                     }),
4882                 };
4883                 let union = ast::ClassSetUnion {
4884                     span: span(1..3),
4885                     items: vec![
4886                         ast::ClassSetItem::Literal(ast::Literal {
4887                             span: span(1..2),
4888                             kind: ast::LiteralKind::Verbatim,
4889                             c: '-',
4890                         }),
4891                         ast::ClassSetItem::Literal(ast::Literal {
4892                             span: span(2..3),
4893                             kind: ast::LiteralKind::Verbatim,
4894                             c: '-',
4895                         }),
4896                     ],
4897                 };
4898                 Ok((set, union))
4899             });
4900         assert_eq!(
4901             parser("[]a]").parse_set_class_open(), {
4902                 let set = ast::ClassBracketed {
4903                     span: span(0..2),
4904                     negated: false,
4905                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4906                         span: span(1..1),
4907                         items: vec![],
4908                     }),
4909                 };
4910                 let union = ast::ClassSetUnion {
4911                     span: span(1..2),
4912                     items: vec![
4913                         ast::ClassSetItem::Literal(ast::Literal {
4914                             span: span(1..2),
4915                             kind: ast::LiteralKind::Verbatim,
4916                             c: ']',
4917                         }),
4918                     ],
4919                 };
4920                 Ok((set, union))
4921             });
4922         assert_eq!(
4923             parser_ignore_whitespace("[ ] a]").parse_set_class_open(), {
4924                 let set = ast::ClassBracketed {
4925                     span: span(0..4),
4926                     negated: false,
4927                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4928                         span: span(2..2),
4929                         items: vec![],
4930                     }),
4931                 };
4932                 let union = ast::ClassSetUnion {
4933                     span: span(2..3),
4934                     items: vec![
4935                         ast::ClassSetItem::Literal(ast::Literal {
4936                             span: span(2..3),
4937                             kind: ast::LiteralKind::Verbatim,
4938                             c: ']',
4939                         }),
4940                     ],
4941                 };
4942                 Ok((set, union))
4943             });
4944         assert_eq!(
4945             parser("[^]a]").parse_set_class_open(), {
4946                 let set = ast::ClassBracketed {
4947                     span: span(0..3),
4948                     negated: true,
4949                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4950                         span: span(2..2),
4951                         items: vec![],
4952                     }),
4953                 };
4954                 let union = ast::ClassSetUnion {
4955                     span: span(2..3),
4956                     items: vec![
4957                         ast::ClassSetItem::Literal(ast::Literal {
4958                             span: span(2..3),
4959                             kind: ast::LiteralKind::Verbatim,
4960                             c: ']',
4961                         }),
4962                     ],
4963                 };
4964                 Ok((set, union))
4965             });
4966         assert_eq!(
4967             parser("[-]a]").parse_set_class_open(), {
4968                 let set = ast::ClassBracketed {
4969                     span: span(0..2),
4970                     negated: false,
4971                     kind: ast::ClassSet::union(ast::ClassSetUnion {
4972                         span: span(1..1),
4973                         items: vec![],
4974                     }),
4975                 };
4976                 let union = ast::ClassSetUnion {
4977                     span: span(1..2),
4978                     items: vec![
4979                         ast::ClassSetItem::Literal(ast::Literal {
4980                             span: span(1..2),
4981                             kind: ast::LiteralKind::Verbatim,
4982                             c: '-',
4983                         }),
4984                     ],
4985                 };
4986                 Ok((set, union))
4987             });
4988 
4989         assert_eq!(
4990             parser("[").parse_set_class_open().unwrap_err(),
4991             TestError {
4992                 span: span(0..1),
4993                 kind: ast::ErrorKind::ClassUnclosed,
4994             });
4995         assert_eq!(
4996             parser_ignore_whitespace("[    ")
4997             .parse_set_class_open()
4998             .unwrap_err(),
4999             TestError {
5000                 span: span(0..5),
5001                 kind: ast::ErrorKind::ClassUnclosed,
5002             });
5003         assert_eq!(
5004             parser("[^").parse_set_class_open().unwrap_err(),
5005             TestError {
5006                 span: span(0..2),
5007                 kind: ast::ErrorKind::ClassUnclosed,
5008             });
5009         assert_eq!(
5010             parser("[]").parse_set_class_open().unwrap_err(),
5011             TestError {
5012                 span: span(0..2),
5013                 kind: ast::ErrorKind::ClassUnclosed,
5014             });
5015         assert_eq!(
5016             parser("[-").parse_set_class_open().unwrap_err(),
5017             TestError {
5018                 span: span(0..2),
5019                 kind: ast::ErrorKind::ClassUnclosed,
5020             });
5021         assert_eq!(
5022             parser("[--").parse_set_class_open().unwrap_err(),
5023             TestError {
5024                 span: span(0..3),
5025                 kind: ast::ErrorKind::ClassUnclosed,
5026             });
5027     }
5028 
5029     #[test]
maybe_parse_ascii_class()5030     fn maybe_parse_ascii_class() {
5031         assert_eq!(
5032             parser(r"[:alnum:]").maybe_parse_ascii_class(),
5033             Some(ast::ClassAscii {
5034                 span: span(0..9),
5035                 kind: ast::ClassAsciiKind::Alnum,
5036                 negated: false,
5037             }));
5038         assert_eq!(
5039             parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5040             Some(ast::ClassAscii {
5041                 span: span(0..9),
5042                 kind: ast::ClassAsciiKind::Alnum,
5043                 negated: false,
5044             }));
5045         assert_eq!(
5046             parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5047             Some(ast::ClassAscii {
5048                 span: span(0..10),
5049                 kind: ast::ClassAsciiKind::Alnum,
5050                 negated: true,
5051             }));
5052 
5053         let p = parser(r"[:");
5054         assert_eq!(p.maybe_parse_ascii_class(), None);
5055         assert_eq!(p.offset(), 0);
5056 
5057         let p = parser(r"[:^");
5058         assert_eq!(p.maybe_parse_ascii_class(), None);
5059         assert_eq!(p.offset(), 0);
5060 
5061         let p = parser(r"[^:alnum:]");
5062         assert_eq!(p.maybe_parse_ascii_class(), None);
5063         assert_eq!(p.offset(), 0);
5064 
5065         let p = parser(r"[:alnnum:]");
5066         assert_eq!(p.maybe_parse_ascii_class(), None);
5067         assert_eq!(p.offset(), 0);
5068 
5069         let p = parser(r"[:alnum]");
5070         assert_eq!(p.maybe_parse_ascii_class(), None);
5071         assert_eq!(p.offset(), 0);
5072 
5073         let p = parser(r"[:alnum:");
5074         assert_eq!(p.maybe_parse_ascii_class(), None);
5075         assert_eq!(p.offset(), 0);
5076     }
5077 
5078     #[test]
parse_unicode_class()5079     fn parse_unicode_class() {
5080         assert_eq!(
5081             parser(r"\pN").parse_escape(),
5082             Ok(Primitive::Unicode(ast::ClassUnicode {
5083                 span: span(0..3),
5084                 negated: false,
5085                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5086             })));
5087         assert_eq!(
5088             parser(r"\PN").parse_escape(),
5089             Ok(Primitive::Unicode(ast::ClassUnicode {
5090                 span: span(0..3),
5091                 negated: true,
5092                 kind: ast::ClassUnicodeKind::OneLetter('N'),
5093             })));
5094         assert_eq!(
5095             parser(r"\p{N}").parse_escape(),
5096             Ok(Primitive::Unicode(ast::ClassUnicode {
5097                 span: span(0..5),
5098                 negated: false,
5099                 kind: ast::ClassUnicodeKind::Named(s("N")),
5100             })));
5101         assert_eq!(
5102             parser(r"\P{N}").parse_escape(),
5103             Ok(Primitive::Unicode(ast::ClassUnicode {
5104                 span: span(0..5),
5105                 negated: true,
5106                 kind: ast::ClassUnicodeKind::Named(s("N")),
5107             })));
5108         assert_eq!(
5109             parser(r"\p{Greek}").parse_escape(),
5110             Ok(Primitive::Unicode(ast::ClassUnicode {
5111                 span: span(0..9),
5112                 negated: false,
5113                 kind: ast::ClassUnicodeKind::Named(s("Greek")),
5114             })));
5115 
5116         assert_eq!(
5117             parser(r"\p{scx:Katakana}").parse_escape(),
5118             Ok(Primitive::Unicode(ast::ClassUnicode {
5119                 span: span(0..16),
5120                 negated: false,
5121                 kind: ast::ClassUnicodeKind::NamedValue {
5122                     op: ast::ClassUnicodeOpKind::Colon,
5123                     name: s("scx"),
5124                     value: s("Katakana"),
5125                 },
5126             })));
5127         assert_eq!(
5128             parser(r"\p{scx=Katakana}").parse_escape(),
5129             Ok(Primitive::Unicode(ast::ClassUnicode {
5130                 span: span(0..16),
5131                 negated: false,
5132                 kind: ast::ClassUnicodeKind::NamedValue {
5133                     op: ast::ClassUnicodeOpKind::Equal,
5134                     name: s("scx"),
5135                     value: s("Katakana"),
5136                 },
5137             })));
5138         assert_eq!(
5139             parser(r"\p{scx!=Katakana}").parse_escape(),
5140             Ok(Primitive::Unicode(ast::ClassUnicode {
5141                 span: span(0..17),
5142                 negated: false,
5143                 kind: ast::ClassUnicodeKind::NamedValue {
5144                     op: ast::ClassUnicodeOpKind::NotEqual,
5145                     name: s("scx"),
5146                     value: s("Katakana"),
5147                 },
5148             })));
5149 
5150         assert_eq!(
5151             parser(r"\p{:}").parse_escape(),
5152             Ok(Primitive::Unicode(ast::ClassUnicode {
5153                 span: span(0..5),
5154                 negated: false,
5155                 kind: ast::ClassUnicodeKind::NamedValue {
5156                     op: ast::ClassUnicodeOpKind::Colon,
5157                     name: s(""),
5158                     value: s(""),
5159                 },
5160             })));
5161         assert_eq!(
5162             parser(r"\p{=}").parse_escape(),
5163             Ok(Primitive::Unicode(ast::ClassUnicode {
5164                 span: span(0..5),
5165                 negated: false,
5166                 kind: ast::ClassUnicodeKind::NamedValue {
5167                     op: ast::ClassUnicodeOpKind::Equal,
5168                     name: s(""),
5169                     value: s(""),
5170                 },
5171             })));
5172         assert_eq!(
5173             parser(r"\p{!=}").parse_escape(),
5174             Ok(Primitive::Unicode(ast::ClassUnicode {
5175                 span: span(0..6),
5176                 negated: false,
5177                 kind: ast::ClassUnicodeKind::NamedValue {
5178                     op: ast::ClassUnicodeOpKind::NotEqual,
5179                     name: s(""),
5180                     value: s(""),
5181                 },
5182             })));
5183 
5184         assert_eq!(
5185             parser(r"\p").parse_escape().unwrap_err(),
5186             TestError {
5187                 span: span(2..2),
5188                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5189             });
5190         assert_eq!(
5191             parser(r"\p{").parse_escape().unwrap_err(),
5192             TestError {
5193                 span: span(3..3),
5194                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5195             });
5196         assert_eq!(
5197             parser(r"\p{N").parse_escape().unwrap_err(),
5198             TestError {
5199                 span: span(4..4),
5200                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5201             });
5202         assert_eq!(
5203             parser(r"\p{Greek").parse_escape().unwrap_err(),
5204             TestError {
5205                 span: span(8..8),
5206                 kind: ast::ErrorKind::EscapeUnexpectedEof,
5207             });
5208 
5209         assert_eq!(
5210             parser(r"\pNz").parse(),
5211             Ok(Ast::Concat(ast::Concat {
5212                 span: span(0..4),
5213                 asts: vec![
5214                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5215                         span: span(0..3),
5216                         negated: false,
5217                         kind: ast::ClassUnicodeKind::OneLetter('N'),
5218                     })),
5219                     Ast::Literal(ast::Literal {
5220                         span: span(3..4),
5221                         kind: ast::LiteralKind::Verbatim,
5222                         c: 'z',
5223                     }),
5224                 ],
5225             })));
5226         assert_eq!(
5227             parser(r"\p{Greek}z").parse(),
5228             Ok(Ast::Concat(ast::Concat {
5229                 span: span(0..10),
5230                 asts: vec![
5231                     Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5232                         span: span(0..9),
5233                         negated: false,
5234                         kind: ast::ClassUnicodeKind::Named(s("Greek")),
5235                     })),
5236                     Ast::Literal(ast::Literal {
5237                         span: span(9..10),
5238                         kind: ast::LiteralKind::Verbatim,
5239                         c: 'z',
5240                     }),
5241                 ],
5242             })));
5243     }
5244 
5245     #[test]
parse_perl_class()5246     fn parse_perl_class() {
5247         assert_eq!(
5248             parser(r"\d").parse_escape(),
5249             Ok(Primitive::Perl(ast::ClassPerl {
5250                 span: span(0..2),
5251                 kind: ast::ClassPerlKind::Digit,
5252                 negated: false,
5253             })));
5254         assert_eq!(
5255             parser(r"\D").parse_escape(),
5256             Ok(Primitive::Perl(ast::ClassPerl {
5257                 span: span(0..2),
5258                 kind: ast::ClassPerlKind::Digit,
5259                 negated: true,
5260             })));
5261         assert_eq!(
5262             parser(r"\s").parse_escape(),
5263             Ok(Primitive::Perl(ast::ClassPerl {
5264                 span: span(0..2),
5265                 kind: ast::ClassPerlKind::Space,
5266                 negated: false,
5267             })));
5268         assert_eq!(
5269             parser(r"\S").parse_escape(),
5270             Ok(Primitive::Perl(ast::ClassPerl {
5271                 span: span(0..2),
5272                 kind: ast::ClassPerlKind::Space,
5273                 negated: true,
5274             })));
5275         assert_eq!(
5276             parser(r"\w").parse_escape(),
5277             Ok(Primitive::Perl(ast::ClassPerl {
5278                 span: span(0..2),
5279                 kind: ast::ClassPerlKind::Word,
5280                 negated: false,
5281             })));
5282         assert_eq!(
5283             parser(r"\W").parse_escape(),
5284             Ok(Primitive::Perl(ast::ClassPerl {
5285                 span: span(0..2),
5286                 kind: ast::ClassPerlKind::Word,
5287                 negated: true,
5288             })));
5289 
5290         assert_eq!(
5291             parser(r"\d").parse(),
5292             Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5293                 span: span(0..2),
5294                 kind: ast::ClassPerlKind::Digit,
5295                 negated: false,
5296             }))));
5297         assert_eq!(
5298             parser(r"\dz").parse(),
5299             Ok(Ast::Concat(ast::Concat {
5300                 span: span(0..3),
5301                 asts: vec![
5302                     Ast::Class(ast::Class::Perl(ast::ClassPerl {
5303                         span: span(0..2),
5304                         kind: ast::ClassPerlKind::Digit,
5305                         negated: false,
5306                     })),
5307                     Ast::Literal(ast::Literal {
5308                         span: span(2..3),
5309                         kind: ast::LiteralKind::Verbatim,
5310                         c: 'z',
5311                     }),
5312                 ],
5313             })));
5314     }
5315 
5316     // This tests a bug fix where the nest limit checker wasn't decrementing
5317     // its depth during post-traversal, which causes long regexes to trip
5318     // the default limit too aggressively.
5319     #[test]
regression_454_nest_too_big()5320     fn regression_454_nest_too_big() {
5321         let pattern = r#"
5322         2(?:
5323           [45]\d{3}|
5324           7(?:
5325             1[0-267]|
5326             2[0-289]|
5327             3[0-29]|
5328             4[01]|
5329             5[1-3]|
5330             6[013]|
5331             7[0178]|
5332             91
5333           )|
5334           8(?:
5335             0[125]|
5336             [139][1-6]|
5337             2[0157-9]|
5338             41|
5339             6[1-35]|
5340             7[1-5]|
5341             8[1-8]|
5342             90
5343           )|
5344           9(?:
5345             0[0-2]|
5346             1[0-4]|
5347             2[568]|
5348             3[3-6]|
5349             5[5-7]|
5350             6[0167]|
5351             7[15]|
5352             8[0146-9]
5353           )
5354         )\d{4}
5355         "#;
5356         assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5357     }
5358 
5359     // This tests that we treat a trailing `-` in a character class as a
5360     // literal `-` even when whitespace mode is enabled and there is whitespace
5361     // after the trailing `-`.
5362     #[test]
regression_455_trailing_dash_ignore_whitespace()5363     fn regression_455_trailing_dash_ignore_whitespace() {
5364         assert!(parser("(?x)[ / - ]").parse().is_ok());
5365         assert!(parser("(?x)[ a - ]").parse().is_ok());
5366         assert!(parser("(?x)[
5367             a
5368             - ]
5369         ").parse().is_ok());
5370         assert!(parser("(?x)[
5371             a # wat
5372             - ]
5373         ").parse().is_ok());
5374 
5375         assert!(parser("(?x)[ / -").parse().is_err());
5376         assert!(parser("(?x)[ / - ").parse().is_err());
5377         assert!(parser("(?x)[
5378             / -
5379         ").parse().is_err());
5380         assert!(parser("(?x)[
5381             / - # wat
5382         ").parse().is_err());
5383     }
5384 }
5385