1 /*!
2 Defines a translator that converts an `Ast` to an `Hir`.
3 */
4 
5 use std::cell::{Cell, RefCell};
6 use std::result;
7 
8 use ast::{self, Ast, Span, Visitor};
9 use hir::{self, Error, ErrorKind, Hir};
10 use unicode::{self, ClassQuery};
11 
12 type Result<T> = result::Result<T, Error>;
13 
14 /// A builder for constructing an AST->HIR translator.
15 #[derive(Clone, Debug)]
16 pub struct TranslatorBuilder {
17     allow_invalid_utf8: bool,
18     flags: Flags,
19 }
20 
21 impl Default for TranslatorBuilder {
default() -> TranslatorBuilder22     fn default() -> TranslatorBuilder {
23         TranslatorBuilder::new()
24     }
25 }
26 
27 impl TranslatorBuilder {
28     /// Create a new translator builder with a default c onfiguration.
new() -> TranslatorBuilder29     pub fn new() -> TranslatorBuilder {
30         TranslatorBuilder {
31             allow_invalid_utf8: false,
32             flags: Flags::default(),
33         }
34     }
35 
36     /// Build a translator using the current configuration.
build(&self) -> Translator37     pub fn build(&self) -> Translator {
38         Translator {
39             stack: RefCell::new(vec![]),
40             flags: Cell::new(self.flags),
41             allow_invalid_utf8: self.allow_invalid_utf8,
42         }
43     }
44 
45     /// When enabled, translation will permit the construction of a regular
46     /// expression that may match invalid UTF-8.
47     ///
48     /// When disabled (the default), the translator is guaranteed to produce
49     /// an expression that will only ever match valid UTF-8 (otherwise, the
50     /// translator will return an error).
51     ///
52     /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53     /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54     /// the parser to return an error. Namely, a negated ASCII word boundary
55     /// can result in matching positions that aren't valid UTF-8 boundaries.
allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder56     pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57         self.allow_invalid_utf8 = yes;
58         self
59     }
60 
61     /// Enable or disable the case insensitive flag (`i`) by default.
case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder62     pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63         self.flags.case_insensitive = if yes { Some(true) } else { None };
64         self
65     }
66 
67     /// Enable or disable the multi-line matching flag (`m`) by default.
multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder68     pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69         self.flags.multi_line = if yes { Some(true) } else { None };
70         self
71     }
72 
73     /// Enable or disable the "dot matches any character" flag (`s`) by
74     /// default.
dot_matches_new_line( &mut self, yes: bool, ) -> &mut TranslatorBuilder75     pub fn dot_matches_new_line(
76         &mut self,
77         yes: bool,
78     ) -> &mut TranslatorBuilder {
79         self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
80         self
81     }
82 
83     /// Enable or disable the "swap greed" flag (`U`) by default.
swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder84     pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85         self.flags.swap_greed = if yes { Some(true) } else { None };
86         self
87     }
88 
89     /// Enable or disable the Unicode flag (`u`) by default.
unicode(&mut self, yes: bool) -> &mut TranslatorBuilder90     pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91         self.flags.unicode = if yes { None } else { Some(false) };
92         self
93     }
94 }
95 
96 /// A translator maps abstract syntax to a high level intermediate
97 /// representation.
98 ///
99 /// A translator may be benefit from reuse. That is, a translator can translate
100 /// many abstract syntax trees.
101 ///
102 /// A `Translator` can be configured in more detail via a
103 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104 #[derive(Clone, Debug)]
105 pub struct Translator {
106     /// Our call stack, but on the heap.
107     stack: RefCell<Vec<HirFrame>>,
108     /// The current flag settings.
109     flags: Cell<Flags>,
110     /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111     allow_invalid_utf8: bool,
112 }
113 
114 impl Translator {
115     /// Create a new translator using the default configuration.
new() -> Translator116     pub fn new() -> Translator {
117         TranslatorBuilder::new().build()
118     }
119 
120     /// Translate the given abstract syntax tree (AST) into a high level
121     /// intermediate representation (HIR).
122     ///
123     /// If there was a problem doing the translation, then an HIR-specific
124     /// error is returned.
125     ///
126     /// The original pattern string used to produce the `Ast` *must* also be
127     /// provided. The translator does not use the pattern string during any
128     /// correct translation, but is used for error reporting.
translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir>129     pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130         ast::visit(ast, TranslatorI::new(self, pattern))
131     }
132 }
133 
134 /// An HirFrame is a single stack frame, represented explicitly, which is
135 /// created for each item in the Ast that we traverse.
136 ///
137 /// Note that technically, this type doesn't represent our entire stack
138 /// frame. In particular, the Ast visitor represents any state associated with
139 /// traversing the Ast itself.
140 #[derive(Clone, Debug)]
141 enum HirFrame {
142     /// An arbitrary HIR expression. These get pushed whenever we hit a base
143     /// case in the Ast. They get popped after an inductive (i.e., recursive)
144     /// step is complete.
145     Expr(Hir),
146     /// A Unicode character class. This frame is mutated as we descend into
147     /// the Ast of a character class (which is itself its own mini recursive
148     /// structure).
149     ClassUnicode(hir::ClassUnicode),
150     /// A byte-oriented character class. This frame is mutated as we descend
151     /// into the Ast of a character class (which is itself its own mini
152     /// recursive structure).
153     ///
154     /// Byte character classes are created when Unicode mode (`u`) is disabled.
155     /// If `allow_invalid_utf8` is disabled (the default), then a byte
156     /// character is only permitted to match ASCII text.
157     ClassBytes(hir::ClassBytes),
158     /// This is pushed on to the stack upon first seeing any kind of group,
159     /// indicated by parentheses (including non-capturing groups). It is popped
160     /// upon leaving a group.
161     Group {
162         /// The old active flags when this group was opened.
163         ///
164         /// If this group sets flags, then the new active flags are set to the
165         /// result of merging the old flags with the flags introduced by this
166         /// group. If the group doesn't set any flags, then this is simply
167         /// equivalent to whatever flags were set when the group was opened.
168         ///
169         /// When this group is popped, the active flags should be restored to
170         /// the flags set here.
171         ///
172         /// The "active" flags correspond to whatever flags are set in the
173         /// Translator.
174         old_flags: Flags,
175     },
176     /// This is pushed whenever a concatenation is observed. After visiting
177     /// every sub-expression in the concatenation, the translator's stack is
178     /// popped until it sees a Concat frame.
179     Concat,
180     /// This is pushed whenever an alternation is observed. After visiting
181     /// every sub-expression in the alternation, the translator's stack is
182     /// popped until it sees an Alternation frame.
183     Alternation,
184 }
185 
186 impl HirFrame {
187     /// Assert that the current stack frame is an Hir expression and return it.
unwrap_expr(self) -> Hir188     fn unwrap_expr(self) -> Hir {
189         match self {
190             HirFrame::Expr(expr) => expr,
191             _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
192         }
193     }
194 
195     /// Assert that the current stack frame is a Unicode class expression and
196     /// return it.
unwrap_class_unicode(self) -> hir::ClassUnicode197     fn unwrap_class_unicode(self) -> hir::ClassUnicode {
198         match self {
199             HirFrame::ClassUnicode(cls) => cls,
200             _ => panic!(
201                 "tried to unwrap Unicode class \
202                  from HirFrame, got: {:?}",
203                 self
204             ),
205         }
206     }
207 
208     /// Assert that the current stack frame is a byte class expression and
209     /// return it.
unwrap_class_bytes(self) -> hir::ClassBytes210     fn unwrap_class_bytes(self) -> hir::ClassBytes {
211         match self {
212             HirFrame::ClassBytes(cls) => cls,
213             _ => panic!(
214                 "tried to unwrap byte class \
215                  from HirFrame, got: {:?}",
216                 self
217             ),
218         }
219     }
220 
221     /// Assert that the current stack frame is a group indicator and return
222     /// its corresponding flags (the flags that were active at the time the
223     /// group was entered).
unwrap_group(self) -> Flags224     fn unwrap_group(self) -> Flags {
225         match self {
226             HirFrame::Group { old_flags } => old_flags,
227             _ => {
228                 panic!("tried to unwrap group from HirFrame, got: {:?}", self)
229             }
230         }
231     }
232 }
233 
234 impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
235     type Output = Hir;
236     type Err = Error;
237 
finish(self) -> Result<Hir>238     fn finish(self) -> Result<Hir> {
239         // ... otherwise, we should have exactly one HIR on the stack.
240         assert_eq!(self.trans().stack.borrow().len(), 1);
241         Ok(self.pop().unwrap().unwrap_expr())
242     }
243 
visit_pre(&mut self, ast: &Ast) -> Result<()>244     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
245         match *ast {
246             Ast::Class(ast::Class::Bracketed(_)) => {
247                 if self.flags().unicode() {
248                     let cls = hir::ClassUnicode::empty();
249                     self.push(HirFrame::ClassUnicode(cls));
250                 } else {
251                     let cls = hir::ClassBytes::empty();
252                     self.push(HirFrame::ClassBytes(cls));
253                 }
254             }
255             Ast::Group(ref x) => {
256                 let old_flags = x
257                     .flags()
258                     .map(|ast| self.set_flags(ast))
259                     .unwrap_or_else(|| self.flags());
260                 self.push(HirFrame::Group { old_flags });
261             }
262             Ast::Concat(ref x) if x.asts.is_empty() => {}
263             Ast::Concat(_) => {
264                 self.push(HirFrame::Concat);
265             }
266             Ast::Alternation(ref x) if x.asts.is_empty() => {}
267             Ast::Alternation(_) => {
268                 self.push(HirFrame::Alternation);
269             }
270             _ => {}
271         }
272         Ok(())
273     }
274 
visit_post(&mut self, ast: &Ast) -> Result<()>275     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
276         match *ast {
277             Ast::Empty(_) => {
278                 self.push(HirFrame::Expr(Hir::empty()));
279             }
280             Ast::Flags(ref x) => {
281                 self.set_flags(&x.flags);
282                 // Flags in the AST are generally considered directives and
283                 // not actual sub-expressions. However, they can be used in
284                 // the concrete syntax like `((?i))`, and we need some kind of
285                 // indication of an expression there, and Empty is the correct
286                 // choice.
287                 //
288                 // There can also be things like `(?i)+`, but we rule those out
289                 // in the parser. In the future, we might allow them for
290                 // consistency sake.
291                 self.push(HirFrame::Expr(Hir::empty()));
292             }
293             Ast::Literal(ref x) => {
294                 self.push(HirFrame::Expr(self.hir_literal(x)?));
295             }
296             Ast::Dot(span) => {
297                 self.push(HirFrame::Expr(self.hir_dot(span)?));
298             }
299             Ast::Assertion(ref x) => {
300                 self.push(HirFrame::Expr(self.hir_assertion(x)?));
301             }
302             Ast::Class(ast::Class::Perl(ref x)) => {
303                 if self.flags().unicode() {
304                     let cls = self.hir_perl_unicode_class(x)?;
305                     let hcls = hir::Class::Unicode(cls);
306                     self.push(HirFrame::Expr(Hir::class(hcls)));
307                 } else {
308                     let cls = self.hir_perl_byte_class(x);
309                     let hcls = hir::Class::Bytes(cls);
310                     self.push(HirFrame::Expr(Hir::class(hcls)));
311                 }
312             }
313             Ast::Class(ast::Class::Unicode(ref x)) => {
314                 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
315                 self.push(HirFrame::Expr(Hir::class(cls)));
316             }
317             Ast::Class(ast::Class::Bracketed(ref ast)) => {
318                 if self.flags().unicode() {
319                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
320                     self.unicode_fold_and_negate(
321                         &ast.span,
322                         ast.negated,
323                         &mut cls,
324                     )?;
325                     if cls.iter().next().is_none() {
326                         return Err(self.error(
327                             ast.span,
328                             ErrorKind::EmptyClassNotAllowed,
329                         ));
330                     }
331                     let expr = Hir::class(hir::Class::Unicode(cls));
332                     self.push(HirFrame::Expr(expr));
333                 } else {
334                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
335                     self.bytes_fold_and_negate(
336                         &ast.span,
337                         ast.negated,
338                         &mut cls,
339                     )?;
340                     if cls.iter().next().is_none() {
341                         return Err(self.error(
342                             ast.span,
343                             ErrorKind::EmptyClassNotAllowed,
344                         ));
345                     }
346 
347                     let expr = Hir::class(hir::Class::Bytes(cls));
348                     self.push(HirFrame::Expr(expr));
349                 }
350             }
351             Ast::Repetition(ref x) => {
352                 let expr = self.pop().unwrap().unwrap_expr();
353                 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
354             }
355             Ast::Group(ref x) => {
356                 let expr = self.pop().unwrap().unwrap_expr();
357                 let old_flags = self.pop().unwrap().unwrap_group();
358                 self.trans().flags.set(old_flags);
359                 self.push(HirFrame::Expr(self.hir_group(x, expr)));
360             }
361             Ast::Concat(_) => {
362                 let mut exprs = vec![];
363                 while let Some(HirFrame::Expr(expr)) = self.pop() {
364                     if !expr.kind().is_empty() {
365                         exprs.push(expr);
366                     }
367                 }
368                 exprs.reverse();
369                 self.push(HirFrame::Expr(Hir::concat(exprs)));
370             }
371             Ast::Alternation(_) => {
372                 let mut exprs = vec![];
373                 while let Some(HirFrame::Expr(expr)) = self.pop() {
374                     exprs.push(expr);
375                 }
376                 exprs.reverse();
377                 self.push(HirFrame::Expr(Hir::alternation(exprs)));
378             }
379         }
380         Ok(())
381     }
382 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>383     fn visit_class_set_item_pre(
384         &mut self,
385         ast: &ast::ClassSetItem,
386     ) -> Result<()> {
387         match *ast {
388             ast::ClassSetItem::Bracketed(_) => {
389                 if self.flags().unicode() {
390                     let cls = hir::ClassUnicode::empty();
391                     self.push(HirFrame::ClassUnicode(cls));
392                 } else {
393                     let cls = hir::ClassBytes::empty();
394                     self.push(HirFrame::ClassBytes(cls));
395                 }
396             }
397             // We needn't handle the Union case here since the visitor will
398             // do it for us.
399             _ => {}
400         }
401         Ok(())
402     }
403 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>404     fn visit_class_set_item_post(
405         &mut self,
406         ast: &ast::ClassSetItem,
407     ) -> Result<()> {
408         match *ast {
409             ast::ClassSetItem::Empty(_) => {}
410             ast::ClassSetItem::Literal(ref x) => {
411                 if self.flags().unicode() {
412                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
413                     cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
414                     self.push(HirFrame::ClassUnicode(cls));
415                 } else {
416                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
417                     let byte = self.class_literal_byte(x)?;
418                     cls.push(hir::ClassBytesRange::new(byte, byte));
419                     self.push(HirFrame::ClassBytes(cls));
420                 }
421             }
422             ast::ClassSetItem::Range(ref x) => {
423                 if self.flags().unicode() {
424                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
425                     cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
426                     self.push(HirFrame::ClassUnicode(cls));
427                 } else {
428                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
429                     let start = self.class_literal_byte(&x.start)?;
430                     let end = self.class_literal_byte(&x.end)?;
431                     cls.push(hir::ClassBytesRange::new(start, end));
432                     self.push(HirFrame::ClassBytes(cls));
433                 }
434             }
435             ast::ClassSetItem::Ascii(ref x) => {
436                 if self.flags().unicode() {
437                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
438                     for &(s, e) in ascii_class(&x.kind) {
439                         cls.push(hir::ClassUnicodeRange::new(s, e));
440                     }
441                     self.unicode_fold_and_negate(
442                         &x.span, x.negated, &mut cls,
443                     )?;
444                     self.push(HirFrame::ClassUnicode(cls));
445                 } else {
446                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
447                     for &(s, e) in ascii_class(&x.kind) {
448                         cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
449                     }
450                     self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?;
451                     self.push(HirFrame::ClassBytes(cls));
452                 }
453             }
454             ast::ClassSetItem::Unicode(ref x) => {
455                 let xcls = self.hir_unicode_class(x)?;
456                 let mut cls = self.pop().unwrap().unwrap_class_unicode();
457                 cls.union(&xcls);
458                 self.push(HirFrame::ClassUnicode(cls));
459             }
460             ast::ClassSetItem::Perl(ref x) => {
461                 if self.flags().unicode() {
462                     let xcls = self.hir_perl_unicode_class(x)?;
463                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
464                     cls.union(&xcls);
465                     self.push(HirFrame::ClassUnicode(cls));
466                 } else {
467                     let xcls = self.hir_perl_byte_class(x);
468                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
469                     cls.union(&xcls);
470                     self.push(HirFrame::ClassBytes(cls));
471                 }
472             }
473             ast::ClassSetItem::Bracketed(ref ast) => {
474                 if self.flags().unicode() {
475                     let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
476                     self.unicode_fold_and_negate(
477                         &ast.span,
478                         ast.negated,
479                         &mut cls1,
480                     )?;
481 
482                     let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
483                     cls2.union(&cls1);
484                     self.push(HirFrame::ClassUnicode(cls2));
485                 } else {
486                     let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
487                     self.bytes_fold_and_negate(
488                         &ast.span,
489                         ast.negated,
490                         &mut cls1,
491                     )?;
492 
493                     let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
494                     cls2.union(&cls1);
495                     self.push(HirFrame::ClassBytes(cls2));
496                 }
497             }
498             // This is handled automatically by the visitor.
499             ast::ClassSetItem::Union(_) => {}
500         }
501         Ok(())
502     }
503 
visit_class_set_binary_op_pre( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>504     fn visit_class_set_binary_op_pre(
505         &mut self,
506         _op: &ast::ClassSetBinaryOp,
507     ) -> Result<()> {
508         if self.flags().unicode() {
509             let cls = hir::ClassUnicode::empty();
510             self.push(HirFrame::ClassUnicode(cls));
511         } else {
512             let cls = hir::ClassBytes::empty();
513             self.push(HirFrame::ClassBytes(cls));
514         }
515         Ok(())
516     }
517 
visit_class_set_binary_op_in( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>518     fn visit_class_set_binary_op_in(
519         &mut self,
520         _op: &ast::ClassSetBinaryOp,
521     ) -> Result<()> {
522         if self.flags().unicode() {
523             let cls = hir::ClassUnicode::empty();
524             self.push(HirFrame::ClassUnicode(cls));
525         } else {
526             let cls = hir::ClassBytes::empty();
527             self.push(HirFrame::ClassBytes(cls));
528         }
529         Ok(())
530     }
531 
visit_class_set_binary_op_post( &mut self, op: &ast::ClassSetBinaryOp, ) -> Result<()>532     fn visit_class_set_binary_op_post(
533         &mut self,
534         op: &ast::ClassSetBinaryOp,
535     ) -> Result<()> {
536         use ast::ClassSetBinaryOpKind::*;
537 
538         if self.flags().unicode() {
539             let mut rhs = self.pop().unwrap().unwrap_class_unicode();
540             let mut lhs = self.pop().unwrap().unwrap_class_unicode();
541             let mut cls = self.pop().unwrap().unwrap_class_unicode();
542             if self.flags().case_insensitive() {
543                 rhs.try_case_fold_simple().map_err(|_| {
544                     self.error(
545                         op.rhs.span().clone(),
546                         ErrorKind::UnicodeCaseUnavailable,
547                     )
548                 })?;
549                 lhs.try_case_fold_simple().map_err(|_| {
550                     self.error(
551                         op.lhs.span().clone(),
552                         ErrorKind::UnicodeCaseUnavailable,
553                     )
554                 })?;
555             }
556             match op.kind {
557                 Intersection => lhs.intersect(&rhs),
558                 Difference => lhs.difference(&rhs),
559                 SymmetricDifference => lhs.symmetric_difference(&rhs),
560             }
561             cls.union(&lhs);
562             self.push(HirFrame::ClassUnicode(cls));
563         } else {
564             let mut rhs = self.pop().unwrap().unwrap_class_bytes();
565             let mut lhs = self.pop().unwrap().unwrap_class_bytes();
566             let mut cls = self.pop().unwrap().unwrap_class_bytes();
567             if self.flags().case_insensitive() {
568                 rhs.case_fold_simple();
569                 lhs.case_fold_simple();
570             }
571             match op.kind {
572                 Intersection => lhs.intersect(&rhs),
573                 Difference => lhs.difference(&rhs),
574                 SymmetricDifference => lhs.symmetric_difference(&rhs),
575             }
576             cls.union(&lhs);
577             self.push(HirFrame::ClassBytes(cls));
578         }
579         Ok(())
580     }
581 }
582 
583 /// The internal implementation of a translator.
584 ///
585 /// This type is responsible for carrying around the original pattern string,
586 /// which is not tied to the internal state of a translator.
587 ///
588 /// A TranslatorI exists for the time it takes to translate a single Ast.
589 #[derive(Clone, Debug)]
590 struct TranslatorI<'t, 'p> {
591     trans: &'t Translator,
592     pattern: &'p str,
593 }
594 
595 impl<'t, 'p> TranslatorI<'t, 'p> {
596     /// Build a new internal translator.
new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p>597     fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
598         TranslatorI { trans: trans, pattern: pattern }
599     }
600 
601     /// Return a reference to the underlying translator.
trans(&self) -> &Translator602     fn trans(&self) -> &Translator {
603         &self.trans
604     }
605 
606     /// Push the given frame on to the call stack.
push(&self, frame: HirFrame)607     fn push(&self, frame: HirFrame) {
608         self.trans().stack.borrow_mut().push(frame);
609     }
610 
611     /// Pop the top of the call stack. If the call stack is empty, return None.
pop(&self) -> Option<HirFrame>612     fn pop(&self) -> Option<HirFrame> {
613         self.trans().stack.borrow_mut().pop()
614     }
615 
616     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ErrorKind) -> Error617     fn error(&self, span: Span, kind: ErrorKind) -> Error {
618         Error { kind: kind, pattern: self.pattern.to_string(), span: span }
619     }
620 
621     /// Return a copy of the active flags.
flags(&self) -> Flags622     fn flags(&self) -> Flags {
623         self.trans().flags.get()
624     }
625 
626     /// Set the flags of this translator from the flags set in the given AST.
627     /// Then, return the old flags.
set_flags(&self, ast_flags: &ast::Flags) -> Flags628     fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
629         let old_flags = self.flags();
630         let mut new_flags = Flags::from_ast(ast_flags);
631         new_flags.merge(&old_flags);
632         self.trans().flags.set(new_flags);
633         old_flags
634     }
635 
hir_literal(&self, lit: &ast::Literal) -> Result<Hir>636     fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
637         let ch = match self.literal_to_char(lit)? {
638             byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
639             hir::Literal::Unicode(ch) => ch,
640         };
641         if self.flags().case_insensitive() {
642             self.hir_from_char_case_insensitive(lit.span, ch)
643         } else {
644             self.hir_from_char(lit.span, ch)
645         }
646     }
647 
648     /// Convert an Ast literal to its scalar representation.
649     ///
650     /// When Unicode mode is enabled, then this always succeeds and returns a
651     /// `char` (Unicode scalar value).
652     ///
653     /// When Unicode mode is disabled, then a raw byte is returned. If that
654     /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
655     /// an error.
literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal>656     fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
657         if self.flags().unicode() {
658             return Ok(hir::Literal::Unicode(lit.c));
659         }
660         let byte = match lit.byte() {
661             None => return Ok(hir::Literal::Unicode(lit.c)),
662             Some(byte) => byte,
663         };
664         if byte <= 0x7F {
665             return Ok(hir::Literal::Unicode(byte as char));
666         }
667         if !self.trans().allow_invalid_utf8 {
668             return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
669         }
670         Ok(hir::Literal::Byte(byte))
671     }
672 
hir_from_char(&self, span: Span, c: char) -> Result<Hir>673     fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
674         if !self.flags().unicode() && c.len_utf8() > 1 {
675             return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
676         }
677         Ok(Hir::literal(hir::Literal::Unicode(c)))
678     }
679 
hir_from_char_case_insensitive( &self, span: Span, c: char, ) -> Result<Hir>680     fn hir_from_char_case_insensitive(
681         &self,
682         span: Span,
683         c: char,
684     ) -> Result<Hir> {
685         if self.flags().unicode() {
686             // If case folding won't do anything, then don't bother trying.
687             let map =
688                 unicode::contains_simple_case_mapping(c, c).map_err(|_| {
689                     self.error(span, ErrorKind::UnicodeCaseUnavailable)
690                 })?;
691             if !map {
692                 return self.hir_from_char(span, c);
693             }
694             let mut cls =
695                 hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
696                     c, c,
697                 )]);
698             cls.try_case_fold_simple().map_err(|_| {
699                 self.error(span, ErrorKind::UnicodeCaseUnavailable)
700             })?;
701             Ok(Hir::class(hir::Class::Unicode(cls)))
702         } else {
703             if c.len_utf8() > 1 {
704                 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
705             }
706             // If case folding won't do anything, then don't bother trying.
707             match c {
708                 'A'..='Z' | 'a'..='z' => {}
709                 _ => return self.hir_from_char(span, c),
710             }
711             let mut cls =
712                 hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
713                     c as u8, c as u8,
714                 )]);
715             cls.case_fold_simple();
716             Ok(Hir::class(hir::Class::Bytes(cls)))
717         }
718     }
719 
hir_dot(&self, span: Span) -> Result<Hir>720     fn hir_dot(&self, span: Span) -> Result<Hir> {
721         let unicode = self.flags().unicode();
722         if !unicode && !self.trans().allow_invalid_utf8 {
723             return Err(self.error(span, ErrorKind::InvalidUtf8));
724         }
725         Ok(if self.flags().dot_matches_new_line() {
726             Hir::any(!unicode)
727         } else {
728             Hir::dot(!unicode)
729         })
730     }
731 
hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir>732     fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
733         let unicode = self.flags().unicode();
734         let multi_line = self.flags().multi_line();
735         Ok(match asst.kind {
736             ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
737                 hir::Anchor::StartLine
738             } else {
739                 hir::Anchor::StartText
740             }),
741             ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
742                 hir::Anchor::EndLine
743             } else {
744                 hir::Anchor::EndText
745             }),
746             ast::AssertionKind::StartText => {
747                 Hir::anchor(hir::Anchor::StartText)
748             }
749             ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
750             ast::AssertionKind::WordBoundary => {
751                 Hir::word_boundary(if unicode {
752                     hir::WordBoundary::Unicode
753                 } else {
754                     hir::WordBoundary::Ascii
755                 })
756             }
757             ast::AssertionKind::NotWordBoundary => {
758                 Hir::word_boundary(if unicode {
759                     hir::WordBoundary::UnicodeNegate
760                 } else {
761                     // It is possible for negated ASCII word boundaries to
762                     // match at invalid UTF-8 boundaries, even when searching
763                     // valid UTF-8.
764                     if !self.trans().allow_invalid_utf8 {
765                         return Err(
766                             self.error(asst.span, ErrorKind::InvalidUtf8)
767                         );
768                     }
769                     hir::WordBoundary::AsciiNegate
770                 })
771             }
772         })
773     }
774 
hir_group(&self, group: &ast::Group, expr: Hir) -> Hir775     fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
776         let kind = match group.kind {
777             ast::GroupKind::CaptureIndex(idx) => {
778                 hir::GroupKind::CaptureIndex(idx)
779             }
780             ast::GroupKind::CaptureName(ref capname) => {
781                 hir::GroupKind::CaptureName {
782                     name: capname.name.clone(),
783                     index: capname.index,
784                 }
785             }
786             ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
787         };
788         Hir::group(hir::Group { kind: kind, hir: Box::new(expr) })
789     }
790 
hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir791     fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
792         let kind = match rep.op.kind {
793             ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
794             ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
795             ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
796             ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
797                 hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
798             }
799             ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
800                 hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
801             }
802             ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
803                 m,
804                 n,
805             )) => {
806                 hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
807             }
808         };
809         let greedy =
810             if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
811         Hir::repetition(hir::Repetition {
812             kind: kind,
813             greedy: greedy,
814             hir: Box::new(expr),
815         })
816     }
817 
hir_unicode_class( &self, ast_class: &ast::ClassUnicode, ) -> Result<hir::ClassUnicode>818     fn hir_unicode_class(
819         &self,
820         ast_class: &ast::ClassUnicode,
821     ) -> Result<hir::ClassUnicode> {
822         use ast::ClassUnicodeKind::*;
823 
824         if !self.flags().unicode() {
825             return Err(
826                 self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
827             );
828         }
829         let query = match ast_class.kind {
830             OneLetter(name) => ClassQuery::OneLetter(name),
831             Named(ref name) => ClassQuery::Binary(name),
832             NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
833                 property_name: name,
834                 property_value: value,
835             },
836         };
837         let mut result = self.convert_unicode_class_error(
838             &ast_class.span,
839             unicode::class(query),
840         );
841         if let Ok(ref mut class) = result {
842             self.unicode_fold_and_negate(
843                 &ast_class.span,
844                 ast_class.negated,
845                 class,
846             )?;
847         }
848         result
849     }
850 
hir_perl_unicode_class( &self, ast_class: &ast::ClassPerl, ) -> Result<hir::ClassUnicode>851     fn hir_perl_unicode_class(
852         &self,
853         ast_class: &ast::ClassPerl,
854     ) -> Result<hir::ClassUnicode> {
855         use ast::ClassPerlKind::*;
856 
857         assert!(self.flags().unicode());
858         let result = match ast_class.kind {
859             Digit => unicode::perl_digit(),
860             Space => unicode::perl_space(),
861             Word => unicode::perl_word(),
862         };
863         let mut class =
864             self.convert_unicode_class_error(&ast_class.span, result)?;
865         // We needn't apply case folding here because the Perl Unicode classes
866         // are already closed under Unicode simple case folding.
867         if ast_class.negated {
868             class.negate();
869         }
870         Ok(class)
871     }
872 
hir_perl_byte_class( &self, ast_class: &ast::ClassPerl, ) -> hir::ClassBytes873     fn hir_perl_byte_class(
874         &self,
875         ast_class: &ast::ClassPerl,
876     ) -> hir::ClassBytes {
877         use ast::ClassPerlKind::*;
878 
879         assert!(!self.flags().unicode());
880         let mut class = match ast_class.kind {
881             Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
882             Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
883             Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
884         };
885         // We needn't apply case folding here because the Perl ASCII classes
886         // are already closed (under ASCII case folding).
887         if ast_class.negated {
888             class.negate();
889         }
890         class
891     }
892 
893     /// Converts the given Unicode specific error to an HIR translation error.
894     ///
895     /// The span given should approximate the position at which an error would
896     /// occur.
convert_unicode_class_error( &self, span: &Span, result: unicode::Result<hir::ClassUnicode>, ) -> Result<hir::ClassUnicode>897     fn convert_unicode_class_error(
898         &self,
899         span: &Span,
900         result: unicode::Result<hir::ClassUnicode>,
901     ) -> Result<hir::ClassUnicode> {
902         result.map_err(|err| {
903             let sp = span.clone();
904             match err {
905                 unicode::Error::PropertyNotFound => {
906                     self.error(sp, ErrorKind::UnicodePropertyNotFound)
907                 }
908                 unicode::Error::PropertyValueNotFound => {
909                     self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
910                 }
911                 unicode::Error::PerlClassNotFound => {
912                     self.error(sp, ErrorKind::UnicodePerlClassNotFound)
913                 }
914             }
915         })
916     }
917 
unicode_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassUnicode, ) -> Result<()>918     fn unicode_fold_and_negate(
919         &self,
920         span: &Span,
921         negated: bool,
922         class: &mut hir::ClassUnicode,
923     ) -> Result<()> {
924         // Note that we must apply case folding before negation!
925         // Consider `(?i)[^x]`. If we applied negation field, then
926         // the result would be the character class that matched any
927         // Unicode scalar value.
928         if self.flags().case_insensitive() {
929             class.try_case_fold_simple().map_err(|_| {
930                 self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
931             })?;
932         }
933         if negated {
934             class.negate();
935         }
936         Ok(())
937     }
938 
bytes_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassBytes, ) -> Result<()>939     fn bytes_fold_and_negate(
940         &self,
941         span: &Span,
942         negated: bool,
943         class: &mut hir::ClassBytes,
944     ) -> Result<()> {
945         // Note that we must apply case folding before negation!
946         // Consider `(?i)[^x]`. If we applied negation field, then
947         // the result would be the character class that matched any
948         // Unicode scalar value.
949         if self.flags().case_insensitive() {
950             class.case_fold_simple();
951         }
952         if negated {
953             class.negate();
954         }
955         if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
956             return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
957         }
958         Ok(())
959     }
960 
961     /// Return a scalar byte value suitable for use as a literal in a byte
962     /// character class.
class_literal_byte(&self, ast: &ast::Literal) -> Result<u8>963     fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
964         match self.literal_to_char(ast)? {
965             hir::Literal::Byte(byte) => Ok(byte),
966             hir::Literal::Unicode(ch) => {
967                 if ch <= 0x7F as char {
968                     Ok(ch as u8)
969                 } else {
970                     // We can't feasibly support Unicode in
971                     // byte oriented classes. Byte classes don't
972                     // do Unicode case folding.
973                     Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
974                 }
975             }
976         }
977     }
978 }
979 
980 /// A translator's representation of a regular expression's flags at any given
981 /// moment in time.
982 ///
983 /// Each flag can be in one of three states: absent, present but disabled or
984 /// present but enabled.
985 #[derive(Clone, Copy, Debug, Default)]
986 struct Flags {
987     case_insensitive: Option<bool>,
988     multi_line: Option<bool>,
989     dot_matches_new_line: Option<bool>,
990     swap_greed: Option<bool>,
991     unicode: Option<bool>,
992     // Note that `ignore_whitespace` is omitted here because it is handled
993     // entirely in the parser.
994 }
995 
996 impl Flags {
from_ast(ast: &ast::Flags) -> Flags997     fn from_ast(ast: &ast::Flags) -> Flags {
998         let mut flags = Flags::default();
999         let mut enable = true;
1000         for item in &ast.items {
1001             match item.kind {
1002                 ast::FlagsItemKind::Negation => {
1003                     enable = false;
1004                 }
1005                 ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1006                     flags.case_insensitive = Some(enable);
1007                 }
1008                 ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1009                     flags.multi_line = Some(enable);
1010                 }
1011                 ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1012                     flags.dot_matches_new_line = Some(enable);
1013                 }
1014                 ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1015                     flags.swap_greed = Some(enable);
1016                 }
1017                 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1018                     flags.unicode = Some(enable);
1019                 }
1020                 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1021             }
1022         }
1023         flags
1024     }
1025 
merge(&mut self, previous: &Flags)1026     fn merge(&mut self, previous: &Flags) {
1027         if self.case_insensitive.is_none() {
1028             self.case_insensitive = previous.case_insensitive;
1029         }
1030         if self.multi_line.is_none() {
1031             self.multi_line = previous.multi_line;
1032         }
1033         if self.dot_matches_new_line.is_none() {
1034             self.dot_matches_new_line = previous.dot_matches_new_line;
1035         }
1036         if self.swap_greed.is_none() {
1037             self.swap_greed = previous.swap_greed;
1038         }
1039         if self.unicode.is_none() {
1040             self.unicode = previous.unicode;
1041         }
1042     }
1043 
case_insensitive(&self) -> bool1044     fn case_insensitive(&self) -> bool {
1045         self.case_insensitive.unwrap_or(false)
1046     }
1047 
multi_line(&self) -> bool1048     fn multi_line(&self) -> bool {
1049         self.multi_line.unwrap_or(false)
1050     }
1051 
dot_matches_new_line(&self) -> bool1052     fn dot_matches_new_line(&self) -> bool {
1053         self.dot_matches_new_line.unwrap_or(false)
1054     }
1055 
swap_greed(&self) -> bool1056     fn swap_greed(&self) -> bool {
1057         self.swap_greed.unwrap_or(false)
1058     }
1059 
unicode(&self) -> bool1060     fn unicode(&self) -> bool {
1061         self.unicode.unwrap_or(true)
1062     }
1063 }
1064 
hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes1065 fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1066     let ranges: Vec<_> = ascii_class(kind)
1067         .iter()
1068         .cloned()
1069         .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1070         .collect();
1071     hir::ClassBytes::new(ranges)
1072 }
1073 
ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)]1074 fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1075     use ast::ClassAsciiKind::*;
1076     match *kind {
1077         Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1078         Alpha => &[('A', 'Z'), ('a', 'z')],
1079         Ascii => &[('\x00', '\x7F')],
1080         Blank => &[('\t', '\t'), (' ', ' ')],
1081         Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1082         Digit => &[('0', '9')],
1083         Graph => &[('!', '~')],
1084         Lower => &[('a', 'z')],
1085         Print => &[(' ', '~')],
1086         Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1087         Space => &[
1088             ('\t', '\t'),
1089             ('\n', '\n'),
1090             ('\x0B', '\x0B'),
1091             ('\x0C', '\x0C'),
1092             ('\r', '\r'),
1093             (' ', ' '),
1094         ],
1095         Upper => &[('A', 'Z')],
1096         Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1097         Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1098     }
1099 }
1100 
1101 #[cfg(test)]
1102 mod tests {
1103     use ast::parse::ParserBuilder;
1104     use ast::{self, Ast, Position, Span};
1105     use hir::{self, Hir, HirKind};
1106     use unicode::{self, ClassQuery};
1107 
1108     use super::{ascii_class, TranslatorBuilder};
1109 
1110     // We create these errors to compare with real hir::Errors in the tests.
1111     // We define equality between TestError and hir::Error to disregard the
1112     // pattern string in hir::Error, which is annoying to provide in tests.
1113     #[derive(Clone, Debug)]
1114     struct TestError {
1115         span: Span,
1116         kind: hir::ErrorKind,
1117     }
1118 
1119     impl PartialEq<hir::Error> for TestError {
eq(&self, other: &hir::Error) -> bool1120         fn eq(&self, other: &hir::Error) -> bool {
1121             self.span == other.span && self.kind == other.kind
1122         }
1123     }
1124 
1125     impl PartialEq<TestError> for hir::Error {
eq(&self, other: &TestError) -> bool1126         fn eq(&self, other: &TestError) -> bool {
1127             self.span == other.span && self.kind == other.kind
1128         }
1129     }
1130 
parse(pattern: &str) -> Ast1131     fn parse(pattern: &str) -> Ast {
1132         ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1133     }
1134 
t(pattern: &str) -> Hir1135     fn t(pattern: &str) -> Hir {
1136         TranslatorBuilder::new()
1137             .allow_invalid_utf8(false)
1138             .build()
1139             .translate(pattern, &parse(pattern))
1140             .unwrap()
1141     }
1142 
t_err(pattern: &str) -> hir::Error1143     fn t_err(pattern: &str) -> hir::Error {
1144         TranslatorBuilder::new()
1145             .allow_invalid_utf8(false)
1146             .build()
1147             .translate(pattern, &parse(pattern))
1148             .unwrap_err()
1149     }
1150 
t_bytes(pattern: &str) -> Hir1151     fn t_bytes(pattern: &str) -> Hir {
1152         TranslatorBuilder::new()
1153             .allow_invalid_utf8(true)
1154             .build()
1155             .translate(pattern, &parse(pattern))
1156             .unwrap()
1157     }
1158 
hir_lit(s: &str) -> Hir1159     fn hir_lit(s: &str) -> Hir {
1160         match s.len() {
1161             0 => Hir::empty(),
1162             _ => {
1163                 let lits = s
1164                     .chars()
1165                     .map(hir::Literal::Unicode)
1166                     .map(Hir::literal)
1167                     .collect();
1168                 Hir::concat(lits)
1169             }
1170         }
1171     }
1172 
hir_blit(s: &[u8]) -> Hir1173     fn hir_blit(s: &[u8]) -> Hir {
1174         match s.len() {
1175             0 => Hir::empty(),
1176             1 => Hir::literal(hir::Literal::Byte(s[0])),
1177             _ => {
1178                 let lits = s
1179                     .iter()
1180                     .cloned()
1181                     .map(hir::Literal::Byte)
1182                     .map(Hir::literal)
1183                     .collect();
1184                 Hir::concat(lits)
1185             }
1186         }
1187     }
1188 
hir_group(i: u32, expr: Hir) -> Hir1189     fn hir_group(i: u32, expr: Hir) -> Hir {
1190         Hir::group(hir::Group {
1191             kind: hir::GroupKind::CaptureIndex(i),
1192             hir: Box::new(expr),
1193         })
1194     }
1195 
hir_group_name(i: u32, name: &str, expr: Hir) -> Hir1196     fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1197         Hir::group(hir::Group {
1198             kind: hir::GroupKind::CaptureName {
1199                 name: name.to_string(),
1200                 index: i,
1201             },
1202             hir: Box::new(expr),
1203         })
1204     }
1205 
hir_group_nocap(expr: Hir) -> Hir1206     fn hir_group_nocap(expr: Hir) -> Hir {
1207         Hir::group(hir::Group {
1208             kind: hir::GroupKind::NonCapturing,
1209             hir: Box::new(expr),
1210         })
1211     }
1212 
hir_quest(greedy: bool, expr: Hir) -> Hir1213     fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1214         Hir::repetition(hir::Repetition {
1215             kind: hir::RepetitionKind::ZeroOrOne,
1216             greedy: greedy,
1217             hir: Box::new(expr),
1218         })
1219     }
1220 
hir_star(greedy: bool, expr: Hir) -> Hir1221     fn hir_star(greedy: bool, expr: Hir) -> Hir {
1222         Hir::repetition(hir::Repetition {
1223             kind: hir::RepetitionKind::ZeroOrMore,
1224             greedy: greedy,
1225             hir: Box::new(expr),
1226         })
1227     }
1228 
hir_plus(greedy: bool, expr: Hir) -> Hir1229     fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1230         Hir::repetition(hir::Repetition {
1231             kind: hir::RepetitionKind::OneOrMore,
1232             greedy: greedy,
1233             hir: Box::new(expr),
1234         })
1235     }
1236 
hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir1237     fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1238         Hir::repetition(hir::Repetition {
1239             kind: hir::RepetitionKind::Range(range),
1240             greedy: greedy,
1241             hir: Box::new(expr),
1242         })
1243     }
1244 
hir_alt(alts: Vec<Hir>) -> Hir1245     fn hir_alt(alts: Vec<Hir>) -> Hir {
1246         Hir::alternation(alts)
1247     }
1248 
hir_cat(exprs: Vec<Hir>) -> Hir1249     fn hir_cat(exprs: Vec<Hir>) -> Hir {
1250         Hir::concat(exprs)
1251     }
1252 
1253     #[allow(dead_code)]
hir_uclass_query(query: ClassQuery) -> Hir1254     fn hir_uclass_query(query: ClassQuery) -> Hir {
1255         Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1256     }
1257 
1258     #[allow(dead_code)]
hir_uclass_perl_word() -> Hir1259     fn hir_uclass_perl_word() -> Hir {
1260         Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1261     }
1262 
hir_uclass(ranges: &[(char, char)]) -> Hir1263     fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1264         let ranges: Vec<hir::ClassUnicodeRange> = ranges
1265             .iter()
1266             .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1267             .collect();
1268         Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1269     }
1270 
hir_bclass(ranges: &[(u8, u8)]) -> Hir1271     fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1272         let ranges: Vec<hir::ClassBytesRange> = ranges
1273             .iter()
1274             .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1275             .collect();
1276         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1277     }
1278 
hir_bclass_from_char(ranges: &[(char, char)]) -> Hir1279     fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1280         let ranges: Vec<hir::ClassBytesRange> = ranges
1281             .iter()
1282             .map(|&(s, e)| {
1283                 assert!(s as u32 <= 0x7F);
1284                 assert!(e as u32 <= 0x7F);
1285                 hir::ClassBytesRange::new(s as u8, e as u8)
1286             })
1287             .collect();
1288         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1289     }
1290 
hir_case_fold(expr: Hir) -> Hir1291     fn hir_case_fold(expr: Hir) -> Hir {
1292         match expr.into_kind() {
1293             HirKind::Class(mut cls) => {
1294                 cls.case_fold_simple();
1295                 Hir::class(cls)
1296             }
1297             _ => panic!("cannot case fold non-class Hir expr"),
1298         }
1299     }
1300 
hir_negate(expr: Hir) -> Hir1301     fn hir_negate(expr: Hir) -> Hir {
1302         match expr.into_kind() {
1303             HirKind::Class(mut cls) => {
1304                 cls.negate();
1305                 Hir::class(cls)
1306             }
1307             _ => panic!("cannot negate non-class Hir expr"),
1308         }
1309     }
1310 
1311     #[allow(dead_code)]
hir_union(expr1: Hir, expr2: Hir) -> Hir1312     fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1313         use hir::Class::{Bytes, Unicode};
1314 
1315         match (expr1.into_kind(), expr2.into_kind()) {
1316             (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1317                 c1.union(&c2);
1318                 Hir::class(hir::Class::Unicode(c1))
1319             }
1320             (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1321                 c1.union(&c2);
1322                 Hir::class(hir::Class::Bytes(c1))
1323             }
1324             _ => panic!("cannot union non-class Hir exprs"),
1325         }
1326     }
1327 
1328     #[allow(dead_code)]
hir_difference(expr1: Hir, expr2: Hir) -> Hir1329     fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1330         use hir::Class::{Bytes, Unicode};
1331 
1332         match (expr1.into_kind(), expr2.into_kind()) {
1333             (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1334                 c1.difference(&c2);
1335                 Hir::class(hir::Class::Unicode(c1))
1336             }
1337             (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1338                 c1.difference(&c2);
1339                 Hir::class(hir::Class::Bytes(c1))
1340             }
1341             _ => panic!("cannot difference non-class Hir exprs"),
1342         }
1343     }
1344 
hir_anchor(anchor: hir::Anchor) -> Hir1345     fn hir_anchor(anchor: hir::Anchor) -> Hir {
1346         Hir::anchor(anchor)
1347     }
1348 
hir_word(wb: hir::WordBoundary) -> Hir1349     fn hir_word(wb: hir::WordBoundary) -> Hir {
1350         Hir::word_boundary(wb)
1351     }
1352 
1353     #[test]
empty()1354     fn empty() {
1355         assert_eq!(t(""), Hir::empty());
1356         assert_eq!(t("(?i)"), Hir::empty());
1357         assert_eq!(t("()"), hir_group(1, Hir::empty()));
1358         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1359         assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1360         assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1361         assert_eq!(
1362             t("()|()"),
1363             hir_alt(vec![
1364                 hir_group(1, Hir::empty()),
1365                 hir_group(2, Hir::empty()),
1366             ])
1367         );
1368         assert_eq!(
1369             t("(|b)"),
1370             hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1371         );
1372         assert_eq!(
1373             t("(a|)"),
1374             hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1375         );
1376         assert_eq!(
1377             t("(a||c)"),
1378             hir_group(
1379                 1,
1380                 hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1381             )
1382         );
1383         assert_eq!(
1384             t("(||)"),
1385             hir_group(
1386                 1,
1387                 hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1388             )
1389         );
1390     }
1391 
1392     #[test]
literal()1393     fn literal() {
1394         assert_eq!(t("a"), hir_lit("a"));
1395         assert_eq!(t("(?-u)a"), hir_lit("a"));
1396         assert_eq!(t("☃"), hir_lit("☃"));
1397         assert_eq!(t("abcd"), hir_lit("abcd"));
1398 
1399         assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1400         assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1401         assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1402         assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1403 
1404         assert_eq!(
1405             t_err("(?-u)☃"),
1406             TestError {
1407                 kind: hir::ErrorKind::UnicodeNotAllowed,
1408                 span: Span::new(
1409                     Position::new(5, 1, 6),
1410                     Position::new(8, 1, 7)
1411                 ),
1412             }
1413         );
1414         assert_eq!(
1415             t_err(r"(?-u)\xFF"),
1416             TestError {
1417                 kind: hir::ErrorKind::InvalidUtf8,
1418                 span: Span::new(
1419                     Position::new(5, 1, 6),
1420                     Position::new(9, 1, 10)
1421                 ),
1422             }
1423         );
1424     }
1425 
1426     #[test]
literal_case_insensitive()1427     fn literal_case_insensitive() {
1428         #[cfg(feature = "unicode-case")]
1429         assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1430         #[cfg(feature = "unicode-case")]
1431         assert_eq!(
1432             t("(?i:a)"),
1433             hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1434         );
1435         #[cfg(feature = "unicode-case")]
1436         assert_eq!(
1437             t("a(?i)a(?-i)a"),
1438             hir_cat(vec![
1439                 hir_lit("a"),
1440                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1441                 hir_lit("a"),
1442             ])
1443         );
1444         #[cfg(feature = "unicode-case")]
1445         assert_eq!(
1446             t("(?i)ab@c"),
1447             hir_cat(vec![
1448                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1449                 hir_uclass(&[('B', 'B'), ('b', 'b')]),
1450                 hir_lit("@"),
1451                 hir_uclass(&[('C', 'C'), ('c', 'c')]),
1452             ])
1453         );
1454         #[cfg(feature = "unicode-case")]
1455         assert_eq!(
1456             t("(?i)β"),
1457             hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1458         );
1459 
1460         assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1461         #[cfg(feature = "unicode-case")]
1462         assert_eq!(
1463             t("(?-u)a(?i)a(?-i)a"),
1464             hir_cat(vec![
1465                 hir_lit("a"),
1466                 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1467                 hir_lit("a"),
1468             ])
1469         );
1470         assert_eq!(
1471             t("(?i-u)ab@c"),
1472             hir_cat(vec![
1473                 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1474                 hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1475                 hir_lit("@"),
1476                 hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1477             ])
1478         );
1479 
1480         assert_eq!(
1481             t_bytes("(?i-u)a"),
1482             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1483         );
1484         assert_eq!(
1485             t_bytes("(?i-u)\x61"),
1486             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1487         );
1488         assert_eq!(
1489             t_bytes(r"(?i-u)\x61"),
1490             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1491         );
1492         assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1493 
1494         assert_eq!(
1495             t_err("(?i-u)β"),
1496             TestError {
1497                 kind: hir::ErrorKind::UnicodeNotAllowed,
1498                 span: Span::new(
1499                     Position::new(6, 1, 7),
1500                     Position::new(8, 1, 8),
1501                 ),
1502             }
1503         );
1504     }
1505 
1506     #[test]
dot()1507     fn dot() {
1508         assert_eq!(
1509             t("."),
1510             hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1511         );
1512         assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1513         assert_eq!(
1514             t_bytes("(?-u)."),
1515             hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1516         );
1517         assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1518 
1519         // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1520         assert_eq!(
1521             t_err("(?-u)."),
1522             TestError {
1523                 kind: hir::ErrorKind::InvalidUtf8,
1524                 span: Span::new(
1525                     Position::new(5, 1, 6),
1526                     Position::new(6, 1, 7)
1527                 ),
1528             }
1529         );
1530         assert_eq!(
1531             t_err("(?s-u)."),
1532             TestError {
1533                 kind: hir::ErrorKind::InvalidUtf8,
1534                 span: Span::new(
1535                     Position::new(6, 1, 7),
1536                     Position::new(7, 1, 8)
1537                 ),
1538             }
1539         );
1540     }
1541 
1542     #[test]
assertions()1543     fn assertions() {
1544         assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1545         assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1546         assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1547         assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1548         assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1549         assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1550         assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1551         assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1552 
1553         assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1554         assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1555         assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1556         assert_eq!(
1557             t_bytes(r"(?-u)\B"),
1558             hir_word(hir::WordBoundary::AsciiNegate)
1559         );
1560 
1561         assert_eq!(
1562             t_err(r"(?-u)\B"),
1563             TestError {
1564                 kind: hir::ErrorKind::InvalidUtf8,
1565                 span: Span::new(
1566                     Position::new(5, 1, 6),
1567                     Position::new(7, 1, 8)
1568                 ),
1569             }
1570         );
1571     }
1572 
1573     #[test]
group()1574     fn group() {
1575         assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1576         assert_eq!(
1577             t("(a)(b)"),
1578             hir_cat(vec![
1579                 hir_group(1, hir_lit("a")),
1580                 hir_group(2, hir_lit("b")),
1581             ])
1582         );
1583         assert_eq!(
1584             t("(a)|(b)"),
1585             hir_alt(vec![
1586                 hir_group(1, hir_lit("a")),
1587                 hir_group(2, hir_lit("b")),
1588             ])
1589         );
1590         assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1591         assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1592         assert_eq!(
1593             t("(?P<foo>a)(?P<bar>b)"),
1594             hir_cat(vec![
1595                 hir_group_name(1, "foo", hir_lit("a")),
1596                 hir_group_name(2, "bar", hir_lit("b")),
1597             ])
1598         );
1599         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1600         assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1601         assert_eq!(
1602             t("(?:a)(b)"),
1603             hir_cat(vec![
1604                 hir_group_nocap(hir_lit("a")),
1605                 hir_group(1, hir_lit("b")),
1606             ])
1607         );
1608         assert_eq!(
1609             t("(a)(?:b)(c)"),
1610             hir_cat(vec![
1611                 hir_group(1, hir_lit("a")),
1612                 hir_group_nocap(hir_lit("b")),
1613                 hir_group(2, hir_lit("c")),
1614             ])
1615         );
1616         assert_eq!(
1617             t("(a)(?P<foo>b)(c)"),
1618             hir_cat(vec![
1619                 hir_group(1, hir_lit("a")),
1620                 hir_group_name(2, "foo", hir_lit("b")),
1621                 hir_group(3, hir_lit("c")),
1622             ])
1623         );
1624         assert_eq!(t("()"), hir_group(1, Hir::empty()));
1625         assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1626         assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1627         assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1628     }
1629 
1630     #[test]
flags()1631     fn flags() {
1632         #[cfg(feature = "unicode-case")]
1633         assert_eq!(
1634             t("(?i:a)a"),
1635             hir_cat(vec![
1636                 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1637                 hir_lit("a"),
1638             ])
1639         );
1640         assert_eq!(
1641             t("(?i-u:a)β"),
1642             hir_cat(vec![
1643                 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1644                 hir_lit("β"),
1645             ])
1646         );
1647         assert_eq!(
1648             t("(?:(?i-u)a)b"),
1649             hir_cat(vec![
1650                 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1651                 hir_lit("b"),
1652             ])
1653         );
1654         assert_eq!(
1655             t("((?i-u)a)b"),
1656             hir_cat(vec![
1657                 hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1658                 hir_lit("b"),
1659             ])
1660         );
1661         #[cfg(feature = "unicode-case")]
1662         assert_eq!(
1663             t("(?i)(?-i:a)a"),
1664             hir_cat(vec![
1665                 hir_group_nocap(hir_lit("a")),
1666                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1667             ])
1668         );
1669         #[cfg(feature = "unicode-case")]
1670         assert_eq!(
1671             t("(?im)a^"),
1672             hir_cat(vec![
1673                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1674                 hir_anchor(hir::Anchor::StartLine),
1675             ])
1676         );
1677         #[cfg(feature = "unicode-case")]
1678         assert_eq!(
1679             t("(?im)a^(?i-m)a^"),
1680             hir_cat(vec![
1681                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1682                 hir_anchor(hir::Anchor::StartLine),
1683                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1684                 hir_anchor(hir::Anchor::StartText),
1685             ])
1686         );
1687         assert_eq!(
1688             t("(?U)a*a*?(?-U)a*a*?"),
1689             hir_cat(vec![
1690                 hir_star(false, hir_lit("a")),
1691                 hir_star(true, hir_lit("a")),
1692                 hir_star(true, hir_lit("a")),
1693                 hir_star(false, hir_lit("a")),
1694             ])
1695         );
1696         #[cfg(feature = "unicode-case")]
1697         assert_eq!(
1698             t("(?:a(?i)a)a"),
1699             hir_cat(vec![
1700                 hir_group_nocap(hir_cat(vec![
1701                     hir_lit("a"),
1702                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
1703                 ])),
1704                 hir_lit("a"),
1705             ])
1706         );
1707         #[cfg(feature = "unicode-case")]
1708         assert_eq!(
1709             t("(?i)(?:a(?-i)a)a"),
1710             hir_cat(vec![
1711                 hir_group_nocap(hir_cat(vec![
1712                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
1713                     hir_lit("a"),
1714                 ])),
1715                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1716             ])
1717         );
1718     }
1719 
1720     #[test]
escape()1721     fn escape() {
1722         assert_eq!(
1723             t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1724             hir_lit(r"\.+*?()|[]{}^$#")
1725         );
1726     }
1727 
1728     #[test]
repetition()1729     fn repetition() {
1730         assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1731         assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1732         assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1733         assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1734         assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1735         assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1736 
1737         assert_eq!(
1738             t("a{1}"),
1739             hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1740         );
1741         assert_eq!(
1742             t("a{1,}"),
1743             hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1744         );
1745         assert_eq!(
1746             t("a{1,2}"),
1747             hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1748         );
1749         assert_eq!(
1750             t("a{1}?"),
1751             hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1752         );
1753         assert_eq!(
1754             t("a{1,}?"),
1755             hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1756         );
1757         assert_eq!(
1758             t("a{1,2}?"),
1759             hir_range(
1760                 false,
1761                 hir::RepetitionRange::Bounded(1, 2),
1762                 hir_lit("a"),
1763             )
1764         );
1765 
1766         assert_eq!(
1767             t("ab?"),
1768             hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1769         );
1770         assert_eq!(
1771             t("(ab)?"),
1772             hir_quest(
1773                 true,
1774                 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1775             )
1776         );
1777         assert_eq!(
1778             t("a|b?"),
1779             hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1780         );
1781     }
1782 
1783     #[test]
cat_alt()1784     fn cat_alt() {
1785         assert_eq!(
1786             t("(ab)"),
1787             hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1788         );
1789         assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1790         assert_eq!(
1791             t("a|b|c"),
1792             hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1793         );
1794         assert_eq!(
1795             t("ab|bc|cd"),
1796             hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1797         );
1798         assert_eq!(
1799             t("(a|b)"),
1800             hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1801         );
1802         assert_eq!(
1803             t("(a|b|c)"),
1804             hir_group(
1805                 1,
1806                 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1807             )
1808         );
1809         assert_eq!(
1810             t("(ab|bc|cd)"),
1811             hir_group(
1812                 1,
1813                 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1814             )
1815         );
1816         assert_eq!(
1817             t("(ab|(bc|(cd)))"),
1818             hir_group(
1819                 1,
1820                 hir_alt(vec![
1821                     hir_lit("ab"),
1822                     hir_group(
1823                         2,
1824                         hir_alt(vec![
1825                             hir_lit("bc"),
1826                             hir_group(3, hir_lit("cd")),
1827                         ])
1828                     ),
1829                 ])
1830             )
1831         );
1832     }
1833 
1834     #[test]
class_ascii()1835     fn class_ascii() {
1836         assert_eq!(
1837             t("[[:alnum:]]"),
1838             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1839         );
1840         assert_eq!(
1841             t("[[:alpha:]]"),
1842             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1843         );
1844         assert_eq!(
1845             t("[[:ascii:]]"),
1846             hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1847         );
1848         assert_eq!(
1849             t("[[:blank:]]"),
1850             hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1851         );
1852         assert_eq!(
1853             t("[[:cntrl:]]"),
1854             hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1855         );
1856         assert_eq!(
1857             t("[[:digit:]]"),
1858             hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1859         );
1860         assert_eq!(
1861             t("[[:graph:]]"),
1862             hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1863         );
1864         assert_eq!(
1865             t("[[:lower:]]"),
1866             hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1867         );
1868         assert_eq!(
1869             t("[[:print:]]"),
1870             hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1871         );
1872         assert_eq!(
1873             t("[[:punct:]]"),
1874             hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1875         );
1876         assert_eq!(
1877             t("[[:space:]]"),
1878             hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1879         );
1880         assert_eq!(
1881             t("[[:upper:]]"),
1882             hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1883         );
1884         assert_eq!(
1885             t("[[:word:]]"),
1886             hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1887         );
1888         assert_eq!(
1889             t("[[:xdigit:]]"),
1890             hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1891         );
1892 
1893         assert_eq!(
1894             t("[[:^lower:]]"),
1895             hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1896         );
1897         #[cfg(feature = "unicode-case")]
1898         assert_eq!(
1899             t("(?i)[[:lower:]]"),
1900             hir_uclass(&[
1901                 ('A', 'Z'),
1902                 ('a', 'z'),
1903                 ('\u{17F}', '\u{17F}'),
1904                 ('\u{212A}', '\u{212A}'),
1905             ])
1906         );
1907 
1908         assert_eq!(
1909             t("(?-u)[[:lower:]]"),
1910             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1911         );
1912         assert_eq!(
1913             t("(?i-u)[[:lower:]]"),
1914             hir_case_fold(hir_bclass_from_char(ascii_class(
1915                 &ast::ClassAsciiKind::Lower
1916             )))
1917         );
1918 
1919         assert_eq!(
1920             t_err("(?-u)[[:^lower:]]"),
1921             TestError {
1922                 kind: hir::ErrorKind::InvalidUtf8,
1923                 span: Span::new(
1924                     Position::new(6, 1, 7),
1925                     Position::new(16, 1, 17)
1926                 ),
1927             }
1928         );
1929         assert_eq!(
1930             t_err("(?i-u)[[:^lower:]]"),
1931             TestError {
1932                 kind: hir::ErrorKind::InvalidUtf8,
1933                 span: Span::new(
1934                     Position::new(7, 1, 8),
1935                     Position::new(17, 1, 18)
1936                 ),
1937             }
1938         );
1939     }
1940 
1941     #[test]
1942     #[cfg(feature = "unicode-perl")]
class_perl()1943     fn class_perl() {
1944         // Unicode
1945         assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1946         assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1947         assert_eq!(t(r"\w"), hir_uclass_perl_word());
1948         #[cfg(feature = "unicode-case")]
1949         assert_eq!(
1950             t(r"(?i)\d"),
1951             hir_uclass_query(ClassQuery::Binary("digit"))
1952         );
1953         #[cfg(feature = "unicode-case")]
1954         assert_eq!(
1955             t(r"(?i)\s"),
1956             hir_uclass_query(ClassQuery::Binary("space"))
1957         );
1958         #[cfg(feature = "unicode-case")]
1959         assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
1960 
1961         // Unicode, negated
1962         assert_eq!(
1963             t(r"\D"),
1964             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1965         );
1966         assert_eq!(
1967             t(r"\S"),
1968             hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1969         );
1970         assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
1971         #[cfg(feature = "unicode-case")]
1972         assert_eq!(
1973             t(r"(?i)\D"),
1974             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1975         );
1976         #[cfg(feature = "unicode-case")]
1977         assert_eq!(
1978             t(r"(?i)\S"),
1979             hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1980         );
1981         #[cfg(feature = "unicode-case")]
1982         assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
1983 
1984         // ASCII only
1985         assert_eq!(
1986             t(r"(?-u)\d"),
1987             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
1988         );
1989         assert_eq!(
1990             t(r"(?-u)\s"),
1991             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
1992         );
1993         assert_eq!(
1994             t(r"(?-u)\w"),
1995             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
1996         );
1997         assert_eq!(
1998             t(r"(?i-u)\d"),
1999             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2000         );
2001         assert_eq!(
2002             t(r"(?i-u)\s"),
2003             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2004         );
2005         assert_eq!(
2006             t(r"(?i-u)\w"),
2007             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2008         );
2009 
2010         // ASCII only, negated
2011         assert_eq!(
2012             t(r"(?-u)\D"),
2013             hir_negate(hir_bclass_from_char(ascii_class(
2014                 &ast::ClassAsciiKind::Digit
2015             )))
2016         );
2017         assert_eq!(
2018             t(r"(?-u)\S"),
2019             hir_negate(hir_bclass_from_char(ascii_class(
2020                 &ast::ClassAsciiKind::Space
2021             )))
2022         );
2023         assert_eq!(
2024             t(r"(?-u)\W"),
2025             hir_negate(hir_bclass_from_char(ascii_class(
2026                 &ast::ClassAsciiKind::Word
2027             )))
2028         );
2029         assert_eq!(
2030             t(r"(?i-u)\D"),
2031             hir_negate(hir_bclass_from_char(ascii_class(
2032                 &ast::ClassAsciiKind::Digit
2033             )))
2034         );
2035         assert_eq!(
2036             t(r"(?i-u)\S"),
2037             hir_negate(hir_bclass_from_char(ascii_class(
2038                 &ast::ClassAsciiKind::Space
2039             )))
2040         );
2041         assert_eq!(
2042             t(r"(?i-u)\W"),
2043             hir_negate(hir_bclass_from_char(ascii_class(
2044                 &ast::ClassAsciiKind::Word
2045             )))
2046         );
2047     }
2048 
2049     #[test]
2050     #[cfg(not(feature = "unicode-perl"))]
class_perl_word_disabled()2051     fn class_perl_word_disabled() {
2052         assert_eq!(
2053             t_err(r"\w"),
2054             TestError {
2055                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2056                 span: Span::new(
2057                     Position::new(0, 1, 1),
2058                     Position::new(2, 1, 3)
2059                 ),
2060             }
2061         );
2062     }
2063 
2064     #[test]
2065     #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
class_perl_space_disabled()2066     fn class_perl_space_disabled() {
2067         assert_eq!(
2068             t_err(r"\s"),
2069             TestError {
2070                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2071                 span: Span::new(
2072                     Position::new(0, 1, 1),
2073                     Position::new(2, 1, 3)
2074                 ),
2075             }
2076         );
2077     }
2078 
2079     #[test]
2080     #[cfg(all(
2081         not(feature = "unicode-perl"),
2082         not(feature = "unicode-gencat")
2083     ))]
class_perl_digit_disabled()2084     fn class_perl_digit_disabled() {
2085         assert_eq!(
2086             t_err(r"\d"),
2087             TestError {
2088                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2089                 span: Span::new(
2090                     Position::new(0, 1, 1),
2091                     Position::new(2, 1, 3)
2092                 ),
2093             }
2094         );
2095     }
2096 
2097     #[test]
2098     #[cfg(feature = "unicode-gencat")]
class_unicode_gencat()2099     fn class_unicode_gencat() {
2100         assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2101         assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2102         assert_eq!(
2103             t(r"\p{Separator}"),
2104             hir_uclass_query(ClassQuery::Binary("Z"))
2105         );
2106         assert_eq!(
2107             t(r"\p{se      PaRa ToR}"),
2108             hir_uclass_query(ClassQuery::Binary("Z"))
2109         );
2110         assert_eq!(
2111             t(r"\p{gc:Separator}"),
2112             hir_uclass_query(ClassQuery::Binary("Z"))
2113         );
2114         assert_eq!(
2115             t(r"\p{gc=Separator}"),
2116             hir_uclass_query(ClassQuery::Binary("Z"))
2117         );
2118         assert_eq!(
2119             t(r"\p{Other}"),
2120             hir_uclass_query(ClassQuery::Binary("Other"))
2121         );
2122         assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2123 
2124         assert_eq!(
2125             t(r"\PZ"),
2126             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2127         );
2128         assert_eq!(
2129             t(r"\P{separator}"),
2130             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2131         );
2132         assert_eq!(
2133             t(r"\P{gc!=separator}"),
2134             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2135         );
2136 
2137         assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2138         assert_eq!(
2139             t(r"\p{assigned}"),
2140             hir_uclass_query(ClassQuery::Binary("Assigned"))
2141         );
2142         assert_eq!(
2143             t(r"\p{ascii}"),
2144             hir_uclass_query(ClassQuery::Binary("ASCII"))
2145         );
2146         assert_eq!(
2147             t(r"\p{gc:any}"),
2148             hir_uclass_query(ClassQuery::Binary("Any"))
2149         );
2150         assert_eq!(
2151             t(r"\p{gc:assigned}"),
2152             hir_uclass_query(ClassQuery::Binary("Assigned"))
2153         );
2154         assert_eq!(
2155             t(r"\p{gc:ascii}"),
2156             hir_uclass_query(ClassQuery::Binary("ASCII"))
2157         );
2158 
2159         assert_eq!(
2160             t_err(r"(?-u)\pZ"),
2161             TestError {
2162                 kind: hir::ErrorKind::UnicodeNotAllowed,
2163                 span: Span::new(
2164                     Position::new(5, 1, 6),
2165                     Position::new(8, 1, 9)
2166                 ),
2167             }
2168         );
2169         assert_eq!(
2170             t_err(r"(?-u)\p{Separator}"),
2171             TestError {
2172                 kind: hir::ErrorKind::UnicodeNotAllowed,
2173                 span: Span::new(
2174                     Position::new(5, 1, 6),
2175                     Position::new(18, 1, 19)
2176                 ),
2177             }
2178         );
2179         assert_eq!(
2180             t_err(r"\pE"),
2181             TestError {
2182                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2183                 span: Span::new(
2184                     Position::new(0, 1, 1),
2185                     Position::new(3, 1, 4)
2186                 ),
2187             }
2188         );
2189         assert_eq!(
2190             t_err(r"\p{Foo}"),
2191             TestError {
2192                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2193                 span: Span::new(
2194                     Position::new(0, 1, 1),
2195                     Position::new(7, 1, 8)
2196                 ),
2197             }
2198         );
2199         assert_eq!(
2200             t_err(r"\p{gc:Foo}"),
2201             TestError {
2202                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2203                 span: Span::new(
2204                     Position::new(0, 1, 1),
2205                     Position::new(10, 1, 11)
2206                 ),
2207             }
2208         );
2209     }
2210 
2211     #[test]
2212     #[cfg(not(feature = "unicode-gencat"))]
class_unicode_gencat_disabled()2213     fn class_unicode_gencat_disabled() {
2214         assert_eq!(
2215             t_err(r"\p{Separator}"),
2216             TestError {
2217                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2218                 span: Span::new(
2219                     Position::new(0, 1, 1),
2220                     Position::new(13, 1, 14)
2221                 ),
2222             }
2223         );
2224 
2225         assert_eq!(
2226             t_err(r"\p{Any}"),
2227             TestError {
2228                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2229                 span: Span::new(
2230                     Position::new(0, 1, 1),
2231                     Position::new(7, 1, 8)
2232                 ),
2233             }
2234         );
2235     }
2236 
2237     #[test]
2238     #[cfg(feature = "unicode-script")]
class_unicode_script()2239     fn class_unicode_script() {
2240         assert_eq!(
2241             t(r"\p{Greek}"),
2242             hir_uclass_query(ClassQuery::Binary("Greek"))
2243         );
2244         #[cfg(feature = "unicode-case")]
2245         assert_eq!(
2246             t(r"(?i)\p{Greek}"),
2247             hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2248         );
2249         #[cfg(feature = "unicode-case")]
2250         assert_eq!(
2251             t(r"(?i)\P{Greek}"),
2252             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2253                 "Greek"
2254             ))))
2255         );
2256 
2257         assert_eq!(
2258             t_err(r"\p{sc:Foo}"),
2259             TestError {
2260                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2261                 span: Span::new(
2262                     Position::new(0, 1, 1),
2263                     Position::new(10, 1, 11)
2264                 ),
2265             }
2266         );
2267         assert_eq!(
2268             t_err(r"\p{scx:Foo}"),
2269             TestError {
2270                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2271                 span: Span::new(
2272                     Position::new(0, 1, 1),
2273                     Position::new(11, 1, 12)
2274                 ),
2275             }
2276         );
2277     }
2278 
2279     #[test]
2280     #[cfg(not(feature = "unicode-script"))]
class_unicode_script_disabled()2281     fn class_unicode_script_disabled() {
2282         assert_eq!(
2283             t_err(r"\p{Greek}"),
2284             TestError {
2285                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2286                 span: Span::new(
2287                     Position::new(0, 1, 1),
2288                     Position::new(9, 1, 10)
2289                 ),
2290             }
2291         );
2292 
2293         assert_eq!(
2294             t_err(r"\p{scx:Greek}"),
2295             TestError {
2296                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2297                 span: Span::new(
2298                     Position::new(0, 1, 1),
2299                     Position::new(13, 1, 14)
2300                 ),
2301             }
2302         );
2303     }
2304 
2305     #[test]
2306     #[cfg(feature = "unicode-age")]
class_unicode_age()2307     fn class_unicode_age() {
2308         assert_eq!(
2309             t_err(r"\p{age:Foo}"),
2310             TestError {
2311                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2312                 span: Span::new(
2313                     Position::new(0, 1, 1),
2314                     Position::new(11, 1, 12)
2315                 ),
2316             }
2317         );
2318     }
2319 
2320     #[test]
2321     #[cfg(not(feature = "unicode-age"))]
class_unicode_age_disabled()2322     fn class_unicode_age_disabled() {
2323         assert_eq!(
2324             t_err(r"\p{age:3.0}"),
2325             TestError {
2326                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2327                 span: Span::new(
2328                     Position::new(0, 1, 1),
2329                     Position::new(11, 1, 12)
2330                 ),
2331             }
2332         );
2333     }
2334 
2335     #[test]
class_bracketed()2336     fn class_bracketed() {
2337         assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2338         assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2339         assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2340         assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2341         assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2342         assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2343         assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2344         assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2345         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2346         assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2347         #[cfg(feature = "unicode-gencat")]
2348         assert_eq!(
2349             t(r"[\pZ]"),
2350             hir_uclass_query(ClassQuery::Binary("separator"))
2351         );
2352         #[cfg(feature = "unicode-gencat")]
2353         assert_eq!(
2354             t(r"[\p{separator}]"),
2355             hir_uclass_query(ClassQuery::Binary("separator"))
2356         );
2357         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2358         assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2359         #[cfg(feature = "unicode-gencat")]
2360         assert_eq!(
2361             t(r"[^\PZ]"),
2362             hir_uclass_query(ClassQuery::Binary("separator"))
2363         );
2364         #[cfg(feature = "unicode-gencat")]
2365         assert_eq!(
2366             t(r"[^\P{separator}]"),
2367             hir_uclass_query(ClassQuery::Binary("separator"))
2368         );
2369         #[cfg(all(
2370             feature = "unicode-case",
2371             any(feature = "unicode-perl", feature = "unicode-gencat")
2372         ))]
2373         assert_eq!(
2374             t(r"(?i)[^\D]"),
2375             hir_uclass_query(ClassQuery::Binary("digit"))
2376         );
2377         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2378         assert_eq!(
2379             t(r"(?i)[^\P{greek}]"),
2380             hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2381         );
2382 
2383         assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2384         assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2385         assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2386 
2387         #[cfg(feature = "unicode-case")]
2388         assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2389         #[cfg(feature = "unicode-case")]
2390         assert_eq!(
2391             t("(?i)[k]"),
2392             hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2393         );
2394         #[cfg(feature = "unicode-case")]
2395         assert_eq!(
2396             t("(?i)[β]"),
2397             hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2398         );
2399         assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2400 
2401         assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2402         assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2403         assert_eq!(
2404             t_bytes("(?-u)[^a]"),
2405             hir_negate(hir_bclass(&[(b'a', b'a')]))
2406         );
2407         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2408         assert_eq!(
2409             t(r"[^\d]"),
2410             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2411         );
2412         #[cfg(feature = "unicode-gencat")]
2413         assert_eq!(
2414             t(r"[^\pZ]"),
2415             hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2416         );
2417         #[cfg(feature = "unicode-gencat")]
2418         assert_eq!(
2419             t(r"[^\p{separator}]"),
2420             hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2421         );
2422         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2423         assert_eq!(
2424             t(r"(?i)[^\p{greek}]"),
2425             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2426                 "greek"
2427             ))))
2428         );
2429         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2430         assert_eq!(
2431             t(r"(?i)[\P{greek}]"),
2432             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2433                 "greek"
2434             ))))
2435         );
2436 
2437         // Test some weird cases.
2438         assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2439 
2440         assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2441         assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2442         assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2443         assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2444         assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2445 
2446         assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2447         assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2448         assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2449         assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2450         assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2451 
2452         assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2453         assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2454         assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2455         assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2456         assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2457 
2458         assert_eq!(
2459             t_err("(?-u)[^a]"),
2460             TestError {
2461                 kind: hir::ErrorKind::InvalidUtf8,
2462                 span: Span::new(
2463                     Position::new(5, 1, 6),
2464                     Position::new(9, 1, 10)
2465                 ),
2466             }
2467         );
2468         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2469         assert_eq!(
2470             t_err(r"[^\s\S]"),
2471             TestError {
2472                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2473                 span: Span::new(
2474                     Position::new(0, 1, 1),
2475                     Position::new(7, 1, 8)
2476                 ),
2477             }
2478         );
2479         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2480         assert_eq!(
2481             t_err(r"(?-u)[^\s\S]"),
2482             TestError {
2483                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2484                 span: Span::new(
2485                     Position::new(5, 1, 6),
2486                     Position::new(12, 1, 13)
2487                 ),
2488             }
2489         );
2490     }
2491 
2492     #[test]
class_bracketed_union()2493     fn class_bracketed_union() {
2494         assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2495         #[cfg(feature = "unicode-gencat")]
2496         assert_eq!(
2497             t(r"[a\pZb]"),
2498             hir_union(
2499                 hir_uclass(&[('a', 'b')]),
2500                 hir_uclass_query(ClassQuery::Binary("separator"))
2501             )
2502         );
2503         #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2504         assert_eq!(
2505             t(r"[\pZ\p{Greek}]"),
2506             hir_union(
2507                 hir_uclass_query(ClassQuery::Binary("greek")),
2508                 hir_uclass_query(ClassQuery::Binary("separator"))
2509             )
2510         );
2511         #[cfg(all(
2512             feature = "unicode-age",
2513             feature = "unicode-gencat",
2514             feature = "unicode-script"
2515         ))]
2516         assert_eq!(
2517             t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2518             hir_union(
2519                 hir_uclass_query(ClassQuery::ByValue {
2520                     property_name: "age",
2521                     property_value: "3.0",
2522                 }),
2523                 hir_union(
2524                     hir_uclass_query(ClassQuery::Binary("greek")),
2525                     hir_uclass_query(ClassQuery::Binary("separator"))
2526                 )
2527             )
2528         );
2529         #[cfg(all(
2530             feature = "unicode-age",
2531             feature = "unicode-gencat",
2532             feature = "unicode-script"
2533         ))]
2534         assert_eq!(
2535             t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2536             hir_union(
2537                 hir_uclass_query(ClassQuery::ByValue {
2538                     property_name: "age",
2539                     property_value: "3.0",
2540                 }),
2541                 hir_union(
2542                     hir_uclass_query(ClassQuery::Binary("cyrillic")),
2543                     hir_union(
2544                         hir_uclass_query(ClassQuery::Binary("greek")),
2545                         hir_uclass_query(ClassQuery::Binary("separator"))
2546                     )
2547                 )
2548             )
2549         );
2550 
2551         #[cfg(all(
2552             feature = "unicode-age",
2553             feature = "unicode-case",
2554             feature = "unicode-gencat",
2555             feature = "unicode-script"
2556         ))]
2557         assert_eq!(
2558             t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2559             hir_case_fold(hir_union(
2560                 hir_uclass_query(ClassQuery::ByValue {
2561                     property_name: "age",
2562                     property_value: "3.0",
2563                 }),
2564                 hir_union(
2565                     hir_uclass_query(ClassQuery::Binary("greek")),
2566                     hir_uclass_query(ClassQuery::Binary("separator"))
2567                 )
2568             ))
2569         );
2570         #[cfg(all(
2571             feature = "unicode-age",
2572             feature = "unicode-gencat",
2573             feature = "unicode-script"
2574         ))]
2575         assert_eq!(
2576             t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2577             hir_negate(hir_union(
2578                 hir_uclass_query(ClassQuery::ByValue {
2579                     property_name: "age",
2580                     property_value: "3.0",
2581                 }),
2582                 hir_union(
2583                     hir_uclass_query(ClassQuery::Binary("greek")),
2584                     hir_uclass_query(ClassQuery::Binary("separator"))
2585                 )
2586             ))
2587         );
2588         #[cfg(all(
2589             feature = "unicode-age",
2590             feature = "unicode-case",
2591             feature = "unicode-gencat",
2592             feature = "unicode-script"
2593         ))]
2594         assert_eq!(
2595             t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2596             hir_negate(hir_case_fold(hir_union(
2597                 hir_uclass_query(ClassQuery::ByValue {
2598                     property_name: "age",
2599                     property_value: "3.0",
2600                 }),
2601                 hir_union(
2602                     hir_uclass_query(ClassQuery::Binary("greek")),
2603                     hir_uclass_query(ClassQuery::Binary("separator"))
2604                 )
2605             )))
2606         );
2607     }
2608 
2609     #[test]
class_bracketed_nested()2610     fn class_bracketed_nested() {
2611         assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2612         assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2613         assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2614 
2615         assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2616         assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2617 
2618         #[cfg(feature = "unicode-case")]
2619         assert_eq!(
2620             t(r"(?i)[a[^c]]"),
2621             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2622         );
2623         #[cfg(feature = "unicode-case")]
2624         assert_eq!(
2625             t(r"(?i)[a-b[^c]]"),
2626             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2627         );
2628 
2629         #[cfg(feature = "unicode-case")]
2630         assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2631         #[cfg(feature = "unicode-case")]
2632         assert_eq!(
2633             t(r"(?i)[^a-b[^c]]"),
2634             hir_uclass(&[('C', 'C'), ('c', 'c')])
2635         );
2636 
2637         assert_eq!(
2638             t_err(r"[^a-c[^c]]"),
2639             TestError {
2640                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2641                 span: Span::new(
2642                     Position::new(0, 1, 1),
2643                     Position::new(10, 1, 11)
2644                 ),
2645             }
2646         );
2647         #[cfg(feature = "unicode-case")]
2648         assert_eq!(
2649             t_err(r"(?i)[^a-c[^c]]"),
2650             TestError {
2651                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2652                 span: Span::new(
2653                     Position::new(4, 1, 5),
2654                     Position::new(14, 1, 15)
2655                 ),
2656             }
2657         );
2658     }
2659 
2660     #[test]
class_bracketed_intersect()2661     fn class_bracketed_intersect() {
2662         assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2663         assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2664         assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2665         assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2666         assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2667         assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2668         assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2669         assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2670         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2671 
2672         assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2673         assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2674         assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2675         assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2676         assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2677         assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2678 
2679         #[cfg(feature = "unicode-case")]
2680         assert_eq!(
2681             t("(?i)[abc&&b-c]"),
2682             hir_case_fold(hir_uclass(&[('b', 'c')]))
2683         );
2684         #[cfg(feature = "unicode-case")]
2685         assert_eq!(
2686             t("(?i)[abc&&[b-c]]"),
2687             hir_case_fold(hir_uclass(&[('b', 'c')]))
2688         );
2689         #[cfg(feature = "unicode-case")]
2690         assert_eq!(
2691             t("(?i)[[abc]&&[b-c]]"),
2692             hir_case_fold(hir_uclass(&[('b', 'c')]))
2693         );
2694         #[cfg(feature = "unicode-case")]
2695         assert_eq!(
2696             t("(?i)[a-z&&b-y&&c-x]"),
2697             hir_case_fold(hir_uclass(&[('c', 'x')]))
2698         );
2699         #[cfg(feature = "unicode-case")]
2700         assert_eq!(
2701             t("(?i)[c-da-b&&a-d]"),
2702             hir_case_fold(hir_uclass(&[('a', 'd')]))
2703         );
2704         #[cfg(feature = "unicode-case")]
2705         assert_eq!(
2706             t("(?i)[a-d&&c-da-b]"),
2707             hir_case_fold(hir_uclass(&[('a', 'd')]))
2708         );
2709 
2710         assert_eq!(
2711             t("(?i-u)[abc&&b-c]"),
2712             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2713         );
2714         assert_eq!(
2715             t("(?i-u)[abc&&[b-c]]"),
2716             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2717         );
2718         assert_eq!(
2719             t("(?i-u)[[abc]&&[b-c]]"),
2720             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2721         );
2722         assert_eq!(
2723             t("(?i-u)[a-z&&b-y&&c-x]"),
2724             hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2725         );
2726         assert_eq!(
2727             t("(?i-u)[c-da-b&&a-d]"),
2728             hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2729         );
2730         assert_eq!(
2731             t("(?i-u)[a-d&&c-da-b]"),
2732             hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2733         );
2734 
2735         // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2736         // `^` is also allowed to be unescaped after `&&`.
2737         assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2738         // `]` needs to be escaped after `&&` since it's not at start of class.
2739         assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2740         assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2741         assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2742         assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2743         // Test precedence.
2744         assert_eq!(
2745             t(r"[a-w&&[^c-g]z]"),
2746             hir_uclass(&[('a', 'b'), ('h', 'w')])
2747         );
2748     }
2749 
2750     #[test]
class_bracketed_intersect_negate()2751     fn class_bracketed_intersect_negate() {
2752         #[cfg(feature = "unicode-perl")]
2753         assert_eq!(
2754             t(r"[^\w&&\d]"),
2755             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2756         );
2757         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2758         #[cfg(feature = "unicode-perl")]
2759         assert_eq!(
2760             t(r"[^[\w&&\d]]"),
2761             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2762         );
2763         #[cfg(feature = "unicode-perl")]
2764         assert_eq!(
2765             t(r"[^[^\w&&\d]]"),
2766             hir_uclass_query(ClassQuery::Binary("digit"))
2767         );
2768         #[cfg(feature = "unicode-perl")]
2769         assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2770 
2771         #[cfg(feature = "unicode-perl")]
2772         assert_eq!(
2773             t_bytes(r"(?-u)[^\w&&\d]"),
2774             hir_negate(hir_bclass_from_char(ascii_class(
2775                 &ast::ClassAsciiKind::Digit
2776             )))
2777         );
2778         assert_eq!(
2779             t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2780             hir_negate(hir_bclass(&[(b'a', b'c')]))
2781         );
2782         assert_eq!(
2783             t_bytes(r"(?-u)[^[\w&&\d]]"),
2784             hir_negate(hir_bclass_from_char(ascii_class(
2785                 &ast::ClassAsciiKind::Digit
2786             )))
2787         );
2788         assert_eq!(
2789             t_bytes(r"(?-u)[^[^\w&&\d]]"),
2790             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2791         );
2792         assert_eq!(
2793             t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2794             hir_negate(hir_bclass_from_char(ascii_class(
2795                 &ast::ClassAsciiKind::Word
2796             )))
2797         );
2798     }
2799 
2800     #[test]
class_bracketed_difference()2801     fn class_bracketed_difference() {
2802         #[cfg(feature = "unicode-gencat")]
2803         assert_eq!(
2804             t(r"[\pL--[:ascii:]]"),
2805             hir_difference(
2806                 hir_uclass_query(ClassQuery::Binary("letter")),
2807                 hir_uclass(&[('\0', '\x7F')])
2808             )
2809         );
2810 
2811         assert_eq!(
2812             t(r"(?-u)[[:alpha:]--[:lower:]]"),
2813             hir_bclass(&[(b'A', b'Z')])
2814         );
2815     }
2816 
2817     #[test]
class_bracketed_symmetric_difference()2818     fn class_bracketed_symmetric_difference() {
2819         #[cfg(feature = "unicode-script")]
2820         assert_eq!(
2821             t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2822             hir_uclass(&[
2823                 ('\u{0342}', '\u{0342}'),
2824                 ('\u{0345}', '\u{0345}'),
2825                 ('\u{1DC0}', '\u{1DC1}'),
2826             ])
2827         );
2828         assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2829 
2830         assert_eq!(
2831             t(r"(?-u)[a-g~~c-j]"),
2832             hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2833         );
2834     }
2835 
2836     #[test]
ignore_whitespace()2837     fn ignore_whitespace() {
2838         assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2839         assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2840         assert_eq!(
2841             t(r"(?x)\x # comment
2842 { # comment
2843     53 # comment
2844 } #comment"),
2845             hir_lit("S")
2846         );
2847 
2848         assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2849         assert_eq!(
2850             t(r"(?x)\x # comment
2851         53 # comment"),
2852             hir_lit("S")
2853         );
2854         assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2855 
2856         #[cfg(feature = "unicode-gencat")]
2857         assert_eq!(
2858             t(r"(?x)\p # comment
2859 { # comment
2860     Separator # comment
2861 } # comment"),
2862             hir_uclass_query(ClassQuery::Binary("separator"))
2863         );
2864 
2865         assert_eq!(
2866             t(r"(?x)a # comment
2867 { # comment
2868     5 # comment
2869     , # comment
2870     10 # comment
2871 } # comment"),
2872             hir_range(
2873                 true,
2874                 hir::RepetitionRange::Bounded(5, 10),
2875                 hir_lit("a")
2876             )
2877         );
2878 
2879         assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
2880     }
2881 
2882     #[test]
analysis_is_always_utf8()2883     fn analysis_is_always_utf8() {
2884         // Positive examples.
2885         assert!(t_bytes(r"a").is_always_utf8());
2886         assert!(t_bytes(r"ab").is_always_utf8());
2887         assert!(t_bytes(r"(?-u)a").is_always_utf8());
2888         assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2889         assert!(t_bytes(r"\xFF").is_always_utf8());
2890         assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2891         assert!(t_bytes(r"[^a]").is_always_utf8());
2892         assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2893         assert!(t_bytes(r"\b").is_always_utf8());
2894         assert!(t_bytes(r"\B").is_always_utf8());
2895         assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2896 
2897         // Negative examples.
2898         assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2899         assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2900         assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2901         assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2902         assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2903     }
2904 
2905     #[test]
analysis_is_all_assertions()2906     fn analysis_is_all_assertions() {
2907         // Positive examples.
2908         assert!(t(r"\b").is_all_assertions());
2909         assert!(t(r"\B").is_all_assertions());
2910         assert!(t(r"^").is_all_assertions());
2911         assert!(t(r"$").is_all_assertions());
2912         assert!(t(r"\A").is_all_assertions());
2913         assert!(t(r"\z").is_all_assertions());
2914         assert!(t(r"$^\z\A\b\B").is_all_assertions());
2915         assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2916         assert!(t(r"^$|$^").is_all_assertions());
2917         assert!(t(r"((\b)+())*^").is_all_assertions());
2918 
2919         // Negative examples.
2920         assert!(!t(r"^a").is_all_assertions());
2921     }
2922 
2923     #[test]
analysis_is_anchored()2924     fn analysis_is_anchored() {
2925         // Positive examples.
2926         assert!(t(r"^").is_anchored_start());
2927         assert!(t(r"$").is_anchored_end());
2928         assert!(t(r"^").is_line_anchored_start());
2929         assert!(t(r"$").is_line_anchored_end());
2930 
2931         assert!(t(r"^^").is_anchored_start());
2932         assert!(t(r"$$").is_anchored_end());
2933         assert!(t(r"^^").is_line_anchored_start());
2934         assert!(t(r"$$").is_line_anchored_end());
2935 
2936         assert!(t(r"^$").is_anchored_start());
2937         assert!(t(r"^$").is_anchored_end());
2938         assert!(t(r"^$").is_line_anchored_start());
2939         assert!(t(r"^$").is_line_anchored_end());
2940 
2941         assert!(t(r"^foo").is_anchored_start());
2942         assert!(t(r"foo$").is_anchored_end());
2943         assert!(t(r"^foo").is_line_anchored_start());
2944         assert!(t(r"foo$").is_line_anchored_end());
2945 
2946         assert!(t(r"^foo|^bar").is_anchored_start());
2947         assert!(t(r"foo$|bar$").is_anchored_end());
2948         assert!(t(r"^foo|^bar").is_line_anchored_start());
2949         assert!(t(r"foo$|bar$").is_line_anchored_end());
2950 
2951         assert!(t(r"^(foo|bar)").is_anchored_start());
2952         assert!(t(r"(foo|bar)$").is_anchored_end());
2953         assert!(t(r"^(foo|bar)").is_line_anchored_start());
2954         assert!(t(r"(foo|bar)$").is_line_anchored_end());
2955 
2956         assert!(t(r"^+").is_anchored_start());
2957         assert!(t(r"$+").is_anchored_end());
2958         assert!(t(r"^+").is_line_anchored_start());
2959         assert!(t(r"$+").is_line_anchored_end());
2960         assert!(t(r"^++").is_anchored_start());
2961         assert!(t(r"$++").is_anchored_end());
2962         assert!(t(r"^++").is_line_anchored_start());
2963         assert!(t(r"$++").is_line_anchored_end());
2964         assert!(t(r"(^)+").is_anchored_start());
2965         assert!(t(r"($)+").is_anchored_end());
2966         assert!(t(r"(^)+").is_line_anchored_start());
2967         assert!(t(r"($)+").is_line_anchored_end());
2968 
2969         assert!(t(r"$^").is_anchored_start());
2970         assert!(t(r"$^").is_anchored_start());
2971         assert!(t(r"$^").is_line_anchored_end());
2972         assert!(t(r"$^").is_line_anchored_end());
2973         assert!(t(r"$^|^$").is_anchored_start());
2974         assert!(t(r"$^|^$").is_anchored_end());
2975         assert!(t(r"$^|^$").is_line_anchored_start());
2976         assert!(t(r"$^|^$").is_line_anchored_end());
2977 
2978         assert!(t(r"\b^").is_anchored_start());
2979         assert!(t(r"$\b").is_anchored_end());
2980         assert!(t(r"\b^").is_line_anchored_start());
2981         assert!(t(r"$\b").is_line_anchored_end());
2982         assert!(t(r"^(?m:^)").is_anchored_start());
2983         assert!(t(r"(?m:$)$").is_anchored_end());
2984         assert!(t(r"^(?m:^)").is_line_anchored_start());
2985         assert!(t(r"(?m:$)$").is_line_anchored_end());
2986         assert!(t(r"(?m:^)^").is_anchored_start());
2987         assert!(t(r"$(?m:$)").is_anchored_end());
2988         assert!(t(r"(?m:^)^").is_line_anchored_start());
2989         assert!(t(r"$(?m:$)").is_line_anchored_end());
2990 
2991         // Negative examples.
2992         assert!(!t(r"(?m)^").is_anchored_start());
2993         assert!(!t(r"(?m)$").is_anchored_end());
2994         assert!(!t(r"(?m:^$)|$^").is_anchored_start());
2995         assert!(!t(r"(?m:^$)|$^").is_anchored_end());
2996         assert!(!t(r"$^|(?m:^$)").is_anchored_start());
2997         assert!(!t(r"$^|(?m:^$)").is_anchored_end());
2998 
2999         assert!(!t(r"a^").is_anchored_start());
3000         assert!(!t(r"$a").is_anchored_start());
3001         assert!(!t(r"a^").is_line_anchored_start());
3002         assert!(!t(r"$a").is_line_anchored_start());
3003 
3004         assert!(!t(r"a^").is_anchored_end());
3005         assert!(!t(r"$a").is_anchored_end());
3006         assert!(!t(r"a^").is_line_anchored_end());
3007         assert!(!t(r"$a").is_line_anchored_end());
3008 
3009         assert!(!t(r"^foo|bar").is_anchored_start());
3010         assert!(!t(r"foo|bar$").is_anchored_end());
3011         assert!(!t(r"^foo|bar").is_line_anchored_start());
3012         assert!(!t(r"foo|bar$").is_line_anchored_end());
3013 
3014         assert!(!t(r"^*").is_anchored_start());
3015         assert!(!t(r"$*").is_anchored_end());
3016         assert!(!t(r"^*").is_line_anchored_start());
3017         assert!(!t(r"$*").is_line_anchored_end());
3018         assert!(!t(r"^*+").is_anchored_start());
3019         assert!(!t(r"$*+").is_anchored_end());
3020         assert!(!t(r"^*+").is_line_anchored_start());
3021         assert!(!t(r"$*+").is_line_anchored_end());
3022         assert!(!t(r"^+*").is_anchored_start());
3023         assert!(!t(r"$+*").is_anchored_end());
3024         assert!(!t(r"^+*").is_line_anchored_start());
3025         assert!(!t(r"$+*").is_line_anchored_end());
3026         assert!(!t(r"(^)*").is_anchored_start());
3027         assert!(!t(r"($)*").is_anchored_end());
3028         assert!(!t(r"(^)*").is_line_anchored_start());
3029         assert!(!t(r"($)*").is_line_anchored_end());
3030     }
3031 
3032     #[test]
analysis_is_line_anchored()3033     fn analysis_is_line_anchored() {
3034         assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3035         assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3036 
3037         assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3038         assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3039 
3040         assert!(t(r"(?m)^").is_line_anchored_start());
3041         assert!(t(r"(?m)$").is_line_anchored_end());
3042 
3043         assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3044         assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3045 
3046         assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3047         assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3048     }
3049 
3050     #[test]
analysis_is_any_anchored()3051     fn analysis_is_any_anchored() {
3052         // Positive examples.
3053         assert!(t(r"^").is_any_anchored_start());
3054         assert!(t(r"$").is_any_anchored_end());
3055         assert!(t(r"\A").is_any_anchored_start());
3056         assert!(t(r"\z").is_any_anchored_end());
3057 
3058         // Negative examples.
3059         assert!(!t(r"(?m)^").is_any_anchored_start());
3060         assert!(!t(r"(?m)$").is_any_anchored_end());
3061         assert!(!t(r"$").is_any_anchored_start());
3062         assert!(!t(r"^").is_any_anchored_end());
3063     }
3064 
3065     #[test]
analysis_is_match_empty()3066     fn analysis_is_match_empty() {
3067         // Positive examples.
3068         assert!(t(r"").is_match_empty());
3069         assert!(t(r"()").is_match_empty());
3070         assert!(t(r"()*").is_match_empty());
3071         assert!(t(r"()+").is_match_empty());
3072         assert!(t(r"()?").is_match_empty());
3073         assert!(t(r"a*").is_match_empty());
3074         assert!(t(r"a?").is_match_empty());
3075         assert!(t(r"a{0}").is_match_empty());
3076         assert!(t(r"a{0,}").is_match_empty());
3077         assert!(t(r"a{0,1}").is_match_empty());
3078         assert!(t(r"a{0,10}").is_match_empty());
3079         #[cfg(feature = "unicode-gencat")]
3080         assert!(t(r"\pL*").is_match_empty());
3081         assert!(t(r"a*|b").is_match_empty());
3082         assert!(t(r"b|a*").is_match_empty());
3083         assert!(t(r"a*a?(abcd)*").is_match_empty());
3084         assert!(t(r"^").is_match_empty());
3085         assert!(t(r"$").is_match_empty());
3086         assert!(t(r"(?m)^").is_match_empty());
3087         assert!(t(r"(?m)$").is_match_empty());
3088         assert!(t(r"\A").is_match_empty());
3089         assert!(t(r"\z").is_match_empty());
3090         assert!(t(r"\B").is_match_empty());
3091         assert!(t_bytes(r"(?-u)\B").is_match_empty());
3092 
3093         // Negative examples.
3094         assert!(!t(r"a+").is_match_empty());
3095         assert!(!t(r"a{1}").is_match_empty());
3096         assert!(!t(r"a{1,}").is_match_empty());
3097         assert!(!t(r"a{1,2}").is_match_empty());
3098         assert!(!t(r"a{1,10}").is_match_empty());
3099         assert!(!t(r"b|a").is_match_empty());
3100         assert!(!t(r"a*a+(abcd)*").is_match_empty());
3101         assert!(!t(r"\b").is_match_empty());
3102         assert!(!t(r"(?-u)\b").is_match_empty());
3103     }
3104 
3105     #[test]
analysis_is_literal()3106     fn analysis_is_literal() {
3107         // Positive examples.
3108         assert!(t(r"a").is_literal());
3109         assert!(t(r"ab").is_literal());
3110         assert!(t(r"abc").is_literal());
3111         assert!(t(r"(?m)abc").is_literal());
3112 
3113         // Negative examples.
3114         assert!(!t(r"").is_literal());
3115         assert!(!t(r"^").is_literal());
3116         assert!(!t(r"a|b").is_literal());
3117         assert!(!t(r"(a)").is_literal());
3118         assert!(!t(r"a+").is_literal());
3119         assert!(!t(r"foo(a)").is_literal());
3120         assert!(!t(r"(a)foo").is_literal());
3121         assert!(!t(r"[a]").is_literal());
3122     }
3123 
3124     #[test]
analysis_is_alternation_literal()3125     fn analysis_is_alternation_literal() {
3126         // Positive examples.
3127         assert!(t(r"a").is_alternation_literal());
3128         assert!(t(r"ab").is_alternation_literal());
3129         assert!(t(r"abc").is_alternation_literal());
3130         assert!(t(r"(?m)abc").is_alternation_literal());
3131         assert!(t(r"a|b").is_alternation_literal());
3132         assert!(t(r"a|b|c").is_alternation_literal());
3133         assert!(t(r"foo|bar").is_alternation_literal());
3134         assert!(t(r"foo|bar|baz").is_alternation_literal());
3135 
3136         // Negative examples.
3137         assert!(!t(r"").is_alternation_literal());
3138         assert!(!t(r"^").is_alternation_literal());
3139         assert!(!t(r"(a)").is_alternation_literal());
3140         assert!(!t(r"a+").is_alternation_literal());
3141         assert!(!t(r"foo(a)").is_alternation_literal());
3142         assert!(!t(r"(a)foo").is_alternation_literal());
3143         assert!(!t(r"[a]").is_alternation_literal());
3144         assert!(!t(r"[a]|b").is_alternation_literal());
3145         assert!(!t(r"a|[b]").is_alternation_literal());
3146         assert!(!t(r"(a)|b").is_alternation_literal());
3147         assert!(!t(r"a|(b)").is_alternation_literal());
3148     }
3149 }
3150