1 /*!
2 Defines a translator that converts an `Ast` to an `Hir`.
3 */
4 
5 use std::cell::{Cell, RefCell};
6 use std::result;
7 
8 use ast::{self, Ast, Span, Visitor};
9 use hir::{self, Error, ErrorKind, Hir};
10 use unicode::{self, ClassQuery};
11 
12 type Result<T> = result::Result<T, Error>;
13 
14 /// A builder for constructing an AST->HIR translator.
15 #[derive(Clone, Debug)]
16 pub struct TranslatorBuilder {
17     allow_invalid_utf8: bool,
18     flags: Flags,
19 }
20 
21 impl Default for TranslatorBuilder {
default() -> TranslatorBuilder22     fn default() -> TranslatorBuilder {
23         TranslatorBuilder::new()
24     }
25 }
26 
27 impl TranslatorBuilder {
28     /// Create a new translator builder with a default c onfiguration.
new() -> TranslatorBuilder29     pub fn new() -> TranslatorBuilder {
30         TranslatorBuilder {
31             allow_invalid_utf8: false,
32             flags: Flags::default(),
33         }
34     }
35 
36     /// Build a translator using the current configuration.
build(&self) -> Translator37     pub fn build(&self) -> Translator {
38         Translator {
39             stack: RefCell::new(vec![]),
40             flags: Cell::new(self.flags),
41             allow_invalid_utf8: self.allow_invalid_utf8,
42         }
43     }
44 
45     /// When enabled, translation will permit the construction of a regular
46     /// expression that may match invalid UTF-8.
47     ///
48     /// When disabled (the default), the translator is guaranteed to produce
49     /// an expression that will only ever match valid UTF-8 (otherwise, the
50     /// translator will return an error).
51     ///
52     /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53     /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54     /// the parser to return an error. Namely, a negated ASCII word boundary
55     /// can result in matching positions that aren't valid UTF-8 boundaries.
allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder56     pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57         self.allow_invalid_utf8 = yes;
58         self
59     }
60 
61     /// Enable or disable the case insensitive flag (`i`) by default.
case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder62     pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63         self.flags.case_insensitive = if yes { Some(true) } else { None };
64         self
65     }
66 
67     /// Enable or disable the multi-line matching flag (`m`) by default.
multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder68     pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69         self.flags.multi_line = if yes { Some(true) } else { None };
70         self
71     }
72 
73     /// Enable or disable the "dot matches any character" flag (`s`) by
74     /// default.
dot_matches_new_line( &mut self, yes: bool, ) -> &mut TranslatorBuilder75     pub fn dot_matches_new_line(
76         &mut self,
77         yes: bool,
78     ) -> &mut TranslatorBuilder {
79         self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
80         self
81     }
82 
83     /// Enable or disable the "swap greed" flag (`U`) by default.
swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder84     pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85         self.flags.swap_greed = if yes { Some(true) } else { None };
86         self
87     }
88 
89     /// Enable or disable the Unicode flag (`u`) by default.
unicode(&mut self, yes: bool) -> &mut TranslatorBuilder90     pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91         self.flags.unicode = if yes { None } else { Some(false) };
92         self
93     }
94 }
95 
96 /// A translator maps abstract syntax to a high level intermediate
97 /// representation.
98 ///
99 /// A translator may be benefit from reuse. That is, a translator can translate
100 /// many abstract syntax trees.
101 ///
102 /// A `Translator` can be configured in more detail via a
103 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104 #[derive(Clone, Debug)]
105 pub struct Translator {
106     /// Our call stack, but on the heap.
107     stack: RefCell<Vec<HirFrame>>,
108     /// The current flag settings.
109     flags: Cell<Flags>,
110     /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111     allow_invalid_utf8: bool,
112 }
113 
114 impl Translator {
115     /// Create a new translator using the default configuration.
new() -> Translator116     pub fn new() -> Translator {
117         TranslatorBuilder::new().build()
118     }
119 
120     /// Translate the given abstract syntax tree (AST) into a high level
121     /// intermediate representation (HIR).
122     ///
123     /// If there was a problem doing the translation, then an HIR-specific
124     /// error is returned.
125     ///
126     /// The original pattern string used to produce the `Ast` *must* also be
127     /// provided. The translator does not use the pattern string during any
128     /// correct translation, but is used for error reporting.
translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir>129     pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130         ast::visit(ast, TranslatorI::new(self, pattern))
131     }
132 }
133 
134 /// An HirFrame is a single stack frame, represented explicitly, which is
135 /// created for each item in the Ast that we traverse.
136 ///
137 /// Note that technically, this type doesn't represent our entire stack
138 /// frame. In particular, the Ast visitor represents any state associated with
139 /// traversing the Ast itself.
140 #[derive(Clone, Debug)]
141 enum HirFrame {
142     /// An arbitrary HIR expression. These get pushed whenever we hit a base
143     /// case in the Ast. They get popped after an inductive (i.e., recursive)
144     /// step is complete.
145     Expr(Hir),
146     /// A Unicode character class. This frame is mutated as we descend into
147     /// the Ast of a character class (which is itself its own mini recursive
148     /// structure).
149     ClassUnicode(hir::ClassUnicode),
150     /// A byte-oriented character class. This frame is mutated as we descend
151     /// into the Ast of a character class (which is itself its own mini
152     /// recursive structure).
153     ///
154     /// Byte character classes are created when Unicode mode (`u`) is disabled.
155     /// If `allow_invalid_utf8` is disabled (the default), then a byte
156     /// character is only permitted to match ASCII text.
157     ClassBytes(hir::ClassBytes),
158     /// This is pushed on to the stack upon first seeing any kind of group,
159     /// indicated by parentheses (including non-capturing groups). It is popped
160     /// upon leaving a group.
161     Group {
162         /// The old active flags, if any, when this group was opened.
163         ///
164         /// If this group sets flags, then the new active flags are set to the
165         /// result of merging the old flags with the flags introduced by this
166         /// group.
167         ///
168         /// When this group is popped, the active flags should be restored to
169         /// the flags set here.
170         ///
171         /// The "active" flags correspond to whatever flags are set in the
172         /// Translator.
173         old_flags: Option<Flags>,
174     },
175     /// This is pushed whenever a concatenation is observed. After visiting
176     /// every sub-expression in the concatenation, the translator's stack is
177     /// popped until it sees a Concat frame.
178     Concat,
179     /// This is pushed whenever an alternation is observed. After visiting
180     /// every sub-expression in the alternation, the translator's stack is
181     /// popped until it sees an Alternation frame.
182     Alternation,
183 }
184 
185 impl HirFrame {
186     /// Assert that the current stack frame is an Hir expression and return it.
unwrap_expr(self) -> Hir187     fn unwrap_expr(self) -> Hir {
188         match self {
189             HirFrame::Expr(expr) => expr,
190             _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
191         }
192     }
193 
194     /// Assert that the current stack frame is a Unicode class expression and
195     /// return it.
unwrap_class_unicode(self) -> hir::ClassUnicode196     fn unwrap_class_unicode(self) -> hir::ClassUnicode {
197         match self {
198             HirFrame::ClassUnicode(cls) => cls,
199             _ => panic!(
200                 "tried to unwrap Unicode class \
201                  from HirFrame, got: {:?}",
202                 self
203             ),
204         }
205     }
206 
207     /// Assert that the current stack frame is a byte class expression and
208     /// return it.
unwrap_class_bytes(self) -> hir::ClassBytes209     fn unwrap_class_bytes(self) -> hir::ClassBytes {
210         match self {
211             HirFrame::ClassBytes(cls) => cls,
212             _ => panic!(
213                 "tried to unwrap byte class \
214                  from HirFrame, got: {:?}",
215                 self
216             ),
217         }
218     }
219 
220     /// Assert that the current stack frame is a group indicator and return
221     /// its corresponding flags (the flags that were active at the time the
222     /// group was entered) if they exist.
unwrap_group(self) -> Option<Flags>223     fn unwrap_group(self) -> Option<Flags> {
224         match self {
225             HirFrame::Group { old_flags } => old_flags,
226             _ => {
227                 panic!("tried to unwrap group from HirFrame, got: {:?}", self)
228             }
229         }
230     }
231 }
232 
233 impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
234     type Output = Hir;
235     type Err = Error;
236 
finish(self) -> Result<Hir>237     fn finish(self) -> Result<Hir> {
238         // ... otherwise, we should have exactly one HIR on the stack.
239         assert_eq!(self.trans().stack.borrow().len(), 1);
240         Ok(self.pop().unwrap().unwrap_expr())
241     }
242 
visit_pre(&mut self, ast: &Ast) -> Result<()>243     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
244         match *ast {
245             Ast::Class(ast::Class::Bracketed(_)) => {
246                 if self.flags().unicode() {
247                     let cls = hir::ClassUnicode::empty();
248                     self.push(HirFrame::ClassUnicode(cls));
249                 } else {
250                     let cls = hir::ClassBytes::empty();
251                     self.push(HirFrame::ClassBytes(cls));
252                 }
253             }
254             Ast::Group(ref x) => {
255                 let old_flags = x.flags().map(|ast| self.set_flags(ast));
256                 self.push(HirFrame::Group { old_flags: old_flags });
257             }
258             Ast::Concat(ref x) if x.asts.is_empty() => {}
259             Ast::Concat(_) => {
260                 self.push(HirFrame::Concat);
261             }
262             Ast::Alternation(ref x) if x.asts.is_empty() => {}
263             Ast::Alternation(_) => {
264                 self.push(HirFrame::Alternation);
265             }
266             _ => {}
267         }
268         Ok(())
269     }
270 
visit_post(&mut self, ast: &Ast) -> Result<()>271     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
272         match *ast {
273             Ast::Empty(_) => {
274                 self.push(HirFrame::Expr(Hir::empty()));
275             }
276             Ast::Flags(ref x) => {
277                 self.set_flags(&x.flags);
278                 // Flags in the AST are generally considered directives and
279                 // not actual sub-expressions. However, they can be used in
280                 // the concrete syntax like `((?i))`, and we need some kind of
281                 // indication of an expression there, and Empty is the correct
282                 // choice.
283                 //
284                 // There can also be things like `(?i)+`, but we rule those out
285                 // in the parser. In the future, we might allow them for
286                 // consistency sake.
287                 self.push(HirFrame::Expr(Hir::empty()));
288             }
289             Ast::Literal(ref x) => {
290                 self.push(HirFrame::Expr(self.hir_literal(x)?));
291             }
292             Ast::Dot(span) => {
293                 self.push(HirFrame::Expr(self.hir_dot(span)?));
294             }
295             Ast::Assertion(ref x) => {
296                 self.push(HirFrame::Expr(self.hir_assertion(x)?));
297             }
298             Ast::Class(ast::Class::Perl(ref x)) => {
299                 if self.flags().unicode() {
300                     let cls = self.hir_perl_unicode_class(x)?;
301                     let hcls = hir::Class::Unicode(cls);
302                     self.push(HirFrame::Expr(Hir::class(hcls)));
303                 } else {
304                     let cls = self.hir_perl_byte_class(x);
305                     let hcls = hir::Class::Bytes(cls);
306                     self.push(HirFrame::Expr(Hir::class(hcls)));
307                 }
308             }
309             Ast::Class(ast::Class::Unicode(ref x)) => {
310                 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
311                 self.push(HirFrame::Expr(Hir::class(cls)));
312             }
313             Ast::Class(ast::Class::Bracketed(ref ast)) => {
314                 if self.flags().unicode() {
315                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
316                     self.unicode_fold_and_negate(
317                         &ast.span,
318                         ast.negated,
319                         &mut cls,
320                     )?;
321                     if cls.iter().next().is_none() {
322                         return Err(self.error(
323                             ast.span,
324                             ErrorKind::EmptyClassNotAllowed,
325                         ));
326                     }
327                     let expr = Hir::class(hir::Class::Unicode(cls));
328                     self.push(HirFrame::Expr(expr));
329                 } else {
330                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
331                     self.bytes_fold_and_negate(
332                         &ast.span,
333                         ast.negated,
334                         &mut cls,
335                     )?;
336                     if cls.iter().next().is_none() {
337                         return Err(self.error(
338                             ast.span,
339                             ErrorKind::EmptyClassNotAllowed,
340                         ));
341                     }
342 
343                     let expr = Hir::class(hir::Class::Bytes(cls));
344                     self.push(HirFrame::Expr(expr));
345                 }
346             }
347             Ast::Repetition(ref x) => {
348                 let expr = self.pop().unwrap().unwrap_expr();
349                 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
350             }
351             Ast::Group(ref x) => {
352                 let expr = self.pop().unwrap().unwrap_expr();
353                 if let Some(flags) = self.pop().unwrap().unwrap_group() {
354                     self.trans().flags.set(flags);
355                 }
356                 self.push(HirFrame::Expr(self.hir_group(x, expr)));
357             }
358             Ast::Concat(_) => {
359                 let mut exprs = vec![];
360                 while let Some(HirFrame::Expr(expr)) = self.pop() {
361                     if !expr.kind().is_empty() {
362                         exprs.push(expr);
363                     }
364                 }
365                 exprs.reverse();
366                 self.push(HirFrame::Expr(Hir::concat(exprs)));
367             }
368             Ast::Alternation(_) => {
369                 let mut exprs = vec![];
370                 while let Some(HirFrame::Expr(expr)) = self.pop() {
371                     exprs.push(expr);
372                 }
373                 exprs.reverse();
374                 self.push(HirFrame::Expr(Hir::alternation(exprs)));
375             }
376         }
377         Ok(())
378     }
379 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>380     fn visit_class_set_item_pre(
381         &mut self,
382         ast: &ast::ClassSetItem,
383     ) -> Result<()> {
384         match *ast {
385             ast::ClassSetItem::Bracketed(_) => {
386                 if self.flags().unicode() {
387                     let cls = hir::ClassUnicode::empty();
388                     self.push(HirFrame::ClassUnicode(cls));
389                 } else {
390                     let cls = hir::ClassBytes::empty();
391                     self.push(HirFrame::ClassBytes(cls));
392                 }
393             }
394             // We needn't handle the Union case here since the visitor will
395             // do it for us.
396             _ => {}
397         }
398         Ok(())
399     }
400 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>401     fn visit_class_set_item_post(
402         &mut self,
403         ast: &ast::ClassSetItem,
404     ) -> Result<()> {
405         match *ast {
406             ast::ClassSetItem::Empty(_) => {}
407             ast::ClassSetItem::Literal(ref x) => {
408                 if self.flags().unicode() {
409                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
410                     cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
411                     self.push(HirFrame::ClassUnicode(cls));
412                 } else {
413                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
414                     let byte = self.class_literal_byte(x)?;
415                     cls.push(hir::ClassBytesRange::new(byte, byte));
416                     self.push(HirFrame::ClassBytes(cls));
417                 }
418             }
419             ast::ClassSetItem::Range(ref x) => {
420                 if self.flags().unicode() {
421                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
422                     cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
423                     self.push(HirFrame::ClassUnicode(cls));
424                 } else {
425                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
426                     let start = self.class_literal_byte(&x.start)?;
427                     let end = self.class_literal_byte(&x.end)?;
428                     cls.push(hir::ClassBytesRange::new(start, end));
429                     self.push(HirFrame::ClassBytes(cls));
430                 }
431             }
432             ast::ClassSetItem::Ascii(ref x) => {
433                 if self.flags().unicode() {
434                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
435                     for &(s, e) in ascii_class(&x.kind) {
436                         cls.push(hir::ClassUnicodeRange::new(s, e));
437                     }
438                     self.unicode_fold_and_negate(
439                         &x.span, x.negated, &mut cls,
440                     )?;
441                     self.push(HirFrame::ClassUnicode(cls));
442                 } else {
443                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
444                     for &(s, e) in ascii_class(&x.kind) {
445                         cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
446                     }
447                     self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?;
448                     self.push(HirFrame::ClassBytes(cls));
449                 }
450             }
451             ast::ClassSetItem::Unicode(ref x) => {
452                 let xcls = self.hir_unicode_class(x)?;
453                 let mut cls = self.pop().unwrap().unwrap_class_unicode();
454                 cls.union(&xcls);
455                 self.push(HirFrame::ClassUnicode(cls));
456             }
457             ast::ClassSetItem::Perl(ref x) => {
458                 if self.flags().unicode() {
459                     let xcls = self.hir_perl_unicode_class(x)?;
460                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
461                     cls.union(&xcls);
462                     self.push(HirFrame::ClassUnicode(cls));
463                 } else {
464                     let xcls = self.hir_perl_byte_class(x);
465                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
466                     cls.union(&xcls);
467                     self.push(HirFrame::ClassBytes(cls));
468                 }
469             }
470             ast::ClassSetItem::Bracketed(ref ast) => {
471                 if self.flags().unicode() {
472                     let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
473                     self.unicode_fold_and_negate(
474                         &ast.span,
475                         ast.negated,
476                         &mut cls1,
477                     )?;
478 
479                     let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
480                     cls2.union(&cls1);
481                     self.push(HirFrame::ClassUnicode(cls2));
482                 } else {
483                     let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
484                     self.bytes_fold_and_negate(
485                         &ast.span,
486                         ast.negated,
487                         &mut cls1,
488                     )?;
489 
490                     let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
491                     cls2.union(&cls1);
492                     self.push(HirFrame::ClassBytes(cls2));
493                 }
494             }
495             // This is handled automatically by the visitor.
496             ast::ClassSetItem::Union(_) => {}
497         }
498         Ok(())
499     }
500 
visit_class_set_binary_op_pre( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>501     fn visit_class_set_binary_op_pre(
502         &mut self,
503         _op: &ast::ClassSetBinaryOp,
504     ) -> Result<()> {
505         if self.flags().unicode() {
506             let cls = hir::ClassUnicode::empty();
507             self.push(HirFrame::ClassUnicode(cls));
508         } else {
509             let cls = hir::ClassBytes::empty();
510             self.push(HirFrame::ClassBytes(cls));
511         }
512         Ok(())
513     }
514 
visit_class_set_binary_op_in( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>515     fn visit_class_set_binary_op_in(
516         &mut self,
517         _op: &ast::ClassSetBinaryOp,
518     ) -> Result<()> {
519         if self.flags().unicode() {
520             let cls = hir::ClassUnicode::empty();
521             self.push(HirFrame::ClassUnicode(cls));
522         } else {
523             let cls = hir::ClassBytes::empty();
524             self.push(HirFrame::ClassBytes(cls));
525         }
526         Ok(())
527     }
528 
visit_class_set_binary_op_post( &mut self, op: &ast::ClassSetBinaryOp, ) -> Result<()>529     fn visit_class_set_binary_op_post(
530         &mut self,
531         op: &ast::ClassSetBinaryOp,
532     ) -> Result<()> {
533         use ast::ClassSetBinaryOpKind::*;
534 
535         if self.flags().unicode() {
536             let mut rhs = self.pop().unwrap().unwrap_class_unicode();
537             let mut lhs = self.pop().unwrap().unwrap_class_unicode();
538             let mut cls = self.pop().unwrap().unwrap_class_unicode();
539             if self.flags().case_insensitive() {
540                 rhs.try_case_fold_simple().map_err(|_| {
541                     self.error(
542                         op.rhs.span().clone(),
543                         ErrorKind::UnicodeCaseUnavailable,
544                     )
545                 })?;
546                 lhs.try_case_fold_simple().map_err(|_| {
547                     self.error(
548                         op.lhs.span().clone(),
549                         ErrorKind::UnicodeCaseUnavailable,
550                     )
551                 })?;
552             }
553             match op.kind {
554                 Intersection => lhs.intersect(&rhs),
555                 Difference => lhs.difference(&rhs),
556                 SymmetricDifference => lhs.symmetric_difference(&rhs),
557             }
558             cls.union(&lhs);
559             self.push(HirFrame::ClassUnicode(cls));
560         } else {
561             let mut rhs = self.pop().unwrap().unwrap_class_bytes();
562             let mut lhs = self.pop().unwrap().unwrap_class_bytes();
563             let mut cls = self.pop().unwrap().unwrap_class_bytes();
564             if self.flags().case_insensitive() {
565                 rhs.case_fold_simple();
566                 lhs.case_fold_simple();
567             }
568             match op.kind {
569                 Intersection => lhs.intersect(&rhs),
570                 Difference => lhs.difference(&rhs),
571                 SymmetricDifference => lhs.symmetric_difference(&rhs),
572             }
573             cls.union(&lhs);
574             self.push(HirFrame::ClassBytes(cls));
575         }
576         Ok(())
577     }
578 }
579 
580 /// The internal implementation of a translator.
581 ///
582 /// This type is responsible for carrying around the original pattern string,
583 /// which is not tied to the internal state of a translator.
584 ///
585 /// A TranslatorI exists for the time it takes to translate a single Ast.
586 #[derive(Clone, Debug)]
587 struct TranslatorI<'t, 'p> {
588     trans: &'t Translator,
589     pattern: &'p str,
590 }
591 
592 impl<'t, 'p> TranslatorI<'t, 'p> {
593     /// Build a new internal translator.
new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p>594     fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
595         TranslatorI { trans: trans, pattern: pattern }
596     }
597 
598     /// Return a reference to the underlying translator.
trans(&self) -> &Translator599     fn trans(&self) -> &Translator {
600         &self.trans
601     }
602 
603     /// Push the given frame on to the call stack.
push(&self, frame: HirFrame)604     fn push(&self, frame: HirFrame) {
605         self.trans().stack.borrow_mut().push(frame);
606     }
607 
608     /// Pop the top of the call stack. If the call stack is empty, return None.
pop(&self) -> Option<HirFrame>609     fn pop(&self) -> Option<HirFrame> {
610         self.trans().stack.borrow_mut().pop()
611     }
612 
613     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ErrorKind) -> Error614     fn error(&self, span: Span, kind: ErrorKind) -> Error {
615         Error { kind: kind, pattern: self.pattern.to_string(), span: span }
616     }
617 
618     /// Return a copy of the active flags.
flags(&self) -> Flags619     fn flags(&self) -> Flags {
620         self.trans().flags.get()
621     }
622 
623     /// Set the flags of this translator from the flags set in the given AST.
624     /// Then, return the old flags.
set_flags(&self, ast_flags: &ast::Flags) -> Flags625     fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
626         let old_flags = self.flags();
627         let mut new_flags = Flags::from_ast(ast_flags);
628         new_flags.merge(&old_flags);
629         self.trans().flags.set(new_flags);
630         old_flags
631     }
632 
hir_literal(&self, lit: &ast::Literal) -> Result<Hir>633     fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
634         let ch = match self.literal_to_char(lit)? {
635             byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
636             hir::Literal::Unicode(ch) => ch,
637         };
638         if self.flags().case_insensitive() {
639             self.hir_from_char_case_insensitive(lit.span, ch)
640         } else {
641             self.hir_from_char(lit.span, ch)
642         }
643     }
644 
645     /// Convert an Ast literal to its scalar representation.
646     ///
647     /// When Unicode mode is enabled, then this always succeeds and returns a
648     /// `char` (Unicode scalar value).
649     ///
650     /// When Unicode mode is disabled, then a raw byte is returned. If that
651     /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
652     /// an error.
literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal>653     fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
654         if self.flags().unicode() {
655             return Ok(hir::Literal::Unicode(lit.c));
656         }
657         let byte = match lit.byte() {
658             None => return Ok(hir::Literal::Unicode(lit.c)),
659             Some(byte) => byte,
660         };
661         if byte <= 0x7F {
662             return Ok(hir::Literal::Unicode(byte as char));
663         }
664         if !self.trans().allow_invalid_utf8 {
665             return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
666         }
667         Ok(hir::Literal::Byte(byte))
668     }
669 
hir_from_char(&self, span: Span, c: char) -> Result<Hir>670     fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
671         if !self.flags().unicode() && c.len_utf8() > 1 {
672             return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
673         }
674         Ok(Hir::literal(hir::Literal::Unicode(c)))
675     }
676 
hir_from_char_case_insensitive( &self, span: Span, c: char, ) -> Result<Hir>677     fn hir_from_char_case_insensitive(
678         &self,
679         span: Span,
680         c: char,
681     ) -> Result<Hir> {
682         if self.flags().unicode() {
683             // If case folding won't do anything, then don't bother trying.
684             let map =
685                 unicode::contains_simple_case_mapping(c, c).map_err(|_| {
686                     self.error(span, ErrorKind::UnicodeCaseUnavailable)
687                 })?;
688             if !map {
689                 return self.hir_from_char(span, c);
690             }
691             let mut cls =
692                 hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
693                     c, c,
694                 )]);
695             cls.try_case_fold_simple().map_err(|_| {
696                 self.error(span, ErrorKind::UnicodeCaseUnavailable)
697             })?;
698             Ok(Hir::class(hir::Class::Unicode(cls)))
699         } else {
700             if c.len_utf8() > 1 {
701                 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
702             }
703             // If case folding won't do anything, then don't bother trying.
704             match c {
705                 'A'..='Z' | 'a'..='z' => {}
706                 _ => return self.hir_from_char(span, c),
707             }
708             let mut cls =
709                 hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
710                     c as u8, c as u8,
711                 )]);
712             cls.case_fold_simple();
713             Ok(Hir::class(hir::Class::Bytes(cls)))
714         }
715     }
716 
hir_dot(&self, span: Span) -> Result<Hir>717     fn hir_dot(&self, span: Span) -> Result<Hir> {
718         let unicode = self.flags().unicode();
719         if !unicode && !self.trans().allow_invalid_utf8 {
720             return Err(self.error(span, ErrorKind::InvalidUtf8));
721         }
722         Ok(if self.flags().dot_matches_new_line() {
723             Hir::any(!unicode)
724         } else {
725             Hir::dot(!unicode)
726         })
727     }
728 
hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir>729     fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
730         let unicode = self.flags().unicode();
731         let multi_line = self.flags().multi_line();
732         Ok(match asst.kind {
733             ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
734                 hir::Anchor::StartLine
735             } else {
736                 hir::Anchor::StartText
737             }),
738             ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
739                 hir::Anchor::EndLine
740             } else {
741                 hir::Anchor::EndText
742             }),
743             ast::AssertionKind::StartText => {
744                 Hir::anchor(hir::Anchor::StartText)
745             }
746             ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
747             ast::AssertionKind::WordBoundary => {
748                 Hir::word_boundary(if unicode {
749                     hir::WordBoundary::Unicode
750                 } else {
751                     hir::WordBoundary::Ascii
752                 })
753             }
754             ast::AssertionKind::NotWordBoundary => {
755                 Hir::word_boundary(if unicode {
756                     hir::WordBoundary::UnicodeNegate
757                 } else {
758                     // It is possible for negated ASCII word boundaries to
759                     // match at invalid UTF-8 boundaries, even when searching
760                     // valid UTF-8.
761                     if !self.trans().allow_invalid_utf8 {
762                         return Err(
763                             self.error(asst.span, ErrorKind::InvalidUtf8)
764                         );
765                     }
766                     hir::WordBoundary::AsciiNegate
767                 })
768             }
769         })
770     }
771 
hir_group(&self, group: &ast::Group, expr: Hir) -> Hir772     fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
773         let kind = match group.kind {
774             ast::GroupKind::CaptureIndex(idx) => {
775                 hir::GroupKind::CaptureIndex(idx)
776             }
777             ast::GroupKind::CaptureName(ref capname) => {
778                 hir::GroupKind::CaptureName {
779                     name: capname.name.clone(),
780                     index: capname.index,
781                 }
782             }
783             ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
784         };
785         Hir::group(hir::Group { kind: kind, hir: Box::new(expr) })
786     }
787 
hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir788     fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
789         let kind = match rep.op.kind {
790             ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
791             ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
792             ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
793             ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
794                 hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
795             }
796             ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
797                 hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
798             }
799             ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
800                 m,
801                 n,
802             )) => {
803                 hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
804             }
805         };
806         let greedy =
807             if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
808         Hir::repetition(hir::Repetition {
809             kind: kind,
810             greedy: greedy,
811             hir: Box::new(expr),
812         })
813     }
814 
hir_unicode_class( &self, ast_class: &ast::ClassUnicode, ) -> Result<hir::ClassUnicode>815     fn hir_unicode_class(
816         &self,
817         ast_class: &ast::ClassUnicode,
818     ) -> Result<hir::ClassUnicode> {
819         use ast::ClassUnicodeKind::*;
820 
821         if !self.flags().unicode() {
822             return Err(
823                 self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
824             );
825         }
826         let query = match ast_class.kind {
827             OneLetter(name) => ClassQuery::OneLetter(name),
828             Named(ref name) => ClassQuery::Binary(name),
829             NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
830                 property_name: name,
831                 property_value: value,
832             },
833         };
834         let mut result = self.convert_unicode_class_error(
835             &ast_class.span,
836             unicode::class(query),
837         );
838         if let Ok(ref mut class) = result {
839             self.unicode_fold_and_negate(
840                 &ast_class.span,
841                 ast_class.negated,
842                 class,
843             )?;
844         }
845         result
846     }
847 
hir_perl_unicode_class( &self, ast_class: &ast::ClassPerl, ) -> Result<hir::ClassUnicode>848     fn hir_perl_unicode_class(
849         &self,
850         ast_class: &ast::ClassPerl,
851     ) -> Result<hir::ClassUnicode> {
852         use ast::ClassPerlKind::*;
853 
854         assert!(self.flags().unicode());
855         let result = match ast_class.kind {
856             Digit => unicode::perl_digit(),
857             Space => unicode::perl_space(),
858             Word => unicode::perl_word(),
859         };
860         let mut class =
861             self.convert_unicode_class_error(&ast_class.span, result)?;
862         // We needn't apply case folding here because the Perl Unicode classes
863         // are already closed under Unicode simple case folding.
864         if ast_class.negated {
865             class.negate();
866         }
867         Ok(class)
868     }
869 
hir_perl_byte_class( &self, ast_class: &ast::ClassPerl, ) -> hir::ClassBytes870     fn hir_perl_byte_class(
871         &self,
872         ast_class: &ast::ClassPerl,
873     ) -> hir::ClassBytes {
874         use ast::ClassPerlKind::*;
875 
876         assert!(!self.flags().unicode());
877         let mut class = match ast_class.kind {
878             Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
879             Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
880             Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
881         };
882         // We needn't apply case folding here because the Perl ASCII classes
883         // are already closed (under ASCII case folding).
884         if ast_class.negated {
885             class.negate();
886         }
887         class
888     }
889 
890     /// Converts the given Unicode specific error to an HIR translation error.
891     ///
892     /// The span given should approximate the position at which an error would
893     /// occur.
convert_unicode_class_error( &self, span: &Span, result: unicode::Result<hir::ClassUnicode>, ) -> Result<hir::ClassUnicode>894     fn convert_unicode_class_error(
895         &self,
896         span: &Span,
897         result: unicode::Result<hir::ClassUnicode>,
898     ) -> Result<hir::ClassUnicode> {
899         result.map_err(|err| {
900             let sp = span.clone();
901             match err {
902                 unicode::Error::PropertyNotFound => {
903                     self.error(sp, ErrorKind::UnicodePropertyNotFound)
904                 }
905                 unicode::Error::PropertyValueNotFound => {
906                     self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
907                 }
908                 unicode::Error::PerlClassNotFound => {
909                     self.error(sp, ErrorKind::UnicodePerlClassNotFound)
910                 }
911             }
912         })
913     }
914 
unicode_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassUnicode, ) -> Result<()>915     fn unicode_fold_and_negate(
916         &self,
917         span: &Span,
918         negated: bool,
919         class: &mut hir::ClassUnicode,
920     ) -> Result<()> {
921         // Note that we must apply case folding before negation!
922         // Consider `(?i)[^x]`. If we applied negation field, then
923         // the result would be the character class that matched any
924         // Unicode scalar value.
925         if self.flags().case_insensitive() {
926             class.try_case_fold_simple().map_err(|_| {
927                 self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
928             })?;
929         }
930         if negated {
931             class.negate();
932         }
933         Ok(())
934     }
935 
bytes_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassBytes, ) -> Result<()>936     fn bytes_fold_and_negate(
937         &self,
938         span: &Span,
939         negated: bool,
940         class: &mut hir::ClassBytes,
941     ) -> Result<()> {
942         // Note that we must apply case folding before negation!
943         // Consider `(?i)[^x]`. If we applied negation field, then
944         // the result would be the character class that matched any
945         // Unicode scalar value.
946         if self.flags().case_insensitive() {
947             class.case_fold_simple();
948         }
949         if negated {
950             class.negate();
951         }
952         if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
953             return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
954         }
955         Ok(())
956     }
957 
958     /// Return a scalar byte value suitable for use as a literal in a byte
959     /// character class.
class_literal_byte(&self, ast: &ast::Literal) -> Result<u8>960     fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
961         match self.literal_to_char(ast)? {
962             hir::Literal::Byte(byte) => Ok(byte),
963             hir::Literal::Unicode(ch) => {
964                 if ch <= 0x7F as char {
965                     Ok(ch as u8)
966                 } else {
967                     // We can't feasibly support Unicode in
968                     // byte oriented classes. Byte classes don't
969                     // do Unicode case folding.
970                     Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
971                 }
972             }
973         }
974     }
975 }
976 
977 /// A translator's representation of a regular expression's flags at any given
978 /// moment in time.
979 ///
980 /// Each flag can be in one of three states: absent, present but disabled or
981 /// present but enabled.
982 #[derive(Clone, Copy, Debug, Default)]
983 struct Flags {
984     case_insensitive: Option<bool>,
985     multi_line: Option<bool>,
986     dot_matches_new_line: Option<bool>,
987     swap_greed: Option<bool>,
988     unicode: Option<bool>,
989     // Note that `ignore_whitespace` is omitted here because it is handled
990     // entirely in the parser.
991 }
992 
993 impl Flags {
from_ast(ast: &ast::Flags) -> Flags994     fn from_ast(ast: &ast::Flags) -> Flags {
995         let mut flags = Flags::default();
996         let mut enable = true;
997         for item in &ast.items {
998             match item.kind {
999                 ast::FlagsItemKind::Negation => {
1000                     enable = false;
1001                 }
1002                 ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1003                     flags.case_insensitive = Some(enable);
1004                 }
1005                 ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1006                     flags.multi_line = Some(enable);
1007                 }
1008                 ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1009                     flags.dot_matches_new_line = Some(enable);
1010                 }
1011                 ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1012                     flags.swap_greed = Some(enable);
1013                 }
1014                 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1015                     flags.unicode = Some(enable);
1016                 }
1017                 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1018             }
1019         }
1020         flags
1021     }
1022 
merge(&mut self, previous: &Flags)1023     fn merge(&mut self, previous: &Flags) {
1024         if self.case_insensitive.is_none() {
1025             self.case_insensitive = previous.case_insensitive;
1026         }
1027         if self.multi_line.is_none() {
1028             self.multi_line = previous.multi_line;
1029         }
1030         if self.dot_matches_new_line.is_none() {
1031             self.dot_matches_new_line = previous.dot_matches_new_line;
1032         }
1033         if self.swap_greed.is_none() {
1034             self.swap_greed = previous.swap_greed;
1035         }
1036         if self.unicode.is_none() {
1037             self.unicode = previous.unicode;
1038         }
1039     }
1040 
case_insensitive(&self) -> bool1041     fn case_insensitive(&self) -> bool {
1042         self.case_insensitive.unwrap_or(false)
1043     }
1044 
multi_line(&self) -> bool1045     fn multi_line(&self) -> bool {
1046         self.multi_line.unwrap_or(false)
1047     }
1048 
dot_matches_new_line(&self) -> bool1049     fn dot_matches_new_line(&self) -> bool {
1050         self.dot_matches_new_line.unwrap_or(false)
1051     }
1052 
swap_greed(&self) -> bool1053     fn swap_greed(&self) -> bool {
1054         self.swap_greed.unwrap_or(false)
1055     }
1056 
unicode(&self) -> bool1057     fn unicode(&self) -> bool {
1058         self.unicode.unwrap_or(true)
1059     }
1060 }
1061 
hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes1062 fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1063     let ranges: Vec<_> = ascii_class(kind)
1064         .iter()
1065         .cloned()
1066         .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1067         .collect();
1068     hir::ClassBytes::new(ranges)
1069 }
1070 
ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)]1071 fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1072     use ast::ClassAsciiKind::*;
1073     match *kind {
1074         Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1075         Alpha => &[('A', 'Z'), ('a', 'z')],
1076         Ascii => &[('\x00', '\x7F')],
1077         Blank => &[('\t', '\t'), (' ', ' ')],
1078         Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1079         Digit => &[('0', '9')],
1080         Graph => &[('!', '~')],
1081         Lower => &[('a', 'z')],
1082         Print => &[(' ', '~')],
1083         Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1084         Space => &[
1085             ('\t', '\t'),
1086             ('\n', '\n'),
1087             ('\x0B', '\x0B'),
1088             ('\x0C', '\x0C'),
1089             ('\r', '\r'),
1090             (' ', ' '),
1091         ],
1092         Upper => &[('A', 'Z')],
1093         Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1094         Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1095     }
1096 }
1097 
1098 #[cfg(test)]
1099 mod tests {
1100     use ast::parse::ParserBuilder;
1101     use ast::{self, Ast, Position, Span};
1102     use hir::{self, Hir, HirKind};
1103     use unicode::{self, ClassQuery};
1104 
1105     use super::{ascii_class, TranslatorBuilder};
1106 
1107     // We create these errors to compare with real hir::Errors in the tests.
1108     // We define equality between TestError and hir::Error to disregard the
1109     // pattern string in hir::Error, which is annoying to provide in tests.
1110     #[derive(Clone, Debug)]
1111     struct TestError {
1112         span: Span,
1113         kind: hir::ErrorKind,
1114     }
1115 
1116     impl PartialEq<hir::Error> for TestError {
eq(&self, other: &hir::Error) -> bool1117         fn eq(&self, other: &hir::Error) -> bool {
1118             self.span == other.span && self.kind == other.kind
1119         }
1120     }
1121 
1122     impl PartialEq<TestError> for hir::Error {
eq(&self, other: &TestError) -> bool1123         fn eq(&self, other: &TestError) -> bool {
1124             self.span == other.span && self.kind == other.kind
1125         }
1126     }
1127 
parse(pattern: &str) -> Ast1128     fn parse(pattern: &str) -> Ast {
1129         ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1130     }
1131 
t(pattern: &str) -> Hir1132     fn t(pattern: &str) -> Hir {
1133         TranslatorBuilder::new()
1134             .allow_invalid_utf8(false)
1135             .build()
1136             .translate(pattern, &parse(pattern))
1137             .unwrap()
1138     }
1139 
t_err(pattern: &str) -> hir::Error1140     fn t_err(pattern: &str) -> hir::Error {
1141         TranslatorBuilder::new()
1142             .allow_invalid_utf8(false)
1143             .build()
1144             .translate(pattern, &parse(pattern))
1145             .unwrap_err()
1146     }
1147 
t_bytes(pattern: &str) -> Hir1148     fn t_bytes(pattern: &str) -> Hir {
1149         TranslatorBuilder::new()
1150             .allow_invalid_utf8(true)
1151             .build()
1152             .translate(pattern, &parse(pattern))
1153             .unwrap()
1154     }
1155 
hir_lit(s: &str) -> Hir1156     fn hir_lit(s: &str) -> Hir {
1157         match s.len() {
1158             0 => Hir::empty(),
1159             _ => {
1160                 let lits = s
1161                     .chars()
1162                     .map(hir::Literal::Unicode)
1163                     .map(Hir::literal)
1164                     .collect();
1165                 Hir::concat(lits)
1166             }
1167         }
1168     }
1169 
hir_blit(s: &[u8]) -> Hir1170     fn hir_blit(s: &[u8]) -> Hir {
1171         match s.len() {
1172             0 => Hir::empty(),
1173             1 => Hir::literal(hir::Literal::Byte(s[0])),
1174             _ => {
1175                 let lits = s
1176                     .iter()
1177                     .cloned()
1178                     .map(hir::Literal::Byte)
1179                     .map(Hir::literal)
1180                     .collect();
1181                 Hir::concat(lits)
1182             }
1183         }
1184     }
1185 
hir_group(i: u32, expr: Hir) -> Hir1186     fn hir_group(i: u32, expr: Hir) -> Hir {
1187         Hir::group(hir::Group {
1188             kind: hir::GroupKind::CaptureIndex(i),
1189             hir: Box::new(expr),
1190         })
1191     }
1192 
hir_group_name(i: u32, name: &str, expr: Hir) -> Hir1193     fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1194         Hir::group(hir::Group {
1195             kind: hir::GroupKind::CaptureName {
1196                 name: name.to_string(),
1197                 index: i,
1198             },
1199             hir: Box::new(expr),
1200         })
1201     }
1202 
hir_group_nocap(expr: Hir) -> Hir1203     fn hir_group_nocap(expr: Hir) -> Hir {
1204         Hir::group(hir::Group {
1205             kind: hir::GroupKind::NonCapturing,
1206             hir: Box::new(expr),
1207         })
1208     }
1209 
hir_quest(greedy: bool, expr: Hir) -> Hir1210     fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1211         Hir::repetition(hir::Repetition {
1212             kind: hir::RepetitionKind::ZeroOrOne,
1213             greedy: greedy,
1214             hir: Box::new(expr),
1215         })
1216     }
1217 
hir_star(greedy: bool, expr: Hir) -> Hir1218     fn hir_star(greedy: bool, expr: Hir) -> Hir {
1219         Hir::repetition(hir::Repetition {
1220             kind: hir::RepetitionKind::ZeroOrMore,
1221             greedy: greedy,
1222             hir: Box::new(expr),
1223         })
1224     }
1225 
hir_plus(greedy: bool, expr: Hir) -> Hir1226     fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1227         Hir::repetition(hir::Repetition {
1228             kind: hir::RepetitionKind::OneOrMore,
1229             greedy: greedy,
1230             hir: Box::new(expr),
1231         })
1232     }
1233 
hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir1234     fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1235         Hir::repetition(hir::Repetition {
1236             kind: hir::RepetitionKind::Range(range),
1237             greedy: greedy,
1238             hir: Box::new(expr),
1239         })
1240     }
1241 
hir_alt(alts: Vec<Hir>) -> Hir1242     fn hir_alt(alts: Vec<Hir>) -> Hir {
1243         Hir::alternation(alts)
1244     }
1245 
hir_cat(exprs: Vec<Hir>) -> Hir1246     fn hir_cat(exprs: Vec<Hir>) -> Hir {
1247         Hir::concat(exprs)
1248     }
1249 
1250     #[allow(dead_code)]
hir_uclass_query(query: ClassQuery) -> Hir1251     fn hir_uclass_query(query: ClassQuery) -> Hir {
1252         Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1253     }
1254 
1255     #[allow(dead_code)]
hir_uclass_perl_word() -> Hir1256     fn hir_uclass_perl_word() -> Hir {
1257         Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1258     }
1259 
hir_uclass(ranges: &[(char, char)]) -> Hir1260     fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1261         let ranges: Vec<hir::ClassUnicodeRange> = ranges
1262             .iter()
1263             .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1264             .collect();
1265         Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1266     }
1267 
hir_bclass(ranges: &[(u8, u8)]) -> Hir1268     fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1269         let ranges: Vec<hir::ClassBytesRange> = ranges
1270             .iter()
1271             .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1272             .collect();
1273         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1274     }
1275 
hir_bclass_from_char(ranges: &[(char, char)]) -> Hir1276     fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1277         let ranges: Vec<hir::ClassBytesRange> = ranges
1278             .iter()
1279             .map(|&(s, e)| {
1280                 assert!(s as u32 <= 0x7F);
1281                 assert!(e as u32 <= 0x7F);
1282                 hir::ClassBytesRange::new(s as u8, e as u8)
1283             })
1284             .collect();
1285         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1286     }
1287 
hir_case_fold(expr: Hir) -> Hir1288     fn hir_case_fold(expr: Hir) -> Hir {
1289         match expr.into_kind() {
1290             HirKind::Class(mut cls) => {
1291                 cls.case_fold_simple();
1292                 Hir::class(cls)
1293             }
1294             _ => panic!("cannot case fold non-class Hir expr"),
1295         }
1296     }
1297 
hir_negate(expr: Hir) -> Hir1298     fn hir_negate(expr: Hir) -> Hir {
1299         match expr.into_kind() {
1300             HirKind::Class(mut cls) => {
1301                 cls.negate();
1302                 Hir::class(cls)
1303             }
1304             _ => panic!("cannot negate non-class Hir expr"),
1305         }
1306     }
1307 
1308     #[allow(dead_code)]
hir_union(expr1: Hir, expr2: Hir) -> Hir1309     fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1310         use hir::Class::{Bytes, Unicode};
1311 
1312         match (expr1.into_kind(), expr2.into_kind()) {
1313             (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1314                 c1.union(&c2);
1315                 Hir::class(hir::Class::Unicode(c1))
1316             }
1317             (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1318                 c1.union(&c2);
1319                 Hir::class(hir::Class::Bytes(c1))
1320             }
1321             _ => panic!("cannot union non-class Hir exprs"),
1322         }
1323     }
1324 
1325     #[allow(dead_code)]
hir_difference(expr1: Hir, expr2: Hir) -> Hir1326     fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1327         use hir::Class::{Bytes, Unicode};
1328 
1329         match (expr1.into_kind(), expr2.into_kind()) {
1330             (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1331                 c1.difference(&c2);
1332                 Hir::class(hir::Class::Unicode(c1))
1333             }
1334             (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1335                 c1.difference(&c2);
1336                 Hir::class(hir::Class::Bytes(c1))
1337             }
1338             _ => panic!("cannot difference non-class Hir exprs"),
1339         }
1340     }
1341 
hir_anchor(anchor: hir::Anchor) -> Hir1342     fn hir_anchor(anchor: hir::Anchor) -> Hir {
1343         Hir::anchor(anchor)
1344     }
1345 
hir_word(wb: hir::WordBoundary) -> Hir1346     fn hir_word(wb: hir::WordBoundary) -> Hir {
1347         Hir::word_boundary(wb)
1348     }
1349 
1350     #[test]
empty()1351     fn empty() {
1352         assert_eq!(t(""), Hir::empty());
1353         assert_eq!(t("(?i)"), Hir::empty());
1354         assert_eq!(t("()"), hir_group(1, Hir::empty()));
1355         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1356         assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1357         assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1358         assert_eq!(
1359             t("()|()"),
1360             hir_alt(vec![
1361                 hir_group(1, Hir::empty()),
1362                 hir_group(2, Hir::empty()),
1363             ])
1364         );
1365         assert_eq!(
1366             t("(|b)"),
1367             hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1368         );
1369         assert_eq!(
1370             t("(a|)"),
1371             hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1372         );
1373         assert_eq!(
1374             t("(a||c)"),
1375             hir_group(
1376                 1,
1377                 hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1378             )
1379         );
1380         assert_eq!(
1381             t("(||)"),
1382             hir_group(
1383                 1,
1384                 hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1385             )
1386         );
1387     }
1388 
1389     #[test]
literal()1390     fn literal() {
1391         assert_eq!(t("a"), hir_lit("a"));
1392         assert_eq!(t("(?-u)a"), hir_lit("a"));
1393         assert_eq!(t("☃"), hir_lit("☃"));
1394         assert_eq!(t("abcd"), hir_lit("abcd"));
1395 
1396         assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1397         assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1398         assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1399         assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1400 
1401         assert_eq!(
1402             t_err("(?-u)☃"),
1403             TestError {
1404                 kind: hir::ErrorKind::UnicodeNotAllowed,
1405                 span: Span::new(
1406                     Position::new(5, 1, 6),
1407                     Position::new(8, 1, 7)
1408                 ),
1409             }
1410         );
1411         assert_eq!(
1412             t_err(r"(?-u)\xFF"),
1413             TestError {
1414                 kind: hir::ErrorKind::InvalidUtf8,
1415                 span: Span::new(
1416                     Position::new(5, 1, 6),
1417                     Position::new(9, 1, 10)
1418                 ),
1419             }
1420         );
1421     }
1422 
1423     #[test]
literal_case_insensitive()1424     fn literal_case_insensitive() {
1425         #[cfg(feature = "unicode-case")]
1426         assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1427         #[cfg(feature = "unicode-case")]
1428         assert_eq!(
1429             t("(?i:a)"),
1430             hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1431         );
1432         #[cfg(feature = "unicode-case")]
1433         assert_eq!(
1434             t("a(?i)a(?-i)a"),
1435             hir_cat(vec![
1436                 hir_lit("a"),
1437                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1438                 hir_lit("a"),
1439             ])
1440         );
1441         #[cfg(feature = "unicode-case")]
1442         assert_eq!(
1443             t("(?i)ab@c"),
1444             hir_cat(vec![
1445                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1446                 hir_uclass(&[('B', 'B'), ('b', 'b')]),
1447                 hir_lit("@"),
1448                 hir_uclass(&[('C', 'C'), ('c', 'c')]),
1449             ])
1450         );
1451         #[cfg(feature = "unicode-case")]
1452         assert_eq!(
1453             t("(?i)β"),
1454             hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1455         );
1456 
1457         assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1458         #[cfg(feature = "unicode-case")]
1459         assert_eq!(
1460             t("(?-u)a(?i)a(?-i)a"),
1461             hir_cat(vec![
1462                 hir_lit("a"),
1463                 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1464                 hir_lit("a"),
1465             ])
1466         );
1467         assert_eq!(
1468             t("(?i-u)ab@c"),
1469             hir_cat(vec![
1470                 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1471                 hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1472                 hir_lit("@"),
1473                 hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1474             ])
1475         );
1476 
1477         assert_eq!(
1478             t_bytes("(?i-u)a"),
1479             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1480         );
1481         assert_eq!(
1482             t_bytes("(?i-u)\x61"),
1483             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1484         );
1485         assert_eq!(
1486             t_bytes(r"(?i-u)\x61"),
1487             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1488         );
1489         assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1490 
1491         assert_eq!(
1492             t_err("(?i-u)β"),
1493             TestError {
1494                 kind: hir::ErrorKind::UnicodeNotAllowed,
1495                 span: Span::new(
1496                     Position::new(6, 1, 7),
1497                     Position::new(8, 1, 8),
1498                 ),
1499             }
1500         );
1501     }
1502 
1503     #[test]
dot()1504     fn dot() {
1505         assert_eq!(
1506             t("."),
1507             hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1508         );
1509         assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1510         assert_eq!(
1511             t_bytes("(?-u)."),
1512             hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1513         );
1514         assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1515 
1516         // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1517         assert_eq!(
1518             t_err("(?-u)."),
1519             TestError {
1520                 kind: hir::ErrorKind::InvalidUtf8,
1521                 span: Span::new(
1522                     Position::new(5, 1, 6),
1523                     Position::new(6, 1, 7)
1524                 ),
1525             }
1526         );
1527         assert_eq!(
1528             t_err("(?s-u)."),
1529             TestError {
1530                 kind: hir::ErrorKind::InvalidUtf8,
1531                 span: Span::new(
1532                     Position::new(6, 1, 7),
1533                     Position::new(7, 1, 8)
1534                 ),
1535             }
1536         );
1537     }
1538 
1539     #[test]
assertions()1540     fn assertions() {
1541         assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1542         assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1543         assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1544         assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1545         assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1546         assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1547         assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1548         assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1549 
1550         assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1551         assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1552         assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1553         assert_eq!(
1554             t_bytes(r"(?-u)\B"),
1555             hir_word(hir::WordBoundary::AsciiNegate)
1556         );
1557 
1558         assert_eq!(
1559             t_err(r"(?-u)\B"),
1560             TestError {
1561                 kind: hir::ErrorKind::InvalidUtf8,
1562                 span: Span::new(
1563                     Position::new(5, 1, 6),
1564                     Position::new(7, 1, 8)
1565                 ),
1566             }
1567         );
1568     }
1569 
1570     #[test]
group()1571     fn group() {
1572         assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1573         assert_eq!(
1574             t("(a)(b)"),
1575             hir_cat(vec![
1576                 hir_group(1, hir_lit("a")),
1577                 hir_group(2, hir_lit("b")),
1578             ])
1579         );
1580         assert_eq!(
1581             t("(a)|(b)"),
1582             hir_alt(vec![
1583                 hir_group(1, hir_lit("a")),
1584                 hir_group(2, hir_lit("b")),
1585             ])
1586         );
1587         assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1588         assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1589         assert_eq!(
1590             t("(?P<foo>a)(?P<bar>b)"),
1591             hir_cat(vec![
1592                 hir_group_name(1, "foo", hir_lit("a")),
1593                 hir_group_name(2, "bar", hir_lit("b")),
1594             ])
1595         );
1596         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1597         assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1598         assert_eq!(
1599             t("(?:a)(b)"),
1600             hir_cat(vec![
1601                 hir_group_nocap(hir_lit("a")),
1602                 hir_group(1, hir_lit("b")),
1603             ])
1604         );
1605         assert_eq!(
1606             t("(a)(?:b)(c)"),
1607             hir_cat(vec![
1608                 hir_group(1, hir_lit("a")),
1609                 hir_group_nocap(hir_lit("b")),
1610                 hir_group(2, hir_lit("c")),
1611             ])
1612         );
1613         assert_eq!(
1614             t("(a)(?P<foo>b)(c)"),
1615             hir_cat(vec![
1616                 hir_group(1, hir_lit("a")),
1617                 hir_group_name(2, "foo", hir_lit("b")),
1618                 hir_group(3, hir_lit("c")),
1619             ])
1620         );
1621         assert_eq!(t("()"), hir_group(1, Hir::empty()));
1622         assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1623         assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1624         assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1625     }
1626 
1627     #[test]
flags()1628     fn flags() {
1629         #[cfg(feature = "unicode-case")]
1630         assert_eq!(
1631             t("(?i:a)a"),
1632             hir_cat(vec![
1633                 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1634                 hir_lit("a"),
1635             ])
1636         );
1637         assert_eq!(
1638             t("(?i-u:a)β"),
1639             hir_cat(vec![
1640                 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1641                 hir_lit("β"),
1642             ])
1643         );
1644         #[cfg(feature = "unicode-case")]
1645         assert_eq!(
1646             t("(?i)(?-i:a)a"),
1647             hir_cat(vec![
1648                 hir_group_nocap(hir_lit("a")),
1649                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1650             ])
1651         );
1652         #[cfg(feature = "unicode-case")]
1653         assert_eq!(
1654             t("(?im)a^"),
1655             hir_cat(vec![
1656                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1657                 hir_anchor(hir::Anchor::StartLine),
1658             ])
1659         );
1660         #[cfg(feature = "unicode-case")]
1661         assert_eq!(
1662             t("(?im)a^(?i-m)a^"),
1663             hir_cat(vec![
1664                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1665                 hir_anchor(hir::Anchor::StartLine),
1666                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1667                 hir_anchor(hir::Anchor::StartText),
1668             ])
1669         );
1670         assert_eq!(
1671             t("(?U)a*a*?(?-U)a*a*?"),
1672             hir_cat(vec![
1673                 hir_star(false, hir_lit("a")),
1674                 hir_star(true, hir_lit("a")),
1675                 hir_star(true, hir_lit("a")),
1676                 hir_star(false, hir_lit("a")),
1677             ])
1678         );
1679         #[cfg(feature = "unicode-case")]
1680         assert_eq!(
1681             t("(?:a(?i)a)a"),
1682             hir_cat(vec![
1683                 hir_group_nocap(hir_cat(vec![
1684                     hir_lit("a"),
1685                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
1686                 ])),
1687                 hir_lit("a"),
1688             ])
1689         );
1690         #[cfg(feature = "unicode-case")]
1691         assert_eq!(
1692             t("(?i)(?:a(?-i)a)a"),
1693             hir_cat(vec![
1694                 hir_group_nocap(hir_cat(vec![
1695                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
1696                     hir_lit("a"),
1697                 ])),
1698                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1699             ])
1700         );
1701     }
1702 
1703     #[test]
escape()1704     fn escape() {
1705         assert_eq!(
1706             t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1707             hir_lit(r"\.+*?()|[]{}^$#")
1708         );
1709     }
1710 
1711     #[test]
repetition()1712     fn repetition() {
1713         assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1714         assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1715         assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1716         assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1717         assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1718         assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1719 
1720         assert_eq!(
1721             t("a{1}"),
1722             hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1723         );
1724         assert_eq!(
1725             t("a{1,}"),
1726             hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1727         );
1728         assert_eq!(
1729             t("a{1,2}"),
1730             hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1731         );
1732         assert_eq!(
1733             t("a{1}?"),
1734             hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1735         );
1736         assert_eq!(
1737             t("a{1,}?"),
1738             hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1739         );
1740         assert_eq!(
1741             t("a{1,2}?"),
1742             hir_range(
1743                 false,
1744                 hir::RepetitionRange::Bounded(1, 2),
1745                 hir_lit("a"),
1746             )
1747         );
1748 
1749         assert_eq!(
1750             t("ab?"),
1751             hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1752         );
1753         assert_eq!(
1754             t("(ab)?"),
1755             hir_quest(
1756                 true,
1757                 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1758             )
1759         );
1760         assert_eq!(
1761             t("a|b?"),
1762             hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1763         );
1764     }
1765 
1766     #[test]
cat_alt()1767     fn cat_alt() {
1768         assert_eq!(
1769             t("(ab)"),
1770             hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1771         );
1772         assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1773         assert_eq!(
1774             t("a|b|c"),
1775             hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1776         );
1777         assert_eq!(
1778             t("ab|bc|cd"),
1779             hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1780         );
1781         assert_eq!(
1782             t("(a|b)"),
1783             hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1784         );
1785         assert_eq!(
1786             t("(a|b|c)"),
1787             hir_group(
1788                 1,
1789                 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1790             )
1791         );
1792         assert_eq!(
1793             t("(ab|bc|cd)"),
1794             hir_group(
1795                 1,
1796                 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1797             )
1798         );
1799         assert_eq!(
1800             t("(ab|(bc|(cd)))"),
1801             hir_group(
1802                 1,
1803                 hir_alt(vec![
1804                     hir_lit("ab"),
1805                     hir_group(
1806                         2,
1807                         hir_alt(vec![
1808                             hir_lit("bc"),
1809                             hir_group(3, hir_lit("cd")),
1810                         ])
1811                     ),
1812                 ])
1813             )
1814         );
1815     }
1816 
1817     #[test]
class_ascii()1818     fn class_ascii() {
1819         assert_eq!(
1820             t("[[:alnum:]]"),
1821             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1822         );
1823         assert_eq!(
1824             t("[[:alpha:]]"),
1825             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1826         );
1827         assert_eq!(
1828             t("[[:ascii:]]"),
1829             hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1830         );
1831         assert_eq!(
1832             t("[[:blank:]]"),
1833             hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1834         );
1835         assert_eq!(
1836             t("[[:cntrl:]]"),
1837             hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1838         );
1839         assert_eq!(
1840             t("[[:digit:]]"),
1841             hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1842         );
1843         assert_eq!(
1844             t("[[:graph:]]"),
1845             hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1846         );
1847         assert_eq!(
1848             t("[[:lower:]]"),
1849             hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1850         );
1851         assert_eq!(
1852             t("[[:print:]]"),
1853             hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1854         );
1855         assert_eq!(
1856             t("[[:punct:]]"),
1857             hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1858         );
1859         assert_eq!(
1860             t("[[:space:]]"),
1861             hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1862         );
1863         assert_eq!(
1864             t("[[:upper:]]"),
1865             hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1866         );
1867         assert_eq!(
1868             t("[[:word:]]"),
1869             hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1870         );
1871         assert_eq!(
1872             t("[[:xdigit:]]"),
1873             hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1874         );
1875 
1876         assert_eq!(
1877             t("[[:^lower:]]"),
1878             hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1879         );
1880         #[cfg(feature = "unicode-case")]
1881         assert_eq!(
1882             t("(?i)[[:lower:]]"),
1883             hir_uclass(&[
1884                 ('A', 'Z'),
1885                 ('a', 'z'),
1886                 ('\u{17F}', '\u{17F}'),
1887                 ('\u{212A}', '\u{212A}'),
1888             ])
1889         );
1890 
1891         assert_eq!(
1892             t("(?-u)[[:lower:]]"),
1893             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1894         );
1895         assert_eq!(
1896             t("(?i-u)[[:lower:]]"),
1897             hir_case_fold(hir_bclass_from_char(ascii_class(
1898                 &ast::ClassAsciiKind::Lower
1899             )))
1900         );
1901 
1902         assert_eq!(
1903             t_err("(?-u)[[:^lower:]]"),
1904             TestError {
1905                 kind: hir::ErrorKind::InvalidUtf8,
1906                 span: Span::new(
1907                     Position::new(6, 1, 7),
1908                     Position::new(16, 1, 17)
1909                 ),
1910             }
1911         );
1912         assert_eq!(
1913             t_err("(?i-u)[[:^lower:]]"),
1914             TestError {
1915                 kind: hir::ErrorKind::InvalidUtf8,
1916                 span: Span::new(
1917                     Position::new(7, 1, 8),
1918                     Position::new(17, 1, 18)
1919                 ),
1920             }
1921         );
1922     }
1923 
1924     #[test]
1925     #[cfg(feature = "unicode-perl")]
class_perl()1926     fn class_perl() {
1927         // Unicode
1928         assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1929         assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1930         assert_eq!(t(r"\w"), hir_uclass_perl_word());
1931         #[cfg(feature = "unicode-case")]
1932         assert_eq!(
1933             t(r"(?i)\d"),
1934             hir_uclass_query(ClassQuery::Binary("digit"))
1935         );
1936         #[cfg(feature = "unicode-case")]
1937         assert_eq!(
1938             t(r"(?i)\s"),
1939             hir_uclass_query(ClassQuery::Binary("space"))
1940         );
1941         #[cfg(feature = "unicode-case")]
1942         assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
1943 
1944         // Unicode, negated
1945         assert_eq!(
1946             t(r"\D"),
1947             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1948         );
1949         assert_eq!(
1950             t(r"\S"),
1951             hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1952         );
1953         assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
1954         #[cfg(feature = "unicode-case")]
1955         assert_eq!(
1956             t(r"(?i)\D"),
1957             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1958         );
1959         #[cfg(feature = "unicode-case")]
1960         assert_eq!(
1961             t(r"(?i)\S"),
1962             hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1963         );
1964         #[cfg(feature = "unicode-case")]
1965         assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
1966 
1967         // ASCII only
1968         assert_eq!(
1969             t(r"(?-u)\d"),
1970             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
1971         );
1972         assert_eq!(
1973             t(r"(?-u)\s"),
1974             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
1975         );
1976         assert_eq!(
1977             t(r"(?-u)\w"),
1978             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
1979         );
1980         assert_eq!(
1981             t(r"(?i-u)\d"),
1982             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
1983         );
1984         assert_eq!(
1985             t(r"(?i-u)\s"),
1986             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
1987         );
1988         assert_eq!(
1989             t(r"(?i-u)\w"),
1990             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
1991         );
1992 
1993         // ASCII only, negated
1994         assert_eq!(
1995             t(r"(?-u)\D"),
1996             hir_negate(hir_bclass_from_char(ascii_class(
1997                 &ast::ClassAsciiKind::Digit
1998             )))
1999         );
2000         assert_eq!(
2001             t(r"(?-u)\S"),
2002             hir_negate(hir_bclass_from_char(ascii_class(
2003                 &ast::ClassAsciiKind::Space
2004             )))
2005         );
2006         assert_eq!(
2007             t(r"(?-u)\W"),
2008             hir_negate(hir_bclass_from_char(ascii_class(
2009                 &ast::ClassAsciiKind::Word
2010             )))
2011         );
2012         assert_eq!(
2013             t(r"(?i-u)\D"),
2014             hir_negate(hir_bclass_from_char(ascii_class(
2015                 &ast::ClassAsciiKind::Digit
2016             )))
2017         );
2018         assert_eq!(
2019             t(r"(?i-u)\S"),
2020             hir_negate(hir_bclass_from_char(ascii_class(
2021                 &ast::ClassAsciiKind::Space
2022             )))
2023         );
2024         assert_eq!(
2025             t(r"(?i-u)\W"),
2026             hir_negate(hir_bclass_from_char(ascii_class(
2027                 &ast::ClassAsciiKind::Word
2028             )))
2029         );
2030     }
2031 
2032     #[test]
2033     #[cfg(not(feature = "unicode-perl"))]
class_perl_word_disabled()2034     fn class_perl_word_disabled() {
2035         assert_eq!(
2036             t_err(r"\w"),
2037             TestError {
2038                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2039                 span: Span::new(
2040                     Position::new(0, 1, 1),
2041                     Position::new(2, 1, 3)
2042                 ),
2043             }
2044         );
2045     }
2046 
2047     #[test]
2048     #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
class_perl_space_disabled()2049     fn class_perl_space_disabled() {
2050         assert_eq!(
2051             t_err(r"\s"),
2052             TestError {
2053                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2054                 span: Span::new(
2055                     Position::new(0, 1, 1),
2056                     Position::new(2, 1, 3)
2057                 ),
2058             }
2059         );
2060     }
2061 
2062     #[test]
2063     #[cfg(all(
2064         not(feature = "unicode-perl"),
2065         not(feature = "unicode-gencat")
2066     ))]
class_perl_digit_disabled()2067     fn class_perl_digit_disabled() {
2068         assert_eq!(
2069             t_err(r"\d"),
2070             TestError {
2071                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2072                 span: Span::new(
2073                     Position::new(0, 1, 1),
2074                     Position::new(2, 1, 3)
2075                 ),
2076             }
2077         );
2078     }
2079 
2080     #[test]
2081     #[cfg(feature = "unicode-gencat")]
class_unicode_gencat()2082     fn class_unicode_gencat() {
2083         assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2084         assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2085         assert_eq!(
2086             t(r"\p{Separator}"),
2087             hir_uclass_query(ClassQuery::Binary("Z"))
2088         );
2089         assert_eq!(
2090             t(r"\p{se      PaRa ToR}"),
2091             hir_uclass_query(ClassQuery::Binary("Z"))
2092         );
2093         assert_eq!(
2094             t(r"\p{gc:Separator}"),
2095             hir_uclass_query(ClassQuery::Binary("Z"))
2096         );
2097         assert_eq!(
2098             t(r"\p{gc=Separator}"),
2099             hir_uclass_query(ClassQuery::Binary("Z"))
2100         );
2101         assert_eq!(
2102             t(r"\p{Other}"),
2103             hir_uclass_query(ClassQuery::Binary("Other"))
2104         );
2105         assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2106 
2107         assert_eq!(
2108             t(r"\PZ"),
2109             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2110         );
2111         assert_eq!(
2112             t(r"\P{separator}"),
2113             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2114         );
2115         assert_eq!(
2116             t(r"\P{gc!=separator}"),
2117             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2118         );
2119 
2120         assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2121         assert_eq!(
2122             t(r"\p{assigned}"),
2123             hir_uclass_query(ClassQuery::Binary("Assigned"))
2124         );
2125         assert_eq!(
2126             t(r"\p{ascii}"),
2127             hir_uclass_query(ClassQuery::Binary("ASCII"))
2128         );
2129         assert_eq!(
2130             t(r"\p{gc:any}"),
2131             hir_uclass_query(ClassQuery::Binary("Any"))
2132         );
2133         assert_eq!(
2134             t(r"\p{gc:assigned}"),
2135             hir_uclass_query(ClassQuery::Binary("Assigned"))
2136         );
2137         assert_eq!(
2138             t(r"\p{gc:ascii}"),
2139             hir_uclass_query(ClassQuery::Binary("ASCII"))
2140         );
2141 
2142         assert_eq!(
2143             t_err(r"(?-u)\pZ"),
2144             TestError {
2145                 kind: hir::ErrorKind::UnicodeNotAllowed,
2146                 span: Span::new(
2147                     Position::new(5, 1, 6),
2148                     Position::new(8, 1, 9)
2149                 ),
2150             }
2151         );
2152         assert_eq!(
2153             t_err(r"(?-u)\p{Separator}"),
2154             TestError {
2155                 kind: hir::ErrorKind::UnicodeNotAllowed,
2156                 span: Span::new(
2157                     Position::new(5, 1, 6),
2158                     Position::new(18, 1, 19)
2159                 ),
2160             }
2161         );
2162         assert_eq!(
2163             t_err(r"\pE"),
2164             TestError {
2165                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2166                 span: Span::new(
2167                     Position::new(0, 1, 1),
2168                     Position::new(3, 1, 4)
2169                 ),
2170             }
2171         );
2172         assert_eq!(
2173             t_err(r"\p{Foo}"),
2174             TestError {
2175                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2176                 span: Span::new(
2177                     Position::new(0, 1, 1),
2178                     Position::new(7, 1, 8)
2179                 ),
2180             }
2181         );
2182         assert_eq!(
2183             t_err(r"\p{gc:Foo}"),
2184             TestError {
2185                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2186                 span: Span::new(
2187                     Position::new(0, 1, 1),
2188                     Position::new(10, 1, 11)
2189                 ),
2190             }
2191         );
2192     }
2193 
2194     #[test]
2195     #[cfg(not(feature = "unicode-gencat"))]
class_unicode_gencat_disabled()2196     fn class_unicode_gencat_disabled() {
2197         assert_eq!(
2198             t_err(r"\p{Separator}"),
2199             TestError {
2200                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2201                 span: Span::new(
2202                     Position::new(0, 1, 1),
2203                     Position::new(13, 1, 14)
2204                 ),
2205             }
2206         );
2207 
2208         assert_eq!(
2209             t_err(r"\p{Any}"),
2210             TestError {
2211                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2212                 span: Span::new(
2213                     Position::new(0, 1, 1),
2214                     Position::new(7, 1, 8)
2215                 ),
2216             }
2217         );
2218     }
2219 
2220     #[test]
2221     #[cfg(feature = "unicode-script")]
class_unicode_script()2222     fn class_unicode_script() {
2223         assert_eq!(
2224             t(r"\p{Greek}"),
2225             hir_uclass_query(ClassQuery::Binary("Greek"))
2226         );
2227         #[cfg(feature = "unicode-case")]
2228         assert_eq!(
2229             t(r"(?i)\p{Greek}"),
2230             hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2231         );
2232         #[cfg(feature = "unicode-case")]
2233         assert_eq!(
2234             t(r"(?i)\P{Greek}"),
2235             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2236                 "Greek"
2237             ))))
2238         );
2239 
2240         assert_eq!(
2241             t_err(r"\p{sc:Foo}"),
2242             TestError {
2243                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2244                 span: Span::new(
2245                     Position::new(0, 1, 1),
2246                     Position::new(10, 1, 11)
2247                 ),
2248             }
2249         );
2250         assert_eq!(
2251             t_err(r"\p{scx:Foo}"),
2252             TestError {
2253                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2254                 span: Span::new(
2255                     Position::new(0, 1, 1),
2256                     Position::new(11, 1, 12)
2257                 ),
2258             }
2259         );
2260     }
2261 
2262     #[test]
2263     #[cfg(not(feature = "unicode-script"))]
class_unicode_script_disabled()2264     fn class_unicode_script_disabled() {
2265         assert_eq!(
2266             t_err(r"\p{Greek}"),
2267             TestError {
2268                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2269                 span: Span::new(
2270                     Position::new(0, 1, 1),
2271                     Position::new(9, 1, 10)
2272                 ),
2273             }
2274         );
2275 
2276         assert_eq!(
2277             t_err(r"\p{scx:Greek}"),
2278             TestError {
2279                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2280                 span: Span::new(
2281                     Position::new(0, 1, 1),
2282                     Position::new(13, 1, 14)
2283                 ),
2284             }
2285         );
2286     }
2287 
2288     #[test]
2289     #[cfg(feature = "unicode-age")]
class_unicode_age()2290     fn class_unicode_age() {
2291         assert_eq!(
2292             t_err(r"\p{age:Foo}"),
2293             TestError {
2294                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2295                 span: Span::new(
2296                     Position::new(0, 1, 1),
2297                     Position::new(11, 1, 12)
2298                 ),
2299             }
2300         );
2301     }
2302 
2303     #[test]
2304     #[cfg(not(feature = "unicode-age"))]
class_unicode_age_disabled()2305     fn class_unicode_age_disabled() {
2306         assert_eq!(
2307             t_err(r"\p{age:3.0}"),
2308             TestError {
2309                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2310                 span: Span::new(
2311                     Position::new(0, 1, 1),
2312                     Position::new(11, 1, 12)
2313                 ),
2314             }
2315         );
2316     }
2317 
2318     #[test]
class_bracketed()2319     fn class_bracketed() {
2320         assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2321         assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2322         assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2323         assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2324         assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2325         assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2326         assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2327         assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2328         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2329         assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2330         #[cfg(feature = "unicode-gencat")]
2331         assert_eq!(
2332             t(r"[\pZ]"),
2333             hir_uclass_query(ClassQuery::Binary("separator"))
2334         );
2335         #[cfg(feature = "unicode-gencat")]
2336         assert_eq!(
2337             t(r"[\p{separator}]"),
2338             hir_uclass_query(ClassQuery::Binary("separator"))
2339         );
2340         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2341         assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2342         #[cfg(feature = "unicode-gencat")]
2343         assert_eq!(
2344             t(r"[^\PZ]"),
2345             hir_uclass_query(ClassQuery::Binary("separator"))
2346         );
2347         #[cfg(feature = "unicode-gencat")]
2348         assert_eq!(
2349             t(r"[^\P{separator}]"),
2350             hir_uclass_query(ClassQuery::Binary("separator"))
2351         );
2352         #[cfg(all(
2353             feature = "unicode-case",
2354             any(feature = "unicode-perl", feature = "unicode-gencat")
2355         ))]
2356         assert_eq!(
2357             t(r"(?i)[^\D]"),
2358             hir_uclass_query(ClassQuery::Binary("digit"))
2359         );
2360         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2361         assert_eq!(
2362             t(r"(?i)[^\P{greek}]"),
2363             hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2364         );
2365 
2366         assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2367         assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2368         assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2369 
2370         #[cfg(feature = "unicode-case")]
2371         assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2372         #[cfg(feature = "unicode-case")]
2373         assert_eq!(
2374             t("(?i)[k]"),
2375             hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2376         );
2377         #[cfg(feature = "unicode-case")]
2378         assert_eq!(
2379             t("(?i)[β]"),
2380             hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2381         );
2382         assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2383 
2384         assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2385         assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2386         assert_eq!(
2387             t_bytes("(?-u)[^a]"),
2388             hir_negate(hir_bclass(&[(b'a', b'a')]))
2389         );
2390         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2391         assert_eq!(
2392             t(r"[^\d]"),
2393             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2394         );
2395         #[cfg(feature = "unicode-gencat")]
2396         assert_eq!(
2397             t(r"[^\pZ]"),
2398             hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2399         );
2400         #[cfg(feature = "unicode-gencat")]
2401         assert_eq!(
2402             t(r"[^\p{separator}]"),
2403             hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2404         );
2405         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2406         assert_eq!(
2407             t(r"(?i)[^\p{greek}]"),
2408             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2409                 "greek"
2410             ))))
2411         );
2412         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2413         assert_eq!(
2414             t(r"(?i)[\P{greek}]"),
2415             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2416                 "greek"
2417             ))))
2418         );
2419 
2420         // Test some weird cases.
2421         assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2422 
2423         assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2424         assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2425         assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2426         assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2427         assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2428 
2429         assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2430         assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2431         assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2432         assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2433         assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2434 
2435         assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2436         assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2437         assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2438         assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2439         assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2440 
2441         assert_eq!(
2442             t_err("(?-u)[^a]"),
2443             TestError {
2444                 kind: hir::ErrorKind::InvalidUtf8,
2445                 span: Span::new(
2446                     Position::new(5, 1, 6),
2447                     Position::new(9, 1, 10)
2448                 ),
2449             }
2450         );
2451         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2452         assert_eq!(
2453             t_err(r"[^\s\S]"),
2454             TestError {
2455                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2456                 span: Span::new(
2457                     Position::new(0, 1, 1),
2458                     Position::new(7, 1, 8)
2459                 ),
2460             }
2461         );
2462         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2463         assert_eq!(
2464             t_err(r"(?-u)[^\s\S]"),
2465             TestError {
2466                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2467                 span: Span::new(
2468                     Position::new(5, 1, 6),
2469                     Position::new(12, 1, 13)
2470                 ),
2471             }
2472         );
2473     }
2474 
2475     #[test]
class_bracketed_union()2476     fn class_bracketed_union() {
2477         assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2478         #[cfg(feature = "unicode-gencat")]
2479         assert_eq!(
2480             t(r"[a\pZb]"),
2481             hir_union(
2482                 hir_uclass(&[('a', 'b')]),
2483                 hir_uclass_query(ClassQuery::Binary("separator"))
2484             )
2485         );
2486         #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2487         assert_eq!(
2488             t(r"[\pZ\p{Greek}]"),
2489             hir_union(
2490                 hir_uclass_query(ClassQuery::Binary("greek")),
2491                 hir_uclass_query(ClassQuery::Binary("separator"))
2492             )
2493         );
2494         #[cfg(all(
2495             feature = "unicode-age",
2496             feature = "unicode-gencat",
2497             feature = "unicode-script"
2498         ))]
2499         assert_eq!(
2500             t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2501             hir_union(
2502                 hir_uclass_query(ClassQuery::ByValue {
2503                     property_name: "age",
2504                     property_value: "3.0",
2505                 }),
2506                 hir_union(
2507                     hir_uclass_query(ClassQuery::Binary("greek")),
2508                     hir_uclass_query(ClassQuery::Binary("separator"))
2509                 )
2510             )
2511         );
2512         #[cfg(all(
2513             feature = "unicode-age",
2514             feature = "unicode-gencat",
2515             feature = "unicode-script"
2516         ))]
2517         assert_eq!(
2518             t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2519             hir_union(
2520                 hir_uclass_query(ClassQuery::ByValue {
2521                     property_name: "age",
2522                     property_value: "3.0",
2523                 }),
2524                 hir_union(
2525                     hir_uclass_query(ClassQuery::Binary("cyrillic")),
2526                     hir_union(
2527                         hir_uclass_query(ClassQuery::Binary("greek")),
2528                         hir_uclass_query(ClassQuery::Binary("separator"))
2529                     )
2530                 )
2531             )
2532         );
2533 
2534         #[cfg(all(
2535             feature = "unicode-age",
2536             feature = "unicode-case",
2537             feature = "unicode-gencat",
2538             feature = "unicode-script"
2539         ))]
2540         assert_eq!(
2541             t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2542             hir_case_fold(hir_union(
2543                 hir_uclass_query(ClassQuery::ByValue {
2544                     property_name: "age",
2545                     property_value: "3.0",
2546                 }),
2547                 hir_union(
2548                     hir_uclass_query(ClassQuery::Binary("greek")),
2549                     hir_uclass_query(ClassQuery::Binary("separator"))
2550                 )
2551             ))
2552         );
2553         #[cfg(all(
2554             feature = "unicode-age",
2555             feature = "unicode-gencat",
2556             feature = "unicode-script"
2557         ))]
2558         assert_eq!(
2559             t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2560             hir_negate(hir_union(
2561                 hir_uclass_query(ClassQuery::ByValue {
2562                     property_name: "age",
2563                     property_value: "3.0",
2564                 }),
2565                 hir_union(
2566                     hir_uclass_query(ClassQuery::Binary("greek")),
2567                     hir_uclass_query(ClassQuery::Binary("separator"))
2568                 )
2569             ))
2570         );
2571         #[cfg(all(
2572             feature = "unicode-age",
2573             feature = "unicode-case",
2574             feature = "unicode-gencat",
2575             feature = "unicode-script"
2576         ))]
2577         assert_eq!(
2578             t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2579             hir_negate(hir_case_fold(hir_union(
2580                 hir_uclass_query(ClassQuery::ByValue {
2581                     property_name: "age",
2582                     property_value: "3.0",
2583                 }),
2584                 hir_union(
2585                     hir_uclass_query(ClassQuery::Binary("greek")),
2586                     hir_uclass_query(ClassQuery::Binary("separator"))
2587                 )
2588             )))
2589         );
2590     }
2591 
2592     #[test]
class_bracketed_nested()2593     fn class_bracketed_nested() {
2594         assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2595         assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2596         assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2597 
2598         assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2599         assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2600 
2601         #[cfg(feature = "unicode-case")]
2602         assert_eq!(
2603             t(r"(?i)[a[^c]]"),
2604             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2605         );
2606         #[cfg(feature = "unicode-case")]
2607         assert_eq!(
2608             t(r"(?i)[a-b[^c]]"),
2609             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2610         );
2611 
2612         #[cfg(feature = "unicode-case")]
2613         assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2614         #[cfg(feature = "unicode-case")]
2615         assert_eq!(
2616             t(r"(?i)[^a-b[^c]]"),
2617             hir_uclass(&[('C', 'C'), ('c', 'c')])
2618         );
2619 
2620         assert_eq!(
2621             t_err(r"[^a-c[^c]]"),
2622             TestError {
2623                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2624                 span: Span::new(
2625                     Position::new(0, 1, 1),
2626                     Position::new(10, 1, 11)
2627                 ),
2628             }
2629         );
2630         #[cfg(feature = "unicode-case")]
2631         assert_eq!(
2632             t_err(r"(?i)[^a-c[^c]]"),
2633             TestError {
2634                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2635                 span: Span::new(
2636                     Position::new(4, 1, 5),
2637                     Position::new(14, 1, 15)
2638                 ),
2639             }
2640         );
2641     }
2642 
2643     #[test]
class_bracketed_intersect()2644     fn class_bracketed_intersect() {
2645         assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2646         assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2647         assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2648         assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2649         assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2650         assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2651         assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2652         assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2653         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2654 
2655         assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2656         assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2657         assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2658         assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2659         assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2660         assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2661 
2662         #[cfg(feature = "unicode-case")]
2663         assert_eq!(
2664             t("(?i)[abc&&b-c]"),
2665             hir_case_fold(hir_uclass(&[('b', 'c')]))
2666         );
2667         #[cfg(feature = "unicode-case")]
2668         assert_eq!(
2669             t("(?i)[abc&&[b-c]]"),
2670             hir_case_fold(hir_uclass(&[('b', 'c')]))
2671         );
2672         #[cfg(feature = "unicode-case")]
2673         assert_eq!(
2674             t("(?i)[[abc]&&[b-c]]"),
2675             hir_case_fold(hir_uclass(&[('b', 'c')]))
2676         );
2677         #[cfg(feature = "unicode-case")]
2678         assert_eq!(
2679             t("(?i)[a-z&&b-y&&c-x]"),
2680             hir_case_fold(hir_uclass(&[('c', 'x')]))
2681         );
2682         #[cfg(feature = "unicode-case")]
2683         assert_eq!(
2684             t("(?i)[c-da-b&&a-d]"),
2685             hir_case_fold(hir_uclass(&[('a', 'd')]))
2686         );
2687         #[cfg(feature = "unicode-case")]
2688         assert_eq!(
2689             t("(?i)[a-d&&c-da-b]"),
2690             hir_case_fold(hir_uclass(&[('a', 'd')]))
2691         );
2692 
2693         assert_eq!(
2694             t("(?i-u)[abc&&b-c]"),
2695             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2696         );
2697         assert_eq!(
2698             t("(?i-u)[abc&&[b-c]]"),
2699             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2700         );
2701         assert_eq!(
2702             t("(?i-u)[[abc]&&[b-c]]"),
2703             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2704         );
2705         assert_eq!(
2706             t("(?i-u)[a-z&&b-y&&c-x]"),
2707             hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2708         );
2709         assert_eq!(
2710             t("(?i-u)[c-da-b&&a-d]"),
2711             hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2712         );
2713         assert_eq!(
2714             t("(?i-u)[a-d&&c-da-b]"),
2715             hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2716         );
2717 
2718         // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2719         // `^` is also allowed to be unescaped after `&&`.
2720         assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2721         // `]` needs to be escaped after `&&` since it's not at start of class.
2722         assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2723         assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2724         assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2725         assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2726         // Test precedence.
2727         assert_eq!(
2728             t(r"[a-w&&[^c-g]z]"),
2729             hir_uclass(&[('a', 'b'), ('h', 'w')])
2730         );
2731     }
2732 
2733     #[test]
class_bracketed_intersect_negate()2734     fn class_bracketed_intersect_negate() {
2735         #[cfg(feature = "unicode-perl")]
2736         assert_eq!(
2737             t(r"[^\w&&\d]"),
2738             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2739         );
2740         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2741         #[cfg(feature = "unicode-perl")]
2742         assert_eq!(
2743             t(r"[^[\w&&\d]]"),
2744             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2745         );
2746         #[cfg(feature = "unicode-perl")]
2747         assert_eq!(
2748             t(r"[^[^\w&&\d]]"),
2749             hir_uclass_query(ClassQuery::Binary("digit"))
2750         );
2751         #[cfg(feature = "unicode-perl")]
2752         assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2753 
2754         #[cfg(feature = "unicode-perl")]
2755         assert_eq!(
2756             t_bytes(r"(?-u)[^\w&&\d]"),
2757             hir_negate(hir_bclass_from_char(ascii_class(
2758                 &ast::ClassAsciiKind::Digit
2759             )))
2760         );
2761         assert_eq!(
2762             t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2763             hir_negate(hir_bclass(&[(b'a', b'c')]))
2764         );
2765         assert_eq!(
2766             t_bytes(r"(?-u)[^[\w&&\d]]"),
2767             hir_negate(hir_bclass_from_char(ascii_class(
2768                 &ast::ClassAsciiKind::Digit
2769             )))
2770         );
2771         assert_eq!(
2772             t_bytes(r"(?-u)[^[^\w&&\d]]"),
2773             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2774         );
2775         assert_eq!(
2776             t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2777             hir_negate(hir_bclass_from_char(ascii_class(
2778                 &ast::ClassAsciiKind::Word
2779             )))
2780         );
2781     }
2782 
2783     #[test]
class_bracketed_difference()2784     fn class_bracketed_difference() {
2785         #[cfg(feature = "unicode-gencat")]
2786         assert_eq!(
2787             t(r"[\pL--[:ascii:]]"),
2788             hir_difference(
2789                 hir_uclass_query(ClassQuery::Binary("letter")),
2790                 hir_uclass(&[('\0', '\x7F')])
2791             )
2792         );
2793 
2794         assert_eq!(
2795             t(r"(?-u)[[:alpha:]--[:lower:]]"),
2796             hir_bclass(&[(b'A', b'Z')])
2797         );
2798     }
2799 
2800     #[test]
class_bracketed_symmetric_difference()2801     fn class_bracketed_symmetric_difference() {
2802         #[cfg(feature = "unicode-script")]
2803         assert_eq!(
2804             t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2805             hir_uclass(&[
2806                 ('\u{0342}', '\u{0342}'),
2807                 ('\u{0345}', '\u{0345}'),
2808                 ('\u{1DC0}', '\u{1DC1}'),
2809             ])
2810         );
2811         assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2812 
2813         assert_eq!(
2814             t(r"(?-u)[a-g~~c-j]"),
2815             hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2816         );
2817     }
2818 
2819     #[test]
ignore_whitespace()2820     fn ignore_whitespace() {
2821         assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2822         assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2823         assert_eq!(
2824             t(r"(?x)\x # comment
2825 { # comment
2826     53 # comment
2827 } #comment"),
2828             hir_lit("S")
2829         );
2830 
2831         assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2832         assert_eq!(
2833             t(r"(?x)\x # comment
2834         53 # comment"),
2835             hir_lit("S")
2836         );
2837         assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2838 
2839         #[cfg(feature = "unicode-gencat")]
2840         assert_eq!(
2841             t(r"(?x)\p # comment
2842 { # comment
2843     Separator # comment
2844 } # comment"),
2845             hir_uclass_query(ClassQuery::Binary("separator"))
2846         );
2847 
2848         assert_eq!(
2849             t(r"(?x)a # comment
2850 { # comment
2851     5 # comment
2852     , # comment
2853     10 # comment
2854 } # comment"),
2855             hir_range(
2856                 true,
2857                 hir::RepetitionRange::Bounded(5, 10),
2858                 hir_lit("a")
2859             )
2860         );
2861 
2862         assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
2863     }
2864 
2865     #[test]
analysis_is_always_utf8()2866     fn analysis_is_always_utf8() {
2867         // Positive examples.
2868         assert!(t_bytes(r"a").is_always_utf8());
2869         assert!(t_bytes(r"ab").is_always_utf8());
2870         assert!(t_bytes(r"(?-u)a").is_always_utf8());
2871         assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2872         assert!(t_bytes(r"\xFF").is_always_utf8());
2873         assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2874         assert!(t_bytes(r"[^a]").is_always_utf8());
2875         assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2876         assert!(t_bytes(r"\b").is_always_utf8());
2877         assert!(t_bytes(r"\B").is_always_utf8());
2878         assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2879 
2880         // Negative examples.
2881         assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2882         assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2883         assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2884         assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2885         assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2886     }
2887 
2888     #[test]
analysis_is_all_assertions()2889     fn analysis_is_all_assertions() {
2890         // Positive examples.
2891         assert!(t(r"\b").is_all_assertions());
2892         assert!(t(r"\B").is_all_assertions());
2893         assert!(t(r"^").is_all_assertions());
2894         assert!(t(r"$").is_all_assertions());
2895         assert!(t(r"\A").is_all_assertions());
2896         assert!(t(r"\z").is_all_assertions());
2897         assert!(t(r"$^\z\A\b\B").is_all_assertions());
2898         assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2899         assert!(t(r"^$|$^").is_all_assertions());
2900         assert!(t(r"((\b)+())*^").is_all_assertions());
2901 
2902         // Negative examples.
2903         assert!(!t(r"^a").is_all_assertions());
2904     }
2905 
2906     #[test]
analysis_is_anchored()2907     fn analysis_is_anchored() {
2908         // Positive examples.
2909         assert!(t(r"^").is_anchored_start());
2910         assert!(t(r"$").is_anchored_end());
2911         assert!(t(r"^").is_line_anchored_start());
2912         assert!(t(r"$").is_line_anchored_end());
2913 
2914         assert!(t(r"^^").is_anchored_start());
2915         assert!(t(r"$$").is_anchored_end());
2916         assert!(t(r"^^").is_line_anchored_start());
2917         assert!(t(r"$$").is_line_anchored_end());
2918 
2919         assert!(t(r"^$").is_anchored_start());
2920         assert!(t(r"^$").is_anchored_end());
2921         assert!(t(r"^$").is_line_anchored_start());
2922         assert!(t(r"^$").is_line_anchored_end());
2923 
2924         assert!(t(r"^foo").is_anchored_start());
2925         assert!(t(r"foo$").is_anchored_end());
2926         assert!(t(r"^foo").is_line_anchored_start());
2927         assert!(t(r"foo$").is_line_anchored_end());
2928 
2929         assert!(t(r"^foo|^bar").is_anchored_start());
2930         assert!(t(r"foo$|bar$").is_anchored_end());
2931         assert!(t(r"^foo|^bar").is_line_anchored_start());
2932         assert!(t(r"foo$|bar$").is_line_anchored_end());
2933 
2934         assert!(t(r"^(foo|bar)").is_anchored_start());
2935         assert!(t(r"(foo|bar)$").is_anchored_end());
2936         assert!(t(r"^(foo|bar)").is_line_anchored_start());
2937         assert!(t(r"(foo|bar)$").is_line_anchored_end());
2938 
2939         assert!(t(r"^+").is_anchored_start());
2940         assert!(t(r"$+").is_anchored_end());
2941         assert!(t(r"^+").is_line_anchored_start());
2942         assert!(t(r"$+").is_line_anchored_end());
2943         assert!(t(r"^++").is_anchored_start());
2944         assert!(t(r"$++").is_anchored_end());
2945         assert!(t(r"^++").is_line_anchored_start());
2946         assert!(t(r"$++").is_line_anchored_end());
2947         assert!(t(r"(^)+").is_anchored_start());
2948         assert!(t(r"($)+").is_anchored_end());
2949         assert!(t(r"(^)+").is_line_anchored_start());
2950         assert!(t(r"($)+").is_line_anchored_end());
2951 
2952         assert!(t(r"$^").is_anchored_start());
2953         assert!(t(r"$^").is_anchored_start());
2954         assert!(t(r"$^").is_line_anchored_end());
2955         assert!(t(r"$^").is_line_anchored_end());
2956         assert!(t(r"$^|^$").is_anchored_start());
2957         assert!(t(r"$^|^$").is_anchored_end());
2958         assert!(t(r"$^|^$").is_line_anchored_start());
2959         assert!(t(r"$^|^$").is_line_anchored_end());
2960 
2961         assert!(t(r"\b^").is_anchored_start());
2962         assert!(t(r"$\b").is_anchored_end());
2963         assert!(t(r"\b^").is_line_anchored_start());
2964         assert!(t(r"$\b").is_line_anchored_end());
2965         assert!(t(r"^(?m:^)").is_anchored_start());
2966         assert!(t(r"(?m:$)$").is_anchored_end());
2967         assert!(t(r"^(?m:^)").is_line_anchored_start());
2968         assert!(t(r"(?m:$)$").is_line_anchored_end());
2969         assert!(t(r"(?m:^)^").is_anchored_start());
2970         assert!(t(r"$(?m:$)").is_anchored_end());
2971         assert!(t(r"(?m:^)^").is_line_anchored_start());
2972         assert!(t(r"$(?m:$)").is_line_anchored_end());
2973 
2974         // Negative examples.
2975         assert!(!t(r"(?m)^").is_anchored_start());
2976         assert!(!t(r"(?m)$").is_anchored_end());
2977         assert!(!t(r"(?m:^$)|$^").is_anchored_start());
2978         assert!(!t(r"(?m:^$)|$^").is_anchored_end());
2979         assert!(!t(r"$^|(?m:^$)").is_anchored_start());
2980         assert!(!t(r"$^|(?m:^$)").is_anchored_end());
2981 
2982         assert!(!t(r"a^").is_anchored_start());
2983         assert!(!t(r"$a").is_anchored_start());
2984         assert!(!t(r"a^").is_line_anchored_start());
2985         assert!(!t(r"$a").is_line_anchored_start());
2986 
2987         assert!(!t(r"a^").is_anchored_end());
2988         assert!(!t(r"$a").is_anchored_end());
2989         assert!(!t(r"a^").is_line_anchored_end());
2990         assert!(!t(r"$a").is_line_anchored_end());
2991 
2992         assert!(!t(r"^foo|bar").is_anchored_start());
2993         assert!(!t(r"foo|bar$").is_anchored_end());
2994         assert!(!t(r"^foo|bar").is_line_anchored_start());
2995         assert!(!t(r"foo|bar$").is_line_anchored_end());
2996 
2997         assert!(!t(r"^*").is_anchored_start());
2998         assert!(!t(r"$*").is_anchored_end());
2999         assert!(!t(r"^*").is_line_anchored_start());
3000         assert!(!t(r"$*").is_line_anchored_end());
3001         assert!(!t(r"^*+").is_anchored_start());
3002         assert!(!t(r"$*+").is_anchored_end());
3003         assert!(!t(r"^*+").is_line_anchored_start());
3004         assert!(!t(r"$*+").is_line_anchored_end());
3005         assert!(!t(r"^+*").is_anchored_start());
3006         assert!(!t(r"$+*").is_anchored_end());
3007         assert!(!t(r"^+*").is_line_anchored_start());
3008         assert!(!t(r"$+*").is_line_anchored_end());
3009         assert!(!t(r"(^)*").is_anchored_start());
3010         assert!(!t(r"($)*").is_anchored_end());
3011         assert!(!t(r"(^)*").is_line_anchored_start());
3012         assert!(!t(r"($)*").is_line_anchored_end());
3013     }
3014 
3015     #[test]
analysis_is_line_anchored()3016     fn analysis_is_line_anchored() {
3017         assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3018         assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3019 
3020         assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3021         assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3022 
3023         assert!(t(r"(?m)^").is_line_anchored_start());
3024         assert!(t(r"(?m)$").is_line_anchored_end());
3025 
3026         assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3027         assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3028 
3029         assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3030         assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3031     }
3032 
3033     #[test]
analysis_is_any_anchored()3034     fn analysis_is_any_anchored() {
3035         // Positive examples.
3036         assert!(t(r"^").is_any_anchored_start());
3037         assert!(t(r"$").is_any_anchored_end());
3038         assert!(t(r"\A").is_any_anchored_start());
3039         assert!(t(r"\z").is_any_anchored_end());
3040 
3041         // Negative examples.
3042         assert!(!t(r"(?m)^").is_any_anchored_start());
3043         assert!(!t(r"(?m)$").is_any_anchored_end());
3044         assert!(!t(r"$").is_any_anchored_start());
3045         assert!(!t(r"^").is_any_anchored_end());
3046     }
3047 
3048     #[test]
analysis_is_match_empty()3049     fn analysis_is_match_empty() {
3050         // Positive examples.
3051         assert!(t(r"").is_match_empty());
3052         assert!(t(r"()").is_match_empty());
3053         assert!(t(r"()*").is_match_empty());
3054         assert!(t(r"()+").is_match_empty());
3055         assert!(t(r"()?").is_match_empty());
3056         assert!(t(r"a*").is_match_empty());
3057         assert!(t(r"a?").is_match_empty());
3058         assert!(t(r"a{0}").is_match_empty());
3059         assert!(t(r"a{0,}").is_match_empty());
3060         assert!(t(r"a{0,1}").is_match_empty());
3061         assert!(t(r"a{0,10}").is_match_empty());
3062         #[cfg(feature = "unicode-gencat")]
3063         assert!(t(r"\pL*").is_match_empty());
3064         assert!(t(r"a*|b").is_match_empty());
3065         assert!(t(r"b|a*").is_match_empty());
3066         assert!(t(r"a*a?(abcd)*").is_match_empty());
3067         assert!(t(r"^").is_match_empty());
3068         assert!(t(r"$").is_match_empty());
3069         assert!(t(r"(?m)^").is_match_empty());
3070         assert!(t(r"(?m)$").is_match_empty());
3071         assert!(t(r"\A").is_match_empty());
3072         assert!(t(r"\z").is_match_empty());
3073         assert!(t(r"\B").is_match_empty());
3074         assert!(t_bytes(r"(?-u)\B").is_match_empty());
3075 
3076         // Negative examples.
3077         assert!(!t(r"a+").is_match_empty());
3078         assert!(!t(r"a{1}").is_match_empty());
3079         assert!(!t(r"a{1,}").is_match_empty());
3080         assert!(!t(r"a{1,2}").is_match_empty());
3081         assert!(!t(r"a{1,10}").is_match_empty());
3082         assert!(!t(r"b|a").is_match_empty());
3083         assert!(!t(r"a*a+(abcd)*").is_match_empty());
3084         assert!(!t(r"\b").is_match_empty());
3085         assert!(!t(r"(?-u)\b").is_match_empty());
3086     }
3087 
3088     #[test]
analysis_is_literal()3089     fn analysis_is_literal() {
3090         // Positive examples.
3091         assert!(t(r"").is_literal());
3092         assert!(t(r"a").is_literal());
3093         assert!(t(r"ab").is_literal());
3094         assert!(t(r"abc").is_literal());
3095         assert!(t(r"(?m)abc").is_literal());
3096 
3097         // Negative examples.
3098         assert!(!t(r"^").is_literal());
3099         assert!(!t(r"a|b").is_literal());
3100         assert!(!t(r"(a)").is_literal());
3101         assert!(!t(r"a+").is_literal());
3102         assert!(!t(r"foo(a)").is_literal());
3103         assert!(!t(r"(a)foo").is_literal());
3104         assert!(!t(r"[a]").is_literal());
3105     }
3106 
3107     #[test]
analysis_is_alternation_literal()3108     fn analysis_is_alternation_literal() {
3109         // Positive examples.
3110         assert!(t(r"").is_alternation_literal());
3111         assert!(t(r"a").is_alternation_literal());
3112         assert!(t(r"ab").is_alternation_literal());
3113         assert!(t(r"abc").is_alternation_literal());
3114         assert!(t(r"(?m)abc").is_alternation_literal());
3115         assert!(t(r"a|b").is_alternation_literal());
3116         assert!(t(r"a|b|c").is_alternation_literal());
3117         assert!(t(r"foo|bar").is_alternation_literal());
3118         assert!(t(r"foo|bar|baz").is_alternation_literal());
3119 
3120         // Negative examples.
3121         assert!(!t(r"^").is_alternation_literal());
3122         assert!(!t(r"(a)").is_alternation_literal());
3123         assert!(!t(r"a+").is_alternation_literal());
3124         assert!(!t(r"foo(a)").is_alternation_literal());
3125         assert!(!t(r"(a)foo").is_alternation_literal());
3126         assert!(!t(r"[a]").is_alternation_literal());
3127         assert!(!t(r"[a]|b").is_alternation_literal());
3128         assert!(!t(r"a|[b]").is_alternation_literal());
3129         assert!(!t(r"(a)|b").is_alternation_literal());
3130         assert!(!t(r"a|(b)").is_alternation_literal());
3131     }
3132 }
3133