1 /*!
2 Defines a translator that converts an `Ast` to an `Hir`.
3 */
4
5 use std::cell::{Cell, RefCell};
6 use std::result;
7
8 use ast::{self, Ast, Span, Visitor};
9 use hir::{self, Error, ErrorKind, Hir};
10 use unicode::{self, ClassQuery};
11
12 type Result<T> = result::Result<T, Error>;
13
14 /// A builder for constructing an AST->HIR translator.
15 #[derive(Clone, Debug)]
16 pub struct TranslatorBuilder {
17 allow_invalid_utf8: bool,
18 flags: Flags,
19 }
20
21 impl Default for TranslatorBuilder {
default() -> TranslatorBuilder22 fn default() -> TranslatorBuilder {
23 TranslatorBuilder::new()
24 }
25 }
26
27 impl TranslatorBuilder {
28 /// Create a new translator builder with a default c onfiguration.
new() -> TranslatorBuilder29 pub fn new() -> TranslatorBuilder {
30 TranslatorBuilder {
31 allow_invalid_utf8: false,
32 flags: Flags::default(),
33 }
34 }
35
36 /// Build a translator using the current configuration.
build(&self) -> Translator37 pub fn build(&self) -> Translator {
38 Translator {
39 stack: RefCell::new(vec![]),
40 flags: Cell::new(self.flags),
41 allow_invalid_utf8: self.allow_invalid_utf8,
42 }
43 }
44
45 /// When enabled, translation will permit the construction of a regular
46 /// expression that may match invalid UTF-8.
47 ///
48 /// When disabled (the default), the translator is guaranteed to produce
49 /// an expression that will only ever match valid UTF-8 (otherwise, the
50 /// translator will return an error).
51 ///
52 /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53 /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54 /// the parser to return an error. Namely, a negated ASCII word boundary
55 /// can result in matching positions that aren't valid UTF-8 boundaries.
allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder56 pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57 self.allow_invalid_utf8 = yes;
58 self
59 }
60
61 /// Enable or disable the case insensitive flag (`i`) by default.
case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder62 pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63 self.flags.case_insensitive = if yes { Some(true) } else { None };
64 self
65 }
66
67 /// Enable or disable the multi-line matching flag (`m`) by default.
multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder68 pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69 self.flags.multi_line = if yes { Some(true) } else { None };
70 self
71 }
72
73 /// Enable or disable the "dot matches any character" flag (`s`) by
74 /// default.
dot_matches_new_line( &mut self, yes: bool, ) -> &mut TranslatorBuilder75 pub fn dot_matches_new_line(
76 &mut self,
77 yes: bool,
78 ) -> &mut TranslatorBuilder {
79 self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
80 self
81 }
82
83 /// Enable or disable the "swap greed" flag (`U`) by default.
swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder84 pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85 self.flags.swap_greed = if yes { Some(true) } else { None };
86 self
87 }
88
89 /// Enable or disable the Unicode flag (`u`) by default.
unicode(&mut self, yes: bool) -> &mut TranslatorBuilder90 pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91 self.flags.unicode = if yes { None } else { Some(false) };
92 self
93 }
94 }
95
96 /// A translator maps abstract syntax to a high level intermediate
97 /// representation.
98 ///
99 /// A translator may be benefit from reuse. That is, a translator can translate
100 /// many abstract syntax trees.
101 ///
102 /// A `Translator` can be configured in more detail via a
103 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104 #[derive(Clone, Debug)]
105 pub struct Translator {
106 /// Our call stack, but on the heap.
107 stack: RefCell<Vec<HirFrame>>,
108 /// The current flag settings.
109 flags: Cell<Flags>,
110 /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111 allow_invalid_utf8: bool,
112 }
113
114 impl Translator {
115 /// Create a new translator using the default configuration.
new() -> Translator116 pub fn new() -> Translator {
117 TranslatorBuilder::new().build()
118 }
119
120 /// Translate the given abstract syntax tree (AST) into a high level
121 /// intermediate representation (HIR).
122 ///
123 /// If there was a problem doing the translation, then an HIR-specific
124 /// error is returned.
125 ///
126 /// The original pattern string used to produce the `Ast` *must* also be
127 /// provided. The translator does not use the pattern string during any
128 /// correct translation, but is used for error reporting.
translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir>129 pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130 ast::visit(ast, TranslatorI::new(self, pattern))
131 }
132 }
133
134 /// An HirFrame is a single stack frame, represented explicitly, which is
135 /// created for each item in the Ast that we traverse.
136 ///
137 /// Note that technically, this type doesn't represent our entire stack
138 /// frame. In particular, the Ast visitor represents any state associated with
139 /// traversing the Ast itself.
140 #[derive(Clone, Debug)]
141 enum HirFrame {
142 /// An arbitrary HIR expression. These get pushed whenever we hit a base
143 /// case in the Ast. They get popped after an inductive (i.e., recursive)
144 /// step is complete.
145 Expr(Hir),
146 /// A Unicode character class. This frame is mutated as we descend into
147 /// the Ast of a character class (which is itself its own mini recursive
148 /// structure).
149 ClassUnicode(hir::ClassUnicode),
150 /// A byte-oriented character class. This frame is mutated as we descend
151 /// into the Ast of a character class (which is itself its own mini
152 /// recursive structure).
153 ///
154 /// Byte character classes are created when Unicode mode (`u`) is disabled.
155 /// If `allow_invalid_utf8` is disabled (the default), then a byte
156 /// character is only permitted to match ASCII text.
157 ClassBytes(hir::ClassBytes),
158 /// This is pushed on to the stack upon first seeing any kind of group,
159 /// indicated by parentheses (including non-capturing groups). It is popped
160 /// upon leaving a group.
161 Group {
162 /// The old active flags when this group was opened.
163 ///
164 /// If this group sets flags, then the new active flags are set to the
165 /// result of merging the old flags with the flags introduced by this
166 /// group. If the group doesn't set any flags, then this is simply
167 /// equivalent to whatever flags were set when the group was opened.
168 ///
169 /// When this group is popped, the active flags should be restored to
170 /// the flags set here.
171 ///
172 /// The "active" flags correspond to whatever flags are set in the
173 /// Translator.
174 old_flags: Flags,
175 },
176 /// This is pushed whenever a concatenation is observed. After visiting
177 /// every sub-expression in the concatenation, the translator's stack is
178 /// popped until it sees a Concat frame.
179 Concat,
180 /// This is pushed whenever an alternation is observed. After visiting
181 /// every sub-expression in the alternation, the translator's stack is
182 /// popped until it sees an Alternation frame.
183 Alternation,
184 }
185
186 impl HirFrame {
187 /// Assert that the current stack frame is an Hir expression and return it.
unwrap_expr(self) -> Hir188 fn unwrap_expr(self) -> Hir {
189 match self {
190 HirFrame::Expr(expr) => expr,
191 _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
192 }
193 }
194
195 /// Assert that the current stack frame is a Unicode class expression and
196 /// return it.
unwrap_class_unicode(self) -> hir::ClassUnicode197 fn unwrap_class_unicode(self) -> hir::ClassUnicode {
198 match self {
199 HirFrame::ClassUnicode(cls) => cls,
200 _ => panic!(
201 "tried to unwrap Unicode class \
202 from HirFrame, got: {:?}",
203 self
204 ),
205 }
206 }
207
208 /// Assert that the current stack frame is a byte class expression and
209 /// return it.
unwrap_class_bytes(self) -> hir::ClassBytes210 fn unwrap_class_bytes(self) -> hir::ClassBytes {
211 match self {
212 HirFrame::ClassBytes(cls) => cls,
213 _ => panic!(
214 "tried to unwrap byte class \
215 from HirFrame, got: {:?}",
216 self
217 ),
218 }
219 }
220
221 /// Assert that the current stack frame is a group indicator and return
222 /// its corresponding flags (the flags that were active at the time the
223 /// group was entered).
unwrap_group(self) -> Flags224 fn unwrap_group(self) -> Flags {
225 match self {
226 HirFrame::Group { old_flags } => old_flags,
227 _ => {
228 panic!("tried to unwrap group from HirFrame, got: {:?}", self)
229 }
230 }
231 }
232 }
233
234 impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
235 type Output = Hir;
236 type Err = Error;
237
finish(self) -> Result<Hir>238 fn finish(self) -> Result<Hir> {
239 // ... otherwise, we should have exactly one HIR on the stack.
240 assert_eq!(self.trans().stack.borrow().len(), 1);
241 Ok(self.pop().unwrap().unwrap_expr())
242 }
243
visit_pre(&mut self, ast: &Ast) -> Result<()>244 fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
245 match *ast {
246 Ast::Class(ast::Class::Bracketed(_)) => {
247 if self.flags().unicode() {
248 let cls = hir::ClassUnicode::empty();
249 self.push(HirFrame::ClassUnicode(cls));
250 } else {
251 let cls = hir::ClassBytes::empty();
252 self.push(HirFrame::ClassBytes(cls));
253 }
254 }
255 Ast::Group(ref x) => {
256 let old_flags = x
257 .flags()
258 .map(|ast| self.set_flags(ast))
259 .unwrap_or_else(|| self.flags());
260 self.push(HirFrame::Group { old_flags });
261 }
262 Ast::Concat(ref x) if x.asts.is_empty() => {}
263 Ast::Concat(_) => {
264 self.push(HirFrame::Concat);
265 }
266 Ast::Alternation(ref x) if x.asts.is_empty() => {}
267 Ast::Alternation(_) => {
268 self.push(HirFrame::Alternation);
269 }
270 _ => {}
271 }
272 Ok(())
273 }
274
visit_post(&mut self, ast: &Ast) -> Result<()>275 fn visit_post(&mut self, ast: &Ast) -> Result<()> {
276 match *ast {
277 Ast::Empty(_) => {
278 self.push(HirFrame::Expr(Hir::empty()));
279 }
280 Ast::Flags(ref x) => {
281 self.set_flags(&x.flags);
282 // Flags in the AST are generally considered directives and
283 // not actual sub-expressions. However, they can be used in
284 // the concrete syntax like `((?i))`, and we need some kind of
285 // indication of an expression there, and Empty is the correct
286 // choice.
287 //
288 // There can also be things like `(?i)+`, but we rule those out
289 // in the parser. In the future, we might allow them for
290 // consistency sake.
291 self.push(HirFrame::Expr(Hir::empty()));
292 }
293 Ast::Literal(ref x) => {
294 self.push(HirFrame::Expr(self.hir_literal(x)?));
295 }
296 Ast::Dot(span) => {
297 self.push(HirFrame::Expr(self.hir_dot(span)?));
298 }
299 Ast::Assertion(ref x) => {
300 self.push(HirFrame::Expr(self.hir_assertion(x)?));
301 }
302 Ast::Class(ast::Class::Perl(ref x)) => {
303 if self.flags().unicode() {
304 let cls = self.hir_perl_unicode_class(x)?;
305 let hcls = hir::Class::Unicode(cls);
306 self.push(HirFrame::Expr(Hir::class(hcls)));
307 } else {
308 let cls = self.hir_perl_byte_class(x);
309 let hcls = hir::Class::Bytes(cls);
310 self.push(HirFrame::Expr(Hir::class(hcls)));
311 }
312 }
313 Ast::Class(ast::Class::Unicode(ref x)) => {
314 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
315 self.push(HirFrame::Expr(Hir::class(cls)));
316 }
317 Ast::Class(ast::Class::Bracketed(ref ast)) => {
318 if self.flags().unicode() {
319 let mut cls = self.pop().unwrap().unwrap_class_unicode();
320 self.unicode_fold_and_negate(
321 &ast.span,
322 ast.negated,
323 &mut cls,
324 )?;
325 if cls.iter().next().is_none() {
326 return Err(self.error(
327 ast.span,
328 ErrorKind::EmptyClassNotAllowed,
329 ));
330 }
331 let expr = Hir::class(hir::Class::Unicode(cls));
332 self.push(HirFrame::Expr(expr));
333 } else {
334 let mut cls = self.pop().unwrap().unwrap_class_bytes();
335 self.bytes_fold_and_negate(
336 &ast.span,
337 ast.negated,
338 &mut cls,
339 )?;
340 if cls.iter().next().is_none() {
341 return Err(self.error(
342 ast.span,
343 ErrorKind::EmptyClassNotAllowed,
344 ));
345 }
346
347 let expr = Hir::class(hir::Class::Bytes(cls));
348 self.push(HirFrame::Expr(expr));
349 }
350 }
351 Ast::Repetition(ref x) => {
352 let expr = self.pop().unwrap().unwrap_expr();
353 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
354 }
355 Ast::Group(ref x) => {
356 let expr = self.pop().unwrap().unwrap_expr();
357 let old_flags = self.pop().unwrap().unwrap_group();
358 self.trans().flags.set(old_flags);
359 self.push(HirFrame::Expr(self.hir_group(x, expr)));
360 }
361 Ast::Concat(_) => {
362 let mut exprs = vec![];
363 while let Some(HirFrame::Expr(expr)) = self.pop() {
364 if !expr.kind().is_empty() {
365 exprs.push(expr);
366 }
367 }
368 exprs.reverse();
369 self.push(HirFrame::Expr(Hir::concat(exprs)));
370 }
371 Ast::Alternation(_) => {
372 let mut exprs = vec![];
373 while let Some(HirFrame::Expr(expr)) = self.pop() {
374 exprs.push(expr);
375 }
376 exprs.reverse();
377 self.push(HirFrame::Expr(Hir::alternation(exprs)));
378 }
379 }
380 Ok(())
381 }
382
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>383 fn visit_class_set_item_pre(
384 &mut self,
385 ast: &ast::ClassSetItem,
386 ) -> Result<()> {
387 match *ast {
388 ast::ClassSetItem::Bracketed(_) => {
389 if self.flags().unicode() {
390 let cls = hir::ClassUnicode::empty();
391 self.push(HirFrame::ClassUnicode(cls));
392 } else {
393 let cls = hir::ClassBytes::empty();
394 self.push(HirFrame::ClassBytes(cls));
395 }
396 }
397 // We needn't handle the Union case here since the visitor will
398 // do it for us.
399 _ => {}
400 }
401 Ok(())
402 }
403
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>404 fn visit_class_set_item_post(
405 &mut self,
406 ast: &ast::ClassSetItem,
407 ) -> Result<()> {
408 match *ast {
409 ast::ClassSetItem::Empty(_) => {}
410 ast::ClassSetItem::Literal(ref x) => {
411 if self.flags().unicode() {
412 let mut cls = self.pop().unwrap().unwrap_class_unicode();
413 cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
414 self.push(HirFrame::ClassUnicode(cls));
415 } else {
416 let mut cls = self.pop().unwrap().unwrap_class_bytes();
417 let byte = self.class_literal_byte(x)?;
418 cls.push(hir::ClassBytesRange::new(byte, byte));
419 self.push(HirFrame::ClassBytes(cls));
420 }
421 }
422 ast::ClassSetItem::Range(ref x) => {
423 if self.flags().unicode() {
424 let mut cls = self.pop().unwrap().unwrap_class_unicode();
425 cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
426 self.push(HirFrame::ClassUnicode(cls));
427 } else {
428 let mut cls = self.pop().unwrap().unwrap_class_bytes();
429 let start = self.class_literal_byte(&x.start)?;
430 let end = self.class_literal_byte(&x.end)?;
431 cls.push(hir::ClassBytesRange::new(start, end));
432 self.push(HirFrame::ClassBytes(cls));
433 }
434 }
435 ast::ClassSetItem::Ascii(ref x) => {
436 if self.flags().unicode() {
437 let mut cls = self.pop().unwrap().unwrap_class_unicode();
438 for &(s, e) in ascii_class(&x.kind) {
439 cls.push(hir::ClassUnicodeRange::new(s, e));
440 }
441 self.unicode_fold_and_negate(
442 &x.span, x.negated, &mut cls,
443 )?;
444 self.push(HirFrame::ClassUnicode(cls));
445 } else {
446 let mut cls = self.pop().unwrap().unwrap_class_bytes();
447 for &(s, e) in ascii_class(&x.kind) {
448 cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
449 }
450 self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?;
451 self.push(HirFrame::ClassBytes(cls));
452 }
453 }
454 ast::ClassSetItem::Unicode(ref x) => {
455 let xcls = self.hir_unicode_class(x)?;
456 let mut cls = self.pop().unwrap().unwrap_class_unicode();
457 cls.union(&xcls);
458 self.push(HirFrame::ClassUnicode(cls));
459 }
460 ast::ClassSetItem::Perl(ref x) => {
461 if self.flags().unicode() {
462 let xcls = self.hir_perl_unicode_class(x)?;
463 let mut cls = self.pop().unwrap().unwrap_class_unicode();
464 cls.union(&xcls);
465 self.push(HirFrame::ClassUnicode(cls));
466 } else {
467 let xcls = self.hir_perl_byte_class(x);
468 let mut cls = self.pop().unwrap().unwrap_class_bytes();
469 cls.union(&xcls);
470 self.push(HirFrame::ClassBytes(cls));
471 }
472 }
473 ast::ClassSetItem::Bracketed(ref ast) => {
474 if self.flags().unicode() {
475 let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
476 self.unicode_fold_and_negate(
477 &ast.span,
478 ast.negated,
479 &mut cls1,
480 )?;
481
482 let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
483 cls2.union(&cls1);
484 self.push(HirFrame::ClassUnicode(cls2));
485 } else {
486 let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
487 self.bytes_fold_and_negate(
488 &ast.span,
489 ast.negated,
490 &mut cls1,
491 )?;
492
493 let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
494 cls2.union(&cls1);
495 self.push(HirFrame::ClassBytes(cls2));
496 }
497 }
498 // This is handled automatically by the visitor.
499 ast::ClassSetItem::Union(_) => {}
500 }
501 Ok(())
502 }
503
visit_class_set_binary_op_pre( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>504 fn visit_class_set_binary_op_pre(
505 &mut self,
506 _op: &ast::ClassSetBinaryOp,
507 ) -> Result<()> {
508 if self.flags().unicode() {
509 let cls = hir::ClassUnicode::empty();
510 self.push(HirFrame::ClassUnicode(cls));
511 } else {
512 let cls = hir::ClassBytes::empty();
513 self.push(HirFrame::ClassBytes(cls));
514 }
515 Ok(())
516 }
517
visit_class_set_binary_op_in( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>518 fn visit_class_set_binary_op_in(
519 &mut self,
520 _op: &ast::ClassSetBinaryOp,
521 ) -> Result<()> {
522 if self.flags().unicode() {
523 let cls = hir::ClassUnicode::empty();
524 self.push(HirFrame::ClassUnicode(cls));
525 } else {
526 let cls = hir::ClassBytes::empty();
527 self.push(HirFrame::ClassBytes(cls));
528 }
529 Ok(())
530 }
531
visit_class_set_binary_op_post( &mut self, op: &ast::ClassSetBinaryOp, ) -> Result<()>532 fn visit_class_set_binary_op_post(
533 &mut self,
534 op: &ast::ClassSetBinaryOp,
535 ) -> Result<()> {
536 use ast::ClassSetBinaryOpKind::*;
537
538 if self.flags().unicode() {
539 let mut rhs = self.pop().unwrap().unwrap_class_unicode();
540 let mut lhs = self.pop().unwrap().unwrap_class_unicode();
541 let mut cls = self.pop().unwrap().unwrap_class_unicode();
542 if self.flags().case_insensitive() {
543 rhs.try_case_fold_simple().map_err(|_| {
544 self.error(
545 op.rhs.span().clone(),
546 ErrorKind::UnicodeCaseUnavailable,
547 )
548 })?;
549 lhs.try_case_fold_simple().map_err(|_| {
550 self.error(
551 op.lhs.span().clone(),
552 ErrorKind::UnicodeCaseUnavailable,
553 )
554 })?;
555 }
556 match op.kind {
557 Intersection => lhs.intersect(&rhs),
558 Difference => lhs.difference(&rhs),
559 SymmetricDifference => lhs.symmetric_difference(&rhs),
560 }
561 cls.union(&lhs);
562 self.push(HirFrame::ClassUnicode(cls));
563 } else {
564 let mut rhs = self.pop().unwrap().unwrap_class_bytes();
565 let mut lhs = self.pop().unwrap().unwrap_class_bytes();
566 let mut cls = self.pop().unwrap().unwrap_class_bytes();
567 if self.flags().case_insensitive() {
568 rhs.case_fold_simple();
569 lhs.case_fold_simple();
570 }
571 match op.kind {
572 Intersection => lhs.intersect(&rhs),
573 Difference => lhs.difference(&rhs),
574 SymmetricDifference => lhs.symmetric_difference(&rhs),
575 }
576 cls.union(&lhs);
577 self.push(HirFrame::ClassBytes(cls));
578 }
579 Ok(())
580 }
581 }
582
583 /// The internal implementation of a translator.
584 ///
585 /// This type is responsible for carrying around the original pattern string,
586 /// which is not tied to the internal state of a translator.
587 ///
588 /// A TranslatorI exists for the time it takes to translate a single Ast.
589 #[derive(Clone, Debug)]
590 struct TranslatorI<'t, 'p> {
591 trans: &'t Translator,
592 pattern: &'p str,
593 }
594
595 impl<'t, 'p> TranslatorI<'t, 'p> {
596 /// Build a new internal translator.
new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p>597 fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
598 TranslatorI { trans: trans, pattern: pattern }
599 }
600
601 /// Return a reference to the underlying translator.
trans(&self) -> &Translator602 fn trans(&self) -> &Translator {
603 &self.trans
604 }
605
606 /// Push the given frame on to the call stack.
push(&self, frame: HirFrame)607 fn push(&self, frame: HirFrame) {
608 self.trans().stack.borrow_mut().push(frame);
609 }
610
611 /// Pop the top of the call stack. If the call stack is empty, return None.
pop(&self) -> Option<HirFrame>612 fn pop(&self) -> Option<HirFrame> {
613 self.trans().stack.borrow_mut().pop()
614 }
615
616 /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ErrorKind) -> Error617 fn error(&self, span: Span, kind: ErrorKind) -> Error {
618 Error { kind: kind, pattern: self.pattern.to_string(), span: span }
619 }
620
621 /// Return a copy of the active flags.
flags(&self) -> Flags622 fn flags(&self) -> Flags {
623 self.trans().flags.get()
624 }
625
626 /// Set the flags of this translator from the flags set in the given AST.
627 /// Then, return the old flags.
set_flags(&self, ast_flags: &ast::Flags) -> Flags628 fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
629 let old_flags = self.flags();
630 let mut new_flags = Flags::from_ast(ast_flags);
631 new_flags.merge(&old_flags);
632 self.trans().flags.set(new_flags);
633 old_flags
634 }
635
hir_literal(&self, lit: &ast::Literal) -> Result<Hir>636 fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
637 let ch = match self.literal_to_char(lit)? {
638 byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
639 hir::Literal::Unicode(ch) => ch,
640 };
641 if self.flags().case_insensitive() {
642 self.hir_from_char_case_insensitive(lit.span, ch)
643 } else {
644 self.hir_from_char(lit.span, ch)
645 }
646 }
647
648 /// Convert an Ast literal to its scalar representation.
649 ///
650 /// When Unicode mode is enabled, then this always succeeds and returns a
651 /// `char` (Unicode scalar value).
652 ///
653 /// When Unicode mode is disabled, then a raw byte is returned. If that
654 /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
655 /// an error.
literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal>656 fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
657 if self.flags().unicode() {
658 return Ok(hir::Literal::Unicode(lit.c));
659 }
660 let byte = match lit.byte() {
661 None => return Ok(hir::Literal::Unicode(lit.c)),
662 Some(byte) => byte,
663 };
664 if byte <= 0x7F {
665 return Ok(hir::Literal::Unicode(byte as char));
666 }
667 if !self.trans().allow_invalid_utf8 {
668 return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
669 }
670 Ok(hir::Literal::Byte(byte))
671 }
672
hir_from_char(&self, span: Span, c: char) -> Result<Hir>673 fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
674 if !self.flags().unicode() && c.len_utf8() > 1 {
675 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
676 }
677 Ok(Hir::literal(hir::Literal::Unicode(c)))
678 }
679
hir_from_char_case_insensitive( &self, span: Span, c: char, ) -> Result<Hir>680 fn hir_from_char_case_insensitive(
681 &self,
682 span: Span,
683 c: char,
684 ) -> Result<Hir> {
685 if self.flags().unicode() {
686 // If case folding won't do anything, then don't bother trying.
687 let map =
688 unicode::contains_simple_case_mapping(c, c).map_err(|_| {
689 self.error(span, ErrorKind::UnicodeCaseUnavailable)
690 })?;
691 if !map {
692 return self.hir_from_char(span, c);
693 }
694 let mut cls =
695 hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
696 c, c,
697 )]);
698 cls.try_case_fold_simple().map_err(|_| {
699 self.error(span, ErrorKind::UnicodeCaseUnavailable)
700 })?;
701 Ok(Hir::class(hir::Class::Unicode(cls)))
702 } else {
703 if c.len_utf8() > 1 {
704 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
705 }
706 // If case folding won't do anything, then don't bother trying.
707 match c {
708 'A'..='Z' | 'a'..='z' => {}
709 _ => return self.hir_from_char(span, c),
710 }
711 let mut cls =
712 hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
713 c as u8, c as u8,
714 )]);
715 cls.case_fold_simple();
716 Ok(Hir::class(hir::Class::Bytes(cls)))
717 }
718 }
719
hir_dot(&self, span: Span) -> Result<Hir>720 fn hir_dot(&self, span: Span) -> Result<Hir> {
721 let unicode = self.flags().unicode();
722 if !unicode && !self.trans().allow_invalid_utf8 {
723 return Err(self.error(span, ErrorKind::InvalidUtf8));
724 }
725 Ok(if self.flags().dot_matches_new_line() {
726 Hir::any(!unicode)
727 } else {
728 Hir::dot(!unicode)
729 })
730 }
731
hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir>732 fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
733 let unicode = self.flags().unicode();
734 let multi_line = self.flags().multi_line();
735 Ok(match asst.kind {
736 ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
737 hir::Anchor::StartLine
738 } else {
739 hir::Anchor::StartText
740 }),
741 ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
742 hir::Anchor::EndLine
743 } else {
744 hir::Anchor::EndText
745 }),
746 ast::AssertionKind::StartText => {
747 Hir::anchor(hir::Anchor::StartText)
748 }
749 ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
750 ast::AssertionKind::WordBoundary => {
751 Hir::word_boundary(if unicode {
752 hir::WordBoundary::Unicode
753 } else {
754 hir::WordBoundary::Ascii
755 })
756 }
757 ast::AssertionKind::NotWordBoundary => {
758 Hir::word_boundary(if unicode {
759 hir::WordBoundary::UnicodeNegate
760 } else {
761 // It is possible for negated ASCII word boundaries to
762 // match at invalid UTF-8 boundaries, even when searching
763 // valid UTF-8.
764 if !self.trans().allow_invalid_utf8 {
765 return Err(
766 self.error(asst.span, ErrorKind::InvalidUtf8)
767 );
768 }
769 hir::WordBoundary::AsciiNegate
770 })
771 }
772 })
773 }
774
hir_group(&self, group: &ast::Group, expr: Hir) -> Hir775 fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
776 let kind = match group.kind {
777 ast::GroupKind::CaptureIndex(idx) => {
778 hir::GroupKind::CaptureIndex(idx)
779 }
780 ast::GroupKind::CaptureName(ref capname) => {
781 hir::GroupKind::CaptureName {
782 name: capname.name.clone(),
783 index: capname.index,
784 }
785 }
786 ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
787 };
788 Hir::group(hir::Group { kind: kind, hir: Box::new(expr) })
789 }
790
hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir791 fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
792 let kind = match rep.op.kind {
793 ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
794 ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
795 ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
796 ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
797 hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
798 }
799 ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
800 hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
801 }
802 ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
803 m,
804 n,
805 )) => {
806 hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
807 }
808 };
809 let greedy =
810 if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
811 Hir::repetition(hir::Repetition {
812 kind: kind,
813 greedy: greedy,
814 hir: Box::new(expr),
815 })
816 }
817
hir_unicode_class( &self, ast_class: &ast::ClassUnicode, ) -> Result<hir::ClassUnicode>818 fn hir_unicode_class(
819 &self,
820 ast_class: &ast::ClassUnicode,
821 ) -> Result<hir::ClassUnicode> {
822 use ast::ClassUnicodeKind::*;
823
824 if !self.flags().unicode() {
825 return Err(
826 self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
827 );
828 }
829 let query = match ast_class.kind {
830 OneLetter(name) => ClassQuery::OneLetter(name),
831 Named(ref name) => ClassQuery::Binary(name),
832 NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
833 property_name: name,
834 property_value: value,
835 },
836 };
837 let mut result = self.convert_unicode_class_error(
838 &ast_class.span,
839 unicode::class(query),
840 );
841 if let Ok(ref mut class) = result {
842 self.unicode_fold_and_negate(
843 &ast_class.span,
844 ast_class.negated,
845 class,
846 )?;
847 }
848 result
849 }
850
hir_perl_unicode_class( &self, ast_class: &ast::ClassPerl, ) -> Result<hir::ClassUnicode>851 fn hir_perl_unicode_class(
852 &self,
853 ast_class: &ast::ClassPerl,
854 ) -> Result<hir::ClassUnicode> {
855 use ast::ClassPerlKind::*;
856
857 assert!(self.flags().unicode());
858 let result = match ast_class.kind {
859 Digit => unicode::perl_digit(),
860 Space => unicode::perl_space(),
861 Word => unicode::perl_word(),
862 };
863 let mut class =
864 self.convert_unicode_class_error(&ast_class.span, result)?;
865 // We needn't apply case folding here because the Perl Unicode classes
866 // are already closed under Unicode simple case folding.
867 if ast_class.negated {
868 class.negate();
869 }
870 Ok(class)
871 }
872
hir_perl_byte_class( &self, ast_class: &ast::ClassPerl, ) -> hir::ClassBytes873 fn hir_perl_byte_class(
874 &self,
875 ast_class: &ast::ClassPerl,
876 ) -> hir::ClassBytes {
877 use ast::ClassPerlKind::*;
878
879 assert!(!self.flags().unicode());
880 let mut class = match ast_class.kind {
881 Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
882 Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
883 Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
884 };
885 // We needn't apply case folding here because the Perl ASCII classes
886 // are already closed (under ASCII case folding).
887 if ast_class.negated {
888 class.negate();
889 }
890 class
891 }
892
893 /// Converts the given Unicode specific error to an HIR translation error.
894 ///
895 /// The span given should approximate the position at which an error would
896 /// occur.
convert_unicode_class_error( &self, span: &Span, result: unicode::Result<hir::ClassUnicode>, ) -> Result<hir::ClassUnicode>897 fn convert_unicode_class_error(
898 &self,
899 span: &Span,
900 result: unicode::Result<hir::ClassUnicode>,
901 ) -> Result<hir::ClassUnicode> {
902 result.map_err(|err| {
903 let sp = span.clone();
904 match err {
905 unicode::Error::PropertyNotFound => {
906 self.error(sp, ErrorKind::UnicodePropertyNotFound)
907 }
908 unicode::Error::PropertyValueNotFound => {
909 self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
910 }
911 unicode::Error::PerlClassNotFound => {
912 self.error(sp, ErrorKind::UnicodePerlClassNotFound)
913 }
914 }
915 })
916 }
917
unicode_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassUnicode, ) -> Result<()>918 fn unicode_fold_and_negate(
919 &self,
920 span: &Span,
921 negated: bool,
922 class: &mut hir::ClassUnicode,
923 ) -> Result<()> {
924 // Note that we must apply case folding before negation!
925 // Consider `(?i)[^x]`. If we applied negation field, then
926 // the result would be the character class that matched any
927 // Unicode scalar value.
928 if self.flags().case_insensitive() {
929 class.try_case_fold_simple().map_err(|_| {
930 self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
931 })?;
932 }
933 if negated {
934 class.negate();
935 }
936 Ok(())
937 }
938
bytes_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassBytes, ) -> Result<()>939 fn bytes_fold_and_negate(
940 &self,
941 span: &Span,
942 negated: bool,
943 class: &mut hir::ClassBytes,
944 ) -> Result<()> {
945 // Note that we must apply case folding before negation!
946 // Consider `(?i)[^x]`. If we applied negation field, then
947 // the result would be the character class that matched any
948 // Unicode scalar value.
949 if self.flags().case_insensitive() {
950 class.case_fold_simple();
951 }
952 if negated {
953 class.negate();
954 }
955 if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
956 return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
957 }
958 Ok(())
959 }
960
961 /// Return a scalar byte value suitable for use as a literal in a byte
962 /// character class.
class_literal_byte(&self, ast: &ast::Literal) -> Result<u8>963 fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
964 match self.literal_to_char(ast)? {
965 hir::Literal::Byte(byte) => Ok(byte),
966 hir::Literal::Unicode(ch) => {
967 if ch <= 0x7F as char {
968 Ok(ch as u8)
969 } else {
970 // We can't feasibly support Unicode in
971 // byte oriented classes. Byte classes don't
972 // do Unicode case folding.
973 Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
974 }
975 }
976 }
977 }
978 }
979
980 /// A translator's representation of a regular expression's flags at any given
981 /// moment in time.
982 ///
983 /// Each flag can be in one of three states: absent, present but disabled or
984 /// present but enabled.
985 #[derive(Clone, Copy, Debug, Default)]
986 struct Flags {
987 case_insensitive: Option<bool>,
988 multi_line: Option<bool>,
989 dot_matches_new_line: Option<bool>,
990 swap_greed: Option<bool>,
991 unicode: Option<bool>,
992 // Note that `ignore_whitespace` is omitted here because it is handled
993 // entirely in the parser.
994 }
995
996 impl Flags {
from_ast(ast: &ast::Flags) -> Flags997 fn from_ast(ast: &ast::Flags) -> Flags {
998 let mut flags = Flags::default();
999 let mut enable = true;
1000 for item in &ast.items {
1001 match item.kind {
1002 ast::FlagsItemKind::Negation => {
1003 enable = false;
1004 }
1005 ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1006 flags.case_insensitive = Some(enable);
1007 }
1008 ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1009 flags.multi_line = Some(enable);
1010 }
1011 ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1012 flags.dot_matches_new_line = Some(enable);
1013 }
1014 ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1015 flags.swap_greed = Some(enable);
1016 }
1017 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1018 flags.unicode = Some(enable);
1019 }
1020 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1021 }
1022 }
1023 flags
1024 }
1025
merge(&mut self, previous: &Flags)1026 fn merge(&mut self, previous: &Flags) {
1027 if self.case_insensitive.is_none() {
1028 self.case_insensitive = previous.case_insensitive;
1029 }
1030 if self.multi_line.is_none() {
1031 self.multi_line = previous.multi_line;
1032 }
1033 if self.dot_matches_new_line.is_none() {
1034 self.dot_matches_new_line = previous.dot_matches_new_line;
1035 }
1036 if self.swap_greed.is_none() {
1037 self.swap_greed = previous.swap_greed;
1038 }
1039 if self.unicode.is_none() {
1040 self.unicode = previous.unicode;
1041 }
1042 }
1043
case_insensitive(&self) -> bool1044 fn case_insensitive(&self) -> bool {
1045 self.case_insensitive.unwrap_or(false)
1046 }
1047
multi_line(&self) -> bool1048 fn multi_line(&self) -> bool {
1049 self.multi_line.unwrap_or(false)
1050 }
1051
dot_matches_new_line(&self) -> bool1052 fn dot_matches_new_line(&self) -> bool {
1053 self.dot_matches_new_line.unwrap_or(false)
1054 }
1055
swap_greed(&self) -> bool1056 fn swap_greed(&self) -> bool {
1057 self.swap_greed.unwrap_or(false)
1058 }
1059
unicode(&self) -> bool1060 fn unicode(&self) -> bool {
1061 self.unicode.unwrap_or(true)
1062 }
1063 }
1064
hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes1065 fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1066 let ranges: Vec<_> = ascii_class(kind)
1067 .iter()
1068 .cloned()
1069 .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1070 .collect();
1071 hir::ClassBytes::new(ranges)
1072 }
1073
ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)]1074 fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1075 use ast::ClassAsciiKind::*;
1076 match *kind {
1077 Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1078 Alpha => &[('A', 'Z'), ('a', 'z')],
1079 Ascii => &[('\x00', '\x7F')],
1080 Blank => &[('\t', '\t'), (' ', ' ')],
1081 Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1082 Digit => &[('0', '9')],
1083 Graph => &[('!', '~')],
1084 Lower => &[('a', 'z')],
1085 Print => &[(' ', '~')],
1086 Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1087 Space => &[
1088 ('\t', '\t'),
1089 ('\n', '\n'),
1090 ('\x0B', '\x0B'),
1091 ('\x0C', '\x0C'),
1092 ('\r', '\r'),
1093 (' ', ' '),
1094 ],
1095 Upper => &[('A', 'Z')],
1096 Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1097 Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1098 }
1099 }
1100
1101 #[cfg(test)]
1102 mod tests {
1103 use ast::parse::ParserBuilder;
1104 use ast::{self, Ast, Position, Span};
1105 use hir::{self, Hir, HirKind};
1106 use unicode::{self, ClassQuery};
1107
1108 use super::{ascii_class, TranslatorBuilder};
1109
1110 // We create these errors to compare with real hir::Errors in the tests.
1111 // We define equality between TestError and hir::Error to disregard the
1112 // pattern string in hir::Error, which is annoying to provide in tests.
1113 #[derive(Clone, Debug)]
1114 struct TestError {
1115 span: Span,
1116 kind: hir::ErrorKind,
1117 }
1118
1119 impl PartialEq<hir::Error> for TestError {
eq(&self, other: &hir::Error) -> bool1120 fn eq(&self, other: &hir::Error) -> bool {
1121 self.span == other.span && self.kind == other.kind
1122 }
1123 }
1124
1125 impl PartialEq<TestError> for hir::Error {
eq(&self, other: &TestError) -> bool1126 fn eq(&self, other: &TestError) -> bool {
1127 self.span == other.span && self.kind == other.kind
1128 }
1129 }
1130
parse(pattern: &str) -> Ast1131 fn parse(pattern: &str) -> Ast {
1132 ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1133 }
1134
t(pattern: &str) -> Hir1135 fn t(pattern: &str) -> Hir {
1136 TranslatorBuilder::new()
1137 .allow_invalid_utf8(false)
1138 .build()
1139 .translate(pattern, &parse(pattern))
1140 .unwrap()
1141 }
1142
t_err(pattern: &str) -> hir::Error1143 fn t_err(pattern: &str) -> hir::Error {
1144 TranslatorBuilder::new()
1145 .allow_invalid_utf8(false)
1146 .build()
1147 .translate(pattern, &parse(pattern))
1148 .unwrap_err()
1149 }
1150
t_bytes(pattern: &str) -> Hir1151 fn t_bytes(pattern: &str) -> Hir {
1152 TranslatorBuilder::new()
1153 .allow_invalid_utf8(true)
1154 .build()
1155 .translate(pattern, &parse(pattern))
1156 .unwrap()
1157 }
1158
hir_lit(s: &str) -> Hir1159 fn hir_lit(s: &str) -> Hir {
1160 match s.len() {
1161 0 => Hir::empty(),
1162 _ => {
1163 let lits = s
1164 .chars()
1165 .map(hir::Literal::Unicode)
1166 .map(Hir::literal)
1167 .collect();
1168 Hir::concat(lits)
1169 }
1170 }
1171 }
1172
hir_blit(s: &[u8]) -> Hir1173 fn hir_blit(s: &[u8]) -> Hir {
1174 match s.len() {
1175 0 => Hir::empty(),
1176 1 => Hir::literal(hir::Literal::Byte(s[0])),
1177 _ => {
1178 let lits = s
1179 .iter()
1180 .cloned()
1181 .map(hir::Literal::Byte)
1182 .map(Hir::literal)
1183 .collect();
1184 Hir::concat(lits)
1185 }
1186 }
1187 }
1188
hir_group(i: u32, expr: Hir) -> Hir1189 fn hir_group(i: u32, expr: Hir) -> Hir {
1190 Hir::group(hir::Group {
1191 kind: hir::GroupKind::CaptureIndex(i),
1192 hir: Box::new(expr),
1193 })
1194 }
1195
hir_group_name(i: u32, name: &str, expr: Hir) -> Hir1196 fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1197 Hir::group(hir::Group {
1198 kind: hir::GroupKind::CaptureName {
1199 name: name.to_string(),
1200 index: i,
1201 },
1202 hir: Box::new(expr),
1203 })
1204 }
1205
hir_group_nocap(expr: Hir) -> Hir1206 fn hir_group_nocap(expr: Hir) -> Hir {
1207 Hir::group(hir::Group {
1208 kind: hir::GroupKind::NonCapturing,
1209 hir: Box::new(expr),
1210 })
1211 }
1212
hir_quest(greedy: bool, expr: Hir) -> Hir1213 fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1214 Hir::repetition(hir::Repetition {
1215 kind: hir::RepetitionKind::ZeroOrOne,
1216 greedy: greedy,
1217 hir: Box::new(expr),
1218 })
1219 }
1220
hir_star(greedy: bool, expr: Hir) -> Hir1221 fn hir_star(greedy: bool, expr: Hir) -> Hir {
1222 Hir::repetition(hir::Repetition {
1223 kind: hir::RepetitionKind::ZeroOrMore,
1224 greedy: greedy,
1225 hir: Box::new(expr),
1226 })
1227 }
1228
hir_plus(greedy: bool, expr: Hir) -> Hir1229 fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1230 Hir::repetition(hir::Repetition {
1231 kind: hir::RepetitionKind::OneOrMore,
1232 greedy: greedy,
1233 hir: Box::new(expr),
1234 })
1235 }
1236
hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir1237 fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1238 Hir::repetition(hir::Repetition {
1239 kind: hir::RepetitionKind::Range(range),
1240 greedy: greedy,
1241 hir: Box::new(expr),
1242 })
1243 }
1244
hir_alt(alts: Vec<Hir>) -> Hir1245 fn hir_alt(alts: Vec<Hir>) -> Hir {
1246 Hir::alternation(alts)
1247 }
1248
hir_cat(exprs: Vec<Hir>) -> Hir1249 fn hir_cat(exprs: Vec<Hir>) -> Hir {
1250 Hir::concat(exprs)
1251 }
1252
1253 #[allow(dead_code)]
hir_uclass_query(query: ClassQuery) -> Hir1254 fn hir_uclass_query(query: ClassQuery) -> Hir {
1255 Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1256 }
1257
1258 #[allow(dead_code)]
hir_uclass_perl_word() -> Hir1259 fn hir_uclass_perl_word() -> Hir {
1260 Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1261 }
1262
hir_uclass(ranges: &[(char, char)]) -> Hir1263 fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1264 let ranges: Vec<hir::ClassUnicodeRange> = ranges
1265 .iter()
1266 .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1267 .collect();
1268 Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1269 }
1270
hir_bclass(ranges: &[(u8, u8)]) -> Hir1271 fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1272 let ranges: Vec<hir::ClassBytesRange> = ranges
1273 .iter()
1274 .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1275 .collect();
1276 Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1277 }
1278
hir_bclass_from_char(ranges: &[(char, char)]) -> Hir1279 fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1280 let ranges: Vec<hir::ClassBytesRange> = ranges
1281 .iter()
1282 .map(|&(s, e)| {
1283 assert!(s as u32 <= 0x7F);
1284 assert!(e as u32 <= 0x7F);
1285 hir::ClassBytesRange::new(s as u8, e as u8)
1286 })
1287 .collect();
1288 Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1289 }
1290
hir_case_fold(expr: Hir) -> Hir1291 fn hir_case_fold(expr: Hir) -> Hir {
1292 match expr.into_kind() {
1293 HirKind::Class(mut cls) => {
1294 cls.case_fold_simple();
1295 Hir::class(cls)
1296 }
1297 _ => panic!("cannot case fold non-class Hir expr"),
1298 }
1299 }
1300
hir_negate(expr: Hir) -> Hir1301 fn hir_negate(expr: Hir) -> Hir {
1302 match expr.into_kind() {
1303 HirKind::Class(mut cls) => {
1304 cls.negate();
1305 Hir::class(cls)
1306 }
1307 _ => panic!("cannot negate non-class Hir expr"),
1308 }
1309 }
1310
1311 #[allow(dead_code)]
hir_union(expr1: Hir, expr2: Hir) -> Hir1312 fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1313 use hir::Class::{Bytes, Unicode};
1314
1315 match (expr1.into_kind(), expr2.into_kind()) {
1316 (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1317 c1.union(&c2);
1318 Hir::class(hir::Class::Unicode(c1))
1319 }
1320 (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1321 c1.union(&c2);
1322 Hir::class(hir::Class::Bytes(c1))
1323 }
1324 _ => panic!("cannot union non-class Hir exprs"),
1325 }
1326 }
1327
1328 #[allow(dead_code)]
hir_difference(expr1: Hir, expr2: Hir) -> Hir1329 fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1330 use hir::Class::{Bytes, Unicode};
1331
1332 match (expr1.into_kind(), expr2.into_kind()) {
1333 (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1334 c1.difference(&c2);
1335 Hir::class(hir::Class::Unicode(c1))
1336 }
1337 (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1338 c1.difference(&c2);
1339 Hir::class(hir::Class::Bytes(c1))
1340 }
1341 _ => panic!("cannot difference non-class Hir exprs"),
1342 }
1343 }
1344
hir_anchor(anchor: hir::Anchor) -> Hir1345 fn hir_anchor(anchor: hir::Anchor) -> Hir {
1346 Hir::anchor(anchor)
1347 }
1348
hir_word(wb: hir::WordBoundary) -> Hir1349 fn hir_word(wb: hir::WordBoundary) -> Hir {
1350 Hir::word_boundary(wb)
1351 }
1352
1353 #[test]
empty()1354 fn empty() {
1355 assert_eq!(t(""), Hir::empty());
1356 assert_eq!(t("(?i)"), Hir::empty());
1357 assert_eq!(t("()"), hir_group(1, Hir::empty()));
1358 assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1359 assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1360 assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1361 assert_eq!(
1362 t("()|()"),
1363 hir_alt(vec![
1364 hir_group(1, Hir::empty()),
1365 hir_group(2, Hir::empty()),
1366 ])
1367 );
1368 assert_eq!(
1369 t("(|b)"),
1370 hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1371 );
1372 assert_eq!(
1373 t("(a|)"),
1374 hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1375 );
1376 assert_eq!(
1377 t("(a||c)"),
1378 hir_group(
1379 1,
1380 hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1381 )
1382 );
1383 assert_eq!(
1384 t("(||)"),
1385 hir_group(
1386 1,
1387 hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1388 )
1389 );
1390 }
1391
1392 #[test]
literal()1393 fn literal() {
1394 assert_eq!(t("a"), hir_lit("a"));
1395 assert_eq!(t("(?-u)a"), hir_lit("a"));
1396 assert_eq!(t("☃"), hir_lit("☃"));
1397 assert_eq!(t("abcd"), hir_lit("abcd"));
1398
1399 assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1400 assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1401 assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1402 assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1403
1404 assert_eq!(
1405 t_err("(?-u)☃"),
1406 TestError {
1407 kind: hir::ErrorKind::UnicodeNotAllowed,
1408 span: Span::new(
1409 Position::new(5, 1, 6),
1410 Position::new(8, 1, 7)
1411 ),
1412 }
1413 );
1414 assert_eq!(
1415 t_err(r"(?-u)\xFF"),
1416 TestError {
1417 kind: hir::ErrorKind::InvalidUtf8,
1418 span: Span::new(
1419 Position::new(5, 1, 6),
1420 Position::new(9, 1, 10)
1421 ),
1422 }
1423 );
1424 }
1425
1426 #[test]
literal_case_insensitive()1427 fn literal_case_insensitive() {
1428 #[cfg(feature = "unicode-case")]
1429 assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1430 #[cfg(feature = "unicode-case")]
1431 assert_eq!(
1432 t("(?i:a)"),
1433 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1434 );
1435 #[cfg(feature = "unicode-case")]
1436 assert_eq!(
1437 t("a(?i)a(?-i)a"),
1438 hir_cat(vec![
1439 hir_lit("a"),
1440 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1441 hir_lit("a"),
1442 ])
1443 );
1444 #[cfg(feature = "unicode-case")]
1445 assert_eq!(
1446 t("(?i)ab@c"),
1447 hir_cat(vec![
1448 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1449 hir_uclass(&[('B', 'B'), ('b', 'b')]),
1450 hir_lit("@"),
1451 hir_uclass(&[('C', 'C'), ('c', 'c')]),
1452 ])
1453 );
1454 #[cfg(feature = "unicode-case")]
1455 assert_eq!(
1456 t("(?i)β"),
1457 hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1458 );
1459
1460 assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1461 #[cfg(feature = "unicode-case")]
1462 assert_eq!(
1463 t("(?-u)a(?i)a(?-i)a"),
1464 hir_cat(vec![
1465 hir_lit("a"),
1466 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1467 hir_lit("a"),
1468 ])
1469 );
1470 assert_eq!(
1471 t("(?i-u)ab@c"),
1472 hir_cat(vec![
1473 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1474 hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1475 hir_lit("@"),
1476 hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1477 ])
1478 );
1479
1480 assert_eq!(
1481 t_bytes("(?i-u)a"),
1482 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1483 );
1484 assert_eq!(
1485 t_bytes("(?i-u)\x61"),
1486 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1487 );
1488 assert_eq!(
1489 t_bytes(r"(?i-u)\x61"),
1490 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1491 );
1492 assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1493
1494 assert_eq!(
1495 t_err("(?i-u)β"),
1496 TestError {
1497 kind: hir::ErrorKind::UnicodeNotAllowed,
1498 span: Span::new(
1499 Position::new(6, 1, 7),
1500 Position::new(8, 1, 8),
1501 ),
1502 }
1503 );
1504 }
1505
1506 #[test]
dot()1507 fn dot() {
1508 assert_eq!(
1509 t("."),
1510 hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1511 );
1512 assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1513 assert_eq!(
1514 t_bytes("(?-u)."),
1515 hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1516 );
1517 assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1518
1519 // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1520 assert_eq!(
1521 t_err("(?-u)."),
1522 TestError {
1523 kind: hir::ErrorKind::InvalidUtf8,
1524 span: Span::new(
1525 Position::new(5, 1, 6),
1526 Position::new(6, 1, 7)
1527 ),
1528 }
1529 );
1530 assert_eq!(
1531 t_err("(?s-u)."),
1532 TestError {
1533 kind: hir::ErrorKind::InvalidUtf8,
1534 span: Span::new(
1535 Position::new(6, 1, 7),
1536 Position::new(7, 1, 8)
1537 ),
1538 }
1539 );
1540 }
1541
1542 #[test]
assertions()1543 fn assertions() {
1544 assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1545 assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1546 assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1547 assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1548 assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1549 assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1550 assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1551 assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1552
1553 assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1554 assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1555 assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1556 assert_eq!(
1557 t_bytes(r"(?-u)\B"),
1558 hir_word(hir::WordBoundary::AsciiNegate)
1559 );
1560
1561 assert_eq!(
1562 t_err(r"(?-u)\B"),
1563 TestError {
1564 kind: hir::ErrorKind::InvalidUtf8,
1565 span: Span::new(
1566 Position::new(5, 1, 6),
1567 Position::new(7, 1, 8)
1568 ),
1569 }
1570 );
1571 }
1572
1573 #[test]
group()1574 fn group() {
1575 assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1576 assert_eq!(
1577 t("(a)(b)"),
1578 hir_cat(vec![
1579 hir_group(1, hir_lit("a")),
1580 hir_group(2, hir_lit("b")),
1581 ])
1582 );
1583 assert_eq!(
1584 t("(a)|(b)"),
1585 hir_alt(vec![
1586 hir_group(1, hir_lit("a")),
1587 hir_group(2, hir_lit("b")),
1588 ])
1589 );
1590 assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1591 assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1592 assert_eq!(
1593 t("(?P<foo>a)(?P<bar>b)"),
1594 hir_cat(vec![
1595 hir_group_name(1, "foo", hir_lit("a")),
1596 hir_group_name(2, "bar", hir_lit("b")),
1597 ])
1598 );
1599 assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1600 assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1601 assert_eq!(
1602 t("(?:a)(b)"),
1603 hir_cat(vec![
1604 hir_group_nocap(hir_lit("a")),
1605 hir_group(1, hir_lit("b")),
1606 ])
1607 );
1608 assert_eq!(
1609 t("(a)(?:b)(c)"),
1610 hir_cat(vec![
1611 hir_group(1, hir_lit("a")),
1612 hir_group_nocap(hir_lit("b")),
1613 hir_group(2, hir_lit("c")),
1614 ])
1615 );
1616 assert_eq!(
1617 t("(a)(?P<foo>b)(c)"),
1618 hir_cat(vec![
1619 hir_group(1, hir_lit("a")),
1620 hir_group_name(2, "foo", hir_lit("b")),
1621 hir_group(3, hir_lit("c")),
1622 ])
1623 );
1624 assert_eq!(t("()"), hir_group(1, Hir::empty()));
1625 assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1626 assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1627 assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1628 }
1629
1630 #[test]
flags()1631 fn flags() {
1632 #[cfg(feature = "unicode-case")]
1633 assert_eq!(
1634 t("(?i:a)a"),
1635 hir_cat(vec![
1636 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1637 hir_lit("a"),
1638 ])
1639 );
1640 assert_eq!(
1641 t("(?i-u:a)β"),
1642 hir_cat(vec![
1643 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1644 hir_lit("β"),
1645 ])
1646 );
1647 assert_eq!(
1648 t("(?:(?i-u)a)b"),
1649 hir_cat(vec![
1650 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1651 hir_lit("b"),
1652 ])
1653 );
1654 assert_eq!(
1655 t("((?i-u)a)b"),
1656 hir_cat(vec![
1657 hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1658 hir_lit("b"),
1659 ])
1660 );
1661 #[cfg(feature = "unicode-case")]
1662 assert_eq!(
1663 t("(?i)(?-i:a)a"),
1664 hir_cat(vec![
1665 hir_group_nocap(hir_lit("a")),
1666 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1667 ])
1668 );
1669 #[cfg(feature = "unicode-case")]
1670 assert_eq!(
1671 t("(?im)a^"),
1672 hir_cat(vec![
1673 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1674 hir_anchor(hir::Anchor::StartLine),
1675 ])
1676 );
1677 #[cfg(feature = "unicode-case")]
1678 assert_eq!(
1679 t("(?im)a^(?i-m)a^"),
1680 hir_cat(vec![
1681 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1682 hir_anchor(hir::Anchor::StartLine),
1683 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1684 hir_anchor(hir::Anchor::StartText),
1685 ])
1686 );
1687 assert_eq!(
1688 t("(?U)a*a*?(?-U)a*a*?"),
1689 hir_cat(vec![
1690 hir_star(false, hir_lit("a")),
1691 hir_star(true, hir_lit("a")),
1692 hir_star(true, hir_lit("a")),
1693 hir_star(false, hir_lit("a")),
1694 ])
1695 );
1696 #[cfg(feature = "unicode-case")]
1697 assert_eq!(
1698 t("(?:a(?i)a)a"),
1699 hir_cat(vec![
1700 hir_group_nocap(hir_cat(vec![
1701 hir_lit("a"),
1702 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1703 ])),
1704 hir_lit("a"),
1705 ])
1706 );
1707 #[cfg(feature = "unicode-case")]
1708 assert_eq!(
1709 t("(?i)(?:a(?-i)a)a"),
1710 hir_cat(vec![
1711 hir_group_nocap(hir_cat(vec![
1712 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1713 hir_lit("a"),
1714 ])),
1715 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1716 ])
1717 );
1718 }
1719
1720 #[test]
escape()1721 fn escape() {
1722 assert_eq!(
1723 t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1724 hir_lit(r"\.+*?()|[]{}^$#")
1725 );
1726 }
1727
1728 #[test]
repetition()1729 fn repetition() {
1730 assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1731 assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1732 assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1733 assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1734 assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1735 assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1736
1737 assert_eq!(
1738 t("a{1}"),
1739 hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1740 );
1741 assert_eq!(
1742 t("a{1,}"),
1743 hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1744 );
1745 assert_eq!(
1746 t("a{1,2}"),
1747 hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1748 );
1749 assert_eq!(
1750 t("a{1}?"),
1751 hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1752 );
1753 assert_eq!(
1754 t("a{1,}?"),
1755 hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1756 );
1757 assert_eq!(
1758 t("a{1,2}?"),
1759 hir_range(
1760 false,
1761 hir::RepetitionRange::Bounded(1, 2),
1762 hir_lit("a"),
1763 )
1764 );
1765
1766 assert_eq!(
1767 t("ab?"),
1768 hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1769 );
1770 assert_eq!(
1771 t("(ab)?"),
1772 hir_quest(
1773 true,
1774 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1775 )
1776 );
1777 assert_eq!(
1778 t("a|b?"),
1779 hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1780 );
1781 }
1782
1783 #[test]
cat_alt()1784 fn cat_alt() {
1785 assert_eq!(
1786 t("(ab)"),
1787 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1788 );
1789 assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1790 assert_eq!(
1791 t("a|b|c"),
1792 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1793 );
1794 assert_eq!(
1795 t("ab|bc|cd"),
1796 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1797 );
1798 assert_eq!(
1799 t("(a|b)"),
1800 hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1801 );
1802 assert_eq!(
1803 t("(a|b|c)"),
1804 hir_group(
1805 1,
1806 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1807 )
1808 );
1809 assert_eq!(
1810 t("(ab|bc|cd)"),
1811 hir_group(
1812 1,
1813 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1814 )
1815 );
1816 assert_eq!(
1817 t("(ab|(bc|(cd)))"),
1818 hir_group(
1819 1,
1820 hir_alt(vec![
1821 hir_lit("ab"),
1822 hir_group(
1823 2,
1824 hir_alt(vec![
1825 hir_lit("bc"),
1826 hir_group(3, hir_lit("cd")),
1827 ])
1828 ),
1829 ])
1830 )
1831 );
1832 }
1833
1834 #[test]
class_ascii()1835 fn class_ascii() {
1836 assert_eq!(
1837 t("[[:alnum:]]"),
1838 hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1839 );
1840 assert_eq!(
1841 t("[[:alpha:]]"),
1842 hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1843 );
1844 assert_eq!(
1845 t("[[:ascii:]]"),
1846 hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1847 );
1848 assert_eq!(
1849 t("[[:blank:]]"),
1850 hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1851 );
1852 assert_eq!(
1853 t("[[:cntrl:]]"),
1854 hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1855 );
1856 assert_eq!(
1857 t("[[:digit:]]"),
1858 hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1859 );
1860 assert_eq!(
1861 t("[[:graph:]]"),
1862 hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1863 );
1864 assert_eq!(
1865 t("[[:lower:]]"),
1866 hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1867 );
1868 assert_eq!(
1869 t("[[:print:]]"),
1870 hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1871 );
1872 assert_eq!(
1873 t("[[:punct:]]"),
1874 hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1875 );
1876 assert_eq!(
1877 t("[[:space:]]"),
1878 hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1879 );
1880 assert_eq!(
1881 t("[[:upper:]]"),
1882 hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1883 );
1884 assert_eq!(
1885 t("[[:word:]]"),
1886 hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1887 );
1888 assert_eq!(
1889 t("[[:xdigit:]]"),
1890 hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1891 );
1892
1893 assert_eq!(
1894 t("[[:^lower:]]"),
1895 hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1896 );
1897 #[cfg(feature = "unicode-case")]
1898 assert_eq!(
1899 t("(?i)[[:lower:]]"),
1900 hir_uclass(&[
1901 ('A', 'Z'),
1902 ('a', 'z'),
1903 ('\u{17F}', '\u{17F}'),
1904 ('\u{212A}', '\u{212A}'),
1905 ])
1906 );
1907
1908 assert_eq!(
1909 t("(?-u)[[:lower:]]"),
1910 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1911 );
1912 assert_eq!(
1913 t("(?i-u)[[:lower:]]"),
1914 hir_case_fold(hir_bclass_from_char(ascii_class(
1915 &ast::ClassAsciiKind::Lower
1916 )))
1917 );
1918
1919 assert_eq!(
1920 t_err("(?-u)[[:^lower:]]"),
1921 TestError {
1922 kind: hir::ErrorKind::InvalidUtf8,
1923 span: Span::new(
1924 Position::new(6, 1, 7),
1925 Position::new(16, 1, 17)
1926 ),
1927 }
1928 );
1929 assert_eq!(
1930 t_err("(?i-u)[[:^lower:]]"),
1931 TestError {
1932 kind: hir::ErrorKind::InvalidUtf8,
1933 span: Span::new(
1934 Position::new(7, 1, 8),
1935 Position::new(17, 1, 18)
1936 ),
1937 }
1938 );
1939 }
1940
1941 #[test]
1942 #[cfg(feature = "unicode-perl")]
class_perl()1943 fn class_perl() {
1944 // Unicode
1945 assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1946 assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1947 assert_eq!(t(r"\w"), hir_uclass_perl_word());
1948 #[cfg(feature = "unicode-case")]
1949 assert_eq!(
1950 t(r"(?i)\d"),
1951 hir_uclass_query(ClassQuery::Binary("digit"))
1952 );
1953 #[cfg(feature = "unicode-case")]
1954 assert_eq!(
1955 t(r"(?i)\s"),
1956 hir_uclass_query(ClassQuery::Binary("space"))
1957 );
1958 #[cfg(feature = "unicode-case")]
1959 assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
1960
1961 // Unicode, negated
1962 assert_eq!(
1963 t(r"\D"),
1964 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1965 );
1966 assert_eq!(
1967 t(r"\S"),
1968 hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1969 );
1970 assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
1971 #[cfg(feature = "unicode-case")]
1972 assert_eq!(
1973 t(r"(?i)\D"),
1974 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1975 );
1976 #[cfg(feature = "unicode-case")]
1977 assert_eq!(
1978 t(r"(?i)\S"),
1979 hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1980 );
1981 #[cfg(feature = "unicode-case")]
1982 assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
1983
1984 // ASCII only
1985 assert_eq!(
1986 t(r"(?-u)\d"),
1987 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
1988 );
1989 assert_eq!(
1990 t(r"(?-u)\s"),
1991 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
1992 );
1993 assert_eq!(
1994 t(r"(?-u)\w"),
1995 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
1996 );
1997 assert_eq!(
1998 t(r"(?i-u)\d"),
1999 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2000 );
2001 assert_eq!(
2002 t(r"(?i-u)\s"),
2003 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2004 );
2005 assert_eq!(
2006 t(r"(?i-u)\w"),
2007 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2008 );
2009
2010 // ASCII only, negated
2011 assert_eq!(
2012 t(r"(?-u)\D"),
2013 hir_negate(hir_bclass_from_char(ascii_class(
2014 &ast::ClassAsciiKind::Digit
2015 )))
2016 );
2017 assert_eq!(
2018 t(r"(?-u)\S"),
2019 hir_negate(hir_bclass_from_char(ascii_class(
2020 &ast::ClassAsciiKind::Space
2021 )))
2022 );
2023 assert_eq!(
2024 t(r"(?-u)\W"),
2025 hir_negate(hir_bclass_from_char(ascii_class(
2026 &ast::ClassAsciiKind::Word
2027 )))
2028 );
2029 assert_eq!(
2030 t(r"(?i-u)\D"),
2031 hir_negate(hir_bclass_from_char(ascii_class(
2032 &ast::ClassAsciiKind::Digit
2033 )))
2034 );
2035 assert_eq!(
2036 t(r"(?i-u)\S"),
2037 hir_negate(hir_bclass_from_char(ascii_class(
2038 &ast::ClassAsciiKind::Space
2039 )))
2040 );
2041 assert_eq!(
2042 t(r"(?i-u)\W"),
2043 hir_negate(hir_bclass_from_char(ascii_class(
2044 &ast::ClassAsciiKind::Word
2045 )))
2046 );
2047 }
2048
2049 #[test]
2050 #[cfg(not(feature = "unicode-perl"))]
class_perl_word_disabled()2051 fn class_perl_word_disabled() {
2052 assert_eq!(
2053 t_err(r"\w"),
2054 TestError {
2055 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2056 span: Span::new(
2057 Position::new(0, 1, 1),
2058 Position::new(2, 1, 3)
2059 ),
2060 }
2061 );
2062 }
2063
2064 #[test]
2065 #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
class_perl_space_disabled()2066 fn class_perl_space_disabled() {
2067 assert_eq!(
2068 t_err(r"\s"),
2069 TestError {
2070 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2071 span: Span::new(
2072 Position::new(0, 1, 1),
2073 Position::new(2, 1, 3)
2074 ),
2075 }
2076 );
2077 }
2078
2079 #[test]
2080 #[cfg(all(
2081 not(feature = "unicode-perl"),
2082 not(feature = "unicode-gencat")
2083 ))]
class_perl_digit_disabled()2084 fn class_perl_digit_disabled() {
2085 assert_eq!(
2086 t_err(r"\d"),
2087 TestError {
2088 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2089 span: Span::new(
2090 Position::new(0, 1, 1),
2091 Position::new(2, 1, 3)
2092 ),
2093 }
2094 );
2095 }
2096
2097 #[test]
2098 #[cfg(feature = "unicode-gencat")]
class_unicode_gencat()2099 fn class_unicode_gencat() {
2100 assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2101 assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2102 assert_eq!(
2103 t(r"\p{Separator}"),
2104 hir_uclass_query(ClassQuery::Binary("Z"))
2105 );
2106 assert_eq!(
2107 t(r"\p{se PaRa ToR}"),
2108 hir_uclass_query(ClassQuery::Binary("Z"))
2109 );
2110 assert_eq!(
2111 t(r"\p{gc:Separator}"),
2112 hir_uclass_query(ClassQuery::Binary("Z"))
2113 );
2114 assert_eq!(
2115 t(r"\p{gc=Separator}"),
2116 hir_uclass_query(ClassQuery::Binary("Z"))
2117 );
2118 assert_eq!(
2119 t(r"\p{Other}"),
2120 hir_uclass_query(ClassQuery::Binary("Other"))
2121 );
2122 assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2123
2124 assert_eq!(
2125 t(r"\PZ"),
2126 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2127 );
2128 assert_eq!(
2129 t(r"\P{separator}"),
2130 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2131 );
2132 assert_eq!(
2133 t(r"\P{gc!=separator}"),
2134 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2135 );
2136
2137 assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2138 assert_eq!(
2139 t(r"\p{assigned}"),
2140 hir_uclass_query(ClassQuery::Binary("Assigned"))
2141 );
2142 assert_eq!(
2143 t(r"\p{ascii}"),
2144 hir_uclass_query(ClassQuery::Binary("ASCII"))
2145 );
2146 assert_eq!(
2147 t(r"\p{gc:any}"),
2148 hir_uclass_query(ClassQuery::Binary("Any"))
2149 );
2150 assert_eq!(
2151 t(r"\p{gc:assigned}"),
2152 hir_uclass_query(ClassQuery::Binary("Assigned"))
2153 );
2154 assert_eq!(
2155 t(r"\p{gc:ascii}"),
2156 hir_uclass_query(ClassQuery::Binary("ASCII"))
2157 );
2158
2159 assert_eq!(
2160 t_err(r"(?-u)\pZ"),
2161 TestError {
2162 kind: hir::ErrorKind::UnicodeNotAllowed,
2163 span: Span::new(
2164 Position::new(5, 1, 6),
2165 Position::new(8, 1, 9)
2166 ),
2167 }
2168 );
2169 assert_eq!(
2170 t_err(r"(?-u)\p{Separator}"),
2171 TestError {
2172 kind: hir::ErrorKind::UnicodeNotAllowed,
2173 span: Span::new(
2174 Position::new(5, 1, 6),
2175 Position::new(18, 1, 19)
2176 ),
2177 }
2178 );
2179 assert_eq!(
2180 t_err(r"\pE"),
2181 TestError {
2182 kind: hir::ErrorKind::UnicodePropertyNotFound,
2183 span: Span::new(
2184 Position::new(0, 1, 1),
2185 Position::new(3, 1, 4)
2186 ),
2187 }
2188 );
2189 assert_eq!(
2190 t_err(r"\p{Foo}"),
2191 TestError {
2192 kind: hir::ErrorKind::UnicodePropertyNotFound,
2193 span: Span::new(
2194 Position::new(0, 1, 1),
2195 Position::new(7, 1, 8)
2196 ),
2197 }
2198 );
2199 assert_eq!(
2200 t_err(r"\p{gc:Foo}"),
2201 TestError {
2202 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2203 span: Span::new(
2204 Position::new(0, 1, 1),
2205 Position::new(10, 1, 11)
2206 ),
2207 }
2208 );
2209 }
2210
2211 #[test]
2212 #[cfg(not(feature = "unicode-gencat"))]
class_unicode_gencat_disabled()2213 fn class_unicode_gencat_disabled() {
2214 assert_eq!(
2215 t_err(r"\p{Separator}"),
2216 TestError {
2217 kind: hir::ErrorKind::UnicodePropertyNotFound,
2218 span: Span::new(
2219 Position::new(0, 1, 1),
2220 Position::new(13, 1, 14)
2221 ),
2222 }
2223 );
2224
2225 assert_eq!(
2226 t_err(r"\p{Any}"),
2227 TestError {
2228 kind: hir::ErrorKind::UnicodePropertyNotFound,
2229 span: Span::new(
2230 Position::new(0, 1, 1),
2231 Position::new(7, 1, 8)
2232 ),
2233 }
2234 );
2235 }
2236
2237 #[test]
2238 #[cfg(feature = "unicode-script")]
class_unicode_script()2239 fn class_unicode_script() {
2240 assert_eq!(
2241 t(r"\p{Greek}"),
2242 hir_uclass_query(ClassQuery::Binary("Greek"))
2243 );
2244 #[cfg(feature = "unicode-case")]
2245 assert_eq!(
2246 t(r"(?i)\p{Greek}"),
2247 hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2248 );
2249 #[cfg(feature = "unicode-case")]
2250 assert_eq!(
2251 t(r"(?i)\P{Greek}"),
2252 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2253 "Greek"
2254 ))))
2255 );
2256
2257 assert_eq!(
2258 t_err(r"\p{sc:Foo}"),
2259 TestError {
2260 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2261 span: Span::new(
2262 Position::new(0, 1, 1),
2263 Position::new(10, 1, 11)
2264 ),
2265 }
2266 );
2267 assert_eq!(
2268 t_err(r"\p{scx:Foo}"),
2269 TestError {
2270 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2271 span: Span::new(
2272 Position::new(0, 1, 1),
2273 Position::new(11, 1, 12)
2274 ),
2275 }
2276 );
2277 }
2278
2279 #[test]
2280 #[cfg(not(feature = "unicode-script"))]
class_unicode_script_disabled()2281 fn class_unicode_script_disabled() {
2282 assert_eq!(
2283 t_err(r"\p{Greek}"),
2284 TestError {
2285 kind: hir::ErrorKind::UnicodePropertyNotFound,
2286 span: Span::new(
2287 Position::new(0, 1, 1),
2288 Position::new(9, 1, 10)
2289 ),
2290 }
2291 );
2292
2293 assert_eq!(
2294 t_err(r"\p{scx:Greek}"),
2295 TestError {
2296 kind: hir::ErrorKind::UnicodePropertyNotFound,
2297 span: Span::new(
2298 Position::new(0, 1, 1),
2299 Position::new(13, 1, 14)
2300 ),
2301 }
2302 );
2303 }
2304
2305 #[test]
2306 #[cfg(feature = "unicode-age")]
class_unicode_age()2307 fn class_unicode_age() {
2308 assert_eq!(
2309 t_err(r"\p{age:Foo}"),
2310 TestError {
2311 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2312 span: Span::new(
2313 Position::new(0, 1, 1),
2314 Position::new(11, 1, 12)
2315 ),
2316 }
2317 );
2318 }
2319
2320 #[test]
2321 #[cfg(not(feature = "unicode-age"))]
class_unicode_age_disabled()2322 fn class_unicode_age_disabled() {
2323 assert_eq!(
2324 t_err(r"\p{age:3.0}"),
2325 TestError {
2326 kind: hir::ErrorKind::UnicodePropertyNotFound,
2327 span: Span::new(
2328 Position::new(0, 1, 1),
2329 Position::new(11, 1, 12)
2330 ),
2331 }
2332 );
2333 }
2334
2335 #[test]
class_bracketed()2336 fn class_bracketed() {
2337 assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2338 assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2339 assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2340 assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2341 assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2342 assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2343 assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2344 assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2345 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2346 assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2347 #[cfg(feature = "unicode-gencat")]
2348 assert_eq!(
2349 t(r"[\pZ]"),
2350 hir_uclass_query(ClassQuery::Binary("separator"))
2351 );
2352 #[cfg(feature = "unicode-gencat")]
2353 assert_eq!(
2354 t(r"[\p{separator}]"),
2355 hir_uclass_query(ClassQuery::Binary("separator"))
2356 );
2357 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2358 assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2359 #[cfg(feature = "unicode-gencat")]
2360 assert_eq!(
2361 t(r"[^\PZ]"),
2362 hir_uclass_query(ClassQuery::Binary("separator"))
2363 );
2364 #[cfg(feature = "unicode-gencat")]
2365 assert_eq!(
2366 t(r"[^\P{separator}]"),
2367 hir_uclass_query(ClassQuery::Binary("separator"))
2368 );
2369 #[cfg(all(
2370 feature = "unicode-case",
2371 any(feature = "unicode-perl", feature = "unicode-gencat")
2372 ))]
2373 assert_eq!(
2374 t(r"(?i)[^\D]"),
2375 hir_uclass_query(ClassQuery::Binary("digit"))
2376 );
2377 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2378 assert_eq!(
2379 t(r"(?i)[^\P{greek}]"),
2380 hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2381 );
2382
2383 assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2384 assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2385 assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2386
2387 #[cfg(feature = "unicode-case")]
2388 assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2389 #[cfg(feature = "unicode-case")]
2390 assert_eq!(
2391 t("(?i)[k]"),
2392 hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2393 );
2394 #[cfg(feature = "unicode-case")]
2395 assert_eq!(
2396 t("(?i)[β]"),
2397 hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2398 );
2399 assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2400
2401 assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2402 assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2403 assert_eq!(
2404 t_bytes("(?-u)[^a]"),
2405 hir_negate(hir_bclass(&[(b'a', b'a')]))
2406 );
2407 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2408 assert_eq!(
2409 t(r"[^\d]"),
2410 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2411 );
2412 #[cfg(feature = "unicode-gencat")]
2413 assert_eq!(
2414 t(r"[^\pZ]"),
2415 hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2416 );
2417 #[cfg(feature = "unicode-gencat")]
2418 assert_eq!(
2419 t(r"[^\p{separator}]"),
2420 hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2421 );
2422 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2423 assert_eq!(
2424 t(r"(?i)[^\p{greek}]"),
2425 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2426 "greek"
2427 ))))
2428 );
2429 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2430 assert_eq!(
2431 t(r"(?i)[\P{greek}]"),
2432 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2433 "greek"
2434 ))))
2435 );
2436
2437 // Test some weird cases.
2438 assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2439
2440 assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2441 assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2442 assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2443 assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2444 assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2445
2446 assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2447 assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2448 assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2449 assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2450 assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2451
2452 assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2453 assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2454 assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2455 assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2456 assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2457
2458 assert_eq!(
2459 t_err("(?-u)[^a]"),
2460 TestError {
2461 kind: hir::ErrorKind::InvalidUtf8,
2462 span: Span::new(
2463 Position::new(5, 1, 6),
2464 Position::new(9, 1, 10)
2465 ),
2466 }
2467 );
2468 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2469 assert_eq!(
2470 t_err(r"[^\s\S]"),
2471 TestError {
2472 kind: hir::ErrorKind::EmptyClassNotAllowed,
2473 span: Span::new(
2474 Position::new(0, 1, 1),
2475 Position::new(7, 1, 8)
2476 ),
2477 }
2478 );
2479 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2480 assert_eq!(
2481 t_err(r"(?-u)[^\s\S]"),
2482 TestError {
2483 kind: hir::ErrorKind::EmptyClassNotAllowed,
2484 span: Span::new(
2485 Position::new(5, 1, 6),
2486 Position::new(12, 1, 13)
2487 ),
2488 }
2489 );
2490 }
2491
2492 #[test]
class_bracketed_union()2493 fn class_bracketed_union() {
2494 assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2495 #[cfg(feature = "unicode-gencat")]
2496 assert_eq!(
2497 t(r"[a\pZb]"),
2498 hir_union(
2499 hir_uclass(&[('a', 'b')]),
2500 hir_uclass_query(ClassQuery::Binary("separator"))
2501 )
2502 );
2503 #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2504 assert_eq!(
2505 t(r"[\pZ\p{Greek}]"),
2506 hir_union(
2507 hir_uclass_query(ClassQuery::Binary("greek")),
2508 hir_uclass_query(ClassQuery::Binary("separator"))
2509 )
2510 );
2511 #[cfg(all(
2512 feature = "unicode-age",
2513 feature = "unicode-gencat",
2514 feature = "unicode-script"
2515 ))]
2516 assert_eq!(
2517 t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2518 hir_union(
2519 hir_uclass_query(ClassQuery::ByValue {
2520 property_name: "age",
2521 property_value: "3.0",
2522 }),
2523 hir_union(
2524 hir_uclass_query(ClassQuery::Binary("greek")),
2525 hir_uclass_query(ClassQuery::Binary("separator"))
2526 )
2527 )
2528 );
2529 #[cfg(all(
2530 feature = "unicode-age",
2531 feature = "unicode-gencat",
2532 feature = "unicode-script"
2533 ))]
2534 assert_eq!(
2535 t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2536 hir_union(
2537 hir_uclass_query(ClassQuery::ByValue {
2538 property_name: "age",
2539 property_value: "3.0",
2540 }),
2541 hir_union(
2542 hir_uclass_query(ClassQuery::Binary("cyrillic")),
2543 hir_union(
2544 hir_uclass_query(ClassQuery::Binary("greek")),
2545 hir_uclass_query(ClassQuery::Binary("separator"))
2546 )
2547 )
2548 )
2549 );
2550
2551 #[cfg(all(
2552 feature = "unicode-age",
2553 feature = "unicode-case",
2554 feature = "unicode-gencat",
2555 feature = "unicode-script"
2556 ))]
2557 assert_eq!(
2558 t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2559 hir_case_fold(hir_union(
2560 hir_uclass_query(ClassQuery::ByValue {
2561 property_name: "age",
2562 property_value: "3.0",
2563 }),
2564 hir_union(
2565 hir_uclass_query(ClassQuery::Binary("greek")),
2566 hir_uclass_query(ClassQuery::Binary("separator"))
2567 )
2568 ))
2569 );
2570 #[cfg(all(
2571 feature = "unicode-age",
2572 feature = "unicode-gencat",
2573 feature = "unicode-script"
2574 ))]
2575 assert_eq!(
2576 t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2577 hir_negate(hir_union(
2578 hir_uclass_query(ClassQuery::ByValue {
2579 property_name: "age",
2580 property_value: "3.0",
2581 }),
2582 hir_union(
2583 hir_uclass_query(ClassQuery::Binary("greek")),
2584 hir_uclass_query(ClassQuery::Binary("separator"))
2585 )
2586 ))
2587 );
2588 #[cfg(all(
2589 feature = "unicode-age",
2590 feature = "unicode-case",
2591 feature = "unicode-gencat",
2592 feature = "unicode-script"
2593 ))]
2594 assert_eq!(
2595 t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2596 hir_negate(hir_case_fold(hir_union(
2597 hir_uclass_query(ClassQuery::ByValue {
2598 property_name: "age",
2599 property_value: "3.0",
2600 }),
2601 hir_union(
2602 hir_uclass_query(ClassQuery::Binary("greek")),
2603 hir_uclass_query(ClassQuery::Binary("separator"))
2604 )
2605 )))
2606 );
2607 }
2608
2609 #[test]
class_bracketed_nested()2610 fn class_bracketed_nested() {
2611 assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2612 assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2613 assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2614
2615 assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2616 assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2617
2618 #[cfg(feature = "unicode-case")]
2619 assert_eq!(
2620 t(r"(?i)[a[^c]]"),
2621 hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2622 );
2623 #[cfg(feature = "unicode-case")]
2624 assert_eq!(
2625 t(r"(?i)[a-b[^c]]"),
2626 hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2627 );
2628
2629 #[cfg(feature = "unicode-case")]
2630 assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2631 #[cfg(feature = "unicode-case")]
2632 assert_eq!(
2633 t(r"(?i)[^a-b[^c]]"),
2634 hir_uclass(&[('C', 'C'), ('c', 'c')])
2635 );
2636
2637 assert_eq!(
2638 t_err(r"[^a-c[^c]]"),
2639 TestError {
2640 kind: hir::ErrorKind::EmptyClassNotAllowed,
2641 span: Span::new(
2642 Position::new(0, 1, 1),
2643 Position::new(10, 1, 11)
2644 ),
2645 }
2646 );
2647 #[cfg(feature = "unicode-case")]
2648 assert_eq!(
2649 t_err(r"(?i)[^a-c[^c]]"),
2650 TestError {
2651 kind: hir::ErrorKind::EmptyClassNotAllowed,
2652 span: Span::new(
2653 Position::new(4, 1, 5),
2654 Position::new(14, 1, 15)
2655 ),
2656 }
2657 );
2658 }
2659
2660 #[test]
class_bracketed_intersect()2661 fn class_bracketed_intersect() {
2662 assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2663 assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2664 assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2665 assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2666 assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2667 assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2668 assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2669 assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2670 assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2671
2672 assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2673 assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2674 assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2675 assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2676 assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2677 assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2678
2679 #[cfg(feature = "unicode-case")]
2680 assert_eq!(
2681 t("(?i)[abc&&b-c]"),
2682 hir_case_fold(hir_uclass(&[('b', 'c')]))
2683 );
2684 #[cfg(feature = "unicode-case")]
2685 assert_eq!(
2686 t("(?i)[abc&&[b-c]]"),
2687 hir_case_fold(hir_uclass(&[('b', 'c')]))
2688 );
2689 #[cfg(feature = "unicode-case")]
2690 assert_eq!(
2691 t("(?i)[[abc]&&[b-c]]"),
2692 hir_case_fold(hir_uclass(&[('b', 'c')]))
2693 );
2694 #[cfg(feature = "unicode-case")]
2695 assert_eq!(
2696 t("(?i)[a-z&&b-y&&c-x]"),
2697 hir_case_fold(hir_uclass(&[('c', 'x')]))
2698 );
2699 #[cfg(feature = "unicode-case")]
2700 assert_eq!(
2701 t("(?i)[c-da-b&&a-d]"),
2702 hir_case_fold(hir_uclass(&[('a', 'd')]))
2703 );
2704 #[cfg(feature = "unicode-case")]
2705 assert_eq!(
2706 t("(?i)[a-d&&c-da-b]"),
2707 hir_case_fold(hir_uclass(&[('a', 'd')]))
2708 );
2709
2710 assert_eq!(
2711 t("(?i-u)[abc&&b-c]"),
2712 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2713 );
2714 assert_eq!(
2715 t("(?i-u)[abc&&[b-c]]"),
2716 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2717 );
2718 assert_eq!(
2719 t("(?i-u)[[abc]&&[b-c]]"),
2720 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2721 );
2722 assert_eq!(
2723 t("(?i-u)[a-z&&b-y&&c-x]"),
2724 hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2725 );
2726 assert_eq!(
2727 t("(?i-u)[c-da-b&&a-d]"),
2728 hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2729 );
2730 assert_eq!(
2731 t("(?i-u)[a-d&&c-da-b]"),
2732 hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2733 );
2734
2735 // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2736 // `^` is also allowed to be unescaped after `&&`.
2737 assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2738 // `]` needs to be escaped after `&&` since it's not at start of class.
2739 assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2740 assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2741 assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2742 assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2743 // Test precedence.
2744 assert_eq!(
2745 t(r"[a-w&&[^c-g]z]"),
2746 hir_uclass(&[('a', 'b'), ('h', 'w')])
2747 );
2748 }
2749
2750 #[test]
class_bracketed_intersect_negate()2751 fn class_bracketed_intersect_negate() {
2752 #[cfg(feature = "unicode-perl")]
2753 assert_eq!(
2754 t(r"[^\w&&\d]"),
2755 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2756 );
2757 assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2758 #[cfg(feature = "unicode-perl")]
2759 assert_eq!(
2760 t(r"[^[\w&&\d]]"),
2761 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2762 );
2763 #[cfg(feature = "unicode-perl")]
2764 assert_eq!(
2765 t(r"[^[^\w&&\d]]"),
2766 hir_uclass_query(ClassQuery::Binary("digit"))
2767 );
2768 #[cfg(feature = "unicode-perl")]
2769 assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2770
2771 #[cfg(feature = "unicode-perl")]
2772 assert_eq!(
2773 t_bytes(r"(?-u)[^\w&&\d]"),
2774 hir_negate(hir_bclass_from_char(ascii_class(
2775 &ast::ClassAsciiKind::Digit
2776 )))
2777 );
2778 assert_eq!(
2779 t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2780 hir_negate(hir_bclass(&[(b'a', b'c')]))
2781 );
2782 assert_eq!(
2783 t_bytes(r"(?-u)[^[\w&&\d]]"),
2784 hir_negate(hir_bclass_from_char(ascii_class(
2785 &ast::ClassAsciiKind::Digit
2786 )))
2787 );
2788 assert_eq!(
2789 t_bytes(r"(?-u)[^[^\w&&\d]]"),
2790 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2791 );
2792 assert_eq!(
2793 t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2794 hir_negate(hir_bclass_from_char(ascii_class(
2795 &ast::ClassAsciiKind::Word
2796 )))
2797 );
2798 }
2799
2800 #[test]
class_bracketed_difference()2801 fn class_bracketed_difference() {
2802 #[cfg(feature = "unicode-gencat")]
2803 assert_eq!(
2804 t(r"[\pL--[:ascii:]]"),
2805 hir_difference(
2806 hir_uclass_query(ClassQuery::Binary("letter")),
2807 hir_uclass(&[('\0', '\x7F')])
2808 )
2809 );
2810
2811 assert_eq!(
2812 t(r"(?-u)[[:alpha:]--[:lower:]]"),
2813 hir_bclass(&[(b'A', b'Z')])
2814 );
2815 }
2816
2817 #[test]
class_bracketed_symmetric_difference()2818 fn class_bracketed_symmetric_difference() {
2819 #[cfg(feature = "unicode-script")]
2820 assert_eq!(
2821 t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2822 hir_uclass(&[
2823 ('\u{0342}', '\u{0342}'),
2824 ('\u{0345}', '\u{0345}'),
2825 ('\u{1DC0}', '\u{1DC1}'),
2826 ])
2827 );
2828 assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2829
2830 assert_eq!(
2831 t(r"(?-u)[a-g~~c-j]"),
2832 hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2833 );
2834 }
2835
2836 #[test]
ignore_whitespace()2837 fn ignore_whitespace() {
2838 assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2839 assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2840 assert_eq!(
2841 t(r"(?x)\x # comment
2842 { # comment
2843 53 # comment
2844 } #comment"),
2845 hir_lit("S")
2846 );
2847
2848 assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2849 assert_eq!(
2850 t(r"(?x)\x # comment
2851 53 # comment"),
2852 hir_lit("S")
2853 );
2854 assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2855
2856 #[cfg(feature = "unicode-gencat")]
2857 assert_eq!(
2858 t(r"(?x)\p # comment
2859 { # comment
2860 Separator # comment
2861 } # comment"),
2862 hir_uclass_query(ClassQuery::Binary("separator"))
2863 );
2864
2865 assert_eq!(
2866 t(r"(?x)a # comment
2867 { # comment
2868 5 # comment
2869 , # comment
2870 10 # comment
2871 } # comment"),
2872 hir_range(
2873 true,
2874 hir::RepetitionRange::Bounded(5, 10),
2875 hir_lit("a")
2876 )
2877 );
2878
2879 assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a "));
2880 }
2881
2882 #[test]
analysis_is_always_utf8()2883 fn analysis_is_always_utf8() {
2884 // Positive examples.
2885 assert!(t_bytes(r"a").is_always_utf8());
2886 assert!(t_bytes(r"ab").is_always_utf8());
2887 assert!(t_bytes(r"(?-u)a").is_always_utf8());
2888 assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2889 assert!(t_bytes(r"\xFF").is_always_utf8());
2890 assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2891 assert!(t_bytes(r"[^a]").is_always_utf8());
2892 assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2893 assert!(t_bytes(r"\b").is_always_utf8());
2894 assert!(t_bytes(r"\B").is_always_utf8());
2895 assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2896
2897 // Negative examples.
2898 assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2899 assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2900 assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2901 assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2902 assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2903 }
2904
2905 #[test]
analysis_is_all_assertions()2906 fn analysis_is_all_assertions() {
2907 // Positive examples.
2908 assert!(t(r"\b").is_all_assertions());
2909 assert!(t(r"\B").is_all_assertions());
2910 assert!(t(r"^").is_all_assertions());
2911 assert!(t(r"$").is_all_assertions());
2912 assert!(t(r"\A").is_all_assertions());
2913 assert!(t(r"\z").is_all_assertions());
2914 assert!(t(r"$^\z\A\b\B").is_all_assertions());
2915 assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2916 assert!(t(r"^$|$^").is_all_assertions());
2917 assert!(t(r"((\b)+())*^").is_all_assertions());
2918
2919 // Negative examples.
2920 assert!(!t(r"^a").is_all_assertions());
2921 }
2922
2923 #[test]
analysis_is_anchored()2924 fn analysis_is_anchored() {
2925 // Positive examples.
2926 assert!(t(r"^").is_anchored_start());
2927 assert!(t(r"$").is_anchored_end());
2928 assert!(t(r"^").is_line_anchored_start());
2929 assert!(t(r"$").is_line_anchored_end());
2930
2931 assert!(t(r"^^").is_anchored_start());
2932 assert!(t(r"$$").is_anchored_end());
2933 assert!(t(r"^^").is_line_anchored_start());
2934 assert!(t(r"$$").is_line_anchored_end());
2935
2936 assert!(t(r"^$").is_anchored_start());
2937 assert!(t(r"^$").is_anchored_end());
2938 assert!(t(r"^$").is_line_anchored_start());
2939 assert!(t(r"^$").is_line_anchored_end());
2940
2941 assert!(t(r"^foo").is_anchored_start());
2942 assert!(t(r"foo$").is_anchored_end());
2943 assert!(t(r"^foo").is_line_anchored_start());
2944 assert!(t(r"foo$").is_line_anchored_end());
2945
2946 assert!(t(r"^foo|^bar").is_anchored_start());
2947 assert!(t(r"foo$|bar$").is_anchored_end());
2948 assert!(t(r"^foo|^bar").is_line_anchored_start());
2949 assert!(t(r"foo$|bar$").is_line_anchored_end());
2950
2951 assert!(t(r"^(foo|bar)").is_anchored_start());
2952 assert!(t(r"(foo|bar)$").is_anchored_end());
2953 assert!(t(r"^(foo|bar)").is_line_anchored_start());
2954 assert!(t(r"(foo|bar)$").is_line_anchored_end());
2955
2956 assert!(t(r"^+").is_anchored_start());
2957 assert!(t(r"$+").is_anchored_end());
2958 assert!(t(r"^+").is_line_anchored_start());
2959 assert!(t(r"$+").is_line_anchored_end());
2960 assert!(t(r"^++").is_anchored_start());
2961 assert!(t(r"$++").is_anchored_end());
2962 assert!(t(r"^++").is_line_anchored_start());
2963 assert!(t(r"$++").is_line_anchored_end());
2964 assert!(t(r"(^)+").is_anchored_start());
2965 assert!(t(r"($)+").is_anchored_end());
2966 assert!(t(r"(^)+").is_line_anchored_start());
2967 assert!(t(r"($)+").is_line_anchored_end());
2968
2969 assert!(t(r"$^").is_anchored_start());
2970 assert!(t(r"$^").is_anchored_start());
2971 assert!(t(r"$^").is_line_anchored_end());
2972 assert!(t(r"$^").is_line_anchored_end());
2973 assert!(t(r"$^|^$").is_anchored_start());
2974 assert!(t(r"$^|^$").is_anchored_end());
2975 assert!(t(r"$^|^$").is_line_anchored_start());
2976 assert!(t(r"$^|^$").is_line_anchored_end());
2977
2978 assert!(t(r"\b^").is_anchored_start());
2979 assert!(t(r"$\b").is_anchored_end());
2980 assert!(t(r"\b^").is_line_anchored_start());
2981 assert!(t(r"$\b").is_line_anchored_end());
2982 assert!(t(r"^(?m:^)").is_anchored_start());
2983 assert!(t(r"(?m:$)$").is_anchored_end());
2984 assert!(t(r"^(?m:^)").is_line_anchored_start());
2985 assert!(t(r"(?m:$)$").is_line_anchored_end());
2986 assert!(t(r"(?m:^)^").is_anchored_start());
2987 assert!(t(r"$(?m:$)").is_anchored_end());
2988 assert!(t(r"(?m:^)^").is_line_anchored_start());
2989 assert!(t(r"$(?m:$)").is_line_anchored_end());
2990
2991 // Negative examples.
2992 assert!(!t(r"(?m)^").is_anchored_start());
2993 assert!(!t(r"(?m)$").is_anchored_end());
2994 assert!(!t(r"(?m:^$)|$^").is_anchored_start());
2995 assert!(!t(r"(?m:^$)|$^").is_anchored_end());
2996 assert!(!t(r"$^|(?m:^$)").is_anchored_start());
2997 assert!(!t(r"$^|(?m:^$)").is_anchored_end());
2998
2999 assert!(!t(r"a^").is_anchored_start());
3000 assert!(!t(r"$a").is_anchored_start());
3001 assert!(!t(r"a^").is_line_anchored_start());
3002 assert!(!t(r"$a").is_line_anchored_start());
3003
3004 assert!(!t(r"a^").is_anchored_end());
3005 assert!(!t(r"$a").is_anchored_end());
3006 assert!(!t(r"a^").is_line_anchored_end());
3007 assert!(!t(r"$a").is_line_anchored_end());
3008
3009 assert!(!t(r"^foo|bar").is_anchored_start());
3010 assert!(!t(r"foo|bar$").is_anchored_end());
3011 assert!(!t(r"^foo|bar").is_line_anchored_start());
3012 assert!(!t(r"foo|bar$").is_line_anchored_end());
3013
3014 assert!(!t(r"^*").is_anchored_start());
3015 assert!(!t(r"$*").is_anchored_end());
3016 assert!(!t(r"^*").is_line_anchored_start());
3017 assert!(!t(r"$*").is_line_anchored_end());
3018 assert!(!t(r"^*+").is_anchored_start());
3019 assert!(!t(r"$*+").is_anchored_end());
3020 assert!(!t(r"^*+").is_line_anchored_start());
3021 assert!(!t(r"$*+").is_line_anchored_end());
3022 assert!(!t(r"^+*").is_anchored_start());
3023 assert!(!t(r"$+*").is_anchored_end());
3024 assert!(!t(r"^+*").is_line_anchored_start());
3025 assert!(!t(r"$+*").is_line_anchored_end());
3026 assert!(!t(r"(^)*").is_anchored_start());
3027 assert!(!t(r"($)*").is_anchored_end());
3028 assert!(!t(r"(^)*").is_line_anchored_start());
3029 assert!(!t(r"($)*").is_line_anchored_end());
3030 }
3031
3032 #[test]
analysis_is_line_anchored()3033 fn analysis_is_line_anchored() {
3034 assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3035 assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3036
3037 assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3038 assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3039
3040 assert!(t(r"(?m)^").is_line_anchored_start());
3041 assert!(t(r"(?m)$").is_line_anchored_end());
3042
3043 assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3044 assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3045
3046 assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3047 assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3048 }
3049
3050 #[test]
analysis_is_any_anchored()3051 fn analysis_is_any_anchored() {
3052 // Positive examples.
3053 assert!(t(r"^").is_any_anchored_start());
3054 assert!(t(r"$").is_any_anchored_end());
3055 assert!(t(r"\A").is_any_anchored_start());
3056 assert!(t(r"\z").is_any_anchored_end());
3057
3058 // Negative examples.
3059 assert!(!t(r"(?m)^").is_any_anchored_start());
3060 assert!(!t(r"(?m)$").is_any_anchored_end());
3061 assert!(!t(r"$").is_any_anchored_start());
3062 assert!(!t(r"^").is_any_anchored_end());
3063 }
3064
3065 #[test]
analysis_is_match_empty()3066 fn analysis_is_match_empty() {
3067 // Positive examples.
3068 assert!(t(r"").is_match_empty());
3069 assert!(t(r"()").is_match_empty());
3070 assert!(t(r"()*").is_match_empty());
3071 assert!(t(r"()+").is_match_empty());
3072 assert!(t(r"()?").is_match_empty());
3073 assert!(t(r"a*").is_match_empty());
3074 assert!(t(r"a?").is_match_empty());
3075 assert!(t(r"a{0}").is_match_empty());
3076 assert!(t(r"a{0,}").is_match_empty());
3077 assert!(t(r"a{0,1}").is_match_empty());
3078 assert!(t(r"a{0,10}").is_match_empty());
3079 #[cfg(feature = "unicode-gencat")]
3080 assert!(t(r"\pL*").is_match_empty());
3081 assert!(t(r"a*|b").is_match_empty());
3082 assert!(t(r"b|a*").is_match_empty());
3083 assert!(t(r"a*a?(abcd)*").is_match_empty());
3084 assert!(t(r"^").is_match_empty());
3085 assert!(t(r"$").is_match_empty());
3086 assert!(t(r"(?m)^").is_match_empty());
3087 assert!(t(r"(?m)$").is_match_empty());
3088 assert!(t(r"\A").is_match_empty());
3089 assert!(t(r"\z").is_match_empty());
3090 assert!(t(r"\B").is_match_empty());
3091 assert!(t_bytes(r"(?-u)\B").is_match_empty());
3092
3093 // Negative examples.
3094 assert!(!t(r"a+").is_match_empty());
3095 assert!(!t(r"a{1}").is_match_empty());
3096 assert!(!t(r"a{1,}").is_match_empty());
3097 assert!(!t(r"a{1,2}").is_match_empty());
3098 assert!(!t(r"a{1,10}").is_match_empty());
3099 assert!(!t(r"b|a").is_match_empty());
3100 assert!(!t(r"a*a+(abcd)*").is_match_empty());
3101 assert!(!t(r"\b").is_match_empty());
3102 assert!(!t(r"(?-u)\b").is_match_empty());
3103 }
3104
3105 #[test]
analysis_is_literal()3106 fn analysis_is_literal() {
3107 // Positive examples.
3108 assert!(t(r"a").is_literal());
3109 assert!(t(r"ab").is_literal());
3110 assert!(t(r"abc").is_literal());
3111 assert!(t(r"(?m)abc").is_literal());
3112
3113 // Negative examples.
3114 assert!(!t(r"").is_literal());
3115 assert!(!t(r"^").is_literal());
3116 assert!(!t(r"a|b").is_literal());
3117 assert!(!t(r"(a)").is_literal());
3118 assert!(!t(r"a+").is_literal());
3119 assert!(!t(r"foo(a)").is_literal());
3120 assert!(!t(r"(a)foo").is_literal());
3121 assert!(!t(r"[a]").is_literal());
3122 }
3123
3124 #[test]
analysis_is_alternation_literal()3125 fn analysis_is_alternation_literal() {
3126 // Positive examples.
3127 assert!(t(r"a").is_alternation_literal());
3128 assert!(t(r"ab").is_alternation_literal());
3129 assert!(t(r"abc").is_alternation_literal());
3130 assert!(t(r"(?m)abc").is_alternation_literal());
3131 assert!(t(r"a|b").is_alternation_literal());
3132 assert!(t(r"a|b|c").is_alternation_literal());
3133 assert!(t(r"foo|bar").is_alternation_literal());
3134 assert!(t(r"foo|bar|baz").is_alternation_literal());
3135
3136 // Negative examples.
3137 assert!(!t(r"").is_alternation_literal());
3138 assert!(!t(r"^").is_alternation_literal());
3139 assert!(!t(r"(a)").is_alternation_literal());
3140 assert!(!t(r"a+").is_alternation_literal());
3141 assert!(!t(r"foo(a)").is_alternation_literal());
3142 assert!(!t(r"(a)foo").is_alternation_literal());
3143 assert!(!t(r"[a]").is_alternation_literal());
3144 assert!(!t(r"[a]|b").is_alternation_literal());
3145 assert!(!t(r"a|[b]").is_alternation_literal());
3146 assert!(!t(r"(a)|b").is_alternation_literal());
3147 assert!(!t(r"a|(b)").is_alternation_literal());
3148 }
3149 }
3150