1 /*!
2 Defines a translator that converts an `Ast` to an `Hir`.
3 */
4
5 use std::cell::{Cell, RefCell};
6 use std::result;
7
8 use ast::{self, Ast, Span, Visitor};
9 use hir::{self, Error, ErrorKind, Hir};
10 use unicode::{self, ClassQuery};
11
12 type Result<T> = result::Result<T, Error>;
13
14 /// A builder for constructing an AST->HIR translator.
15 #[derive(Clone, Debug)]
16 pub struct TranslatorBuilder {
17 allow_invalid_utf8: bool,
18 flags: Flags,
19 }
20
21 impl Default for TranslatorBuilder {
default() -> TranslatorBuilder22 fn default() -> TranslatorBuilder {
23 TranslatorBuilder::new()
24 }
25 }
26
27 impl TranslatorBuilder {
28 /// Create a new translator builder with a default c onfiguration.
new() -> TranslatorBuilder29 pub fn new() -> TranslatorBuilder {
30 TranslatorBuilder {
31 allow_invalid_utf8: false,
32 flags: Flags::default(),
33 }
34 }
35
36 /// Build a translator using the current configuration.
build(&self) -> Translator37 pub fn build(&self) -> Translator {
38 Translator {
39 stack: RefCell::new(vec![]),
40 flags: Cell::new(self.flags),
41 allow_invalid_utf8: self.allow_invalid_utf8,
42 }
43 }
44
45 /// When enabled, translation will permit the construction of a regular
46 /// expression that may match invalid UTF-8.
47 ///
48 /// When disabled (the default), the translator is guaranteed to produce
49 /// an expression that will only ever match valid UTF-8 (otherwise, the
50 /// translator will return an error).
51 ///
52 /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53 /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54 /// the parser to return an error. Namely, a negated ASCII word boundary
55 /// can result in matching positions that aren't valid UTF-8 boundaries.
allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder56 pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57 self.allow_invalid_utf8 = yes;
58 self
59 }
60
61 /// Enable or disable the case insensitive flag (`i`) by default.
case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder62 pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63 self.flags.case_insensitive = if yes { Some(true) } else { None };
64 self
65 }
66
67 /// Enable or disable the multi-line matching flag (`m`) by default.
multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder68 pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69 self.flags.multi_line = if yes { Some(true) } else { None };
70 self
71 }
72
73 /// Enable or disable the "dot matches any character" flag (`s`) by
74 /// default.
dot_matches_new_line( &mut self, yes: bool, ) -> &mut TranslatorBuilder75 pub fn dot_matches_new_line(
76 &mut self,
77 yes: bool,
78 ) -> &mut TranslatorBuilder {
79 self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
80 self
81 }
82
83 /// Enable or disable the "swap greed" flag (`U`) by default.
swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder84 pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85 self.flags.swap_greed = if yes { Some(true) } else { None };
86 self
87 }
88
89 /// Enable or disable the Unicode flag (`u`) by default.
unicode(&mut self, yes: bool) -> &mut TranslatorBuilder90 pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91 self.flags.unicode = if yes { None } else { Some(false) };
92 self
93 }
94 }
95
96 /// A translator maps abstract syntax to a high level intermediate
97 /// representation.
98 ///
99 /// A translator may be benefit from reuse. That is, a translator can translate
100 /// many abstract syntax trees.
101 ///
102 /// A `Translator` can be configured in more detail via a
103 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104 #[derive(Clone, Debug)]
105 pub struct Translator {
106 /// Our call stack, but on the heap.
107 stack: RefCell<Vec<HirFrame>>,
108 /// The current flag settings.
109 flags: Cell<Flags>,
110 /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111 allow_invalid_utf8: bool,
112 }
113
114 impl Translator {
115 /// Create a new translator using the default configuration.
new() -> Translator116 pub fn new() -> Translator {
117 TranslatorBuilder::new().build()
118 }
119
120 /// Translate the given abstract syntax tree (AST) into a high level
121 /// intermediate representation (HIR).
122 ///
123 /// If there was a problem doing the translation, then an HIR-specific
124 /// error is returned.
125 ///
126 /// The original pattern string used to produce the `Ast` *must* also be
127 /// provided. The translator does not use the pattern string during any
128 /// correct translation, but is used for error reporting.
translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir>129 pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130 ast::visit(ast, TranslatorI::new(self, pattern))
131 }
132 }
133
134 /// An HirFrame is a single stack frame, represented explicitly, which is
135 /// created for each item in the Ast that we traverse.
136 ///
137 /// Note that technically, this type doesn't represent our entire stack
138 /// frame. In particular, the Ast visitor represents any state associated with
139 /// traversing the Ast itself.
140 #[derive(Clone, Debug)]
141 enum HirFrame {
142 /// An arbitrary HIR expression. These get pushed whenever we hit a base
143 /// case in the Ast. They get popped after an inductive (i.e., recursive)
144 /// step is complete.
145 Expr(Hir),
146 /// A Unicode character class. This frame is mutated as we descend into
147 /// the Ast of a character class (which is itself its own mini recursive
148 /// structure).
149 ClassUnicode(hir::ClassUnicode),
150 /// A byte-oriented character class. This frame is mutated as we descend
151 /// into the Ast of a character class (which is itself its own mini
152 /// recursive structure).
153 ///
154 /// Byte character classes are created when Unicode mode (`u`) is disabled.
155 /// If `allow_invalid_utf8` is disabled (the default), then a byte
156 /// character is only permitted to match ASCII text.
157 ClassBytes(hir::ClassBytes),
158 /// This is pushed on to the stack upon first seeing any kind of group,
159 /// indicated by parentheses (including non-capturing groups). It is popped
160 /// upon leaving a group.
161 Group {
162 /// The old active flags, if any, when this group was opened.
163 ///
164 /// If this group sets flags, then the new active flags are set to the
165 /// result of merging the old flags with the flags introduced by this
166 /// group.
167 ///
168 /// When this group is popped, the active flags should be restored to
169 /// the flags set here.
170 ///
171 /// The "active" flags correspond to whatever flags are set in the
172 /// Translator.
173 old_flags: Option<Flags>,
174 },
175 /// This is pushed whenever a concatenation is observed. After visiting
176 /// every sub-expression in the concatenation, the translator's stack is
177 /// popped until it sees a Concat frame.
178 Concat,
179 /// This is pushed whenever an alternation is observed. After visiting
180 /// every sub-expression in the alternation, the translator's stack is
181 /// popped until it sees an Alternation frame.
182 Alternation,
183 }
184
185 impl HirFrame {
186 /// Assert that the current stack frame is an Hir expression and return it.
unwrap_expr(self) -> Hir187 fn unwrap_expr(self) -> Hir {
188 match self {
189 HirFrame::Expr(expr) => expr,
190 _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
191 }
192 }
193
194 /// Assert that the current stack frame is a Unicode class expression and
195 /// return it.
unwrap_class_unicode(self) -> hir::ClassUnicode196 fn unwrap_class_unicode(self) -> hir::ClassUnicode {
197 match self {
198 HirFrame::ClassUnicode(cls) => cls,
199 _ => panic!(
200 "tried to unwrap Unicode class \
201 from HirFrame, got: {:?}",
202 self
203 ),
204 }
205 }
206
207 /// Assert that the current stack frame is a byte class expression and
208 /// return it.
unwrap_class_bytes(self) -> hir::ClassBytes209 fn unwrap_class_bytes(self) -> hir::ClassBytes {
210 match self {
211 HirFrame::ClassBytes(cls) => cls,
212 _ => panic!(
213 "tried to unwrap byte class \
214 from HirFrame, got: {:?}",
215 self
216 ),
217 }
218 }
219
220 /// Assert that the current stack frame is a group indicator and return
221 /// its corresponding flags (the flags that were active at the time the
222 /// group was entered) if they exist.
unwrap_group(self) -> Option<Flags>223 fn unwrap_group(self) -> Option<Flags> {
224 match self {
225 HirFrame::Group { old_flags } => old_flags,
226 _ => {
227 panic!("tried to unwrap group from HirFrame, got: {:?}", self)
228 }
229 }
230 }
231 }
232
233 impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
234 type Output = Hir;
235 type Err = Error;
236
finish(self) -> Result<Hir>237 fn finish(self) -> Result<Hir> {
238 // ... otherwise, we should have exactly one HIR on the stack.
239 assert_eq!(self.trans().stack.borrow().len(), 1);
240 Ok(self.pop().unwrap().unwrap_expr())
241 }
242
visit_pre(&mut self, ast: &Ast) -> Result<()>243 fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
244 match *ast {
245 Ast::Class(ast::Class::Bracketed(_)) => {
246 if self.flags().unicode() {
247 let cls = hir::ClassUnicode::empty();
248 self.push(HirFrame::ClassUnicode(cls));
249 } else {
250 let cls = hir::ClassBytes::empty();
251 self.push(HirFrame::ClassBytes(cls));
252 }
253 }
254 Ast::Group(ref x) => {
255 let old_flags = x.flags().map(|ast| self.set_flags(ast));
256 self.push(HirFrame::Group { old_flags: old_flags });
257 }
258 Ast::Concat(ref x) if x.asts.is_empty() => {}
259 Ast::Concat(_) => {
260 self.push(HirFrame::Concat);
261 }
262 Ast::Alternation(ref x) if x.asts.is_empty() => {}
263 Ast::Alternation(_) => {
264 self.push(HirFrame::Alternation);
265 }
266 _ => {}
267 }
268 Ok(())
269 }
270
visit_post(&mut self, ast: &Ast) -> Result<()>271 fn visit_post(&mut self, ast: &Ast) -> Result<()> {
272 match *ast {
273 Ast::Empty(_) => {
274 self.push(HirFrame::Expr(Hir::empty()));
275 }
276 Ast::Flags(ref x) => {
277 self.set_flags(&x.flags);
278 // Flags in the AST are generally considered directives and
279 // not actual sub-expressions. However, they can be used in
280 // the concrete syntax like `((?i))`, and we need some kind of
281 // indication of an expression there, and Empty is the correct
282 // choice.
283 //
284 // There can also be things like `(?i)+`, but we rule those out
285 // in the parser. In the future, we might allow them for
286 // consistency sake.
287 self.push(HirFrame::Expr(Hir::empty()));
288 }
289 Ast::Literal(ref x) => {
290 self.push(HirFrame::Expr(self.hir_literal(x)?));
291 }
292 Ast::Dot(span) => {
293 self.push(HirFrame::Expr(self.hir_dot(span)?));
294 }
295 Ast::Assertion(ref x) => {
296 self.push(HirFrame::Expr(self.hir_assertion(x)?));
297 }
298 Ast::Class(ast::Class::Perl(ref x)) => {
299 if self.flags().unicode() {
300 let cls = self.hir_perl_unicode_class(x)?;
301 let hcls = hir::Class::Unicode(cls);
302 self.push(HirFrame::Expr(Hir::class(hcls)));
303 } else {
304 let cls = self.hir_perl_byte_class(x);
305 let hcls = hir::Class::Bytes(cls);
306 self.push(HirFrame::Expr(Hir::class(hcls)));
307 }
308 }
309 Ast::Class(ast::Class::Unicode(ref x)) => {
310 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
311 self.push(HirFrame::Expr(Hir::class(cls)));
312 }
313 Ast::Class(ast::Class::Bracketed(ref ast)) => {
314 if self.flags().unicode() {
315 let mut cls = self.pop().unwrap().unwrap_class_unicode();
316 self.unicode_fold_and_negate(
317 &ast.span,
318 ast.negated,
319 &mut cls,
320 )?;
321 if cls.iter().next().is_none() {
322 return Err(self.error(
323 ast.span,
324 ErrorKind::EmptyClassNotAllowed,
325 ));
326 }
327 let expr = Hir::class(hir::Class::Unicode(cls));
328 self.push(HirFrame::Expr(expr));
329 } else {
330 let mut cls = self.pop().unwrap().unwrap_class_bytes();
331 self.bytes_fold_and_negate(
332 &ast.span,
333 ast.negated,
334 &mut cls,
335 )?;
336 if cls.iter().next().is_none() {
337 return Err(self.error(
338 ast.span,
339 ErrorKind::EmptyClassNotAllowed,
340 ));
341 }
342
343 let expr = Hir::class(hir::Class::Bytes(cls));
344 self.push(HirFrame::Expr(expr));
345 }
346 }
347 Ast::Repetition(ref x) => {
348 let expr = self.pop().unwrap().unwrap_expr();
349 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
350 }
351 Ast::Group(ref x) => {
352 let expr = self.pop().unwrap().unwrap_expr();
353 if let Some(flags) = self.pop().unwrap().unwrap_group() {
354 self.trans().flags.set(flags);
355 }
356 self.push(HirFrame::Expr(self.hir_group(x, expr)));
357 }
358 Ast::Concat(_) => {
359 let mut exprs = vec![];
360 while let Some(HirFrame::Expr(expr)) = self.pop() {
361 if !expr.kind().is_empty() {
362 exprs.push(expr);
363 }
364 }
365 exprs.reverse();
366 self.push(HirFrame::Expr(Hir::concat(exprs)));
367 }
368 Ast::Alternation(_) => {
369 let mut exprs = vec![];
370 while let Some(HirFrame::Expr(expr)) = self.pop() {
371 exprs.push(expr);
372 }
373 exprs.reverse();
374 self.push(HirFrame::Expr(Hir::alternation(exprs)));
375 }
376 }
377 Ok(())
378 }
379
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>380 fn visit_class_set_item_pre(
381 &mut self,
382 ast: &ast::ClassSetItem,
383 ) -> Result<()> {
384 match *ast {
385 ast::ClassSetItem::Bracketed(_) => {
386 if self.flags().unicode() {
387 let cls = hir::ClassUnicode::empty();
388 self.push(HirFrame::ClassUnicode(cls));
389 } else {
390 let cls = hir::ClassBytes::empty();
391 self.push(HirFrame::ClassBytes(cls));
392 }
393 }
394 // We needn't handle the Union case here since the visitor will
395 // do it for us.
396 _ => {}
397 }
398 Ok(())
399 }
400
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>401 fn visit_class_set_item_post(
402 &mut self,
403 ast: &ast::ClassSetItem,
404 ) -> Result<()> {
405 match *ast {
406 ast::ClassSetItem::Empty(_) => {}
407 ast::ClassSetItem::Literal(ref x) => {
408 if self.flags().unicode() {
409 let mut cls = self.pop().unwrap().unwrap_class_unicode();
410 cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
411 self.push(HirFrame::ClassUnicode(cls));
412 } else {
413 let mut cls = self.pop().unwrap().unwrap_class_bytes();
414 let byte = self.class_literal_byte(x)?;
415 cls.push(hir::ClassBytesRange::new(byte, byte));
416 self.push(HirFrame::ClassBytes(cls));
417 }
418 }
419 ast::ClassSetItem::Range(ref x) => {
420 if self.flags().unicode() {
421 let mut cls = self.pop().unwrap().unwrap_class_unicode();
422 cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
423 self.push(HirFrame::ClassUnicode(cls));
424 } else {
425 let mut cls = self.pop().unwrap().unwrap_class_bytes();
426 let start = self.class_literal_byte(&x.start)?;
427 let end = self.class_literal_byte(&x.end)?;
428 cls.push(hir::ClassBytesRange::new(start, end));
429 self.push(HirFrame::ClassBytes(cls));
430 }
431 }
432 ast::ClassSetItem::Ascii(ref x) => {
433 if self.flags().unicode() {
434 let mut cls = self.pop().unwrap().unwrap_class_unicode();
435 for &(s, e) in ascii_class(&x.kind) {
436 cls.push(hir::ClassUnicodeRange::new(s, e));
437 }
438 self.unicode_fold_and_negate(
439 &x.span, x.negated, &mut cls,
440 )?;
441 self.push(HirFrame::ClassUnicode(cls));
442 } else {
443 let mut cls = self.pop().unwrap().unwrap_class_bytes();
444 for &(s, e) in ascii_class(&x.kind) {
445 cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
446 }
447 self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?;
448 self.push(HirFrame::ClassBytes(cls));
449 }
450 }
451 ast::ClassSetItem::Unicode(ref x) => {
452 let xcls = self.hir_unicode_class(x)?;
453 let mut cls = self.pop().unwrap().unwrap_class_unicode();
454 cls.union(&xcls);
455 self.push(HirFrame::ClassUnicode(cls));
456 }
457 ast::ClassSetItem::Perl(ref x) => {
458 if self.flags().unicode() {
459 let xcls = self.hir_perl_unicode_class(x)?;
460 let mut cls = self.pop().unwrap().unwrap_class_unicode();
461 cls.union(&xcls);
462 self.push(HirFrame::ClassUnicode(cls));
463 } else {
464 let xcls = self.hir_perl_byte_class(x);
465 let mut cls = self.pop().unwrap().unwrap_class_bytes();
466 cls.union(&xcls);
467 self.push(HirFrame::ClassBytes(cls));
468 }
469 }
470 ast::ClassSetItem::Bracketed(ref ast) => {
471 if self.flags().unicode() {
472 let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
473 self.unicode_fold_and_negate(
474 &ast.span,
475 ast.negated,
476 &mut cls1,
477 )?;
478
479 let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
480 cls2.union(&cls1);
481 self.push(HirFrame::ClassUnicode(cls2));
482 } else {
483 let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
484 self.bytes_fold_and_negate(
485 &ast.span,
486 ast.negated,
487 &mut cls1,
488 )?;
489
490 let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
491 cls2.union(&cls1);
492 self.push(HirFrame::ClassBytes(cls2));
493 }
494 }
495 // This is handled automatically by the visitor.
496 ast::ClassSetItem::Union(_) => {}
497 }
498 Ok(())
499 }
500
visit_class_set_binary_op_pre( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>501 fn visit_class_set_binary_op_pre(
502 &mut self,
503 _op: &ast::ClassSetBinaryOp,
504 ) -> Result<()> {
505 if self.flags().unicode() {
506 let cls = hir::ClassUnicode::empty();
507 self.push(HirFrame::ClassUnicode(cls));
508 } else {
509 let cls = hir::ClassBytes::empty();
510 self.push(HirFrame::ClassBytes(cls));
511 }
512 Ok(())
513 }
514
visit_class_set_binary_op_in( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>515 fn visit_class_set_binary_op_in(
516 &mut self,
517 _op: &ast::ClassSetBinaryOp,
518 ) -> Result<()> {
519 if self.flags().unicode() {
520 let cls = hir::ClassUnicode::empty();
521 self.push(HirFrame::ClassUnicode(cls));
522 } else {
523 let cls = hir::ClassBytes::empty();
524 self.push(HirFrame::ClassBytes(cls));
525 }
526 Ok(())
527 }
528
visit_class_set_binary_op_post( &mut self, op: &ast::ClassSetBinaryOp, ) -> Result<()>529 fn visit_class_set_binary_op_post(
530 &mut self,
531 op: &ast::ClassSetBinaryOp,
532 ) -> Result<()> {
533 use ast::ClassSetBinaryOpKind::*;
534
535 if self.flags().unicode() {
536 let mut rhs = self.pop().unwrap().unwrap_class_unicode();
537 let mut lhs = self.pop().unwrap().unwrap_class_unicode();
538 let mut cls = self.pop().unwrap().unwrap_class_unicode();
539 if self.flags().case_insensitive() {
540 rhs.try_case_fold_simple().map_err(|_| {
541 self.error(
542 op.rhs.span().clone(),
543 ErrorKind::UnicodeCaseUnavailable,
544 )
545 })?;
546 lhs.try_case_fold_simple().map_err(|_| {
547 self.error(
548 op.lhs.span().clone(),
549 ErrorKind::UnicodeCaseUnavailable,
550 )
551 })?;
552 }
553 match op.kind {
554 Intersection => lhs.intersect(&rhs),
555 Difference => lhs.difference(&rhs),
556 SymmetricDifference => lhs.symmetric_difference(&rhs),
557 }
558 cls.union(&lhs);
559 self.push(HirFrame::ClassUnicode(cls));
560 } else {
561 let mut rhs = self.pop().unwrap().unwrap_class_bytes();
562 let mut lhs = self.pop().unwrap().unwrap_class_bytes();
563 let mut cls = self.pop().unwrap().unwrap_class_bytes();
564 if self.flags().case_insensitive() {
565 rhs.case_fold_simple();
566 lhs.case_fold_simple();
567 }
568 match op.kind {
569 Intersection => lhs.intersect(&rhs),
570 Difference => lhs.difference(&rhs),
571 SymmetricDifference => lhs.symmetric_difference(&rhs),
572 }
573 cls.union(&lhs);
574 self.push(HirFrame::ClassBytes(cls));
575 }
576 Ok(())
577 }
578 }
579
580 /// The internal implementation of a translator.
581 ///
582 /// This type is responsible for carrying around the original pattern string,
583 /// which is not tied to the internal state of a translator.
584 ///
585 /// A TranslatorI exists for the time it takes to translate a single Ast.
586 #[derive(Clone, Debug)]
587 struct TranslatorI<'t, 'p> {
588 trans: &'t Translator,
589 pattern: &'p str,
590 }
591
592 impl<'t, 'p> TranslatorI<'t, 'p> {
593 /// Build a new internal translator.
new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p>594 fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
595 TranslatorI { trans: trans, pattern: pattern }
596 }
597
598 /// Return a reference to the underlying translator.
trans(&self) -> &Translator599 fn trans(&self) -> &Translator {
600 &self.trans
601 }
602
603 /// Push the given frame on to the call stack.
push(&self, frame: HirFrame)604 fn push(&self, frame: HirFrame) {
605 self.trans().stack.borrow_mut().push(frame);
606 }
607
608 /// Pop the top of the call stack. If the call stack is empty, return None.
pop(&self) -> Option<HirFrame>609 fn pop(&self) -> Option<HirFrame> {
610 self.trans().stack.borrow_mut().pop()
611 }
612
613 /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ErrorKind) -> Error614 fn error(&self, span: Span, kind: ErrorKind) -> Error {
615 Error { kind: kind, pattern: self.pattern.to_string(), span: span }
616 }
617
618 /// Return a copy of the active flags.
flags(&self) -> Flags619 fn flags(&self) -> Flags {
620 self.trans().flags.get()
621 }
622
623 /// Set the flags of this translator from the flags set in the given AST.
624 /// Then, return the old flags.
set_flags(&self, ast_flags: &ast::Flags) -> Flags625 fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
626 let old_flags = self.flags();
627 let mut new_flags = Flags::from_ast(ast_flags);
628 new_flags.merge(&old_flags);
629 self.trans().flags.set(new_flags);
630 old_flags
631 }
632
hir_literal(&self, lit: &ast::Literal) -> Result<Hir>633 fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
634 let ch = match self.literal_to_char(lit)? {
635 byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
636 hir::Literal::Unicode(ch) => ch,
637 };
638 if self.flags().case_insensitive() {
639 self.hir_from_char_case_insensitive(lit.span, ch)
640 } else {
641 self.hir_from_char(lit.span, ch)
642 }
643 }
644
645 /// Convert an Ast literal to its scalar representation.
646 ///
647 /// When Unicode mode is enabled, then this always succeeds and returns a
648 /// `char` (Unicode scalar value).
649 ///
650 /// When Unicode mode is disabled, then a raw byte is returned. If that
651 /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
652 /// an error.
literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal>653 fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
654 if self.flags().unicode() {
655 return Ok(hir::Literal::Unicode(lit.c));
656 }
657 let byte = match lit.byte() {
658 None => return Ok(hir::Literal::Unicode(lit.c)),
659 Some(byte) => byte,
660 };
661 if byte <= 0x7F {
662 return Ok(hir::Literal::Unicode(byte as char));
663 }
664 if !self.trans().allow_invalid_utf8 {
665 return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
666 }
667 Ok(hir::Literal::Byte(byte))
668 }
669
hir_from_char(&self, span: Span, c: char) -> Result<Hir>670 fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
671 if !self.flags().unicode() && c.len_utf8() > 1 {
672 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
673 }
674 Ok(Hir::literal(hir::Literal::Unicode(c)))
675 }
676
hir_from_char_case_insensitive( &self, span: Span, c: char, ) -> Result<Hir>677 fn hir_from_char_case_insensitive(
678 &self,
679 span: Span,
680 c: char,
681 ) -> Result<Hir> {
682 if self.flags().unicode() {
683 // If case folding won't do anything, then don't bother trying.
684 let map =
685 unicode::contains_simple_case_mapping(c, c).map_err(|_| {
686 self.error(span, ErrorKind::UnicodeCaseUnavailable)
687 })?;
688 if !map {
689 return self.hir_from_char(span, c);
690 }
691 let mut cls =
692 hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
693 c, c,
694 )]);
695 cls.try_case_fold_simple().map_err(|_| {
696 self.error(span, ErrorKind::UnicodeCaseUnavailable)
697 })?;
698 Ok(Hir::class(hir::Class::Unicode(cls)))
699 } else {
700 if c.len_utf8() > 1 {
701 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
702 }
703 // If case folding won't do anything, then don't bother trying.
704 match c {
705 'A'..='Z' | 'a'..='z' => {}
706 _ => return self.hir_from_char(span, c),
707 }
708 let mut cls =
709 hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
710 c as u8, c as u8,
711 )]);
712 cls.case_fold_simple();
713 Ok(Hir::class(hir::Class::Bytes(cls)))
714 }
715 }
716
hir_dot(&self, span: Span) -> Result<Hir>717 fn hir_dot(&self, span: Span) -> Result<Hir> {
718 let unicode = self.flags().unicode();
719 if !unicode && !self.trans().allow_invalid_utf8 {
720 return Err(self.error(span, ErrorKind::InvalidUtf8));
721 }
722 Ok(if self.flags().dot_matches_new_line() {
723 Hir::any(!unicode)
724 } else {
725 Hir::dot(!unicode)
726 })
727 }
728
hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir>729 fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
730 let unicode = self.flags().unicode();
731 let multi_line = self.flags().multi_line();
732 Ok(match asst.kind {
733 ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
734 hir::Anchor::StartLine
735 } else {
736 hir::Anchor::StartText
737 }),
738 ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
739 hir::Anchor::EndLine
740 } else {
741 hir::Anchor::EndText
742 }),
743 ast::AssertionKind::StartText => {
744 Hir::anchor(hir::Anchor::StartText)
745 }
746 ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
747 ast::AssertionKind::WordBoundary => {
748 Hir::word_boundary(if unicode {
749 hir::WordBoundary::Unicode
750 } else {
751 hir::WordBoundary::Ascii
752 })
753 }
754 ast::AssertionKind::NotWordBoundary => {
755 Hir::word_boundary(if unicode {
756 hir::WordBoundary::UnicodeNegate
757 } else {
758 // It is possible for negated ASCII word boundaries to
759 // match at invalid UTF-8 boundaries, even when searching
760 // valid UTF-8.
761 if !self.trans().allow_invalid_utf8 {
762 return Err(
763 self.error(asst.span, ErrorKind::InvalidUtf8)
764 );
765 }
766 hir::WordBoundary::AsciiNegate
767 })
768 }
769 })
770 }
771
hir_group(&self, group: &ast::Group, expr: Hir) -> Hir772 fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
773 let kind = match group.kind {
774 ast::GroupKind::CaptureIndex(idx) => {
775 hir::GroupKind::CaptureIndex(idx)
776 }
777 ast::GroupKind::CaptureName(ref capname) => {
778 hir::GroupKind::CaptureName {
779 name: capname.name.clone(),
780 index: capname.index,
781 }
782 }
783 ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
784 };
785 Hir::group(hir::Group { kind: kind, hir: Box::new(expr) })
786 }
787
hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir788 fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
789 let kind = match rep.op.kind {
790 ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
791 ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
792 ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
793 ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
794 hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
795 }
796 ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
797 hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
798 }
799 ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
800 m,
801 n,
802 )) => {
803 hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
804 }
805 };
806 let greedy =
807 if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
808 Hir::repetition(hir::Repetition {
809 kind: kind,
810 greedy: greedy,
811 hir: Box::new(expr),
812 })
813 }
814
hir_unicode_class( &self, ast_class: &ast::ClassUnicode, ) -> Result<hir::ClassUnicode>815 fn hir_unicode_class(
816 &self,
817 ast_class: &ast::ClassUnicode,
818 ) -> Result<hir::ClassUnicode> {
819 use ast::ClassUnicodeKind::*;
820
821 if !self.flags().unicode() {
822 return Err(
823 self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
824 );
825 }
826 let query = match ast_class.kind {
827 OneLetter(name) => ClassQuery::OneLetter(name),
828 Named(ref name) => ClassQuery::Binary(name),
829 NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
830 property_name: name,
831 property_value: value,
832 },
833 };
834 let mut result = self.convert_unicode_class_error(
835 &ast_class.span,
836 unicode::class(query),
837 );
838 if let Ok(ref mut class) = result {
839 self.unicode_fold_and_negate(
840 &ast_class.span,
841 ast_class.negated,
842 class,
843 )?;
844 }
845 result
846 }
847
hir_perl_unicode_class( &self, ast_class: &ast::ClassPerl, ) -> Result<hir::ClassUnicode>848 fn hir_perl_unicode_class(
849 &self,
850 ast_class: &ast::ClassPerl,
851 ) -> Result<hir::ClassUnicode> {
852 use ast::ClassPerlKind::*;
853
854 assert!(self.flags().unicode());
855 let result = match ast_class.kind {
856 Digit => unicode::perl_digit(),
857 Space => unicode::perl_space(),
858 Word => unicode::perl_word(),
859 };
860 let mut class =
861 self.convert_unicode_class_error(&ast_class.span, result)?;
862 // We needn't apply case folding here because the Perl Unicode classes
863 // are already closed under Unicode simple case folding.
864 if ast_class.negated {
865 class.negate();
866 }
867 Ok(class)
868 }
869
hir_perl_byte_class( &self, ast_class: &ast::ClassPerl, ) -> hir::ClassBytes870 fn hir_perl_byte_class(
871 &self,
872 ast_class: &ast::ClassPerl,
873 ) -> hir::ClassBytes {
874 use ast::ClassPerlKind::*;
875
876 assert!(!self.flags().unicode());
877 let mut class = match ast_class.kind {
878 Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
879 Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
880 Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
881 };
882 // We needn't apply case folding here because the Perl ASCII classes
883 // are already closed (under ASCII case folding).
884 if ast_class.negated {
885 class.negate();
886 }
887 class
888 }
889
890 /// Converts the given Unicode specific error to an HIR translation error.
891 ///
892 /// The span given should approximate the position at which an error would
893 /// occur.
convert_unicode_class_error( &self, span: &Span, result: unicode::Result<hir::ClassUnicode>, ) -> Result<hir::ClassUnicode>894 fn convert_unicode_class_error(
895 &self,
896 span: &Span,
897 result: unicode::Result<hir::ClassUnicode>,
898 ) -> Result<hir::ClassUnicode> {
899 result.map_err(|err| {
900 let sp = span.clone();
901 match err {
902 unicode::Error::PropertyNotFound => {
903 self.error(sp, ErrorKind::UnicodePropertyNotFound)
904 }
905 unicode::Error::PropertyValueNotFound => {
906 self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
907 }
908 unicode::Error::PerlClassNotFound => {
909 self.error(sp, ErrorKind::UnicodePerlClassNotFound)
910 }
911 }
912 })
913 }
914
unicode_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassUnicode, ) -> Result<()>915 fn unicode_fold_and_negate(
916 &self,
917 span: &Span,
918 negated: bool,
919 class: &mut hir::ClassUnicode,
920 ) -> Result<()> {
921 // Note that we must apply case folding before negation!
922 // Consider `(?i)[^x]`. If we applied negation field, then
923 // the result would be the character class that matched any
924 // Unicode scalar value.
925 if self.flags().case_insensitive() {
926 class.try_case_fold_simple().map_err(|_| {
927 self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
928 })?;
929 }
930 if negated {
931 class.negate();
932 }
933 Ok(())
934 }
935
bytes_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassBytes, ) -> Result<()>936 fn bytes_fold_and_negate(
937 &self,
938 span: &Span,
939 negated: bool,
940 class: &mut hir::ClassBytes,
941 ) -> Result<()> {
942 // Note that we must apply case folding before negation!
943 // Consider `(?i)[^x]`. If we applied negation field, then
944 // the result would be the character class that matched any
945 // Unicode scalar value.
946 if self.flags().case_insensitive() {
947 class.case_fold_simple();
948 }
949 if negated {
950 class.negate();
951 }
952 if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
953 return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
954 }
955 Ok(())
956 }
957
958 /// Return a scalar byte value suitable for use as a literal in a byte
959 /// character class.
class_literal_byte(&self, ast: &ast::Literal) -> Result<u8>960 fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
961 match self.literal_to_char(ast)? {
962 hir::Literal::Byte(byte) => Ok(byte),
963 hir::Literal::Unicode(ch) => {
964 if ch <= 0x7F as char {
965 Ok(ch as u8)
966 } else {
967 // We can't feasibly support Unicode in
968 // byte oriented classes. Byte classes don't
969 // do Unicode case folding.
970 Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
971 }
972 }
973 }
974 }
975 }
976
977 /// A translator's representation of a regular expression's flags at any given
978 /// moment in time.
979 ///
980 /// Each flag can be in one of three states: absent, present but disabled or
981 /// present but enabled.
982 #[derive(Clone, Copy, Debug, Default)]
983 struct Flags {
984 case_insensitive: Option<bool>,
985 multi_line: Option<bool>,
986 dot_matches_new_line: Option<bool>,
987 swap_greed: Option<bool>,
988 unicode: Option<bool>,
989 // Note that `ignore_whitespace` is omitted here because it is handled
990 // entirely in the parser.
991 }
992
993 impl Flags {
from_ast(ast: &ast::Flags) -> Flags994 fn from_ast(ast: &ast::Flags) -> Flags {
995 let mut flags = Flags::default();
996 let mut enable = true;
997 for item in &ast.items {
998 match item.kind {
999 ast::FlagsItemKind::Negation => {
1000 enable = false;
1001 }
1002 ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1003 flags.case_insensitive = Some(enable);
1004 }
1005 ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1006 flags.multi_line = Some(enable);
1007 }
1008 ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1009 flags.dot_matches_new_line = Some(enable);
1010 }
1011 ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1012 flags.swap_greed = Some(enable);
1013 }
1014 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1015 flags.unicode = Some(enable);
1016 }
1017 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1018 }
1019 }
1020 flags
1021 }
1022
merge(&mut self, previous: &Flags)1023 fn merge(&mut self, previous: &Flags) {
1024 if self.case_insensitive.is_none() {
1025 self.case_insensitive = previous.case_insensitive;
1026 }
1027 if self.multi_line.is_none() {
1028 self.multi_line = previous.multi_line;
1029 }
1030 if self.dot_matches_new_line.is_none() {
1031 self.dot_matches_new_line = previous.dot_matches_new_line;
1032 }
1033 if self.swap_greed.is_none() {
1034 self.swap_greed = previous.swap_greed;
1035 }
1036 if self.unicode.is_none() {
1037 self.unicode = previous.unicode;
1038 }
1039 }
1040
case_insensitive(&self) -> bool1041 fn case_insensitive(&self) -> bool {
1042 self.case_insensitive.unwrap_or(false)
1043 }
1044
multi_line(&self) -> bool1045 fn multi_line(&self) -> bool {
1046 self.multi_line.unwrap_or(false)
1047 }
1048
dot_matches_new_line(&self) -> bool1049 fn dot_matches_new_line(&self) -> bool {
1050 self.dot_matches_new_line.unwrap_or(false)
1051 }
1052
swap_greed(&self) -> bool1053 fn swap_greed(&self) -> bool {
1054 self.swap_greed.unwrap_or(false)
1055 }
1056
unicode(&self) -> bool1057 fn unicode(&self) -> bool {
1058 self.unicode.unwrap_or(true)
1059 }
1060 }
1061
hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes1062 fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1063 let ranges: Vec<_> = ascii_class(kind)
1064 .iter()
1065 .cloned()
1066 .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1067 .collect();
1068 hir::ClassBytes::new(ranges)
1069 }
1070
ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)]1071 fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1072 use ast::ClassAsciiKind::*;
1073 match *kind {
1074 Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1075 Alpha => &[('A', 'Z'), ('a', 'z')],
1076 Ascii => &[('\x00', '\x7F')],
1077 Blank => &[('\t', '\t'), (' ', ' ')],
1078 Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1079 Digit => &[('0', '9')],
1080 Graph => &[('!', '~')],
1081 Lower => &[('a', 'z')],
1082 Print => &[(' ', '~')],
1083 Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1084 Space => &[
1085 ('\t', '\t'),
1086 ('\n', '\n'),
1087 ('\x0B', '\x0B'),
1088 ('\x0C', '\x0C'),
1089 ('\r', '\r'),
1090 (' ', ' '),
1091 ],
1092 Upper => &[('A', 'Z')],
1093 Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1094 Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1095 }
1096 }
1097
1098 #[cfg(test)]
1099 mod tests {
1100 use ast::parse::ParserBuilder;
1101 use ast::{self, Ast, Position, Span};
1102 use hir::{self, Hir, HirKind};
1103 use unicode::{self, ClassQuery};
1104
1105 use super::{ascii_class, TranslatorBuilder};
1106
1107 // We create these errors to compare with real hir::Errors in the tests.
1108 // We define equality between TestError and hir::Error to disregard the
1109 // pattern string in hir::Error, which is annoying to provide in tests.
1110 #[derive(Clone, Debug)]
1111 struct TestError {
1112 span: Span,
1113 kind: hir::ErrorKind,
1114 }
1115
1116 impl PartialEq<hir::Error> for TestError {
eq(&self, other: &hir::Error) -> bool1117 fn eq(&self, other: &hir::Error) -> bool {
1118 self.span == other.span && self.kind == other.kind
1119 }
1120 }
1121
1122 impl PartialEq<TestError> for hir::Error {
eq(&self, other: &TestError) -> bool1123 fn eq(&self, other: &TestError) -> bool {
1124 self.span == other.span && self.kind == other.kind
1125 }
1126 }
1127
parse(pattern: &str) -> Ast1128 fn parse(pattern: &str) -> Ast {
1129 ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1130 }
1131
t(pattern: &str) -> Hir1132 fn t(pattern: &str) -> Hir {
1133 TranslatorBuilder::new()
1134 .allow_invalid_utf8(false)
1135 .build()
1136 .translate(pattern, &parse(pattern))
1137 .unwrap()
1138 }
1139
t_err(pattern: &str) -> hir::Error1140 fn t_err(pattern: &str) -> hir::Error {
1141 TranslatorBuilder::new()
1142 .allow_invalid_utf8(false)
1143 .build()
1144 .translate(pattern, &parse(pattern))
1145 .unwrap_err()
1146 }
1147
t_bytes(pattern: &str) -> Hir1148 fn t_bytes(pattern: &str) -> Hir {
1149 TranslatorBuilder::new()
1150 .allow_invalid_utf8(true)
1151 .build()
1152 .translate(pattern, &parse(pattern))
1153 .unwrap()
1154 }
1155
hir_lit(s: &str) -> Hir1156 fn hir_lit(s: &str) -> Hir {
1157 match s.len() {
1158 0 => Hir::empty(),
1159 _ => {
1160 let lits = s
1161 .chars()
1162 .map(hir::Literal::Unicode)
1163 .map(Hir::literal)
1164 .collect();
1165 Hir::concat(lits)
1166 }
1167 }
1168 }
1169
hir_blit(s: &[u8]) -> Hir1170 fn hir_blit(s: &[u8]) -> Hir {
1171 match s.len() {
1172 0 => Hir::empty(),
1173 1 => Hir::literal(hir::Literal::Byte(s[0])),
1174 _ => {
1175 let lits = s
1176 .iter()
1177 .cloned()
1178 .map(hir::Literal::Byte)
1179 .map(Hir::literal)
1180 .collect();
1181 Hir::concat(lits)
1182 }
1183 }
1184 }
1185
hir_group(i: u32, expr: Hir) -> Hir1186 fn hir_group(i: u32, expr: Hir) -> Hir {
1187 Hir::group(hir::Group {
1188 kind: hir::GroupKind::CaptureIndex(i),
1189 hir: Box::new(expr),
1190 })
1191 }
1192
hir_group_name(i: u32, name: &str, expr: Hir) -> Hir1193 fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1194 Hir::group(hir::Group {
1195 kind: hir::GroupKind::CaptureName {
1196 name: name.to_string(),
1197 index: i,
1198 },
1199 hir: Box::new(expr),
1200 })
1201 }
1202
hir_group_nocap(expr: Hir) -> Hir1203 fn hir_group_nocap(expr: Hir) -> Hir {
1204 Hir::group(hir::Group {
1205 kind: hir::GroupKind::NonCapturing,
1206 hir: Box::new(expr),
1207 })
1208 }
1209
hir_quest(greedy: bool, expr: Hir) -> Hir1210 fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1211 Hir::repetition(hir::Repetition {
1212 kind: hir::RepetitionKind::ZeroOrOne,
1213 greedy: greedy,
1214 hir: Box::new(expr),
1215 })
1216 }
1217
hir_star(greedy: bool, expr: Hir) -> Hir1218 fn hir_star(greedy: bool, expr: Hir) -> Hir {
1219 Hir::repetition(hir::Repetition {
1220 kind: hir::RepetitionKind::ZeroOrMore,
1221 greedy: greedy,
1222 hir: Box::new(expr),
1223 })
1224 }
1225
hir_plus(greedy: bool, expr: Hir) -> Hir1226 fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1227 Hir::repetition(hir::Repetition {
1228 kind: hir::RepetitionKind::OneOrMore,
1229 greedy: greedy,
1230 hir: Box::new(expr),
1231 })
1232 }
1233
hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir1234 fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1235 Hir::repetition(hir::Repetition {
1236 kind: hir::RepetitionKind::Range(range),
1237 greedy: greedy,
1238 hir: Box::new(expr),
1239 })
1240 }
1241
hir_alt(alts: Vec<Hir>) -> Hir1242 fn hir_alt(alts: Vec<Hir>) -> Hir {
1243 Hir::alternation(alts)
1244 }
1245
hir_cat(exprs: Vec<Hir>) -> Hir1246 fn hir_cat(exprs: Vec<Hir>) -> Hir {
1247 Hir::concat(exprs)
1248 }
1249
1250 #[allow(dead_code)]
hir_uclass_query(query: ClassQuery) -> Hir1251 fn hir_uclass_query(query: ClassQuery) -> Hir {
1252 Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1253 }
1254
1255 #[allow(dead_code)]
hir_uclass_perl_word() -> Hir1256 fn hir_uclass_perl_word() -> Hir {
1257 Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1258 }
1259
hir_uclass(ranges: &[(char, char)]) -> Hir1260 fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1261 let ranges: Vec<hir::ClassUnicodeRange> = ranges
1262 .iter()
1263 .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1264 .collect();
1265 Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1266 }
1267
hir_bclass(ranges: &[(u8, u8)]) -> Hir1268 fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1269 let ranges: Vec<hir::ClassBytesRange> = ranges
1270 .iter()
1271 .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1272 .collect();
1273 Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1274 }
1275
hir_bclass_from_char(ranges: &[(char, char)]) -> Hir1276 fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1277 let ranges: Vec<hir::ClassBytesRange> = ranges
1278 .iter()
1279 .map(|&(s, e)| {
1280 assert!(s as u32 <= 0x7F);
1281 assert!(e as u32 <= 0x7F);
1282 hir::ClassBytesRange::new(s as u8, e as u8)
1283 })
1284 .collect();
1285 Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1286 }
1287
hir_case_fold(expr: Hir) -> Hir1288 fn hir_case_fold(expr: Hir) -> Hir {
1289 match expr.into_kind() {
1290 HirKind::Class(mut cls) => {
1291 cls.case_fold_simple();
1292 Hir::class(cls)
1293 }
1294 _ => panic!("cannot case fold non-class Hir expr"),
1295 }
1296 }
1297
hir_negate(expr: Hir) -> Hir1298 fn hir_negate(expr: Hir) -> Hir {
1299 match expr.into_kind() {
1300 HirKind::Class(mut cls) => {
1301 cls.negate();
1302 Hir::class(cls)
1303 }
1304 _ => panic!("cannot negate non-class Hir expr"),
1305 }
1306 }
1307
1308 #[allow(dead_code)]
hir_union(expr1: Hir, expr2: Hir) -> Hir1309 fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1310 use hir::Class::{Bytes, Unicode};
1311
1312 match (expr1.into_kind(), expr2.into_kind()) {
1313 (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1314 c1.union(&c2);
1315 Hir::class(hir::Class::Unicode(c1))
1316 }
1317 (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1318 c1.union(&c2);
1319 Hir::class(hir::Class::Bytes(c1))
1320 }
1321 _ => panic!("cannot union non-class Hir exprs"),
1322 }
1323 }
1324
1325 #[allow(dead_code)]
hir_difference(expr1: Hir, expr2: Hir) -> Hir1326 fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1327 use hir::Class::{Bytes, Unicode};
1328
1329 match (expr1.into_kind(), expr2.into_kind()) {
1330 (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1331 c1.difference(&c2);
1332 Hir::class(hir::Class::Unicode(c1))
1333 }
1334 (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1335 c1.difference(&c2);
1336 Hir::class(hir::Class::Bytes(c1))
1337 }
1338 _ => panic!("cannot difference non-class Hir exprs"),
1339 }
1340 }
1341
hir_anchor(anchor: hir::Anchor) -> Hir1342 fn hir_anchor(anchor: hir::Anchor) -> Hir {
1343 Hir::anchor(anchor)
1344 }
1345
hir_word(wb: hir::WordBoundary) -> Hir1346 fn hir_word(wb: hir::WordBoundary) -> Hir {
1347 Hir::word_boundary(wb)
1348 }
1349
1350 #[test]
empty()1351 fn empty() {
1352 assert_eq!(t(""), Hir::empty());
1353 assert_eq!(t("(?i)"), Hir::empty());
1354 assert_eq!(t("()"), hir_group(1, Hir::empty()));
1355 assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1356 assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1357 assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1358 assert_eq!(
1359 t("()|()"),
1360 hir_alt(vec![
1361 hir_group(1, Hir::empty()),
1362 hir_group(2, Hir::empty()),
1363 ])
1364 );
1365 assert_eq!(
1366 t("(|b)"),
1367 hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1368 );
1369 assert_eq!(
1370 t("(a|)"),
1371 hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1372 );
1373 assert_eq!(
1374 t("(a||c)"),
1375 hir_group(
1376 1,
1377 hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1378 )
1379 );
1380 assert_eq!(
1381 t("(||)"),
1382 hir_group(
1383 1,
1384 hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1385 )
1386 );
1387 }
1388
1389 #[test]
literal()1390 fn literal() {
1391 assert_eq!(t("a"), hir_lit("a"));
1392 assert_eq!(t("(?-u)a"), hir_lit("a"));
1393 assert_eq!(t("☃"), hir_lit("☃"));
1394 assert_eq!(t("abcd"), hir_lit("abcd"));
1395
1396 assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1397 assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1398 assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1399 assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1400
1401 assert_eq!(
1402 t_err("(?-u)☃"),
1403 TestError {
1404 kind: hir::ErrorKind::UnicodeNotAllowed,
1405 span: Span::new(
1406 Position::new(5, 1, 6),
1407 Position::new(8, 1, 7)
1408 ),
1409 }
1410 );
1411 assert_eq!(
1412 t_err(r"(?-u)\xFF"),
1413 TestError {
1414 kind: hir::ErrorKind::InvalidUtf8,
1415 span: Span::new(
1416 Position::new(5, 1, 6),
1417 Position::new(9, 1, 10)
1418 ),
1419 }
1420 );
1421 }
1422
1423 #[test]
literal_case_insensitive()1424 fn literal_case_insensitive() {
1425 #[cfg(feature = "unicode-case")]
1426 assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1427 #[cfg(feature = "unicode-case")]
1428 assert_eq!(
1429 t("(?i:a)"),
1430 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1431 );
1432 #[cfg(feature = "unicode-case")]
1433 assert_eq!(
1434 t("a(?i)a(?-i)a"),
1435 hir_cat(vec![
1436 hir_lit("a"),
1437 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1438 hir_lit("a"),
1439 ])
1440 );
1441 #[cfg(feature = "unicode-case")]
1442 assert_eq!(
1443 t("(?i)ab@c"),
1444 hir_cat(vec![
1445 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1446 hir_uclass(&[('B', 'B'), ('b', 'b')]),
1447 hir_lit("@"),
1448 hir_uclass(&[('C', 'C'), ('c', 'c')]),
1449 ])
1450 );
1451 #[cfg(feature = "unicode-case")]
1452 assert_eq!(
1453 t("(?i)β"),
1454 hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1455 );
1456
1457 assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1458 #[cfg(feature = "unicode-case")]
1459 assert_eq!(
1460 t("(?-u)a(?i)a(?-i)a"),
1461 hir_cat(vec![
1462 hir_lit("a"),
1463 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1464 hir_lit("a"),
1465 ])
1466 );
1467 assert_eq!(
1468 t("(?i-u)ab@c"),
1469 hir_cat(vec![
1470 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1471 hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1472 hir_lit("@"),
1473 hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1474 ])
1475 );
1476
1477 assert_eq!(
1478 t_bytes("(?i-u)a"),
1479 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1480 );
1481 assert_eq!(
1482 t_bytes("(?i-u)\x61"),
1483 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1484 );
1485 assert_eq!(
1486 t_bytes(r"(?i-u)\x61"),
1487 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1488 );
1489 assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1490
1491 assert_eq!(
1492 t_err("(?i-u)β"),
1493 TestError {
1494 kind: hir::ErrorKind::UnicodeNotAllowed,
1495 span: Span::new(
1496 Position::new(6, 1, 7),
1497 Position::new(8, 1, 8),
1498 ),
1499 }
1500 );
1501 }
1502
1503 #[test]
dot()1504 fn dot() {
1505 assert_eq!(
1506 t("."),
1507 hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1508 );
1509 assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1510 assert_eq!(
1511 t_bytes("(?-u)."),
1512 hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1513 );
1514 assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1515
1516 // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1517 assert_eq!(
1518 t_err("(?-u)."),
1519 TestError {
1520 kind: hir::ErrorKind::InvalidUtf8,
1521 span: Span::new(
1522 Position::new(5, 1, 6),
1523 Position::new(6, 1, 7)
1524 ),
1525 }
1526 );
1527 assert_eq!(
1528 t_err("(?s-u)."),
1529 TestError {
1530 kind: hir::ErrorKind::InvalidUtf8,
1531 span: Span::new(
1532 Position::new(6, 1, 7),
1533 Position::new(7, 1, 8)
1534 ),
1535 }
1536 );
1537 }
1538
1539 #[test]
assertions()1540 fn assertions() {
1541 assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1542 assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1543 assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1544 assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1545 assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1546 assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1547 assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1548 assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1549
1550 assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1551 assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1552 assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1553 assert_eq!(
1554 t_bytes(r"(?-u)\B"),
1555 hir_word(hir::WordBoundary::AsciiNegate)
1556 );
1557
1558 assert_eq!(
1559 t_err(r"(?-u)\B"),
1560 TestError {
1561 kind: hir::ErrorKind::InvalidUtf8,
1562 span: Span::new(
1563 Position::new(5, 1, 6),
1564 Position::new(7, 1, 8)
1565 ),
1566 }
1567 );
1568 }
1569
1570 #[test]
group()1571 fn group() {
1572 assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1573 assert_eq!(
1574 t("(a)(b)"),
1575 hir_cat(vec![
1576 hir_group(1, hir_lit("a")),
1577 hir_group(2, hir_lit("b")),
1578 ])
1579 );
1580 assert_eq!(
1581 t("(a)|(b)"),
1582 hir_alt(vec![
1583 hir_group(1, hir_lit("a")),
1584 hir_group(2, hir_lit("b")),
1585 ])
1586 );
1587 assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1588 assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1589 assert_eq!(
1590 t("(?P<foo>a)(?P<bar>b)"),
1591 hir_cat(vec![
1592 hir_group_name(1, "foo", hir_lit("a")),
1593 hir_group_name(2, "bar", hir_lit("b")),
1594 ])
1595 );
1596 assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1597 assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1598 assert_eq!(
1599 t("(?:a)(b)"),
1600 hir_cat(vec![
1601 hir_group_nocap(hir_lit("a")),
1602 hir_group(1, hir_lit("b")),
1603 ])
1604 );
1605 assert_eq!(
1606 t("(a)(?:b)(c)"),
1607 hir_cat(vec![
1608 hir_group(1, hir_lit("a")),
1609 hir_group_nocap(hir_lit("b")),
1610 hir_group(2, hir_lit("c")),
1611 ])
1612 );
1613 assert_eq!(
1614 t("(a)(?P<foo>b)(c)"),
1615 hir_cat(vec![
1616 hir_group(1, hir_lit("a")),
1617 hir_group_name(2, "foo", hir_lit("b")),
1618 hir_group(3, hir_lit("c")),
1619 ])
1620 );
1621 assert_eq!(t("()"), hir_group(1, Hir::empty()));
1622 assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1623 assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1624 assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1625 }
1626
1627 #[test]
flags()1628 fn flags() {
1629 #[cfg(feature = "unicode-case")]
1630 assert_eq!(
1631 t("(?i:a)a"),
1632 hir_cat(vec![
1633 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1634 hir_lit("a"),
1635 ])
1636 );
1637 assert_eq!(
1638 t("(?i-u:a)β"),
1639 hir_cat(vec![
1640 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1641 hir_lit("β"),
1642 ])
1643 );
1644 #[cfg(feature = "unicode-case")]
1645 assert_eq!(
1646 t("(?i)(?-i:a)a"),
1647 hir_cat(vec![
1648 hir_group_nocap(hir_lit("a")),
1649 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1650 ])
1651 );
1652 #[cfg(feature = "unicode-case")]
1653 assert_eq!(
1654 t("(?im)a^"),
1655 hir_cat(vec![
1656 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1657 hir_anchor(hir::Anchor::StartLine),
1658 ])
1659 );
1660 #[cfg(feature = "unicode-case")]
1661 assert_eq!(
1662 t("(?im)a^(?i-m)a^"),
1663 hir_cat(vec![
1664 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1665 hir_anchor(hir::Anchor::StartLine),
1666 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1667 hir_anchor(hir::Anchor::StartText),
1668 ])
1669 );
1670 assert_eq!(
1671 t("(?U)a*a*?(?-U)a*a*?"),
1672 hir_cat(vec![
1673 hir_star(false, hir_lit("a")),
1674 hir_star(true, hir_lit("a")),
1675 hir_star(true, hir_lit("a")),
1676 hir_star(false, hir_lit("a")),
1677 ])
1678 );
1679 #[cfg(feature = "unicode-case")]
1680 assert_eq!(
1681 t("(?:a(?i)a)a"),
1682 hir_cat(vec![
1683 hir_group_nocap(hir_cat(vec![
1684 hir_lit("a"),
1685 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1686 ])),
1687 hir_lit("a"),
1688 ])
1689 );
1690 #[cfg(feature = "unicode-case")]
1691 assert_eq!(
1692 t("(?i)(?:a(?-i)a)a"),
1693 hir_cat(vec![
1694 hir_group_nocap(hir_cat(vec![
1695 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1696 hir_lit("a"),
1697 ])),
1698 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1699 ])
1700 );
1701 }
1702
1703 #[test]
escape()1704 fn escape() {
1705 assert_eq!(
1706 t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1707 hir_lit(r"\.+*?()|[]{}^$#")
1708 );
1709 }
1710
1711 #[test]
repetition()1712 fn repetition() {
1713 assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1714 assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1715 assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1716 assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1717 assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1718 assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1719
1720 assert_eq!(
1721 t("a{1}"),
1722 hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1723 );
1724 assert_eq!(
1725 t("a{1,}"),
1726 hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1727 );
1728 assert_eq!(
1729 t("a{1,2}"),
1730 hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1731 );
1732 assert_eq!(
1733 t("a{1}?"),
1734 hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1735 );
1736 assert_eq!(
1737 t("a{1,}?"),
1738 hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1739 );
1740 assert_eq!(
1741 t("a{1,2}?"),
1742 hir_range(
1743 false,
1744 hir::RepetitionRange::Bounded(1, 2),
1745 hir_lit("a"),
1746 )
1747 );
1748
1749 assert_eq!(
1750 t("ab?"),
1751 hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1752 );
1753 assert_eq!(
1754 t("(ab)?"),
1755 hir_quest(
1756 true,
1757 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1758 )
1759 );
1760 assert_eq!(
1761 t("a|b?"),
1762 hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1763 );
1764 }
1765
1766 #[test]
cat_alt()1767 fn cat_alt() {
1768 assert_eq!(
1769 t("(ab)"),
1770 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1771 );
1772 assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1773 assert_eq!(
1774 t("a|b|c"),
1775 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1776 );
1777 assert_eq!(
1778 t("ab|bc|cd"),
1779 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1780 );
1781 assert_eq!(
1782 t("(a|b)"),
1783 hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1784 );
1785 assert_eq!(
1786 t("(a|b|c)"),
1787 hir_group(
1788 1,
1789 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1790 )
1791 );
1792 assert_eq!(
1793 t("(ab|bc|cd)"),
1794 hir_group(
1795 1,
1796 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1797 )
1798 );
1799 assert_eq!(
1800 t("(ab|(bc|(cd)))"),
1801 hir_group(
1802 1,
1803 hir_alt(vec![
1804 hir_lit("ab"),
1805 hir_group(
1806 2,
1807 hir_alt(vec![
1808 hir_lit("bc"),
1809 hir_group(3, hir_lit("cd")),
1810 ])
1811 ),
1812 ])
1813 )
1814 );
1815 }
1816
1817 #[test]
class_ascii()1818 fn class_ascii() {
1819 assert_eq!(
1820 t("[[:alnum:]]"),
1821 hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1822 );
1823 assert_eq!(
1824 t("[[:alpha:]]"),
1825 hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1826 );
1827 assert_eq!(
1828 t("[[:ascii:]]"),
1829 hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1830 );
1831 assert_eq!(
1832 t("[[:blank:]]"),
1833 hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1834 );
1835 assert_eq!(
1836 t("[[:cntrl:]]"),
1837 hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1838 );
1839 assert_eq!(
1840 t("[[:digit:]]"),
1841 hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1842 );
1843 assert_eq!(
1844 t("[[:graph:]]"),
1845 hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1846 );
1847 assert_eq!(
1848 t("[[:lower:]]"),
1849 hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1850 );
1851 assert_eq!(
1852 t("[[:print:]]"),
1853 hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1854 );
1855 assert_eq!(
1856 t("[[:punct:]]"),
1857 hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1858 );
1859 assert_eq!(
1860 t("[[:space:]]"),
1861 hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1862 );
1863 assert_eq!(
1864 t("[[:upper:]]"),
1865 hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1866 );
1867 assert_eq!(
1868 t("[[:word:]]"),
1869 hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1870 );
1871 assert_eq!(
1872 t("[[:xdigit:]]"),
1873 hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1874 );
1875
1876 assert_eq!(
1877 t("[[:^lower:]]"),
1878 hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1879 );
1880 #[cfg(feature = "unicode-case")]
1881 assert_eq!(
1882 t("(?i)[[:lower:]]"),
1883 hir_uclass(&[
1884 ('A', 'Z'),
1885 ('a', 'z'),
1886 ('\u{17F}', '\u{17F}'),
1887 ('\u{212A}', '\u{212A}'),
1888 ])
1889 );
1890
1891 assert_eq!(
1892 t("(?-u)[[:lower:]]"),
1893 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1894 );
1895 assert_eq!(
1896 t("(?i-u)[[:lower:]]"),
1897 hir_case_fold(hir_bclass_from_char(ascii_class(
1898 &ast::ClassAsciiKind::Lower
1899 )))
1900 );
1901
1902 assert_eq!(
1903 t_err("(?-u)[[:^lower:]]"),
1904 TestError {
1905 kind: hir::ErrorKind::InvalidUtf8,
1906 span: Span::new(
1907 Position::new(6, 1, 7),
1908 Position::new(16, 1, 17)
1909 ),
1910 }
1911 );
1912 assert_eq!(
1913 t_err("(?i-u)[[:^lower:]]"),
1914 TestError {
1915 kind: hir::ErrorKind::InvalidUtf8,
1916 span: Span::new(
1917 Position::new(7, 1, 8),
1918 Position::new(17, 1, 18)
1919 ),
1920 }
1921 );
1922 }
1923
1924 #[test]
1925 #[cfg(feature = "unicode-perl")]
class_perl()1926 fn class_perl() {
1927 // Unicode
1928 assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1929 assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1930 assert_eq!(t(r"\w"), hir_uclass_perl_word());
1931 #[cfg(feature = "unicode-case")]
1932 assert_eq!(
1933 t(r"(?i)\d"),
1934 hir_uclass_query(ClassQuery::Binary("digit"))
1935 );
1936 #[cfg(feature = "unicode-case")]
1937 assert_eq!(
1938 t(r"(?i)\s"),
1939 hir_uclass_query(ClassQuery::Binary("space"))
1940 );
1941 #[cfg(feature = "unicode-case")]
1942 assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
1943
1944 // Unicode, negated
1945 assert_eq!(
1946 t(r"\D"),
1947 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1948 );
1949 assert_eq!(
1950 t(r"\S"),
1951 hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1952 );
1953 assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
1954 #[cfg(feature = "unicode-case")]
1955 assert_eq!(
1956 t(r"(?i)\D"),
1957 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1958 );
1959 #[cfg(feature = "unicode-case")]
1960 assert_eq!(
1961 t(r"(?i)\S"),
1962 hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1963 );
1964 #[cfg(feature = "unicode-case")]
1965 assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
1966
1967 // ASCII only
1968 assert_eq!(
1969 t(r"(?-u)\d"),
1970 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
1971 );
1972 assert_eq!(
1973 t(r"(?-u)\s"),
1974 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
1975 );
1976 assert_eq!(
1977 t(r"(?-u)\w"),
1978 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
1979 );
1980 assert_eq!(
1981 t(r"(?i-u)\d"),
1982 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
1983 );
1984 assert_eq!(
1985 t(r"(?i-u)\s"),
1986 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
1987 );
1988 assert_eq!(
1989 t(r"(?i-u)\w"),
1990 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
1991 );
1992
1993 // ASCII only, negated
1994 assert_eq!(
1995 t(r"(?-u)\D"),
1996 hir_negate(hir_bclass_from_char(ascii_class(
1997 &ast::ClassAsciiKind::Digit
1998 )))
1999 );
2000 assert_eq!(
2001 t(r"(?-u)\S"),
2002 hir_negate(hir_bclass_from_char(ascii_class(
2003 &ast::ClassAsciiKind::Space
2004 )))
2005 );
2006 assert_eq!(
2007 t(r"(?-u)\W"),
2008 hir_negate(hir_bclass_from_char(ascii_class(
2009 &ast::ClassAsciiKind::Word
2010 )))
2011 );
2012 assert_eq!(
2013 t(r"(?i-u)\D"),
2014 hir_negate(hir_bclass_from_char(ascii_class(
2015 &ast::ClassAsciiKind::Digit
2016 )))
2017 );
2018 assert_eq!(
2019 t(r"(?i-u)\S"),
2020 hir_negate(hir_bclass_from_char(ascii_class(
2021 &ast::ClassAsciiKind::Space
2022 )))
2023 );
2024 assert_eq!(
2025 t(r"(?i-u)\W"),
2026 hir_negate(hir_bclass_from_char(ascii_class(
2027 &ast::ClassAsciiKind::Word
2028 )))
2029 );
2030 }
2031
2032 #[test]
2033 #[cfg(not(feature = "unicode-perl"))]
class_perl_word_disabled()2034 fn class_perl_word_disabled() {
2035 assert_eq!(
2036 t_err(r"\w"),
2037 TestError {
2038 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2039 span: Span::new(
2040 Position::new(0, 1, 1),
2041 Position::new(2, 1, 3)
2042 ),
2043 }
2044 );
2045 }
2046
2047 #[test]
2048 #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
class_perl_space_disabled()2049 fn class_perl_space_disabled() {
2050 assert_eq!(
2051 t_err(r"\s"),
2052 TestError {
2053 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2054 span: Span::new(
2055 Position::new(0, 1, 1),
2056 Position::new(2, 1, 3)
2057 ),
2058 }
2059 );
2060 }
2061
2062 #[test]
2063 #[cfg(all(
2064 not(feature = "unicode-perl"),
2065 not(feature = "unicode-gencat")
2066 ))]
class_perl_digit_disabled()2067 fn class_perl_digit_disabled() {
2068 assert_eq!(
2069 t_err(r"\d"),
2070 TestError {
2071 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2072 span: Span::new(
2073 Position::new(0, 1, 1),
2074 Position::new(2, 1, 3)
2075 ),
2076 }
2077 );
2078 }
2079
2080 #[test]
2081 #[cfg(feature = "unicode-gencat")]
class_unicode_gencat()2082 fn class_unicode_gencat() {
2083 assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2084 assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2085 assert_eq!(
2086 t(r"\p{Separator}"),
2087 hir_uclass_query(ClassQuery::Binary("Z"))
2088 );
2089 assert_eq!(
2090 t(r"\p{se PaRa ToR}"),
2091 hir_uclass_query(ClassQuery::Binary("Z"))
2092 );
2093 assert_eq!(
2094 t(r"\p{gc:Separator}"),
2095 hir_uclass_query(ClassQuery::Binary("Z"))
2096 );
2097 assert_eq!(
2098 t(r"\p{gc=Separator}"),
2099 hir_uclass_query(ClassQuery::Binary("Z"))
2100 );
2101 assert_eq!(
2102 t(r"\p{Other}"),
2103 hir_uclass_query(ClassQuery::Binary("Other"))
2104 );
2105 assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2106
2107 assert_eq!(
2108 t(r"\PZ"),
2109 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2110 );
2111 assert_eq!(
2112 t(r"\P{separator}"),
2113 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2114 );
2115 assert_eq!(
2116 t(r"\P{gc!=separator}"),
2117 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2118 );
2119
2120 assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2121 assert_eq!(
2122 t(r"\p{assigned}"),
2123 hir_uclass_query(ClassQuery::Binary("Assigned"))
2124 );
2125 assert_eq!(
2126 t(r"\p{ascii}"),
2127 hir_uclass_query(ClassQuery::Binary("ASCII"))
2128 );
2129 assert_eq!(
2130 t(r"\p{gc:any}"),
2131 hir_uclass_query(ClassQuery::Binary("Any"))
2132 );
2133 assert_eq!(
2134 t(r"\p{gc:assigned}"),
2135 hir_uclass_query(ClassQuery::Binary("Assigned"))
2136 );
2137 assert_eq!(
2138 t(r"\p{gc:ascii}"),
2139 hir_uclass_query(ClassQuery::Binary("ASCII"))
2140 );
2141
2142 assert_eq!(
2143 t_err(r"(?-u)\pZ"),
2144 TestError {
2145 kind: hir::ErrorKind::UnicodeNotAllowed,
2146 span: Span::new(
2147 Position::new(5, 1, 6),
2148 Position::new(8, 1, 9)
2149 ),
2150 }
2151 );
2152 assert_eq!(
2153 t_err(r"(?-u)\p{Separator}"),
2154 TestError {
2155 kind: hir::ErrorKind::UnicodeNotAllowed,
2156 span: Span::new(
2157 Position::new(5, 1, 6),
2158 Position::new(18, 1, 19)
2159 ),
2160 }
2161 );
2162 assert_eq!(
2163 t_err(r"\pE"),
2164 TestError {
2165 kind: hir::ErrorKind::UnicodePropertyNotFound,
2166 span: Span::new(
2167 Position::new(0, 1, 1),
2168 Position::new(3, 1, 4)
2169 ),
2170 }
2171 );
2172 assert_eq!(
2173 t_err(r"\p{Foo}"),
2174 TestError {
2175 kind: hir::ErrorKind::UnicodePropertyNotFound,
2176 span: Span::new(
2177 Position::new(0, 1, 1),
2178 Position::new(7, 1, 8)
2179 ),
2180 }
2181 );
2182 assert_eq!(
2183 t_err(r"\p{gc:Foo}"),
2184 TestError {
2185 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2186 span: Span::new(
2187 Position::new(0, 1, 1),
2188 Position::new(10, 1, 11)
2189 ),
2190 }
2191 );
2192 }
2193
2194 #[test]
2195 #[cfg(not(feature = "unicode-gencat"))]
class_unicode_gencat_disabled()2196 fn class_unicode_gencat_disabled() {
2197 assert_eq!(
2198 t_err(r"\p{Separator}"),
2199 TestError {
2200 kind: hir::ErrorKind::UnicodePropertyNotFound,
2201 span: Span::new(
2202 Position::new(0, 1, 1),
2203 Position::new(13, 1, 14)
2204 ),
2205 }
2206 );
2207
2208 assert_eq!(
2209 t_err(r"\p{Any}"),
2210 TestError {
2211 kind: hir::ErrorKind::UnicodePropertyNotFound,
2212 span: Span::new(
2213 Position::new(0, 1, 1),
2214 Position::new(7, 1, 8)
2215 ),
2216 }
2217 );
2218 }
2219
2220 #[test]
2221 #[cfg(feature = "unicode-script")]
class_unicode_script()2222 fn class_unicode_script() {
2223 assert_eq!(
2224 t(r"\p{Greek}"),
2225 hir_uclass_query(ClassQuery::Binary("Greek"))
2226 );
2227 #[cfg(feature = "unicode-case")]
2228 assert_eq!(
2229 t(r"(?i)\p{Greek}"),
2230 hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2231 );
2232 #[cfg(feature = "unicode-case")]
2233 assert_eq!(
2234 t(r"(?i)\P{Greek}"),
2235 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2236 "Greek"
2237 ))))
2238 );
2239
2240 assert_eq!(
2241 t_err(r"\p{sc:Foo}"),
2242 TestError {
2243 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2244 span: Span::new(
2245 Position::new(0, 1, 1),
2246 Position::new(10, 1, 11)
2247 ),
2248 }
2249 );
2250 assert_eq!(
2251 t_err(r"\p{scx:Foo}"),
2252 TestError {
2253 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2254 span: Span::new(
2255 Position::new(0, 1, 1),
2256 Position::new(11, 1, 12)
2257 ),
2258 }
2259 );
2260 }
2261
2262 #[test]
2263 #[cfg(not(feature = "unicode-script"))]
class_unicode_script_disabled()2264 fn class_unicode_script_disabled() {
2265 assert_eq!(
2266 t_err(r"\p{Greek}"),
2267 TestError {
2268 kind: hir::ErrorKind::UnicodePropertyNotFound,
2269 span: Span::new(
2270 Position::new(0, 1, 1),
2271 Position::new(9, 1, 10)
2272 ),
2273 }
2274 );
2275
2276 assert_eq!(
2277 t_err(r"\p{scx:Greek}"),
2278 TestError {
2279 kind: hir::ErrorKind::UnicodePropertyNotFound,
2280 span: Span::new(
2281 Position::new(0, 1, 1),
2282 Position::new(13, 1, 14)
2283 ),
2284 }
2285 );
2286 }
2287
2288 #[test]
2289 #[cfg(feature = "unicode-age")]
class_unicode_age()2290 fn class_unicode_age() {
2291 assert_eq!(
2292 t_err(r"\p{age:Foo}"),
2293 TestError {
2294 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2295 span: Span::new(
2296 Position::new(0, 1, 1),
2297 Position::new(11, 1, 12)
2298 ),
2299 }
2300 );
2301 }
2302
2303 #[test]
2304 #[cfg(not(feature = "unicode-age"))]
class_unicode_age_disabled()2305 fn class_unicode_age_disabled() {
2306 assert_eq!(
2307 t_err(r"\p{age:3.0}"),
2308 TestError {
2309 kind: hir::ErrorKind::UnicodePropertyNotFound,
2310 span: Span::new(
2311 Position::new(0, 1, 1),
2312 Position::new(11, 1, 12)
2313 ),
2314 }
2315 );
2316 }
2317
2318 #[test]
class_bracketed()2319 fn class_bracketed() {
2320 assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2321 assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2322 assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2323 assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2324 assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2325 assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2326 assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2327 assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2328 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2329 assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2330 #[cfg(feature = "unicode-gencat")]
2331 assert_eq!(
2332 t(r"[\pZ]"),
2333 hir_uclass_query(ClassQuery::Binary("separator"))
2334 );
2335 #[cfg(feature = "unicode-gencat")]
2336 assert_eq!(
2337 t(r"[\p{separator}]"),
2338 hir_uclass_query(ClassQuery::Binary("separator"))
2339 );
2340 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2341 assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2342 #[cfg(feature = "unicode-gencat")]
2343 assert_eq!(
2344 t(r"[^\PZ]"),
2345 hir_uclass_query(ClassQuery::Binary("separator"))
2346 );
2347 #[cfg(feature = "unicode-gencat")]
2348 assert_eq!(
2349 t(r"[^\P{separator}]"),
2350 hir_uclass_query(ClassQuery::Binary("separator"))
2351 );
2352 #[cfg(all(
2353 feature = "unicode-case",
2354 any(feature = "unicode-perl", feature = "unicode-gencat")
2355 ))]
2356 assert_eq!(
2357 t(r"(?i)[^\D]"),
2358 hir_uclass_query(ClassQuery::Binary("digit"))
2359 );
2360 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2361 assert_eq!(
2362 t(r"(?i)[^\P{greek}]"),
2363 hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2364 );
2365
2366 assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2367 assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2368 assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2369
2370 #[cfg(feature = "unicode-case")]
2371 assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2372 #[cfg(feature = "unicode-case")]
2373 assert_eq!(
2374 t("(?i)[k]"),
2375 hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2376 );
2377 #[cfg(feature = "unicode-case")]
2378 assert_eq!(
2379 t("(?i)[β]"),
2380 hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2381 );
2382 assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2383
2384 assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2385 assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2386 assert_eq!(
2387 t_bytes("(?-u)[^a]"),
2388 hir_negate(hir_bclass(&[(b'a', b'a')]))
2389 );
2390 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2391 assert_eq!(
2392 t(r"[^\d]"),
2393 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2394 );
2395 #[cfg(feature = "unicode-gencat")]
2396 assert_eq!(
2397 t(r"[^\pZ]"),
2398 hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2399 );
2400 #[cfg(feature = "unicode-gencat")]
2401 assert_eq!(
2402 t(r"[^\p{separator}]"),
2403 hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2404 );
2405 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2406 assert_eq!(
2407 t(r"(?i)[^\p{greek}]"),
2408 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2409 "greek"
2410 ))))
2411 );
2412 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2413 assert_eq!(
2414 t(r"(?i)[\P{greek}]"),
2415 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2416 "greek"
2417 ))))
2418 );
2419
2420 // Test some weird cases.
2421 assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2422
2423 assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2424 assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2425 assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2426 assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2427 assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2428
2429 assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2430 assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2431 assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2432 assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2433 assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2434
2435 assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2436 assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2437 assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2438 assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2439 assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2440
2441 assert_eq!(
2442 t_err("(?-u)[^a]"),
2443 TestError {
2444 kind: hir::ErrorKind::InvalidUtf8,
2445 span: Span::new(
2446 Position::new(5, 1, 6),
2447 Position::new(9, 1, 10)
2448 ),
2449 }
2450 );
2451 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2452 assert_eq!(
2453 t_err(r"[^\s\S]"),
2454 TestError {
2455 kind: hir::ErrorKind::EmptyClassNotAllowed,
2456 span: Span::new(
2457 Position::new(0, 1, 1),
2458 Position::new(7, 1, 8)
2459 ),
2460 }
2461 );
2462 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2463 assert_eq!(
2464 t_err(r"(?-u)[^\s\S]"),
2465 TestError {
2466 kind: hir::ErrorKind::EmptyClassNotAllowed,
2467 span: Span::new(
2468 Position::new(5, 1, 6),
2469 Position::new(12, 1, 13)
2470 ),
2471 }
2472 );
2473 }
2474
2475 #[test]
class_bracketed_union()2476 fn class_bracketed_union() {
2477 assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2478 #[cfg(feature = "unicode-gencat")]
2479 assert_eq!(
2480 t(r"[a\pZb]"),
2481 hir_union(
2482 hir_uclass(&[('a', 'b')]),
2483 hir_uclass_query(ClassQuery::Binary("separator"))
2484 )
2485 );
2486 #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2487 assert_eq!(
2488 t(r"[\pZ\p{Greek}]"),
2489 hir_union(
2490 hir_uclass_query(ClassQuery::Binary("greek")),
2491 hir_uclass_query(ClassQuery::Binary("separator"))
2492 )
2493 );
2494 #[cfg(all(
2495 feature = "unicode-age",
2496 feature = "unicode-gencat",
2497 feature = "unicode-script"
2498 ))]
2499 assert_eq!(
2500 t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2501 hir_union(
2502 hir_uclass_query(ClassQuery::ByValue {
2503 property_name: "age",
2504 property_value: "3.0",
2505 }),
2506 hir_union(
2507 hir_uclass_query(ClassQuery::Binary("greek")),
2508 hir_uclass_query(ClassQuery::Binary("separator"))
2509 )
2510 )
2511 );
2512 #[cfg(all(
2513 feature = "unicode-age",
2514 feature = "unicode-gencat",
2515 feature = "unicode-script"
2516 ))]
2517 assert_eq!(
2518 t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2519 hir_union(
2520 hir_uclass_query(ClassQuery::ByValue {
2521 property_name: "age",
2522 property_value: "3.0",
2523 }),
2524 hir_union(
2525 hir_uclass_query(ClassQuery::Binary("cyrillic")),
2526 hir_union(
2527 hir_uclass_query(ClassQuery::Binary("greek")),
2528 hir_uclass_query(ClassQuery::Binary("separator"))
2529 )
2530 )
2531 )
2532 );
2533
2534 #[cfg(all(
2535 feature = "unicode-age",
2536 feature = "unicode-case",
2537 feature = "unicode-gencat",
2538 feature = "unicode-script"
2539 ))]
2540 assert_eq!(
2541 t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2542 hir_case_fold(hir_union(
2543 hir_uclass_query(ClassQuery::ByValue {
2544 property_name: "age",
2545 property_value: "3.0",
2546 }),
2547 hir_union(
2548 hir_uclass_query(ClassQuery::Binary("greek")),
2549 hir_uclass_query(ClassQuery::Binary("separator"))
2550 )
2551 ))
2552 );
2553 #[cfg(all(
2554 feature = "unicode-age",
2555 feature = "unicode-gencat",
2556 feature = "unicode-script"
2557 ))]
2558 assert_eq!(
2559 t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2560 hir_negate(hir_union(
2561 hir_uclass_query(ClassQuery::ByValue {
2562 property_name: "age",
2563 property_value: "3.0",
2564 }),
2565 hir_union(
2566 hir_uclass_query(ClassQuery::Binary("greek")),
2567 hir_uclass_query(ClassQuery::Binary("separator"))
2568 )
2569 ))
2570 );
2571 #[cfg(all(
2572 feature = "unicode-age",
2573 feature = "unicode-case",
2574 feature = "unicode-gencat",
2575 feature = "unicode-script"
2576 ))]
2577 assert_eq!(
2578 t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2579 hir_negate(hir_case_fold(hir_union(
2580 hir_uclass_query(ClassQuery::ByValue {
2581 property_name: "age",
2582 property_value: "3.0",
2583 }),
2584 hir_union(
2585 hir_uclass_query(ClassQuery::Binary("greek")),
2586 hir_uclass_query(ClassQuery::Binary("separator"))
2587 )
2588 )))
2589 );
2590 }
2591
2592 #[test]
class_bracketed_nested()2593 fn class_bracketed_nested() {
2594 assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2595 assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2596 assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2597
2598 assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2599 assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2600
2601 #[cfg(feature = "unicode-case")]
2602 assert_eq!(
2603 t(r"(?i)[a[^c]]"),
2604 hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2605 );
2606 #[cfg(feature = "unicode-case")]
2607 assert_eq!(
2608 t(r"(?i)[a-b[^c]]"),
2609 hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2610 );
2611
2612 #[cfg(feature = "unicode-case")]
2613 assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2614 #[cfg(feature = "unicode-case")]
2615 assert_eq!(
2616 t(r"(?i)[^a-b[^c]]"),
2617 hir_uclass(&[('C', 'C'), ('c', 'c')])
2618 );
2619
2620 assert_eq!(
2621 t_err(r"[^a-c[^c]]"),
2622 TestError {
2623 kind: hir::ErrorKind::EmptyClassNotAllowed,
2624 span: Span::new(
2625 Position::new(0, 1, 1),
2626 Position::new(10, 1, 11)
2627 ),
2628 }
2629 );
2630 #[cfg(feature = "unicode-case")]
2631 assert_eq!(
2632 t_err(r"(?i)[^a-c[^c]]"),
2633 TestError {
2634 kind: hir::ErrorKind::EmptyClassNotAllowed,
2635 span: Span::new(
2636 Position::new(4, 1, 5),
2637 Position::new(14, 1, 15)
2638 ),
2639 }
2640 );
2641 }
2642
2643 #[test]
class_bracketed_intersect()2644 fn class_bracketed_intersect() {
2645 assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2646 assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2647 assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2648 assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2649 assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2650 assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2651 assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2652 assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2653 assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2654
2655 assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2656 assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2657 assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2658 assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2659 assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2660 assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2661
2662 #[cfg(feature = "unicode-case")]
2663 assert_eq!(
2664 t("(?i)[abc&&b-c]"),
2665 hir_case_fold(hir_uclass(&[('b', 'c')]))
2666 );
2667 #[cfg(feature = "unicode-case")]
2668 assert_eq!(
2669 t("(?i)[abc&&[b-c]]"),
2670 hir_case_fold(hir_uclass(&[('b', 'c')]))
2671 );
2672 #[cfg(feature = "unicode-case")]
2673 assert_eq!(
2674 t("(?i)[[abc]&&[b-c]]"),
2675 hir_case_fold(hir_uclass(&[('b', 'c')]))
2676 );
2677 #[cfg(feature = "unicode-case")]
2678 assert_eq!(
2679 t("(?i)[a-z&&b-y&&c-x]"),
2680 hir_case_fold(hir_uclass(&[('c', 'x')]))
2681 );
2682 #[cfg(feature = "unicode-case")]
2683 assert_eq!(
2684 t("(?i)[c-da-b&&a-d]"),
2685 hir_case_fold(hir_uclass(&[('a', 'd')]))
2686 );
2687 #[cfg(feature = "unicode-case")]
2688 assert_eq!(
2689 t("(?i)[a-d&&c-da-b]"),
2690 hir_case_fold(hir_uclass(&[('a', 'd')]))
2691 );
2692
2693 assert_eq!(
2694 t("(?i-u)[abc&&b-c]"),
2695 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2696 );
2697 assert_eq!(
2698 t("(?i-u)[abc&&[b-c]]"),
2699 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2700 );
2701 assert_eq!(
2702 t("(?i-u)[[abc]&&[b-c]]"),
2703 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2704 );
2705 assert_eq!(
2706 t("(?i-u)[a-z&&b-y&&c-x]"),
2707 hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2708 );
2709 assert_eq!(
2710 t("(?i-u)[c-da-b&&a-d]"),
2711 hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2712 );
2713 assert_eq!(
2714 t("(?i-u)[a-d&&c-da-b]"),
2715 hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2716 );
2717
2718 // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2719 // `^` is also allowed to be unescaped after `&&`.
2720 assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2721 // `]` needs to be escaped after `&&` since it's not at start of class.
2722 assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2723 assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2724 assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2725 assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2726 // Test precedence.
2727 assert_eq!(
2728 t(r"[a-w&&[^c-g]z]"),
2729 hir_uclass(&[('a', 'b'), ('h', 'w')])
2730 );
2731 }
2732
2733 #[test]
class_bracketed_intersect_negate()2734 fn class_bracketed_intersect_negate() {
2735 #[cfg(feature = "unicode-perl")]
2736 assert_eq!(
2737 t(r"[^\w&&\d]"),
2738 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2739 );
2740 assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2741 #[cfg(feature = "unicode-perl")]
2742 assert_eq!(
2743 t(r"[^[\w&&\d]]"),
2744 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2745 );
2746 #[cfg(feature = "unicode-perl")]
2747 assert_eq!(
2748 t(r"[^[^\w&&\d]]"),
2749 hir_uclass_query(ClassQuery::Binary("digit"))
2750 );
2751 #[cfg(feature = "unicode-perl")]
2752 assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2753
2754 #[cfg(feature = "unicode-perl")]
2755 assert_eq!(
2756 t_bytes(r"(?-u)[^\w&&\d]"),
2757 hir_negate(hir_bclass_from_char(ascii_class(
2758 &ast::ClassAsciiKind::Digit
2759 )))
2760 );
2761 assert_eq!(
2762 t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2763 hir_negate(hir_bclass(&[(b'a', b'c')]))
2764 );
2765 assert_eq!(
2766 t_bytes(r"(?-u)[^[\w&&\d]]"),
2767 hir_negate(hir_bclass_from_char(ascii_class(
2768 &ast::ClassAsciiKind::Digit
2769 )))
2770 );
2771 assert_eq!(
2772 t_bytes(r"(?-u)[^[^\w&&\d]]"),
2773 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2774 );
2775 assert_eq!(
2776 t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2777 hir_negate(hir_bclass_from_char(ascii_class(
2778 &ast::ClassAsciiKind::Word
2779 )))
2780 );
2781 }
2782
2783 #[test]
class_bracketed_difference()2784 fn class_bracketed_difference() {
2785 #[cfg(feature = "unicode-gencat")]
2786 assert_eq!(
2787 t(r"[\pL--[:ascii:]]"),
2788 hir_difference(
2789 hir_uclass_query(ClassQuery::Binary("letter")),
2790 hir_uclass(&[('\0', '\x7F')])
2791 )
2792 );
2793
2794 assert_eq!(
2795 t(r"(?-u)[[:alpha:]--[:lower:]]"),
2796 hir_bclass(&[(b'A', b'Z')])
2797 );
2798 }
2799
2800 #[test]
class_bracketed_symmetric_difference()2801 fn class_bracketed_symmetric_difference() {
2802 #[cfg(feature = "unicode-script")]
2803 assert_eq!(
2804 t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2805 hir_uclass(&[
2806 ('\u{0342}', '\u{0342}'),
2807 ('\u{0345}', '\u{0345}'),
2808 ('\u{1DC0}', '\u{1DC1}'),
2809 ])
2810 );
2811 assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2812
2813 assert_eq!(
2814 t(r"(?-u)[a-g~~c-j]"),
2815 hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2816 );
2817 }
2818
2819 #[test]
ignore_whitespace()2820 fn ignore_whitespace() {
2821 assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2822 assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2823 assert_eq!(
2824 t(r"(?x)\x # comment
2825 { # comment
2826 53 # comment
2827 } #comment"),
2828 hir_lit("S")
2829 );
2830
2831 assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2832 assert_eq!(
2833 t(r"(?x)\x # comment
2834 53 # comment"),
2835 hir_lit("S")
2836 );
2837 assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2838
2839 #[cfg(feature = "unicode-gencat")]
2840 assert_eq!(
2841 t(r"(?x)\p # comment
2842 { # comment
2843 Separator # comment
2844 } # comment"),
2845 hir_uclass_query(ClassQuery::Binary("separator"))
2846 );
2847
2848 assert_eq!(
2849 t(r"(?x)a # comment
2850 { # comment
2851 5 # comment
2852 , # comment
2853 10 # comment
2854 } # comment"),
2855 hir_range(
2856 true,
2857 hir::RepetitionRange::Bounded(5, 10),
2858 hir_lit("a")
2859 )
2860 );
2861
2862 assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a "));
2863 }
2864
2865 #[test]
analysis_is_always_utf8()2866 fn analysis_is_always_utf8() {
2867 // Positive examples.
2868 assert!(t_bytes(r"a").is_always_utf8());
2869 assert!(t_bytes(r"ab").is_always_utf8());
2870 assert!(t_bytes(r"(?-u)a").is_always_utf8());
2871 assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2872 assert!(t_bytes(r"\xFF").is_always_utf8());
2873 assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2874 assert!(t_bytes(r"[^a]").is_always_utf8());
2875 assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2876 assert!(t_bytes(r"\b").is_always_utf8());
2877 assert!(t_bytes(r"\B").is_always_utf8());
2878 assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2879
2880 // Negative examples.
2881 assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2882 assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2883 assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2884 assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2885 assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2886 }
2887
2888 #[test]
analysis_is_all_assertions()2889 fn analysis_is_all_assertions() {
2890 // Positive examples.
2891 assert!(t(r"\b").is_all_assertions());
2892 assert!(t(r"\B").is_all_assertions());
2893 assert!(t(r"^").is_all_assertions());
2894 assert!(t(r"$").is_all_assertions());
2895 assert!(t(r"\A").is_all_assertions());
2896 assert!(t(r"\z").is_all_assertions());
2897 assert!(t(r"$^\z\A\b\B").is_all_assertions());
2898 assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2899 assert!(t(r"^$|$^").is_all_assertions());
2900 assert!(t(r"((\b)+())*^").is_all_assertions());
2901
2902 // Negative examples.
2903 assert!(!t(r"^a").is_all_assertions());
2904 }
2905
2906 #[test]
analysis_is_anchored()2907 fn analysis_is_anchored() {
2908 // Positive examples.
2909 assert!(t(r"^").is_anchored_start());
2910 assert!(t(r"$").is_anchored_end());
2911 assert!(t(r"^").is_line_anchored_start());
2912 assert!(t(r"$").is_line_anchored_end());
2913
2914 assert!(t(r"^^").is_anchored_start());
2915 assert!(t(r"$$").is_anchored_end());
2916 assert!(t(r"^^").is_line_anchored_start());
2917 assert!(t(r"$$").is_line_anchored_end());
2918
2919 assert!(t(r"^$").is_anchored_start());
2920 assert!(t(r"^$").is_anchored_end());
2921 assert!(t(r"^$").is_line_anchored_start());
2922 assert!(t(r"^$").is_line_anchored_end());
2923
2924 assert!(t(r"^foo").is_anchored_start());
2925 assert!(t(r"foo$").is_anchored_end());
2926 assert!(t(r"^foo").is_line_anchored_start());
2927 assert!(t(r"foo$").is_line_anchored_end());
2928
2929 assert!(t(r"^foo|^bar").is_anchored_start());
2930 assert!(t(r"foo$|bar$").is_anchored_end());
2931 assert!(t(r"^foo|^bar").is_line_anchored_start());
2932 assert!(t(r"foo$|bar$").is_line_anchored_end());
2933
2934 assert!(t(r"^(foo|bar)").is_anchored_start());
2935 assert!(t(r"(foo|bar)$").is_anchored_end());
2936 assert!(t(r"^(foo|bar)").is_line_anchored_start());
2937 assert!(t(r"(foo|bar)$").is_line_anchored_end());
2938
2939 assert!(t(r"^+").is_anchored_start());
2940 assert!(t(r"$+").is_anchored_end());
2941 assert!(t(r"^+").is_line_anchored_start());
2942 assert!(t(r"$+").is_line_anchored_end());
2943 assert!(t(r"^++").is_anchored_start());
2944 assert!(t(r"$++").is_anchored_end());
2945 assert!(t(r"^++").is_line_anchored_start());
2946 assert!(t(r"$++").is_line_anchored_end());
2947 assert!(t(r"(^)+").is_anchored_start());
2948 assert!(t(r"($)+").is_anchored_end());
2949 assert!(t(r"(^)+").is_line_anchored_start());
2950 assert!(t(r"($)+").is_line_anchored_end());
2951
2952 assert!(t(r"$^").is_anchored_start());
2953 assert!(t(r"$^").is_anchored_start());
2954 assert!(t(r"$^").is_line_anchored_end());
2955 assert!(t(r"$^").is_line_anchored_end());
2956 assert!(t(r"$^|^$").is_anchored_start());
2957 assert!(t(r"$^|^$").is_anchored_end());
2958 assert!(t(r"$^|^$").is_line_anchored_start());
2959 assert!(t(r"$^|^$").is_line_anchored_end());
2960
2961 assert!(t(r"\b^").is_anchored_start());
2962 assert!(t(r"$\b").is_anchored_end());
2963 assert!(t(r"\b^").is_line_anchored_start());
2964 assert!(t(r"$\b").is_line_anchored_end());
2965 assert!(t(r"^(?m:^)").is_anchored_start());
2966 assert!(t(r"(?m:$)$").is_anchored_end());
2967 assert!(t(r"^(?m:^)").is_line_anchored_start());
2968 assert!(t(r"(?m:$)$").is_line_anchored_end());
2969 assert!(t(r"(?m:^)^").is_anchored_start());
2970 assert!(t(r"$(?m:$)").is_anchored_end());
2971 assert!(t(r"(?m:^)^").is_line_anchored_start());
2972 assert!(t(r"$(?m:$)").is_line_anchored_end());
2973
2974 // Negative examples.
2975 assert!(!t(r"(?m)^").is_anchored_start());
2976 assert!(!t(r"(?m)$").is_anchored_end());
2977 assert!(!t(r"(?m:^$)|$^").is_anchored_start());
2978 assert!(!t(r"(?m:^$)|$^").is_anchored_end());
2979 assert!(!t(r"$^|(?m:^$)").is_anchored_start());
2980 assert!(!t(r"$^|(?m:^$)").is_anchored_end());
2981
2982 assert!(!t(r"a^").is_anchored_start());
2983 assert!(!t(r"$a").is_anchored_start());
2984 assert!(!t(r"a^").is_line_anchored_start());
2985 assert!(!t(r"$a").is_line_anchored_start());
2986
2987 assert!(!t(r"a^").is_anchored_end());
2988 assert!(!t(r"$a").is_anchored_end());
2989 assert!(!t(r"a^").is_line_anchored_end());
2990 assert!(!t(r"$a").is_line_anchored_end());
2991
2992 assert!(!t(r"^foo|bar").is_anchored_start());
2993 assert!(!t(r"foo|bar$").is_anchored_end());
2994 assert!(!t(r"^foo|bar").is_line_anchored_start());
2995 assert!(!t(r"foo|bar$").is_line_anchored_end());
2996
2997 assert!(!t(r"^*").is_anchored_start());
2998 assert!(!t(r"$*").is_anchored_end());
2999 assert!(!t(r"^*").is_line_anchored_start());
3000 assert!(!t(r"$*").is_line_anchored_end());
3001 assert!(!t(r"^*+").is_anchored_start());
3002 assert!(!t(r"$*+").is_anchored_end());
3003 assert!(!t(r"^*+").is_line_anchored_start());
3004 assert!(!t(r"$*+").is_line_anchored_end());
3005 assert!(!t(r"^+*").is_anchored_start());
3006 assert!(!t(r"$+*").is_anchored_end());
3007 assert!(!t(r"^+*").is_line_anchored_start());
3008 assert!(!t(r"$+*").is_line_anchored_end());
3009 assert!(!t(r"(^)*").is_anchored_start());
3010 assert!(!t(r"($)*").is_anchored_end());
3011 assert!(!t(r"(^)*").is_line_anchored_start());
3012 assert!(!t(r"($)*").is_line_anchored_end());
3013 }
3014
3015 #[test]
analysis_is_line_anchored()3016 fn analysis_is_line_anchored() {
3017 assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3018 assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3019
3020 assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3021 assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3022
3023 assert!(t(r"(?m)^").is_line_anchored_start());
3024 assert!(t(r"(?m)$").is_line_anchored_end());
3025
3026 assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3027 assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3028
3029 assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3030 assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3031 }
3032
3033 #[test]
analysis_is_any_anchored()3034 fn analysis_is_any_anchored() {
3035 // Positive examples.
3036 assert!(t(r"^").is_any_anchored_start());
3037 assert!(t(r"$").is_any_anchored_end());
3038 assert!(t(r"\A").is_any_anchored_start());
3039 assert!(t(r"\z").is_any_anchored_end());
3040
3041 // Negative examples.
3042 assert!(!t(r"(?m)^").is_any_anchored_start());
3043 assert!(!t(r"(?m)$").is_any_anchored_end());
3044 assert!(!t(r"$").is_any_anchored_start());
3045 assert!(!t(r"^").is_any_anchored_end());
3046 }
3047
3048 #[test]
analysis_is_match_empty()3049 fn analysis_is_match_empty() {
3050 // Positive examples.
3051 assert!(t(r"").is_match_empty());
3052 assert!(t(r"()").is_match_empty());
3053 assert!(t(r"()*").is_match_empty());
3054 assert!(t(r"()+").is_match_empty());
3055 assert!(t(r"()?").is_match_empty());
3056 assert!(t(r"a*").is_match_empty());
3057 assert!(t(r"a?").is_match_empty());
3058 assert!(t(r"a{0}").is_match_empty());
3059 assert!(t(r"a{0,}").is_match_empty());
3060 assert!(t(r"a{0,1}").is_match_empty());
3061 assert!(t(r"a{0,10}").is_match_empty());
3062 #[cfg(feature = "unicode-gencat")]
3063 assert!(t(r"\pL*").is_match_empty());
3064 assert!(t(r"a*|b").is_match_empty());
3065 assert!(t(r"b|a*").is_match_empty());
3066 assert!(t(r"a*a?(abcd)*").is_match_empty());
3067 assert!(t(r"^").is_match_empty());
3068 assert!(t(r"$").is_match_empty());
3069 assert!(t(r"(?m)^").is_match_empty());
3070 assert!(t(r"(?m)$").is_match_empty());
3071 assert!(t(r"\A").is_match_empty());
3072 assert!(t(r"\z").is_match_empty());
3073 assert!(t(r"\B").is_match_empty());
3074 assert!(t_bytes(r"(?-u)\B").is_match_empty());
3075
3076 // Negative examples.
3077 assert!(!t(r"a+").is_match_empty());
3078 assert!(!t(r"a{1}").is_match_empty());
3079 assert!(!t(r"a{1,}").is_match_empty());
3080 assert!(!t(r"a{1,2}").is_match_empty());
3081 assert!(!t(r"a{1,10}").is_match_empty());
3082 assert!(!t(r"b|a").is_match_empty());
3083 assert!(!t(r"a*a+(abcd)*").is_match_empty());
3084 assert!(!t(r"\b").is_match_empty());
3085 assert!(!t(r"(?-u)\b").is_match_empty());
3086 }
3087
3088 #[test]
analysis_is_literal()3089 fn analysis_is_literal() {
3090 // Positive examples.
3091 assert!(t(r"").is_literal());
3092 assert!(t(r"a").is_literal());
3093 assert!(t(r"ab").is_literal());
3094 assert!(t(r"abc").is_literal());
3095 assert!(t(r"(?m)abc").is_literal());
3096
3097 // Negative examples.
3098 assert!(!t(r"^").is_literal());
3099 assert!(!t(r"a|b").is_literal());
3100 assert!(!t(r"(a)").is_literal());
3101 assert!(!t(r"a+").is_literal());
3102 assert!(!t(r"foo(a)").is_literal());
3103 assert!(!t(r"(a)foo").is_literal());
3104 assert!(!t(r"[a]").is_literal());
3105 }
3106
3107 #[test]
analysis_is_alternation_literal()3108 fn analysis_is_alternation_literal() {
3109 // Positive examples.
3110 assert!(t(r"").is_alternation_literal());
3111 assert!(t(r"a").is_alternation_literal());
3112 assert!(t(r"ab").is_alternation_literal());
3113 assert!(t(r"abc").is_alternation_literal());
3114 assert!(t(r"(?m)abc").is_alternation_literal());
3115 assert!(t(r"a|b").is_alternation_literal());
3116 assert!(t(r"a|b|c").is_alternation_literal());
3117 assert!(t(r"foo|bar").is_alternation_literal());
3118 assert!(t(r"foo|bar|baz").is_alternation_literal());
3119
3120 // Negative examples.
3121 assert!(!t(r"^").is_alternation_literal());
3122 assert!(!t(r"(a)").is_alternation_literal());
3123 assert!(!t(r"a+").is_alternation_literal());
3124 assert!(!t(r"foo(a)").is_alternation_literal());
3125 assert!(!t(r"(a)foo").is_alternation_literal());
3126 assert!(!t(r"[a]").is_alternation_literal());
3127 assert!(!t(r"[a]|b").is_alternation_literal());
3128 assert!(!t(r"a|[b]").is_alternation_literal());
3129 assert!(!t(r"(a)|b").is_alternation_literal());
3130 assert!(!t(r"a|(b)").is_alternation_literal());
3131 }
3132 }
3133