1 // See: https://github.com/rust-lang/regex/issues/48
2 #[test]
invalid_regexes_no_crash()3 fn invalid_regexes_no_crash() {
4     assert!(regex_new!("(*)").is_err());
5     assert!(regex_new!("(?:?)").is_err());
6     assert!(regex_new!("(?)").is_err());
7     assert!(regex_new!("*").is_err());
8 }
9 
10 // See: https://github.com/rust-lang/regex/issues/98
11 #[test]
regression_many_repeat_stack_overflow()12 fn regression_many_repeat_stack_overflow() {
13     let re = regex!("^.{1,2500}");
14     assert_eq!(vec![(0, 1)], findall!(re, "a"));
15 }
16 
17 // See: https://github.com/rust-lang/regex/issues/555
18 #[test]
regression_invalid_repetition_expr()19 fn regression_invalid_repetition_expr() {
20     assert!(regex_new!("(?m){1,1}").is_err());
21 }
22 
23 // See: https://github.com/rust-lang/regex/issues/527
24 #[test]
regression_invalid_flags_expression()25 fn regression_invalid_flags_expression() {
26     assert!(regex_new!("(((?x)))").is_ok());
27 }
28 
29 // See: https://github.com/rust-lang/regex/issues/75
30 mat!(regression_unsorted_binary_search_1, r"(?i-u)[a_]+", "A_", Some((0, 2)));
31 mat!(regression_unsorted_binary_search_2, r"(?i-u)[A_]+", "a_", Some((0, 2)));
32 
33 // See: https://github.com/rust-lang/regex/issues/99
34 #[cfg(feature = "unicode-case")]
35 mat!(regression_negated_char_class_1, r"(?i)[^x]", "x", None);
36 #[cfg(feature = "unicode-case")]
37 mat!(regression_negated_char_class_2, r"(?i)[^x]", "X", None);
38 
39 // See: https://github.com/rust-lang/regex/issues/101
40 mat!(regression_ascii_word_underscore, r"[[:word:]]", "_", Some((0, 1)));
41 
42 // See: https://github.com/rust-lang/regex/issues/129
43 #[test]
regression_captures_rep()44 fn regression_captures_rep() {
45     let re = regex!(r"([a-f]){2}(?P<foo>[x-z])");
46     let caps = re.captures(text!("abx")).unwrap();
47     assert_eq!(match_text!(caps.name("foo").unwrap()), text!("x"));
48 }
49 
50 // See: https://github.com/rust-lang/regex/issues/153
51 mat!(regression_alt_in_alt1, r"ab?|$", "az", Some((0, 1)));
52 mat!(regression_alt_in_alt2, r"^(.*?)(\n|\r\n?|$)", "ab\rcd", Some((0, 3)));
53 
54 // See: https://github.com/rust-lang/regex/issues/169
55 mat!(regression_leftmost_first_prefix, r"z*azb", "azb", Some((0, 3)));
56 
57 // See: https://github.com/rust-lang/regex/issues/76
58 #[cfg(all(feature = "unicode-case", feature = "unicode-gencat"))]
59 mat!(uni_case_lower_nocase_flag, r"(?i)\p{Ll}+", "ΛΘΓΔα", Some((0, 10)));
60 
61 // See: https://github.com/rust-lang/regex/issues/191
62 mat!(many_alternates, r"1|2|3|4|5|6|7|8|9|10|int", "int", Some((0, 3)));
63 
64 // burntsushi was bad and didn't create an issue for this bug.
65 mat!(anchored_prefix1, r"^a[[:^space:]]", "a ", None);
66 mat!(anchored_prefix2, r"^a[[:^space:]]", "foo boo a ", None);
67 mat!(anchored_prefix3, r"^-[a-z]", "r-f", None);
68 
69 // See: https://github.com/rust-lang/regex/issues/204
70 #[cfg(feature = "unicode-perl")]
71 split!(
72     split_on_word_boundary,
73     r"\b",
74     r"Should this (work?)",
75     &[
76         t!(""),
77         t!("Should"),
78         t!(" "),
79         t!("this"),
80         t!(" ("),
81         t!("work"),
82         t!("?)")
83     ]
84 );
85 #[cfg(feature = "unicode-perl")]
86 matiter!(
87     word_boundary_dfa,
88     r"\b",
89     "a b c",
90     (0, 0),
91     (1, 1),
92     (2, 2),
93     (3, 3),
94     (4, 4),
95     (5, 5)
96 );
97 
98 // See: https://github.com/rust-lang/regex/issues/268
99 matiter!(partial_anchor, r"^a|b", "ba", (0, 1));
100 
101 // See: https://github.com/rust-lang/regex/issues/280
102 ismatch!(partial_anchor_alternate_begin, r"^a|z", "yyyyya", false);
103 ismatch!(partial_anchor_alternate_end, r"a$|z", "ayyyyy", false);
104 
105 // See: https://github.com/rust-lang/regex/issues/289
106 mat!(lits_unambiguous1, r"(ABC|CDA|BC)X", "CDAX", Some((0, 4)));
107 
108 // See: https://github.com/rust-lang/regex/issues/291
109 mat!(
110     lits_unambiguous2,
111     r"((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$",
112     "CIMG2341",
113     Some((0, 8)),
114     Some((0, 4)),
115     None,
116     Some((0, 4)),
117     Some((4, 8))
118 );
119 
120 // See: https://github.com/rust-lang/regex/issues/271
121 mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4)));
122 mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4)));
123 mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4)));
124 #[cfg(feature = "unicode-perl")]
125 mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1)));
126 
127 // See: https://github.com/rust-lang/regex/issues/321
128 ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false);
129 ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false);
130 
131 // See: https://github.com/BurntSushi/ripgrep/issues/1203
132 ismatch!(reverse_suffix1, r"[0-4][0-4][0-4]000", "153.230000", true);
133 ismatch!(reverse_suffix2, r"[0-9][0-9][0-9]000", "153.230000\n", true);
134 matiter!(reverse_suffix3, r"[0-9][0-9][0-9]000", "153.230000\n", (4, 10));
135 
136 // See: https://github.com/rust-lang/regex/issues/334
137 // See: https://github.com/rust-lang/regex/issues/557
138 mat!(
139     captures_after_dfa_premature_end1,
140     r"a(b*(X|$))?",
141     "abcbX",
142     Some((0, 1)),
143     None,
144     None
145 );
146 mat!(
147     captures_after_dfa_premature_end2,
148     r"a(bc*(X|$))?",
149     "abcbX",
150     Some((0, 1)),
151     None,
152     None
153 );
154 mat!(captures_after_dfa_premature_end3, r"(aa$)?", "aaz", Some((0, 0)));
155 
156 // See: https://github.com/rust-lang/regex/issues/437
157 ismatch!(
158     literal_panic,
159     r"typename type\-parameter\-[0-9]+\-[0-9]+::.+",
160     "test",
161     false
162 );
163 
164 // See: https://github.com/rust-lang/regex/issues/533
165 ismatch!(
166     blank_matches_nothing_between_space_and_tab,
167     r"[[:blank:]]",
168     "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\
169      \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\
170      \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}",
171     false
172 );
173 
174 ismatch!(
175     inverted_blank_matches_everything_between_space_and_tab,
176     r"^[[:^blank:]]+$",
177     "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\
178      \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\
179      \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}",
180     true
181 );
182 
183 // Tests that our Aho-Corasick optimization works correctly. It only
184 // kicks in when we have >32 literals. By "works correctly," we mean that
185 // leftmost-first match semantics are properly respected. That is, samwise
186 // should match, not sam.
187 mat!(
188     ahocorasick1,
189     "samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|\
190      A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z",
191     "samwise",
192     Some((0, 7))
193 );
194 
195 // See: https://github.com/BurntSushi/ripgrep/issues/1247
196 #[test]
197 #[cfg(feature = "unicode-perl")]
regression_nfa_stops1()198 fn regression_nfa_stops1() {
199     let re = ::regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap();
200     assert_eq!(0, re.find_iter(b"s\xE4").count());
201 }
202 
203 // See: https://github.com/rust-lang/regex/issues/640
204 #[cfg(feature = "unicode-case")]
205 matiter!(
206     flags_are_unset,
207     r"((?i)foo)|Bar",
208     "foo Foo bar Bar",
209     (0, 3),
210     (4, 7),
211     (12, 15)
212 );
213 
214 // See: https://github.com/rust-lang/regex/issues/659
215 //
216 // Note that 'Ј' is not 'j', but cyrillic Je
217 // https://en.wikipedia.org/wiki/Je_(Cyrillic)
218 ismatch!(empty_group_match, r"()Ј01", "zЈ01", true);
219 matiter!(empty_group_find, r"()Ј01", "zЈ01", (1, 5));
220