1 use regex::internal::ExecBuilder;
2 
3 /// Given a regex, check if all of the backends produce the same
4 /// results on a number of different inputs.
5 ///
6 /// For now this just throws quickcheck at the problem, which
7 /// is not very good because it only really tests half of the
8 /// problem space. It is pretty unlikely that a random string
9 /// will match any given regex, so this will probably just
10 /// be checking that the different backends fail in the same
11 /// way. This is still worthwhile to test, but is definitely not
12 /// the whole story.
13 ///
14 /// TODO(ethan): In order to cover the other half of the problem
15 /// space, we should generate a random matching string by inspecting
16 /// the AST of the input regex. The right way to do this probably
17 /// involves adding a custom Arbitrary instance around a couple
18 /// of newtypes. That way we can respect the quickcheck size hinting
19 /// and shrinking and whatnot.
backends_are_consistent(re: &str) -> Result<u64, String>20 pub fn backends_are_consistent(re: &str) -> Result<u64, String> {
21     let standard_backends = vec![
22         (
23             "bounded_backtracking_re",
24             ExecBuilder::new(re)
25                 .bounded_backtracking()
26                 .build()
27                 .map(|exec| exec.into_regex())
28                 .map_err(|err| format!("{}", err))?,
29         ),
30         (
31             "pikevm_re",
32             ExecBuilder::new(re)
33                 .nfa()
34                 .build()
35                 .map(|exec| exec.into_regex())
36                 .map_err(|err| format!("{}", err))?,
37         ),
38         (
39             "default_re",
40             ExecBuilder::new(re)
41                 .build()
42                 .map(|exec| exec.into_regex())
43                 .map_err(|err| format!("{}", err))?,
44         ),
45     ];
46 
47     let utf8bytes_backends = vec![
48         (
49             "bounded_backtracking_utf8bytes_re",
50             ExecBuilder::new(re)
51                 .bounded_backtracking()
52                 .bytes(true)
53                 .build()
54                 .map(|exec| exec.into_regex())
55                 .map_err(|err| format!("{}", err))?,
56         ),
57         (
58             "pikevm_utf8bytes_re",
59             ExecBuilder::new(re)
60                 .nfa()
61                 .bytes(true)
62                 .build()
63                 .map(|exec| exec.into_regex())
64                 .map_err(|err| format!("{}", err))?,
65         ),
66         (
67             "default_utf8bytes_re",
68             ExecBuilder::new(re)
69                 .bytes(true)
70                 .build()
71                 .map(|exec| exec.into_regex())
72                 .map_err(|err| format!("{}", err))?,
73         ),
74     ];
75 
76     let bytes_backends = vec![
77         (
78             "bounded_backtracking_bytes_re",
79             ExecBuilder::new(re)
80                 .bounded_backtracking()
81                 .only_utf8(false)
82                 .build()
83                 .map(|exec| exec.into_byte_regex())
84                 .map_err(|err| format!("{}", err))?,
85         ),
86         (
87             "pikevm_bytes_re",
88             ExecBuilder::new(re)
89                 .nfa()
90                 .only_utf8(false)
91                 .build()
92                 .map(|exec| exec.into_byte_regex())
93                 .map_err(|err| format!("{}", err))?,
94         ),
95         (
96             "default_bytes_re",
97             ExecBuilder::new(re)
98                 .only_utf8(false)
99                 .build()
100                 .map(|exec| exec.into_byte_regex())
101                 .map_err(|err| format!("{}", err))?,
102         ),
103     ];
104 
105     Ok(string_checker::check_backends(&standard_backends)?
106         + string_checker::check_backends(&utf8bytes_backends)?
107         + bytes_checker::check_backends(&bytes_backends)?)
108 }
109 
110 //
111 // A consistency checker parameterized by the input type (&str or &[u8]).
112 //
113 
114 macro_rules! checker {
115     ($module_name:ident, $regex_type:path, $mk_input:expr) => {
116         mod $module_name {
117             use quickcheck;
118             use quickcheck::{Arbitrary, TestResult};
119 
120             pub fn check_backends(
121                 backends: &[(&str, $regex_type)],
122             ) -> Result<u64, String> {
123                 let mut total_passed = 0;
124                 for regex in backends[1..].iter() {
125                     total_passed += quickcheck_regex_eq(&backends[0], regex)?;
126                 }
127 
128                 Ok(total_passed)
129             }
130 
131             fn quickcheck_regex_eq(
132                 &(name1, ref re1): &(&str, $regex_type),
133                 &(name2, ref re2): &(&str, $regex_type),
134             ) -> Result<u64, String> {
135                 quickcheck::QuickCheck::new()
136                     .quicktest(RegexEqualityTest::new(
137                         re1.clone(),
138                         re2.clone(),
139                     ))
140                     .map_err(|err| {
141                         format!(
142                             "{}(/{}/) and {}(/{}/) are inconsistent.\
143                              QuickCheck Err: {:?}",
144                             name1, re1, name2, re2, err
145                         )
146                     })
147             }
148 
149             struct RegexEqualityTest {
150                 re1: $regex_type,
151                 re2: $regex_type,
152             }
153             impl RegexEqualityTest {
154                 fn new(re1: $regex_type, re2: $regex_type) -> Self {
155                     RegexEqualityTest { re1: re1, re2: re2 }
156                 }
157             }
158 
159             impl quickcheck::Testable for RegexEqualityTest {
160                 fn result(&self, gen: &mut quickcheck::Gen) -> TestResult {
161                     let input = $mk_input(gen);
162                     let input = &input;
163 
164                     if self.re1.find(&input) != self.re2.find(input) {
165                         return TestResult::error(format!(
166                             "find mismatch input={:?}",
167                             input
168                         ));
169                     }
170 
171                     let cap1 = self.re1.captures(input);
172                     let cap2 = self.re2.captures(input);
173                     match (cap1, cap2) {
174                         (None, None) => {}
175                         (Some(cap1), Some(cap2)) => {
176                             for (c1, c2) in cap1.iter().zip(cap2.iter()) {
177                                 if c1 != c2 {
178                                     return TestResult::error(format!(
179                                         "captures mismatch input={:?}",
180                                         input
181                                     ));
182                                 }
183                             }
184                         }
185                         _ => {
186                             return TestResult::error(format!(
187                                 "captures mismatch input={:?}",
188                                 input
189                             ))
190                         }
191                     }
192 
193                     let fi1 = self.re1.find_iter(input);
194                     let fi2 = self.re2.find_iter(input);
195                     for (m1, m2) in fi1.zip(fi2) {
196                         if m1 != m2 {
197                             return TestResult::error(format!(
198                                 "find_iter mismatch input={:?}",
199                                 input
200                             ));
201                         }
202                     }
203 
204                     let ci1 = self.re1.captures_iter(input);
205                     let ci2 = self.re2.captures_iter(input);
206                     for (cap1, cap2) in ci1.zip(ci2) {
207                         for (c1, c2) in cap1.iter().zip(cap2.iter()) {
208                             if c1 != c2 {
209                                 return TestResult::error(format!(
210                                     "captures_iter mismatch input={:?}",
211                                     input
212                                 ));
213                             }
214                         }
215                     }
216 
217                     let s1 = self.re1.split(input);
218                     let s2 = self.re2.split(input);
219                     for (chunk1, chunk2) in s1.zip(s2) {
220                         if chunk1 != chunk2 {
221                             return TestResult::error(format!(
222                                 "split mismatch input={:?}",
223                                 input
224                             ));
225                         }
226                     }
227 
228                     TestResult::from_bool(true)
229                 }
230             }
231         } // mod
232     }; // rule case
233 } // macro_rules!
234 
235 checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen));
236 checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::<u8>::arbitrary(
237     gen
238 ));
239