1 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 use quickcheck::{Arbitrary, Gen, Testable, QuickCheck, StdGen};
12 
13 use {
14     Expr, ExprBuilder,
15     CharClass, ClassRange, ByteClass, ByteRange, Repeater, dec_char,
16 };
17 
qc<T: Testable>(t: T)18 fn qc<T: Testable>(t: T) {
19     QuickCheck::new()
20         .tests(10_000)
21         .max_tests(20_000)
22         .quickcheck(t);
23 }
24 
class(ranges: &[(char, char)]) -> CharClass25 fn class(ranges: &[(char, char)]) -> CharClass {
26     let ranges = ranges.iter().cloned()
27                        .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
28     CharClass::new(ranges)
29 }
30 
31 // Test invariants for canonicalizing character classes.
32 
33 #[test]
negate()34 fn negate() {
35     fn prop(ranges: Vec<(char, char)>) -> bool {
36         let expected = class(&ranges).canonicalize();
37         let got = class(&ranges).negate().negate();
38         expected == got
39     }
40     qc(prop as fn(Vec<(char, char)>) -> bool);
41 }
42 
43 #[test]
classes_are_sorted_and_nonoverlapping()44 fn classes_are_sorted_and_nonoverlapping() {
45     fn prop(ranges: Vec<(char, char)>) -> bool {
46         class(&ranges)
47             .canonicalize()
48             .windows(2)
49             .all(|w| w[0].end < dec_char(w[1].start))
50     }
51     qc(prop as fn(Vec<(char, char)>) -> bool);
52 }
53 
54 #[test]
valid_class_ranges()55 fn valid_class_ranges() {
56     fn prop(ranges: Vec<(char, char)>) -> bool {
57         class(&ranges).canonicalize().into_iter().all(|r| r.start <= r.end)
58     }
59     qc(prop as fn(Vec<(char, char)>) -> bool);
60 }
61 
62 #[test]
intersection()63 fn intersection() {
64     fn prop(ranges1: Vec<(char, char)>, ranges2: Vec<(char, char)>) -> bool {
65         let class1 = class(&ranges1).canonicalize();
66         let class2 = class(&ranges2).canonicalize();
67 
68         let mut expected = CharClass::empty();
69         // This is inefficient but correct.
70         for range1 in &class1 {
71             for range2 in &class2 {
72                 if let Some(intersection) = range1.intersection(range2) {
73                     expected.ranges.push(intersection);
74                 }
75             }
76         }
77         expected = expected.canonicalize();
78 
79         let got = class1.intersection(&class2);
80         expected == got
81     }
82     qc(prop as fn(Vec<(char, char)>, Vec<(char, char)>) -> bool);
83 }
84 
85 /// A wrapper type for generating "regex-like" Unicode strings.
86 ///
87 /// In particular, this type's `Arbitrary` impl specifically biases toward
88 /// special regex characters to make test cases more interesting.
89 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
90 struct RegexLikeString(String);
91 
92 impl Arbitrary for RegexLikeString {
arbitrary<G: Gen>(g: &mut G) -> RegexLikeString93     fn arbitrary<G: Gen>(g: &mut G) -> RegexLikeString {
94         const SPECIAL: &'static [char] = &[
95             '\\', '.', '+', '*', '?', '(', ')', '|', '[', ']', '{', '}',
96             '^', '$',
97         ];
98         // Generating random Unicode strings results in mostly uninteresting
99         // regexes. Namely, they'll mostly just be literals.
100         // To make properties using regex strings more interesting, we bias
101         // toward selecting characters of significance to a regex.
102         let size = { let s = g.size(); g.gen_range(0, s) };
103         RegexLikeString((0..size).map(|_| {
104             if g.gen_weighted_bool(3) {
105                 *g.choose(SPECIAL).unwrap()
106             } else {
107                 g.gen()
108             }
109         }).collect())
110     }
111 
shrink(&self) -> Box<Iterator<Item=RegexLikeString>>112     fn shrink(&self) -> Box<Iterator<Item=RegexLikeString>> {
113         // The regular `String` shrinker is good enough.
114         Box::new(self.0.shrink().map(RegexLikeString))
115     }
116 }
117 
118 /// A special type for generating small non-zero sized ASCII strings.
119 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
120 struct SmallAscii(String);
121 
122 impl Arbitrary for SmallAscii {
arbitrary<G: Gen>(g: &mut G) -> SmallAscii123     fn arbitrary<G: Gen>(g: &mut G) -> SmallAscii {
124         use std::char::from_u32;
125         let size = g.gen_range(1, 5);
126         SmallAscii((0..size)
127                    .map(|_| from_u32(g.gen_range(97, 123)).unwrap())
128                    .collect())
129     }
130 
shrink(&self) -> Box<Iterator<Item=SmallAscii>>131     fn shrink(&self) -> Box<Iterator<Item=SmallAscii>> {
132         Box::new(self.0.shrink().map(SmallAscii))
133     }
134 }
135 
136 #[test]
parser_never_panics()137 fn parser_never_panics() {
138     fn prop(s: RegexLikeString) -> bool {
139         let _ = Expr::parse(&s.0); true
140     }
141     qc(prop as fn(RegexLikeString) -> bool);
142 }
143 
144 // Testing entire expressions.
145 //
146 // We only have one test at the moment, but the machinery could be useful
147 // for other things.
148 //
149 // In particular, Russ Cox writes about testing regexes by comparing the
150 // strings they match with other regex implementations. A fuzzer/shrinker
151 // (which is what's implemented below) would be a great way to drive that
152 // process. ---AG
153 
154 impl Arbitrary for Expr {
arbitrary<G: Gen>(g: &mut G) -> Expr155     fn arbitrary<G: Gen>(g: &mut G) -> Expr {
156         let e = fix_capture_indices(gen_expr(g, 0, ExprType::Anything));
157         e.simplify(200).unwrap()
158     }
159 
shrink(&self) -> Box<Iterator<Item=Expr>>160     fn shrink(&self) -> Box<Iterator<Item=Expr>> {
161         use Expr::*;
162 
163         let nada = || Box::new(None.into_iter());
164         let es: Box<Iterator<Item=Expr>> = match *self {
165             Empty | AnyChar | AnyCharNoNL | AnyByte | AnyByteNoNL
166             | StartLine | EndLine | StartText | EndText
167             | WordBoundary | NotWordBoundary
168             | WordBoundaryAscii | NotWordBoundaryAscii => nada(),
169             Literal { ref chars, .. } if chars.len() == 1 => nada(),
170             Literal { ref chars, casei } => {
171                 Box::new((chars.clone(), casei)
172                          .shrink()
173                          .filter(|&(ref chars, _)| chars.len() > 0)
174                          .map(|(chars, casei)| {
175                              Literal { chars: chars, casei: casei }
176                          }))
177             }
178             LiteralBytes { ref bytes, .. } if bytes.len() == 1 => nada(),
179             LiteralBytes { ref bytes, casei } => {
180                 Box::new((bytes.clone(), casei)
181                          .shrink()
182                          .filter(|&(ref bytes, _)| bytes.len() > 0)
183                          .map(|(bytes, casei)| {
184                              LiteralBytes { bytes: bytes, casei: casei }
185                          }))
186             }
187             Class(ref cls) => Box::new(cls.shrink().map(Class)),
188             ClassBytes(ref cls) => Box::new(cls.shrink().map(ClassBytes)),
189             Group { ref e, ref i, ref name } => {
190                 let (i, name) = (i.clone(), name.clone());
191                 Box::new(e.clone().shrink()
192                           .chain(e.clone().shrink()
193                                   .map(move |e| Group {
194                                       e: Box::new(e),
195                                       i: i.clone(),
196                                       name: name.clone(),
197                                   })))
198             }
199             Repeat { ref e, ref r, greedy } => {
200                 Box::new((*e.clone(), r.clone())
201                          .shrink()
202                          .filter(|&(ref e, _)| e.can_repeat())
203                          .map(move |(e, r)| Repeat {
204                              e: Box::new(e),
205                              r: r,
206                              greedy: greedy,
207                          }))
208             }
209             // Concat(ref es) if es.len() <= 2 => nada(),
210             Concat(ref es) => {
211                 Box::new(es.clone()
212                            .shrink()
213                            .filter(|es| es.len() > 0)
214                            .map(|mut es| if es.len() == 1 {
215                                es.pop().unwrap()
216                            } else {
217                                Concat(es)
218                            }))
219             }
220             // Alternate(ref es) if es.len() <= 2 => nada(),
221             Alternate(ref es) => {
222                 Box::new(es.clone()
223                            .shrink()
224                            .filter(|es| es.len() > 0)
225                            .map(|mut es| if es.len() == 1 {
226                                es.pop().unwrap()
227                            } else {
228                                Alternate(es)
229                            }))
230             }
231         };
232         Box::new(es.map(|e| fix_capture_indices(e).simplify(200).unwrap()))
233     }
234 }
235 
236 enum ExprType {
237     NoSequences, // disallow concat/alternate
238     Anything,
239 }
240 
gen_expr<G: Gen>(g: &mut G, depth: u32, ty: ExprType) -> Expr241 fn gen_expr<G: Gen>(g: &mut G, depth: u32, ty: ExprType) -> Expr {
242     use Expr::*;
243     let ub = match (depth as usize >= g.size(), ty) {
244         (true, _) => 16,
245         (false, ExprType::NoSequences) => 18,
246         (false, ExprType::Anything) => 20,
247     };
248     match g.gen_range(1, ub) {
249         0 => Empty,
250         1 => Literal {
251             chars: SmallAscii::arbitrary(g).0.chars().collect(),
252             casei: g.gen(),
253         },
254         2 => LiteralBytes {
255             bytes: SmallAscii::arbitrary(g).0.as_bytes().to_owned(),
256             casei: g.gen(),
257         },
258         3 => AnyChar,
259         4 => AnyCharNoNL,
260         5 => AnyByte,
261         6 => AnyByteNoNL,
262         7 => Class(CharClass::arbitrary(g)),
263         8 => StartLine,
264         9 => EndLine,
265         10 => StartText,
266         11 => EndText,
267         12 => WordBoundary,
268         13 => NotWordBoundary,
269         14 => WordBoundaryAscii,
270         15 => NotWordBoundaryAscii,
271         16 => gen_group_expr(g, depth + 1),
272         17 => Repeat {
273             e: Box::new(gen_repeatable_expr(g, depth + 1)),
274             r: Repeater::arbitrary(g),
275             greedy: bool::arbitrary(g),
276         },
277         18 => {
278             let size = { let s = g.size(); g.gen_range(2, s) };
279             Concat((0..size)
280                    .map(|_| {
281                        gen_expr(g, depth + 1, ExprType::NoSequences)
282                     })
283                    .collect())
284         }
285         19 => {
286             let size = { let s = g.size(); g.gen_range(2, s) };
287             Alternate((0..size)
288                       .map(|_| {
289                           gen_expr(g, depth + 1, ExprType::NoSequences)
290                       })
291                       .collect())
292         }
293         _ => unreachable!()
294     }
295 }
296 
gen_repeatable_expr<G: Gen>(g: &mut G, depth: u32) -> Expr297 fn gen_repeatable_expr<G: Gen>(g: &mut G, depth: u32) -> Expr {
298     use Expr::*;
299     match g.gen_range(1, 10) {
300         0 => Empty,
301         1 => Literal {
302             chars: vec![Arbitrary::arbitrary(g)],
303             casei: g.gen(),
304         },
305         2 => LiteralBytes {
306             bytes: vec![Arbitrary::arbitrary(g)],
307             casei: g.gen(),
308         },
309         3 => AnyChar,
310         4 => AnyCharNoNL,
311         5 => AnyByte,
312         6 => AnyByteNoNL,
313         7 => Class(CharClass::arbitrary(g)),
314         8 => ClassBytes(ByteClass::arbitrary(g)),
315         9 => gen_group_expr(g, depth + 1),
316         _ => unreachable!(),
317     }
318 }
319 
gen_group_expr<G: Gen>(g: &mut G, depth: u32) -> Expr320 fn gen_group_expr<G: Gen>(g: &mut G, depth: u32) -> Expr {
321     let (i, name) = if g.gen() {
322         (None, None)
323     } else {
324         (Some(0), if g.gen() {
325             Some(SmallAscii::arbitrary(g).0)
326         } else {
327             None
328         })
329     };
330     Expr::Group {
331         e: Box::new(gen_expr(g, depth + 1, ExprType::Anything)),
332         i: i,
333         name: name,
334     }
335 }
336 
fix_capture_indices(e: Expr) -> Expr337 fn fix_capture_indices(e: Expr) -> Expr {
338     fn bx(e: Expr) -> Box<Expr> { Box::new(e) }
339     fn fix(e: Expr, capi: &mut usize, names: &mut Vec<String>) -> Expr {
340         use Expr::*;
341         match e {
342             Group { e, i: Some(_), mut name } => {
343                 *capi += 1;
344                 let i = *capi;
345                 let mut dupe_name = false;
346                 if let Some(ref n1) = name {
347                     if names.iter().any(|n2| n1 == n2) {
348                         dupe_name = true;
349                     } else {
350                         names.push(n1.clone());
351                     }
352                 }
353                 if dupe_name { name = None; }
354                 Group { e: bx(fix(*e, capi, names)), i: Some(i), name: name }
355             }
356             Group { e, i, name } => {
357                 Group { e: bx(fix(*e, capi, names)), i: i, name: name }
358             }
359             Repeat { e, r, greedy } => {
360                 Repeat { e: bx(fix(*e, capi, names)), r: r, greedy: greedy }
361             }
362             Concat(es) =>
363                 Concat(es.into_iter().map(|e| fix(e, capi, names)).collect()),
364             Alternate(es) =>
365                 Alternate(es.into_iter().map(|e| fix(e, capi, names)).collect()),
366             e => e,
367         }
368     }
369     fix(e, &mut 0, &mut vec![])
370 }
371 
372 impl Arbitrary for Repeater {
arbitrary<G: Gen>(g: &mut G) -> Repeater373     fn arbitrary<G: Gen>(g: &mut G) -> Repeater {
374         use Repeater::*;
375         match g.gen_range(0, 4) {
376             0 => ZeroOrOne,
377             1 => ZeroOrMore,
378             2 => OneOrMore,
379             3 => {
380                 use std::cmp::{max, min};
381                 let n1 = Arbitrary::arbitrary(g);
382                 let n2 = Arbitrary::arbitrary(g);
383                 Range {
384                     min: min(n1, n2),
385                     max: if g.gen() { None } else { Some(max(n1, n2)) },
386                 }
387             },
388             _ => unreachable!(),
389         }
390     }
391 
shrink(&self) -> Box<Iterator<Item=Repeater>>392     fn shrink(&self) -> Box<Iterator<Item=Repeater>> {
393         use Repeater::*;
394         match *self {
395             ZeroOrOne | ZeroOrMore | OneOrMore => Box::new(None.into_iter()),
396             Range { min, max } => {
397                 Box::new((min, max)
398                          .shrink()
399                          .map(|(min, max)| Range { min: min, max: max }))
400             }
401         }
402     }
403 }
404 
405 impl Arbitrary for CharClass {
arbitrary<G: Gen>(g: &mut G) -> CharClass406     fn arbitrary<G: Gen>(g: &mut G) -> CharClass {
407         let mut ranges: Vec<ClassRange> = Arbitrary::arbitrary(g);
408         if ranges.is_empty() {
409             ranges.push(Arbitrary::arbitrary(g));
410         }
411         let cls = CharClass { ranges: ranges }.canonicalize();
412         if g.gen() { cls.case_fold() } else { cls }
413     }
414 
shrink(&self) -> Box<Iterator<Item=CharClass>>415     fn shrink(&self) -> Box<Iterator<Item=CharClass>> {
416         Box::new(self.ranges.clone()
417                  .shrink()
418                  .filter(|ranges| ranges.len() > 0)
419                  .map(|ranges| CharClass { ranges: ranges }.canonicalize()))
420     }
421 }
422 
423 impl Arbitrary for ClassRange {
arbitrary<G: Gen>(g: &mut G) -> ClassRange424     fn arbitrary<G: Gen>(g: &mut G) -> ClassRange {
425         use std::char::from_u32;
426         ClassRange::new(
427             from_u32(g.gen_range(97, 123)).unwrap(),
428             from_u32(g.gen_range(97, 123)).unwrap(),
429         )
430     }
431 
shrink(&self) -> Box<Iterator<Item=ClassRange>>432     fn shrink(&self) -> Box<Iterator<Item=ClassRange>> {
433         Box::new((self.start, self.end)
434                  .shrink().map(|(s, e)| ClassRange::new(s, e)))
435     }
436 }
437 
438 impl Arbitrary for ByteClass {
arbitrary<G: Gen>(g: &mut G) -> ByteClass439     fn arbitrary<G: Gen>(g: &mut G) -> ByteClass {
440         let mut ranges: Vec<ByteRange> = Arbitrary::arbitrary(g);
441         if ranges.is_empty() {
442             ranges.push(Arbitrary::arbitrary(g));
443         }
444         let cls = ByteClass { ranges: ranges }.canonicalize();
445         if g.gen() { cls.case_fold() } else { cls }
446     }
447 
shrink(&self) -> Box<Iterator<Item=ByteClass>>448     fn shrink(&self) -> Box<Iterator<Item=ByteClass>> {
449         Box::new(self.ranges.clone()
450                  .shrink()
451                  .filter(|ranges| ranges.len() > 0)
452                  .map(|ranges| ByteClass { ranges: ranges }.canonicalize()))
453     }
454 }
455 
456 impl Arbitrary for ByteRange {
arbitrary<G: Gen>(g: &mut G) -> ByteRange457     fn arbitrary<G: Gen>(g: &mut G) -> ByteRange {
458         ByteRange::new(g.gen_range(97, 123), g.gen_range(97, 123))
459     }
460 
shrink(&self) -> Box<Iterator<Item=ByteRange>>461     fn shrink(&self) -> Box<Iterator<Item=ByteRange>> {
462         Box::new((self.start, self.end)
463                  .shrink().map(|(s, e)| ByteRange::new(s, e)))
464     }
465 }
466 
467 #[test]
display_regex_roundtrips()468 fn display_regex_roundtrips() {
469     // Given an AST, if we print it as a regex and then re-parse it, do we
470     // get back the same AST?
471     // A lot of this relies crucially on regex simplification. So this is
472     // testing `Expr::simplify` as much as it is testing the `Display` impl.
473     fn prop(e: Expr) -> bool {
474         let parser = ExprBuilder::new().allow_bytes(true);
475         e == parser.parse(&e.to_string()).unwrap()
476     }
477     QuickCheck::new()
478         .tests(10_000)
479         .max_tests(20_000)
480         .gen(StdGen::new(::rand::thread_rng(), 50))
481         .quickcheck(prop as fn(Expr) -> bool);
482 }
483