1 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 use quickcheck::{Arbitrary, Gen, Testable, QuickCheck, StdGen};
12
13 use {
14 Expr, ExprBuilder,
15 CharClass, ClassRange, ByteClass, ByteRange, Repeater, dec_char,
16 };
17
qc<T: Testable>(t: T)18 fn qc<T: Testable>(t: T) {
19 QuickCheck::new()
20 .tests(10_000)
21 .max_tests(20_000)
22 .quickcheck(t);
23 }
24
class(ranges: &[(char, char)]) -> CharClass25 fn class(ranges: &[(char, char)]) -> CharClass {
26 let ranges = ranges.iter().cloned()
27 .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
28 CharClass::new(ranges)
29 }
30
31 // Test invariants for canonicalizing character classes.
32
33 #[test]
negate()34 fn negate() {
35 fn prop(ranges: Vec<(char, char)>) -> bool {
36 let expected = class(&ranges).canonicalize();
37 let got = class(&ranges).negate().negate();
38 expected == got
39 }
40 qc(prop as fn(Vec<(char, char)>) -> bool);
41 }
42
43 #[test]
classes_are_sorted_and_nonoverlapping()44 fn classes_are_sorted_and_nonoverlapping() {
45 fn prop(ranges: Vec<(char, char)>) -> bool {
46 class(&ranges)
47 .canonicalize()
48 .windows(2)
49 .all(|w| w[0].end < dec_char(w[1].start))
50 }
51 qc(prop as fn(Vec<(char, char)>) -> bool);
52 }
53
54 #[test]
valid_class_ranges()55 fn valid_class_ranges() {
56 fn prop(ranges: Vec<(char, char)>) -> bool {
57 class(&ranges).canonicalize().into_iter().all(|r| r.start <= r.end)
58 }
59 qc(prop as fn(Vec<(char, char)>) -> bool);
60 }
61
62 #[test]
intersection()63 fn intersection() {
64 fn prop(ranges1: Vec<(char, char)>, ranges2: Vec<(char, char)>) -> bool {
65 let class1 = class(&ranges1).canonicalize();
66 let class2 = class(&ranges2).canonicalize();
67
68 let mut expected = CharClass::empty();
69 // This is inefficient but correct.
70 for range1 in &class1 {
71 for range2 in &class2 {
72 if let Some(intersection) = range1.intersection(range2) {
73 expected.ranges.push(intersection);
74 }
75 }
76 }
77 expected = expected.canonicalize();
78
79 let got = class1.intersection(&class2);
80 expected == got
81 }
82 qc(prop as fn(Vec<(char, char)>, Vec<(char, char)>) -> bool);
83 }
84
85 /// A wrapper type for generating "regex-like" Unicode strings.
86 ///
87 /// In particular, this type's `Arbitrary` impl specifically biases toward
88 /// special regex characters to make test cases more interesting.
89 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
90 struct RegexLikeString(String);
91
92 impl Arbitrary for RegexLikeString {
arbitrary<G: Gen>(g: &mut G) -> RegexLikeString93 fn arbitrary<G: Gen>(g: &mut G) -> RegexLikeString {
94 const SPECIAL: &'static [char] = &[
95 '\\', '.', '+', '*', '?', '(', ')', '|', '[', ']', '{', '}',
96 '^', '$',
97 ];
98 // Generating random Unicode strings results in mostly uninteresting
99 // regexes. Namely, they'll mostly just be literals.
100 // To make properties using regex strings more interesting, we bias
101 // toward selecting characters of significance to a regex.
102 let size = { let s = g.size(); g.gen_range(0, s) };
103 RegexLikeString((0..size).map(|_| {
104 if g.gen_weighted_bool(3) {
105 *g.choose(SPECIAL).unwrap()
106 } else {
107 g.gen()
108 }
109 }).collect())
110 }
111
shrink(&self) -> Box<Iterator<Item=RegexLikeString>>112 fn shrink(&self) -> Box<Iterator<Item=RegexLikeString>> {
113 // The regular `String` shrinker is good enough.
114 Box::new(self.0.shrink().map(RegexLikeString))
115 }
116 }
117
118 /// A special type for generating small non-zero sized ASCII strings.
119 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
120 struct SmallAscii(String);
121
122 impl Arbitrary for SmallAscii {
arbitrary<G: Gen>(g: &mut G) -> SmallAscii123 fn arbitrary<G: Gen>(g: &mut G) -> SmallAscii {
124 use std::char::from_u32;
125 let size = g.gen_range(1, 5);
126 SmallAscii((0..size)
127 .map(|_| from_u32(g.gen_range(97, 123)).unwrap())
128 .collect())
129 }
130
shrink(&self) -> Box<Iterator<Item=SmallAscii>>131 fn shrink(&self) -> Box<Iterator<Item=SmallAscii>> {
132 Box::new(self.0.shrink().map(SmallAscii))
133 }
134 }
135
136 #[test]
parser_never_panics()137 fn parser_never_panics() {
138 fn prop(s: RegexLikeString) -> bool {
139 let _ = Expr::parse(&s.0); true
140 }
141 qc(prop as fn(RegexLikeString) -> bool);
142 }
143
144 // Testing entire expressions.
145 //
146 // We only have one test at the moment, but the machinery could be useful
147 // for other things.
148 //
149 // In particular, Russ Cox writes about testing regexes by comparing the
150 // strings they match with other regex implementations. A fuzzer/shrinker
151 // (which is what's implemented below) would be a great way to drive that
152 // process. ---AG
153
154 impl Arbitrary for Expr {
arbitrary<G: Gen>(g: &mut G) -> Expr155 fn arbitrary<G: Gen>(g: &mut G) -> Expr {
156 let e = fix_capture_indices(gen_expr(g, 0, ExprType::Anything));
157 e.simplify(200).unwrap()
158 }
159
shrink(&self) -> Box<Iterator<Item=Expr>>160 fn shrink(&self) -> Box<Iterator<Item=Expr>> {
161 use Expr::*;
162
163 let nada = || Box::new(None.into_iter());
164 let es: Box<Iterator<Item=Expr>> = match *self {
165 Empty | AnyChar | AnyCharNoNL | AnyByte | AnyByteNoNL
166 | StartLine | EndLine | StartText | EndText
167 | WordBoundary | NotWordBoundary
168 | WordBoundaryAscii | NotWordBoundaryAscii => nada(),
169 Literal { ref chars, .. } if chars.len() == 1 => nada(),
170 Literal { ref chars, casei } => {
171 Box::new((chars.clone(), casei)
172 .shrink()
173 .filter(|&(ref chars, _)| chars.len() > 0)
174 .map(|(chars, casei)| {
175 Literal { chars: chars, casei: casei }
176 }))
177 }
178 LiteralBytes { ref bytes, .. } if bytes.len() == 1 => nada(),
179 LiteralBytes { ref bytes, casei } => {
180 Box::new((bytes.clone(), casei)
181 .shrink()
182 .filter(|&(ref bytes, _)| bytes.len() > 0)
183 .map(|(bytes, casei)| {
184 LiteralBytes { bytes: bytes, casei: casei }
185 }))
186 }
187 Class(ref cls) => Box::new(cls.shrink().map(Class)),
188 ClassBytes(ref cls) => Box::new(cls.shrink().map(ClassBytes)),
189 Group { ref e, ref i, ref name } => {
190 let (i, name) = (i.clone(), name.clone());
191 Box::new(e.clone().shrink()
192 .chain(e.clone().shrink()
193 .map(move |e| Group {
194 e: Box::new(e),
195 i: i.clone(),
196 name: name.clone(),
197 })))
198 }
199 Repeat { ref e, ref r, greedy } => {
200 Box::new((*e.clone(), r.clone())
201 .shrink()
202 .filter(|&(ref e, _)| e.can_repeat())
203 .map(move |(e, r)| Repeat {
204 e: Box::new(e),
205 r: r,
206 greedy: greedy,
207 }))
208 }
209 // Concat(ref es) if es.len() <= 2 => nada(),
210 Concat(ref es) => {
211 Box::new(es.clone()
212 .shrink()
213 .filter(|es| es.len() > 0)
214 .map(|mut es| if es.len() == 1 {
215 es.pop().unwrap()
216 } else {
217 Concat(es)
218 }))
219 }
220 // Alternate(ref es) if es.len() <= 2 => nada(),
221 Alternate(ref es) => {
222 Box::new(es.clone()
223 .shrink()
224 .filter(|es| es.len() > 0)
225 .map(|mut es| if es.len() == 1 {
226 es.pop().unwrap()
227 } else {
228 Alternate(es)
229 }))
230 }
231 };
232 Box::new(es.map(|e| fix_capture_indices(e).simplify(200).unwrap()))
233 }
234 }
235
236 enum ExprType {
237 NoSequences, // disallow concat/alternate
238 Anything,
239 }
240
gen_expr<G: Gen>(g: &mut G, depth: u32, ty: ExprType) -> Expr241 fn gen_expr<G: Gen>(g: &mut G, depth: u32, ty: ExprType) -> Expr {
242 use Expr::*;
243 let ub = match (depth as usize >= g.size(), ty) {
244 (true, _) => 16,
245 (false, ExprType::NoSequences) => 18,
246 (false, ExprType::Anything) => 20,
247 };
248 match g.gen_range(1, ub) {
249 0 => Empty,
250 1 => Literal {
251 chars: SmallAscii::arbitrary(g).0.chars().collect(),
252 casei: g.gen(),
253 },
254 2 => LiteralBytes {
255 bytes: SmallAscii::arbitrary(g).0.as_bytes().to_owned(),
256 casei: g.gen(),
257 },
258 3 => AnyChar,
259 4 => AnyCharNoNL,
260 5 => AnyByte,
261 6 => AnyByteNoNL,
262 7 => Class(CharClass::arbitrary(g)),
263 8 => StartLine,
264 9 => EndLine,
265 10 => StartText,
266 11 => EndText,
267 12 => WordBoundary,
268 13 => NotWordBoundary,
269 14 => WordBoundaryAscii,
270 15 => NotWordBoundaryAscii,
271 16 => gen_group_expr(g, depth + 1),
272 17 => Repeat {
273 e: Box::new(gen_repeatable_expr(g, depth + 1)),
274 r: Repeater::arbitrary(g),
275 greedy: bool::arbitrary(g),
276 },
277 18 => {
278 let size = { let s = g.size(); g.gen_range(2, s) };
279 Concat((0..size)
280 .map(|_| {
281 gen_expr(g, depth + 1, ExprType::NoSequences)
282 })
283 .collect())
284 }
285 19 => {
286 let size = { let s = g.size(); g.gen_range(2, s) };
287 Alternate((0..size)
288 .map(|_| {
289 gen_expr(g, depth + 1, ExprType::NoSequences)
290 })
291 .collect())
292 }
293 _ => unreachable!()
294 }
295 }
296
gen_repeatable_expr<G: Gen>(g: &mut G, depth: u32) -> Expr297 fn gen_repeatable_expr<G: Gen>(g: &mut G, depth: u32) -> Expr {
298 use Expr::*;
299 match g.gen_range(1, 10) {
300 0 => Empty,
301 1 => Literal {
302 chars: vec![Arbitrary::arbitrary(g)],
303 casei: g.gen(),
304 },
305 2 => LiteralBytes {
306 bytes: vec![Arbitrary::arbitrary(g)],
307 casei: g.gen(),
308 },
309 3 => AnyChar,
310 4 => AnyCharNoNL,
311 5 => AnyByte,
312 6 => AnyByteNoNL,
313 7 => Class(CharClass::arbitrary(g)),
314 8 => ClassBytes(ByteClass::arbitrary(g)),
315 9 => gen_group_expr(g, depth + 1),
316 _ => unreachable!(),
317 }
318 }
319
gen_group_expr<G: Gen>(g: &mut G, depth: u32) -> Expr320 fn gen_group_expr<G: Gen>(g: &mut G, depth: u32) -> Expr {
321 let (i, name) = if g.gen() {
322 (None, None)
323 } else {
324 (Some(0), if g.gen() {
325 Some(SmallAscii::arbitrary(g).0)
326 } else {
327 None
328 })
329 };
330 Expr::Group {
331 e: Box::new(gen_expr(g, depth + 1, ExprType::Anything)),
332 i: i,
333 name: name,
334 }
335 }
336
fix_capture_indices(e: Expr) -> Expr337 fn fix_capture_indices(e: Expr) -> Expr {
338 fn bx(e: Expr) -> Box<Expr> { Box::new(e) }
339 fn fix(e: Expr, capi: &mut usize, names: &mut Vec<String>) -> Expr {
340 use Expr::*;
341 match e {
342 Group { e, i: Some(_), mut name } => {
343 *capi += 1;
344 let i = *capi;
345 let mut dupe_name = false;
346 if let Some(ref n1) = name {
347 if names.iter().any(|n2| n1 == n2) {
348 dupe_name = true;
349 } else {
350 names.push(n1.clone());
351 }
352 }
353 if dupe_name { name = None; }
354 Group { e: bx(fix(*e, capi, names)), i: Some(i), name: name }
355 }
356 Group { e, i, name } => {
357 Group { e: bx(fix(*e, capi, names)), i: i, name: name }
358 }
359 Repeat { e, r, greedy } => {
360 Repeat { e: bx(fix(*e, capi, names)), r: r, greedy: greedy }
361 }
362 Concat(es) =>
363 Concat(es.into_iter().map(|e| fix(e, capi, names)).collect()),
364 Alternate(es) =>
365 Alternate(es.into_iter().map(|e| fix(e, capi, names)).collect()),
366 e => e,
367 }
368 }
369 fix(e, &mut 0, &mut vec![])
370 }
371
372 impl Arbitrary for Repeater {
arbitrary<G: Gen>(g: &mut G) -> Repeater373 fn arbitrary<G: Gen>(g: &mut G) -> Repeater {
374 use Repeater::*;
375 match g.gen_range(0, 4) {
376 0 => ZeroOrOne,
377 1 => ZeroOrMore,
378 2 => OneOrMore,
379 3 => {
380 use std::cmp::{max, min};
381 let n1 = Arbitrary::arbitrary(g);
382 let n2 = Arbitrary::arbitrary(g);
383 Range {
384 min: min(n1, n2),
385 max: if g.gen() { None } else { Some(max(n1, n2)) },
386 }
387 },
388 _ => unreachable!(),
389 }
390 }
391
shrink(&self) -> Box<Iterator<Item=Repeater>>392 fn shrink(&self) -> Box<Iterator<Item=Repeater>> {
393 use Repeater::*;
394 match *self {
395 ZeroOrOne | ZeroOrMore | OneOrMore => Box::new(None.into_iter()),
396 Range { min, max } => {
397 Box::new((min, max)
398 .shrink()
399 .map(|(min, max)| Range { min: min, max: max }))
400 }
401 }
402 }
403 }
404
405 impl Arbitrary for CharClass {
arbitrary<G: Gen>(g: &mut G) -> CharClass406 fn arbitrary<G: Gen>(g: &mut G) -> CharClass {
407 let mut ranges: Vec<ClassRange> = Arbitrary::arbitrary(g);
408 if ranges.is_empty() {
409 ranges.push(Arbitrary::arbitrary(g));
410 }
411 let cls = CharClass { ranges: ranges }.canonicalize();
412 if g.gen() { cls.case_fold() } else { cls }
413 }
414
shrink(&self) -> Box<Iterator<Item=CharClass>>415 fn shrink(&self) -> Box<Iterator<Item=CharClass>> {
416 Box::new(self.ranges.clone()
417 .shrink()
418 .filter(|ranges| ranges.len() > 0)
419 .map(|ranges| CharClass { ranges: ranges }.canonicalize()))
420 }
421 }
422
423 impl Arbitrary for ClassRange {
arbitrary<G: Gen>(g: &mut G) -> ClassRange424 fn arbitrary<G: Gen>(g: &mut G) -> ClassRange {
425 use std::char::from_u32;
426 ClassRange::new(
427 from_u32(g.gen_range(97, 123)).unwrap(),
428 from_u32(g.gen_range(97, 123)).unwrap(),
429 )
430 }
431
shrink(&self) -> Box<Iterator<Item=ClassRange>>432 fn shrink(&self) -> Box<Iterator<Item=ClassRange>> {
433 Box::new((self.start, self.end)
434 .shrink().map(|(s, e)| ClassRange::new(s, e)))
435 }
436 }
437
438 impl Arbitrary for ByteClass {
arbitrary<G: Gen>(g: &mut G) -> ByteClass439 fn arbitrary<G: Gen>(g: &mut G) -> ByteClass {
440 let mut ranges: Vec<ByteRange> = Arbitrary::arbitrary(g);
441 if ranges.is_empty() {
442 ranges.push(Arbitrary::arbitrary(g));
443 }
444 let cls = ByteClass { ranges: ranges }.canonicalize();
445 if g.gen() { cls.case_fold() } else { cls }
446 }
447
shrink(&self) -> Box<Iterator<Item=ByteClass>>448 fn shrink(&self) -> Box<Iterator<Item=ByteClass>> {
449 Box::new(self.ranges.clone()
450 .shrink()
451 .filter(|ranges| ranges.len() > 0)
452 .map(|ranges| ByteClass { ranges: ranges }.canonicalize()))
453 }
454 }
455
456 impl Arbitrary for ByteRange {
arbitrary<G: Gen>(g: &mut G) -> ByteRange457 fn arbitrary<G: Gen>(g: &mut G) -> ByteRange {
458 ByteRange::new(g.gen_range(97, 123), g.gen_range(97, 123))
459 }
460
shrink(&self) -> Box<Iterator<Item=ByteRange>>461 fn shrink(&self) -> Box<Iterator<Item=ByteRange>> {
462 Box::new((self.start, self.end)
463 .shrink().map(|(s, e)| ByteRange::new(s, e)))
464 }
465 }
466
467 #[test]
display_regex_roundtrips()468 fn display_regex_roundtrips() {
469 // Given an AST, if we print it as a regex and then re-parse it, do we
470 // get back the same AST?
471 // A lot of this relies crucially on regex simplification. So this is
472 // testing `Expr::simplify` as much as it is testing the `Display` impl.
473 fn prop(e: Expr) -> bool {
474 let parser = ExprBuilder::new().allow_bytes(true);
475 e == parser.parse(&e.to_string()).unwrap()
476 }
477 QuickCheck::new()
478 .tests(10_000)
479 .max_tests(20_000)
480 .gen(StdGen::new(::rand::thread_rng(), 50))
481 .quickcheck(prop as fn(Expr) -> bool);
482 }
483