1 // The Computer Language Benchmarks Game
2 // http://benchmarksgame.alioth.debian.org/
3 //
4 // contributed by the Rust Project Developers
5 // contributed by TeXitoi
6 // contributed by BurntSushi
7 
8 // This technically solves the problem posed in the `regex-dna` benchmark, but
9 // it cheats by combining all of the replacements into a single regex and
10 // replacing them with a single linear scan. i.e., it re-implements
11 // `replace_all`. As a result, this is around 25% faster. ---AG
12 
13 extern crate regex;
14 
15 use std::io::{self, Read};
16 use std::sync::Arc;
17 use std::thread;
18 
19 macro_rules! regex {
20     ($re:expr) => {
21         ::regex::Regex::new($re).unwrap()
22     };
23 }
24 
main()25 fn main() {
26     let mut seq = String::with_capacity(50 * (1 << 20));
27     io::stdin().read_to_string(&mut seq).unwrap();
28     let ilen = seq.len();
29 
30     seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
31     let clen = seq.len();
32     let seq_arc = Arc::new(seq.clone());
33 
34     let variants = vec![
35         regex!("agggtaaa|tttaccct"),
36         regex!("[cgt]gggtaaa|tttaccc[acg]"),
37         regex!("a[act]ggtaaa|tttacc[agt]t"),
38         regex!("ag[act]gtaaa|tttac[agt]ct"),
39         regex!("agg[act]taaa|ttta[agt]cct"),
40         regex!("aggg[acg]aaa|ttt[cgt]ccct"),
41         regex!("agggt[cgt]aa|tt[acg]accct"),
42         regex!("agggta[cgt]a|t[acg]taccct"),
43         regex!("agggtaa[cgt]|[acg]ttaccct"),
44     ];
45     let mut counts = vec![];
46     for variant in variants {
47         let seq = seq_arc.clone();
48         let restr = variant.to_string();
49         let future = thread::spawn(move || variant.find_iter(&seq).count());
50         counts.push((restr, future));
51     }
52 
53     let substs = vec![
54         (b'B', "(c|g|t)"),
55         (b'D', "(a|g|t)"),
56         (b'H', "(a|c|t)"),
57         (b'K', "(g|t)"),
58         (b'M', "(a|c)"),
59         (b'N', "(a|c|g|t)"),
60         (b'R', "(a|g)"),
61         (b'S', "(c|g)"),
62         (b'V', "(a|c|g)"),
63         (b'W', "(a|t)"),
64         (b'Y', "(c|t)"),
65     ]; // combined into one regex in `replace_all`
66     let seq = replace_all(&seq, substs);
67 
68     for (variant, count) in counts {
69         println!("{} {}", variant, count.join().unwrap());
70     }
71     println!("\n{}\n{}\n{}", ilen, clen, seq.len());
72 }
73 
replace_all(text: &str, substs: Vec<(u8, &str)>) -> String74 fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
75     let mut replacements = vec![""; 256];
76     let mut alternates = vec![];
77     for (re, replacement) in substs {
78         replacements[re as usize] = replacement;
79         alternates.push((re as char).to_string());
80     }
81 
82     let re = regex!(&alternates.join("|"));
83     let mut new = String::with_capacity(text.len());
84     let mut last_match = 0;
85     for m in re.find_iter(text) {
86         new.push_str(&text[last_match..m.start()]);
87         new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
88         last_match = m.end();
89     }
90     new.push_str(&text[last_match..]);
91     new
92 }
93