1 // The Computer Language Benchmarks Game
2 // https://benchmarksgame-team.pages.debian.net/benchmarksgame/
3 //
4 // contributed by the Rust Project Developers
5 // contributed by TeXitoi
6 // contributed by BurntSushi
7
8 // This technically solves the problem posed in the `regex-dna` benchmark, but
9 // it cheats by combining all of the replacements into a single regex and
10 // replacing them with a single linear scan. i.e., it re-implements
11 // `replace_all`. As a result, this is around 25% faster. ---AG
12
13 use std::io::{self, Read};
14 use std::sync::Arc;
15 use std::thread;
16
17 macro_rules! regex {
18 ($re:expr) => {
19 ::regex::Regex::new($re).unwrap()
20 };
21 }
22
main()23 fn main() {
24 let mut seq = String::with_capacity(50 * (1 << 20));
25 io::stdin().read_to_string(&mut seq).unwrap();
26 let ilen = seq.len();
27
28 seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
29 let clen = seq.len();
30 let seq_arc = Arc::new(seq.clone());
31
32 let variants = vec![
33 regex!("agggtaaa|tttaccct"),
34 regex!("[cgt]gggtaaa|tttaccc[acg]"),
35 regex!("a[act]ggtaaa|tttacc[agt]t"),
36 regex!("ag[act]gtaaa|tttac[agt]ct"),
37 regex!("agg[act]taaa|ttta[agt]cct"),
38 regex!("aggg[acg]aaa|ttt[cgt]ccct"),
39 regex!("agggt[cgt]aa|tt[acg]accct"),
40 regex!("agggta[cgt]a|t[acg]taccct"),
41 regex!("agggtaa[cgt]|[acg]ttaccct"),
42 ];
43 let mut counts = vec![];
44 for variant in variants {
45 let seq = seq_arc.clone();
46 let restr = variant.to_string();
47 let future = thread::spawn(move || variant.find_iter(&seq).count());
48 counts.push((restr, future));
49 }
50
51 let substs = vec![
52 (b'B', "(c|g|t)"),
53 (b'D', "(a|g|t)"),
54 (b'H', "(a|c|t)"),
55 (b'K', "(g|t)"),
56 (b'M', "(a|c)"),
57 (b'N', "(a|c|g|t)"),
58 (b'R', "(a|g)"),
59 (b'S', "(c|g)"),
60 (b'V', "(a|c|g)"),
61 (b'W', "(a|t)"),
62 (b'Y', "(c|t)"),
63 ]; // combined into one regex in `replace_all`
64 let seq = replace_all(&seq, substs);
65
66 for (variant, count) in counts {
67 println!("{} {}", variant, count.join().unwrap());
68 }
69 println!("\n{}\n{}\n{}", ilen, clen, seq.len());
70 }
71
replace_all(text: &str, substs: Vec<(u8, &str)>) -> String72 fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
73 let mut replacements = vec![""; 256];
74 let mut alternates = vec![];
75 for (re, replacement) in substs {
76 replacements[re as usize] = replacement;
77 alternates.push((re as char).to_string());
78 }
79
80 let re = regex!(&alternates.join("|"));
81 let mut new = String::with_capacity(text.len());
82 let mut last_match = 0;
83 for m in re.find_iter(text) {
84 new.push_str(&text[last_match..m.start()]);
85 new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
86 last_match = m.end();
87 }
88 new.push_str(&text[last_match..]);
89 new
90 }
91