1# Write the benchmarking functions here.
2# See "Writing benchmarks" in the asv docs for more information.
3
4from skbio import DNA, RNA
5import numpy as np
6
7num_bases = 1000000
8size = int(num_bases / 4)
9short_len = 100
10
11dna_template_bytes = [ord(x) for x in 'ACGT']
12dna_template_bytes_gapped = [ord(x) for x in 'AC-.']
13rna_template_bytes = [ord(x) for x in 'ACGU']
14
15dna_bytes = np.array(dna_template_bytes * size, dtype=np.uint8)
16dna_bytes_short = dna_bytes[:short_len]
17dna_bytes_gapped = np.array(dna_template_bytes_gapped * size, dtype=np.uint8)
18rna_bytes = np.array(rna_template_bytes * size, dtype=np.uint8)
19
20dna_seq = DNA(dna_bytes)
21dna_seq_short = DNA(dna_bytes_short)
22dna_gapped = DNA(dna_bytes_gapped)
23rna_seq = RNA(rna_bytes)
24
25motif_1 = "GGTGCAAGCCGGTGGAAACA"
26motif_1_regex = '(' + motif_1 + ')'
27
28
29def consume_iterator(iterator):
30    for _ in iterator:
31        pass
32
33
34class BenchmarkSuite:
35
36    def time_object_creation(self):
37        DNA(dna_bytes, validate=False)
38
39    def time_object_creation_validate(self):
40        DNA(dna_bytes)
41
42    def time_reverse_complement(self):
43        dna_seq.reverse_complement()
44
45    def time_degap_all(self):
46        dna_seq.degap()
47
48    def time_translate(self):
49        rna_seq.translate()
50
51    def time_search_for_motif(self):
52        consume_iterator(dna_seq.find_with_regex(motif_1_regex))
53
54    def time_kmer_count_5(self):
55        dna_seq_short.kmer_frequencies(5)
56
57    def time_kmer_count_25(self):
58        dna_seq_short.kmer_frequencies(25)
59
60    def time_gc_content(self):
61        dna_seq.gc_content()
62
63    def time_search_for_motif_in_gapped(self):
64        consume_iterator(
65            dna_seq.find_with_regex(motif_1_regex, ignore=dna_seq.gaps()))
66