1prelude: |
2  bm_so_fasta = <<'EOS'
3  # The Computer Language Shootout
4  # http://shootout.alioth.debian.org/
5  # Contributed by Sokolov Yura
6
7  $last = 42.0
8  def gen_random(max, im=139968, ia=3877, ic=29573)
9      (max * ($last = ($last * ia + ic) % im)) / im
10  end
11
12  alu =
13     "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"+
14     "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"+
15     "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"+
16     "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"+
17     "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"+
18     "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"+
19     "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
20
21  iub = [
22      ["a", 0.27],
23      ["c", 0.12],
24      ["g", 0.12],
25      ["t", 0.27],
26
27      ["B", 0.02],
28      ["D", 0.02],
29      ["H", 0.02],
30      ["K", 0.02],
31      ["M", 0.02],
32      ["N", 0.02],
33      ["R", 0.02],
34      ["S", 0.02],
35      ["V", 0.02],
36      ["W", 0.02],
37      ["Y", 0.02],
38  ]
39  homosapiens = [
40      ["a", 0.3029549426680],
41      ["c", 0.1979883004921],
42      ["g", 0.1975473066391],
43      ["t", 0.3015094502008],
44  ]
45
46  def make_repeat_fasta(id, desc, src, n)
47      puts ">#{id} #{desc}"
48      v = nil
49      width = 60
50      l = src.length
51      s = src * ((n / l) + 1)
52      s.slice!(n, l)
53      puts(s.scan(/.{1,#{width}}/).join("\n"))
54  end
55
56  def make_random_fasta(id, desc, table, n)
57      puts ">#{id} #{desc}"
58      rand, v = nil,nil
59      width = 60
60      chunk = 1 * width
61      prob = 0.0
62      table.each{|v| v[1]= (prob += v[1])}
63      for i in 1..(n/width)
64          puts((1..width).collect{
65              rand = gen_random(1.0)
66              table.find{|v| v[1]>rand}[0]
67          }.join)
68      end
69      if n%width != 0
70          puts((1..(n%width)).collect{
71              rand = gen_random(1.0)
72              table.find{|v| v[1]>rand}[0]
73          }.join)
74      end
75  end
76
77
78  n = (ARGV[0] or 250_000).to_i
79
80  make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2)
81  make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3)
82  make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5)
83  EOS
84benchmark:
85  - name: so_reverse_complement
86    prelude: |
87      script = File.join(File.dirname($0), 'bm_so_fasta.rb')
88      File.write(script, bm_so_fasta)
89
90      def prepare_fasta_output n
91        filebase = File.join(File.dirname($0), 'fasta.output')
92        script = File.join(File.dirname($0), 'bm_so_fasta.rb')
93        file = "#{filebase}.#{n}"
94
95        unless FileTest.exist?(file)
96          STDERR.puts "preparing #{file}"
97
98          open(file, 'w'){|f|
99            ARGV[0] = n
100            $stdout = f
101            load script
102            $stdout = STDOUT
103          }
104        end
105      end
106      prepare_fasta_output(2_500_000)
107    script: |
108      # The Great Computer Language Shootout
109      # http://shootout.alioth.debian.org/
110      #
111      # Contributed by Peter Bjarke Olsen
112      # Modified by Doug King
113
114      seq=Array.new
115
116      def revcomp(seq)
117        seq.reverse!.tr!('wsatugcyrkmbdhvnATUGCYRKMBDHVN','WSTAACGRYMKVHDBNTAACGRYMKVHDBN')
118        stringlen=seq.length
119        0.step(stringlen-1,60) {|x| print seq.slice(x,60) , "\n"}
120      end
121
122      input = open(File.join(File.dirname($0), 'fasta.output.2500000'), 'rb')
123
124      while input.gets
125        if $_ =~ />/
126          if seq.length != 0
127            revcomp(seq.join)
128            seq=Array.new
129          end
130          puts $_
131        else
132          $_.sub(/\n/,'')
133          seq.push $_
134        end
135      end
136      revcomp(seq.join)
137    loop_count: 1
138