1prelude: |
2  bm_so_fasta = <<'EOS'
3  # The Computer Language Shootout
4  # http://shootout.alioth.debian.org/
5  # Contributed by Sokolov Yura
6
7  $last = 42.0
8  def gen_random(max, im=139968, ia=3877, ic=29573)
9      (max * ($last = ($last * ia + ic) % im)) / im
10  end
11
12  alu =
13     "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"+
14     "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"+
15     "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"+
16     "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"+
17     "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"+
18     "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"+
19     "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
20
21  iub = [
22      ["a", 0.27],
23      ["c", 0.12],
24      ["g", 0.12],
25      ["t", 0.27],
26
27      ["B", 0.02],
28      ["D", 0.02],
29      ["H", 0.02],
30      ["K", 0.02],
31      ["M", 0.02],
32      ["N", 0.02],
33      ["R", 0.02],
34      ["S", 0.02],
35      ["V", 0.02],
36      ["W", 0.02],
37      ["Y", 0.02],
38  ]
39  homosapiens = [
40      ["a", 0.3029549426680],
41      ["c", 0.1979883004921],
42      ["g", 0.1975473066391],
43      ["t", 0.3015094502008],
44  ]
45
46  def make_repeat_fasta(id, desc, src, n)
47      puts ">#{id} #{desc}"
48      v = nil
49      width = 60
50      l = src.length
51      s = src * ((n / l) + 1)
52      s.slice!(n, l)
53      puts(s.scan(/.{1,#{width}}/).join("\n"))
54  end
55
56  def make_random_fasta(id, desc, table, n)
57      puts ">#{id} #{desc}"
58      rand, v = nil,nil
59      width = 60
60      chunk = 1 * width
61      prob = 0.0
62      table.each{|v| v[1]= (prob += v[1])}
63      for i in 1..(n/width)
64          puts((1..width).collect{
65              rand = gen_random(1.0)
66              table.find{|v| v[1]>rand}[0]
67          }.join)
68      end
69      if n%width != 0
70          puts((1..(n%width)).collect{
71              rand = gen_random(1.0)
72              table.find{|v| v[1]>rand}[0]
73          }.join)
74      end
75  end
76
77
78  n = (ARGV[0] or 250_000).to_i
79
80  make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2)
81  make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3)
82  make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5)
83  EOS
84benchmark:
85  - name: so_k_nucleotide
86    prelude: |
87      script = File.join(File.dirname($0), 'bm_so_fasta.rb')
88      File.write(script, bm_so_fasta)
89
90      def prepare_fasta_output n
91        filebase = File.join(File.dirname($0), 'fasta.output')
92        script = File.join(File.dirname($0), 'bm_so_fasta.rb')
93        file = "#{filebase}.#{n}"
94
95        unless FileTest.exist?(file)
96          STDERR.puts "preparing #{file}"
97
98          open(file, 'w'){|f|
99            ARGV[0] = n
100            $stdout = f
101            load script
102            $stdout = STDOUT
103          }
104        end
105      end
106      prepare_fasta_output(100_000)
107    script: |
108      # The Computer Language Shootout
109      # http://shootout.alioth.debian.org
110      #
111      # contributed by jose fco. gonzalez
112      # modified by Sokolov Yura
113
114      seq = String.new
115
116      def frecuency( seq,length )
117          n, table = seq.length - length + 1, Hash.new(0)
118          f, i = nil, nil
119          (0 ... length).each do |f|
120              (f ... n).step(length) do |i|
121                  table[seq[i,length]] += 1
122              end
123          end
124          [n,table]
125
126      end
127
128      def sort_by_freq( seq,length )
129          n,table = frecuency( seq,length )
130          a, b, v = nil, nil, nil
131          table.sort{|a,b| b[1] <=> a[1]}.each do |v|
132              puts "%s %.3f" % [v[0].upcase,((v[1]*100).to_f/n)]
133          end
134          puts
135      end
136
137      def find_seq( seq,s )
138          n,table = frecuency( seq,s.length )
139          puts "#{table[s].to_s}\t#{s.upcase}"
140      end
141
142      input = open(File.join(File.dirname($0), 'fasta.output.100000'), 'rb')
143
144      line = input.gets while line !~ /^>THREE/
145      line = input.gets
146
147      while (line !~ /^>/) & line do
148          seq << line.chomp
149          line = input.gets
150      end
151
152      [1,2].each {|i| sort_by_freq( seq,i ) }
153
154      %w(ggt ggta ggtatt ggtattttaatt ggtattttaatttatagt).each{|s| find_seq( seq,s) }
155    loop_count: 1
156