1prelude: | 2 bm_so_fasta = <<'EOS' 3 # The Computer Language Shootout 4 # http://shootout.alioth.debian.org/ 5 # Contributed by Sokolov Yura 6 7 $last = 42.0 8 def gen_random(max, im=139968, ia=3877, ic=29573) 9 (max * ($last = ($last * ia + ic) % im)) / im 10 end 11 12 alu = 13 "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"+ 14 "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"+ 15 "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"+ 16 "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"+ 17 "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"+ 18 "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"+ 19 "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA" 20 21 iub = [ 22 ["a", 0.27], 23 ["c", 0.12], 24 ["g", 0.12], 25 ["t", 0.27], 26 27 ["B", 0.02], 28 ["D", 0.02], 29 ["H", 0.02], 30 ["K", 0.02], 31 ["M", 0.02], 32 ["N", 0.02], 33 ["R", 0.02], 34 ["S", 0.02], 35 ["V", 0.02], 36 ["W", 0.02], 37 ["Y", 0.02], 38 ] 39 homosapiens = [ 40 ["a", 0.3029549426680], 41 ["c", 0.1979883004921], 42 ["g", 0.1975473066391], 43 ["t", 0.3015094502008], 44 ] 45 46 def make_repeat_fasta(id, desc, src, n) 47 puts ">#{id} #{desc}" 48 v = nil 49 width = 60 50 l = src.length 51 s = src * ((n / l) + 1) 52 s.slice!(n, l) 53 puts(s.scan(/.{1,#{width}}/).join("\n")) 54 end 55 56 def make_random_fasta(id, desc, table, n) 57 puts ">#{id} #{desc}" 58 rand, v = nil,nil 59 width = 60 60 chunk = 1 * width 61 prob = 0.0 62 table.each{|v| v[1]= (prob += v[1])} 63 for i in 1..(n/width) 64 puts((1..width).collect{ 65 rand = gen_random(1.0) 66 table.find{|v| v[1]>rand}[0] 67 }.join) 68 end 69 if n%width != 0 70 puts((1..(n%width)).collect{ 71 rand = gen_random(1.0) 72 table.find{|v| v[1]>rand}[0] 73 }.join) 74 end 75 end 76 77 78 n = (ARGV[0] or 250_000).to_i 79 80 make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2) 81 make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3) 82 make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5) 83 EOS 84benchmark: 85 - name: so_k_nucleotide 86 prelude: | 87 script = File.join(File.dirname($0), 'bm_so_fasta.rb') 88 File.write(script, bm_so_fasta) 89 90 def prepare_fasta_output n 91 filebase = File.join(File.dirname($0), 'fasta.output') 92 script = File.join(File.dirname($0), 'bm_so_fasta.rb') 93 file = "#{filebase}.#{n}" 94 95 unless FileTest.exist?(file) 96 STDERR.puts "preparing #{file}" 97 98 open(file, 'w'){|f| 99 ARGV[0] = n 100 $stdout = f 101 load script 102 $stdout = STDOUT 103 } 104 end 105 end 106 prepare_fasta_output(100_000) 107 script: | 108 # The Computer Language Shootout 109 # http://shootout.alioth.debian.org 110 # 111 # contributed by jose fco. gonzalez 112 # modified by Sokolov Yura 113 114 seq = String.new 115 116 def frecuency( seq,length ) 117 n, table = seq.length - length + 1, Hash.new(0) 118 f, i = nil, nil 119 (0 ... length).each do |f| 120 (f ... n).step(length) do |i| 121 table[seq[i,length]] += 1 122 end 123 end 124 [n,table] 125 126 end 127 128 def sort_by_freq( seq,length ) 129 n,table = frecuency( seq,length ) 130 a, b, v = nil, nil, nil 131 table.sort{|a,b| b[1] <=> a[1]}.each do |v| 132 puts "%s %.3f" % [v[0].upcase,((v[1]*100).to_f/n)] 133 end 134 puts 135 end 136 137 def find_seq( seq,s ) 138 n,table = frecuency( seq,s.length ) 139 puts "#{table[s].to_s}\t#{s.upcase}" 140 end 141 142 input = open(File.join(File.dirname($0), 'fasta.output.100000'), 'rb') 143 144 line = input.gets while line !~ /^>THREE/ 145 line = input.gets 146 147 while (line !~ /^>/) & line do 148 seq << line.chomp 149 line = input.gets 150 end 151 152 [1,2].each {|i| sort_by_freq( seq,i ) } 153 154 %w(ggt ggta ggtatt ggtattttaatt ggtattttaatttatagt).each{|s| find_seq( seq,s) } 155 loop_count: 1 156