1#!/usr/bin/env bash 2 3# Runs performance tests - compares Edlib with other aligners (Parasail, Seqan, Myers). 4 5EDLIB=~/git/edlib/build/bin/edlib-aligner 6 7PARASAIL=~/git/parasail/apps/parasail_aligner 8PARASAIL_FLAGS="-t 1 -d -e 1 -o 1 -M 0 -X 1" 9 10# Here I am using Seqan through modified edlib aligner. It can be found in seqan-test branch of edlib repo. 11SEQAN=~/git/edlib-seqan/src/aligner 12 13MYERS=~/Dropbox/Mile/SIMD_alignment/Myers/myers_98_martin/myers 14 15TEST_DATA=. 16 17########################## TEST RUNNERS ######################## 18 19function edlib { 20 mode=$1 21 query=$2 22 target=$3 23 num_tests=$4 24 k=$5 25 26 time_sum=0 27 for i in $(seq $num_tests); do 28 sleep 1 29 output=$($EDLIB -m $mode -k $k $query $target) 30 time=$(echo "$output" | grep "Cpu time of searching" | cut -d " " -f5) 31 score=$(echo "$output" | grep "#0:" | cut -d " " -f2) 32 time_sum=$(python -c "print($time_sum + $time)") 33 echo ">" "#"$i $score $time 34 done 35 avg_time=$(python -c "print($time_sum / $num_tests)") 36 echo "Edlib:" $avg_time $score 37} 38 39function edlib_path { 40 mode=$1 41 query=$2 42 target=$3 43 num_tests=$4 44 45 time_sum=0 46 for i in $(seq $num_tests); do 47 sleep 1 48 output=$($EDLIB -m $mode -p -s $query $target) 49 time=$(echo "$output" | grep "Cpu time of searching" | cut -d " " -f5) 50 time_sum=$(python -c "print($time_sum + $time)") 51 echo ">" "#"$i $time 52 done 53 avg_time=$(python -c "print($time_sum / $num_tests)") 54 echo "Edlib (path):" $avg_time 55} 56 57function seqan { 58 mode=$1 59 query=$2 60 target=$3 61 num_tests=$4 62 63 time_sum=0 64 for i in $(seq $num_tests); do 65 sleep 1 66 output=$($SEQAN -m $mode -t $query $target) 67 time=$(echo "$output" | grep "Cpu time of searching" | cut -d " " -f5) 68 score=$(($(echo "$output" | grep "Seqan Score:" | cut -d " " -f4) * -1)) 69 time_sum=$(python -c "print($time_sum + $time)") 70 echo ">" "#"$i $score $time 71 done 72 avg_time=$(python -c "print($time_sum / $num_tests)") 73 echo "Seqan:" $avg_time $score 74} 75 76function seqan_path { 77 mode=$1 78 query=$2 79 target=$3 80 num_tests=$4 81 82 time_sum=0 83 for i in $(seq $num_tests); do 84 sleep 1 85 output=$($SEQAN -m $mode -t -p -s $query $target) 86 time=$(echo "$output" | grep "Cpu time of searching" | cut -d " " -f5) 87 score=$(($(echo "$output" | grep "Seqan Score:" | cut -d " " -f4) * -1)) 88 time_sum=$(python -c "print($time_sum + $time)") 89 echo ">" "#"$i $score $time 90 done 91 avg_time=$(python -c "print($time_sum / $num_tests)") 92 echo "Seqan (path):" $score $avg_time 93} 94 95function parasail { 96 query=$1 97 target=$2 98 num_tests=$3 99 100 time_sum=0 101 for i in $(seq $num_tests); do 102 sleep 1 103 output=$($PARASAIL $PARASAIL_FLAGS -a nw_striped_32 -f $target -q $query) 104 time=$(echo "$output" | grep "alignment time" | cut -d ":" -f2 | cut -d " " -f2) 105 score=$(($(head -n 1 parasail.csv | cut -d "," -f5) * -1)) 106 rm parasail.csv 107 time_sum=$(python -c "print($time_sum + $time)") 108 echo ">" "#"$i $score $time 109 done 110 avg_time=$(python -c "print($time_sum / $num_tests)") 111 echo "Parasail:" $avg_time $score 112} 113 114function myers { 115 query=$1 116 target=$2 117 num_tests=$3 118 k=$4 119 120 time_sum=0 121 for i in $(seq $num_tests); do 122 sleep 1 123 tail -n +2 $query | tr -d '\n' > queryMyers.fasta 124 tail -n +2 $target | tr -d '\n' > targetMyers.fasta 125 output=$({ time -p $MYERS $(cat queryMyers.fasta) $k targetMyers.fasta; } 2>&1) 126 rm queryMyers.fasta targetMyers.fasta 127 time=$(echo "$output" | grep "real" | cut -d " " -f2) 128 time_sum=$(python -c "print($time_sum + $time)") 129 echo ">" "#"$i $time 130 done 131 avg_time=$(python -c "print($time_sum / $num_tests)") 132 echo "Myers:" $avg_time 133 134} 135 136 137############################ TESTS ############################# 138 139 140#Enterobacteria 141echo -e "\nEnterobacteria, NW" 142target=$TEST_DATA/Enterobacteria_Phage_1/Enterobacteria_phage_1.fasta 143for query in $(ls $TEST_DATA/Enterobacteria_Phage_1/mutated_*_perc.fasta); do 144 echo $query 145 146 edlib NW $query $target 3 -1 147 edlib_path NW $query $target 3 148 seqan NW $query $target 3 149 seqan_path NW $query $target 3 150 parasail $query $target 3 151done 152 153 154#E coli and its illumina read, HW 155echo -e "\nE coli and its illumina read, HW" 156target=$TEST_DATA/E_coli_DH1/e_coli_DH1.fasta 157for query in $(ls $TEST_DATA/E_coli_DH1/mason_illumina_reads/10kbp/*.fasta); do 158 echo $query 159 160 edlib HW $query $target 3 -1 161 edlib_path HW $query $target 3 162 seqan HW $query $target 3 163# seqan_path HW $query $target 3 # Fails because it allocates too much memory. 164done 165 166 167#E coli and its prefix, SHW 168echo -e "\nE coli and its prefix, SHW" 169target=$TEST_DATA/E_coli_DH1/e_coli_DH1.fasta 170for query in $(ls $TEST_DATA/E_coli_DH1/prefixes/10kbp/mutated_*_perc.fasta); do 171 echo $query 172 173 edlib SHW $query $target 3 -1 174 edlib_path SHW $query $target 3 175 seqan SHW $query $target 3 176# seqan_path SHW $query $target 3 # Fails because it allocates too much memory. 177done 178 179 180#Chromosome 181echo -e "\nChromosome, NW" 182target=$TEST_DATA/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0.fasta 183for query in $(ls $TEST_DATA/Chromosome_2890043_3890042_0/mutated_*_perc.fasta); do 184 echo $query 185 186 edlib NW $query $target 3 -1 187 edlib_path NW $query $target 3 188 seqan NW $query $target 3 189 seqan_path NW $query $target 3 190 parasail $query $target 3 191done 192 193 194################### Myers ##################### 195echo -e "\nMyers" 196target=$TEST_DATA/E_coli_DH1/e_coli_DH1.fasta 197 198k=100 199for query_file in e_coli_DH1_illumina_1x10000.fasta; do 200 query=$TEST_DATA/E_coli_DH1/mason_illumina_reads/10kbp/$query_file 201 echo $query $k 202 edlib HW $query $target 3 $k 203 myers $query $target 3 $k 204done 205 206k=1000 207for query_file in e_coli_DH1_illumina_1x10000.fasta mutated_97_perc.fasta mutated_94_perc.fasta mutated_90_perc.fasta; do 208 query=$TEST_DATA/E_coli_DH1/mason_illumina_reads/10kbp/$query_file 209 echo $query $k 210 edlib HW $query $target 3 $k 211 myers $query $target 3 $k 212done 213 214k=10000 215for query in $(ls $TEST_DATA/E_coli_DH1/mason_illumina_reads/10kbp/*); do 216 echo $query $k 217 edlib HW $query $target 3 $k 218 myers $query $target 3 $k 219done 220