1# tRNAscanSE/Eufind.pm 2# This class contains parameters and functions for running eufindtRNA used in tRNAscan-SE. 3# 4# -------------------------------------------------------------- 5# This module is part of the tRNAscan-SE program. 6# Copyright (C) 2017 Patricia Chan and Todd Lowe 7# -------------------------------------------------------------- 8# 9 10package tRNAscanSE::Eufind; 11 12use strict; 13use tRNAscanSE::Configuration; 14use tRNAscanSE::Utils; 15use tRNAscanSE::ArraytRNA; 16use tRNAscanSE::tRNA; 17use tRNAscanSE::Stats; 18 19 20sub new 21{ 22 my $class = shift; 23 my $self = {}; 24 25 initialize($self); 26 27 bless ($self, $class); 28 return $self; 29} 30 31sub DESTROY 32{ 33 my $self = shift; 34} 35 36sub initialize 37{ 38 my $self = shift; 39 40 $self->{eufind_bin} = "eufindtRNA"; 41 $self->{eufind_mask} = 2; # Bit-wise masks for source of tRNA hits 42} 43 44sub eufind_params 45{ 46 my $self = shift; 47 if (@_) { $self->{eufind_params} = shift; } 48 return $self->{eufind_params}; 49} 50 51sub eufind_intscore 52{ 53 my $self = shift; 54 if (@_) { $self->{eufind_intscore} = shift; } 55 return $self->{eufind_intscore}; 56} 57 58sub eufind_bin 59{ 60 my $self = shift; 61 if (@_) { $self->{eufind_bin} = shift; } 62 return $self->{eufind_bin}; 63} 64 65sub eufind_mask 66{ 67 my $self = shift; 68 return $self->{eufind_mask}; 69} 70 71sub set_defaults 72{ 73 my $self = shift; 74 my $global_vars = shift; 75 my $global_constants = $global_vars->{global_constants}; 76 77 $self->{eufind_params} = $global_constants->get_subvalue("eufind", "relaxed_param"); 78 $self->{eufind_intscore} = $global_constants->get_subvalue("eufind", "intscore"); 79} 80 81sub set_bin 82{ 83 my $self = shift; 84 my $bindir = shift; 85 86 if ($^O =~ /^MSWin/) 87 { 88 $self->{eufind_bin} .= ".exe"; 89 } 90 91 if (!(-x $self->{eufind_bin})) 92 { 93 $self->{eufind_bin} = $bindir."/".$self->{eufind_bin}; 94 if (!(-x $self->{eufind_bin})) 95 { 96 die "FATAL: Unable to find ".$self->{eufind_bin}." executable\n\n"; 97 } 98 } 99} 100 101sub run_eufind 102{ 103 my $self = shift; 104 my ($tmp_fa, $start_index, $max_int_len, $seq_name) = @_; 105 my $eufind_bin = $self->{eufind_bin}; 106 my $eufind_intscore = $self->{eufind_intscore}; 107 my $eufind_params = $self->{eufind_params}; 108 109 # run default Eufind using selected param set 110 my $eufind_output = `$eufind_bin -i $start_index -F -I $eufind_intscore -l $max_int_len $eufind_params $tmp_fa`; 111 if (&error_exit_status("EufindtRNA",$seq_name)) 112 { 113 $eufind_output = ""; 114 } 115 return $eufind_output; 116} 117 118sub process_Eufind_hits 119{ 120 my $self = shift; 121 my ($global_vars, $eufind_output) = @_; 122 my $global_constants = $global_vars->{global_constants}; 123 my $fp_tRNAs = $global_vars->{fp_tRNAs}; 124 my $stats = $global_vars->{stats}; 125 126 my $trna = undef; 127 my $i = 0; 128 129 my (@eufind_lines) = split(/\n/, $eufind_output); 130 foreach (@eufind_lines) 131 { 132 if (/^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)/o) 133 { 134 $trna = tRNAscanSE::tRNA->new; 135 $trna->seqname($1); 136# $trnact = $2; 137 $trna->start($3); 138 $trna->end($4); 139 $trna->isotype($5); 140 $trna->anticodon($6); 141 $trna->score($9); 142 143 if ($trna->start() < $trna->end()) 144 { 145 $trna->position($trna->start()); 146 $trna->strand("+"); 147 } 148 else 149 { 150 $trna->position($global_constants->get("really_big_number") - $trna->start() + 1); 151 $trna->strand("-"); 152 } 153 154 if ($trna->start() == $trna->end()) 155 { 156 print STDERR "Error reading EufindtRNA results: tRNA of length 0"; 157 } 158 159 if (!$self->merge_repeat_hit($global_vars->{stats}, $global_vars->{fp_tRNAs}, $trna)) 160 { 161 # insert non-redundant hit in order 162 # 'Merge_repeat_hits' depends on list being in order 163 $i = 0; 164 while (($i < $fp_tRNAs->get_count()) && ($fp_tRNAs->get($i)->position() < $trna->position())) 165 { 166 $i++; 167 } 168 169 $trna->hit_source($self->{eufind_mask}); 170 $fp_tRNAs->insert($trna, $i); 171 $stats->increment_trnatotal(); 172 } 173 } 174 } 175} 176 177# check current hit for redundancy against all previous hits in hitlist 178# 179# if it IS a repeat, merge it with overlapping hit and return 1 180# if it doesn't overlap with any hits, return 0 181sub merge_repeat_hit 182{ 183 my $self = shift; 184 my ($stats, $fp_tRNAs, $trna) = @_; 185 186 for (my $i = 0; $i < $fp_tRNAs->get_count(); $i++) 187 { 188 if ($trna->strand() eq "+") 189 { 190 if (($fp_tRNAs->get($i)->strand() eq "+") && 191 (&seg_overlap($trna->start(), $trna->end(), $fp_tRNAs->get($i)->start(), $fp_tRNAs->get($i)->end()))) 192 { 193 $fp_tRNAs->get($i)->start(&min($trna->start(), $fp_tRNAs->get($i)->start())); 194 $fp_tRNAs->get($i)->end(&max($trna->end(), $fp_tRNAs->get($i)->end())); 195 $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $self->{eufind_mask}); 196 $fp_tRNAs->get($i)->isotype($trna->isotype()); 197 $fp_tRNAs->get($i)->score($trna->score()); 198 199 # check to see if extended endpoint overlaps i+1 hit's start boundary 200 # if so, combine hit[i] and hit[i+1] into one hit and delete hit[i+1] 201 if (($i != ($fp_tRNAs->get_count() - 1)) and ($fp_tRNAs->get($i+1)->strand() eq "+") 202 and ($fp_tRNAs->get($i)->end() >= $fp_tRNAs->get($i+1)->start())) 203 { 204 $fp_tRNAs->get($i)->end(&max($fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i+1)->end())); 205 $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $fp_tRNAs->get($i+1)->hit_source()); 206 207# splice(@$r_hit_list,$i+1,1); # toss out overlapping hit 208 $fp_tRNAs->remove($i+1); 209# $$r_trnact--; 210 $stats->decrement_trnatotal(); 211 } 212 return 1; # exit loop immediately 213 } 214 } 215 else # else (antisense) strand 216 { 217 if (($fp_tRNAs->get($i)->strand() eq "-") && 218 (&seg_overlap($trna->end(), $trna->start(), $fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i)->start()))) 219 { 220 $fp_tRNAs->get($i)->start(&max($trna->start(), $fp_tRNAs->get($i)->start())); 221 $fp_tRNAs->get($i)->end(&min($trna->end(), $fp_tRNAs->get($i)->end())); 222 $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $self->{eufind_mask}); 223 $fp_tRNAs->get($i)->isotype($trna->isotype()); 224 $fp_tRNAs->get($i)->score($trna->score()); 225 226 if (($i != ($fp_tRNAs->get_count() - 1)) and 227 ($fp_tRNAs->get($i)->end() <= $fp_tRNAs->get($i+1)->start())) 228 { 229 $fp_tRNAs->get($i)->end(&min($fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i+1)->end())); 230 $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $fp_tRNAs->get($i+1)->hit_source()); 231 232# splice(@$r_hit_list,$i+1,1); # toss out overlapping hit 233 $fp_tRNAs->remove($i+1); 234# $$r_trnact--; 235 $stats->decrement_trnatotal(); 236 } 237 return 1; # exit loop immediately 238 } 239 } # else (antisense) strand 240 241 } # for each (hit) 242 243 return 0; # current hit is not a repeat 244} 2451; 246