1# tRNAscanSE/Tscan.pm 2# This class contains parameters and functions for running tRNAscan used in tRNAscan-SE. 3# 4# -------------------------------------------------------------- 5# This module is part of the tRNAscan-SE program. 6# Copyright (C) 2017 Patricia Chan and Todd Lowe 7# -------------------------------------------------------------- 8# 9 10package tRNAscanSE::Tscan; 11 12use strict; 13use tRNAscanSE::Configuration; 14use tRNAscanSE::Utils; 15use tRNAscanSE::ArraytRNA; 16use tRNAscanSE::tRNA; 17use tRNAscanSE::GeneticCode; 18use tRNAscanSE::Stats; 19 20sub new 21{ 22 my $class = shift; 23 my $self = {}; 24 25 initialize($self); 26 27 bless ($self, $class); 28 return $self; 29} 30 31sub DESTROY 32{ 33 my $self = shift; 34} 35 36sub initialize 37{ 38 my $self = shift; 39 40 # set to non-zero if you do NOT want redundant, overlapping hits 41 # found by tRNAscan merged into one hit 42 $self->{keep_tscan_repeats} = 0; 43 44 $self->{tscan_version} = 1.4; # version of tRNAscan used by tRNAscan-SE 45 $self->{tscan_bin} = "trnascan-1.4"; 46 $self->{tscan_mask} = 1; # Bit-wise masks for source of tRNA hits 47} 48 49sub set_defaults 50{ 51 my $self = shift; 52 my $global_vars = shift; 53 my $global_constants = $global_vars->{global_constants}; 54 55 $self->{tscan_params} = $global_constants->get_subvalue("tscan", "strict_param"); 56} 57 58sub keep_tscan_repeats 59{ 60 my $self = shift; 61 if (@_) { $self->{keep_tscan_repeats} = shift; } 62 return $self->{keep_tscan_repeats}; 63} 64 65sub tscan_params 66{ 67 my $self = shift; 68 if (@_) { $self->{tscan_params} = shift; } 69 return $self->{tscan_params}; 70} 71 72sub tscan_version 73{ 74 my $self = shift; 75 if (@_) { $self->{tscan_version} = shift; } 76 return $self->{tscan_version}; 77} 78 79sub tscan_bin 80{ 81 my $self = shift; 82 if (@_) { $self->{tscan_bin} = shift; } 83 return $self->{tscan_bin}; 84} 85 86sub tscan_mask 87{ 88 my $self = shift; 89 return $self->{tscan_mask}; 90} 91 92sub set_bin 93{ 94 my $self = shift; 95 my $bindir = shift; 96 97 # choose correct name for version being run 98 # only version 1.4 is provided with distribution 99 100 if ($self->{tscan_version} == 1.4) 101 { 102 $self->{tscan_bin} = "trnascan-1.4"; 103 } 104 elsif ($self->{tscan_version} == 1.39) 105 { 106 $self->{tscan_bin} = "trnascan-1.39"; 107 } 108 elsif ($self->{tscan_version} == 2) 109 { 110 $self->{tscan_bin} = "TRNAscan"; 111 } 112 elsif ($self->{tscan_version} == 1.3) 113 { 114 $self->{tscan_bin} = "trnascan-1.3"; 115 } 116 else 117 { 118 die "FATAL: Illegal tRNAscan version.\n\n"; 119 } 120 121 if ($^O =~ /^MSWin/) 122 { 123 $self->{tscan_bin} .= ".exe"; 124 } 125 126 if (!(-x $self->{tscan_bin})) 127 { 128 $self->{tscan_bin} = $bindir."/".$self->{tscan_bin}; 129 if (!(-x $self->{tscan_bin})) 130 { 131 die "FATAL: Unable to find ".$self->{tscan_bin}." executable\n\n"; 132 } 133 } 134} 135 136sub run_tRNAscan 137{ 138 my $self = shift; 139 my ($tmp_fa, $tmp_raw, $start_index, $lib_dir, $seq_name) = @_; 140 my $tscan_version = $self->{tscan_version}; 141 my $tscan_bin = $self->{tscan_bin}; 142 my $tscan_params = $self->{tscan_params}; 143 144 # version provided with distribution 145 if ($tscan_version == 1.4) 146 { 147 # run default tRNAscan 1.4 using selected param set 148 system ("$tscan_bin -i $start_index -c $tscan_params $tmp_fa > $tmp_raw"); 149 if (&error_exit_status("tRNAscan", $seq_name)) 150 { 151 return -1; 152 } 153 } 154 155 # run tRNAscan without conservative ambiguous base pairing rules 156 # not available in distribution version 157 elsif ($tscan_version == 1.39) 158 { 159 system ("$tscan_bin -c $tscan_params $tmp_fa > $tmp_raw"); 160 } 161 162 # run tRNAscan v2.0, not available in distribution version 163 elsif ($tscan_version == 2) 164 { 165 system ("$tscan_bin -SEQ $tmp_fa -TEMPLATE SEtemplate -OUTPUT $tmp_raw > /dev/null"); 166 } 167 168 # run original tRNAscan 1.3, not available in distribution version 169 elsif ($tscan_version == 1.3) 170 { 171 if (!(-r "./TPCsignal")) 172 { 173 system ("ln -s ".$lib_dir."TPCsignal TPCsignal"); 174 } 175 if (!(-r "./Dsignal")) 176 { 177 system ("ln -s ".$lib_dir."Dsignal Dsignal"); 178 } 179 system ("reformat -ld genbank $tmp_fa > tmp.gb"); 180 system ("$tscan_bin tmp.gb $tmp_raw > /dev/null"); 181 system ("rm tmp.gb"); 182 } 183 else 184 { 185 die "FATAL: Illegal tRNAscan version.\n\n"; 186 } 187} 188 189# Append tRNAscan verbose output to 190# result file with header tag 191sub append_verbfile 192{ 193 my $self = shift; 194 my ($verb_file, $tmp_fa, $seq_name) = @_; 195 196 &open_for_append(\*TSCANVERB, $verb_file); 197 print TSCANVERB "\n>>>> tRNA-Scan verbose output for <$seq_name>\n\n"; 198 close TSCANVERB; 199 system ("cat tscan.verb.out >> $verb_file"); 200} 201 202# extract trna hits from raw result file while weeding out repeated hits 203# save non-redundant hits in "hit_list" array 204sub process_tRNAscan_hits 205{ 206 my $self = shift; 207 my ($global_vars, $seq_name) = @_; 208 my $global_constants = $global_vars->{global_constants}; 209 my $gc = $global_vars->{gc}; 210 my $fp_tRNAs = $global_vars->{fp_tRNAs}; 211 my $stats = $global_vars->{stats}; 212 213 my $tmp_raw = $global_constants->get("tmp_raw"); 214 my $trna = tRNAscanSE::tRNA->new; 215 my $i = 0; 216 217 # open trnascan raw output file for current seq 218 open (TSCANRAW, "$tmp_raw") || die ("FATAL: Unable to open temp raw output file $tmp_raw\n\n"); 219 220 # parse one complete hit per call 221 while ($self->parse_tscan_hit($global_constants, $gc, \*TSCANRAW, $trna)) 222 { 223 # if NOT a repeat hit, put it on the hit list 224 if ($self->{keep_tscan_repeats} || 225 (!$self->merge_repeat_hit($global_vars->{stats}, $global_vars->{fp_tRNAs}, $trna))) 226 { 227 # check to see if tscan 1.3 has incorrectly reported 228 # start/end index (happens occassionally) 229 if ((abs($trna->end() - $trna->start()) + 1) != length($trna->seq())) 230 { 231 if ($trna->strand() eq "+") 232 { 233 $trna->end() = $trna->start() + length($trna->seq()) - 1; 234 } 235 else 236 { 237 $trna->end() = $trna->start() - length($trna->seq()) + 1; 238 } 239 } 240 241 $i = 0; 242 while (($i < $fp_tRNAs->get_count()) && ($fp_tRNAs->get($i)->position() < $trna->position())) 243 { 244 $i++; 245 } 246 247 $trna->seqname($seq_name); 248 $trna->hit_source($self->{tscan_mask}); 249 $fp_tRNAs->insert($trna, $i); 250 $stats->increment_trnatotal(); 251 } 252 $trna = tRNAscanSE::tRNA->new; 253 } # while (&Parse_tscan_hit), more hits to process for cur seq 254} 255 256sub parse_tscan_hit 257{ 258 my $self = shift; 259 my ($global_constants, $gc, $TSCANRAW, $trna) = @_; 260 261 if ($self->{tscan_version} <= 1.4) 262 { 263 while (<$TSCANRAW>) 264 { 265 if (/^start position=\s*(\d+)\s*end position=\s*(\d+)/o) 266 { 267 $trna->start($1); 268 $trna->end($2); 269 if ($trna->start() < $trna->end()) 270 { 271 $trna->strand("+"); 272 $trna->position($trna->start()); 273 } 274 else 275 { 276 $trna->strand("-"); 277 $trna->position($global_constants->get("really_big_number") - $trna->start() + 1); 278 } 279 } 280 elsif (/^potential tRNA sequence=\s(.+)\n/o) 281 { 282 $trna->seq($1); 283 } 284 elsif (/^tRNA predict as a tRNA-\s*(\S+)\s*: anticodon (\S+)/o) 285 { 286 $trna->isotype($1); 287 $trna->anticodon($2); 288 } 289 elsif (/^anticodon includes unknown bases/o) 290 { 291 $trna->isotype($gc->undef_isotype()); 292 $trna->anticodon($gc->undef_anticodon()); 293 } 294 elsif (/^potential intron between positions\s*(\d+)\s*(\d+)/o) 295 { 296 $trna->add_intron($1, $2, $1, $2, "CI"); 297 } 298 # flag for end of current tRNA hit info 299 elsif (/^number of base pairing in the anticodon/o) 300 { 301 return 1; 302 } 303 elsif (/^number of predicted tRNA=(\d+)/o) 304 { 305 return 0; # end of hits for this seq 306 } 307 } 308 return 0; # reached end of raw hits file 309 } 310 else 311 { 312 die "FATAL: Illegal tRNAscan version selected.\n\n"; 313 } 314} 315 316# check current hit for redundancy against all previous hits in hitlist 317# if it IS a repeat, merge it with overlapping hit and return 1 318# if it doesn't overlap with any hits, return 0 319sub merge_repeat_hit 320{ 321 my $self = shift; 322 my ($stats, $fp_tRNAs, $trna) = @_; 323 324 for (my $i = 0; $i < $fp_tRNAs->get_count(); $i++) 325 { 326 if ($trna->strand() eq "+") 327 { 328 if (($fp_tRNAs->get($i)->strand() eq "+") && 329 (&seg_overlap($trna->start(), $trna->end(), $fp_tRNAs->get($i)->start(), $fp_tRNAs->get($i)->end()))) 330 { 331 $fp_tRNAs->get($i)->start(&min($trna->start(), $fp_tRNAs->get($i)->start())); 332 $fp_tRNAs->get($i)->end(&max($trna->end(), $fp_tRNAs->get($i)->end())); 333 $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $self->{tscan_mask}); 334 $fp_tRNAs->get($i)->isotype($trna->isotype()); 335 $fp_tRNAs->get($i)->score($trna->score()); 336 337 # check to see if extended endpoint overlaps i+1 hit's start boundary 338 # if so, combine hit[i] and hit[i+1] into one hit and delete hit[i+1] 339 if (($i != ($fp_tRNAs->get_count() - 1)) and ($fp_tRNAs->get($i+1)->strand() eq "+") 340 and ($fp_tRNAs->get($i)->end() >= $fp_tRNAs->get($i+1)->start())) 341 { 342 $fp_tRNAs->get($i)->end(&max($fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i+1)->end())); 343 $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $fp_tRNAs->get($i+1)->hit_source()); 344 345# splice(@$r_hit_list,$i+1,1); # toss out overlapping hit 346 $fp_tRNAs->remove($i+1); 347# $$r_trnact--; 348 $stats->decrement_trnatotal(); 349 } 350 return 1; # exit loop immediately 351 } 352 } 353 else # else (antisense) strand 354 { 355 if (($fp_tRNAs->get($i)->strand() eq "-") && 356 (&seg_overlap($trna->end(), $trna->start(), $fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i)->start()))) 357 { 358 $fp_tRNAs->get($i)->start(&max($trna->start(), $fp_tRNAs->get($i)->start())); 359 $fp_tRNAs->get($i)->end(&min($trna->end(), $fp_tRNAs->get($i)->end())); 360 $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $self->{tscan_mask}); 361 $fp_tRNAs->get($i)->isotype($trna->isotype()); 362 $fp_tRNAs->get($i)->score($trna->score()); 363 364 if (($i != ($fp_tRNAs->get_count() - 1)) and 365 ($fp_tRNAs->get($i)->end() <= $fp_tRNAs->get($i+1)->start())) 366 { 367 $fp_tRNAs->get($i)->end(&min($fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i+1)->end())); 368 $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $fp_tRNAs->get($i+1)->hit_source()); 369 370# splice(@$r_hit_list,$i+1,1); # toss out overlapping hit 371 $fp_tRNAs->remove($i+1); 372# $$r_trnact--; 373 $stats->decrement_trnatotal(); 374 } 375 return 1; # exit loop immediately 376 } 377 } # else (antisense) strand 378 379 } # for each (hit) 380 381 return 0; # current hit is not a repeat 382} 383 3841;