1# tRNAscanSE/FpScanResultFile.pm 2# This class defines the first pass scanning result file used in tRNAscan-SE. 3# 4# -------------------------------------------------------------- 5# This module is part of the tRNAscan-SE program. 6# Copyright (C) 2017 Patricia Chan and Todd Lowe 7# -------------------------------------------------------------- 8# 9 10package tRNAscanSE::FpScanResultFile; 11 12use strict; 13use tRNAscanSE::Utils; 14use tRNAscanSE::Sequence; 15use tRNAscanSE::tRNA; 16use tRNAscanSE::ArraytRNA; 17use tRNAscanSE::Options; 18 19sub new 20{ 21 my $class = shift; 22 my $file_name = shift; 23 my $self = { 24 file_name => $file_name, 25 FILE_H => undef, 26 FLANKING_H => undef 27 }; 28 29 $self->{indexes} = []; 30 $self->{flanking_file} = ""; 31 32 bless ($self, $class); 33 $self->reset_current_seq(); 34 return $self; 35} 36 37sub DESTROY 38{ 39 my $self = shift; 40} 41 42sub clear 43{ 44 my $self = shift; 45 $self->{file_name} = ""; 46 @{$self->{indexes}} = (); 47 $self->reset_current_seq(); 48} 49 50sub file_name 51{ 52 my $self = shift; 53 if (@_) { $self->{file_name} = shift; } 54 return $self->{file_name}; 55} 56 57sub flanking_file 58{ 59 my $self = shift; 60 if (@_) { $self->{flanking_file} = shift; } 61 return $self->{flanking_file}; 62} 63 64sub get_indexes 65{ 66 my $self = shift; 67 return @{$self->{indexes}}; 68} 69 70sub clear_indexes 71{ 72 my $self = shift; 73 @{$self->{indexes}} = (); 74} 75 76sub get_hit_count 77{ 78 my $self = shift; 79 my $count = 0; 80 81 for (my $i = 0; $i < scalar(@{$self->{indexes}}); $i++) 82 { 83 $count += $self->{indexes}->[$i]->[2]; 84 } 85 return $count; 86} 87 88sub get_pos 89{ 90 my $self = shift; 91 my $index = shift; 92 my $pos = undef; 93 94 if ($index > -1 and $index < scalar(@{$self->{indexes}})) 95 { 96 $pos = $self->{indexes}->[$index]->[0]; 97 } 98 return $pos; 99} 100 101sub reset_current_seq 102{ 103 my $self = shift; 104 $self->{current_seq} = -1; 105 $self->{current_record} = 0; 106} 107 108sub open_file 109{ 110 my $self = shift; 111 112 my $success = 0; 113 114 if ($self->{file_name} ne "") 115 { 116 &open_for_read(\$self->{FILE_H}, $self->{file_name}); 117 $success = 1; 118 } 119 else 120 { 121 die "First pass result file name is not set.\n" 122 } 123 124 return $success; 125} 126 127sub close_file 128{ 129 my $self = shift; 130 131 if (defined $self->{FILE_H}) 132 { 133 close($self->{FILE_H}); 134 } 135} 136 137sub init_fp_result_file 138{ 139 my $self = shift; 140 my ($file) = @_; 141 $self->{file_name} = $file; 142 143 &open_for_append(\$self->{FILE_H}, $self->{file_name}); 144 my $fh = $self->{FILE_H}; 145 146 print $fh "Sequence\t\ttRNA Bounds\ttRNA\tAnti\t\n"; 147 print $fh "Name \ttRNA #\tBegin\tEnd\tType\tCodon\tSeqID\tSeqLen\tScore\n"; 148 print $fh "--------\t------\t-----\t---\t----\t-----\t-----\t------\t-----\n"; 149 150 $self->close_file(); 151} 152 153sub save_firstpass_output 154{ 155 my $self = shift; 156 my ($opts, $fp_tRNAs, $r_fpass_trna_base_ct, $seq_len, $seq_id) = @_; 157 my $triplet = ""; 158 159 my @source_tab = ('Inf', 'Ts', 'Eu', 'Bo'); 160 161 if (!$opts->CM_mode()) 162 { 163 &open_for_append(\$self->{FILE_H}, $opts->out_file()); 164 } 165 else 166 { 167 &open_for_append(\$self->{FILE_H}, $self->{file_name}); 168 } 169 my $fh = $self->{FILE_H}; 170 171 for (my $i = 0; $i < $fp_tRNAs->get_count(); $i++) 172 { 173 my $trna = $fp_tRNAs->get($i); 174 175 $triplet = uc($trna->anticodon()); 176 if ($opts->output_codon()) 177 { 178 $triplet = &rev_comp_seq($triplet); 179 } 180 181 printf $fh "%-10s\t%d\t%d\t%d\t%s\t%s\t", 182 $trna->seqname(), $i + 1, $trna->start(), $trna->end(), $trna->isotype(), $triplet; 183 184 # save intron bounds if not doing covariance model analysis 185 if (!$opts->CM_mode()) 186 { 187 if ($trna->get_intron_count() > 0) 188 { 189 my @ar_introns = $trna->ar_introns(); 190 printf $fh "%d\t%d\t", $ar_introns[0]->{rel_start}, $ar_introns[0]->{rel_end}; 191 } 192 else 193 { 194 printf $fh "0\t0\t"; 195 } 196 printf $fh "%.2f", $trna->score(); 197 } 198 # save seq id number and source seq length if needed for covariance model analysis 199 else 200 { 201 printf $fh "%d\t%d\t%.2f", $seq_id, $seq_len, $trna->score(); 202 if ($trna->model() ne "") 203 { 204 printf $fh "\t%s", $trna->model(); 205 } 206 else 207 { 208 printf $fh "\tnone"; 209 } 210 } 211 212 if ($opts->save_source()) 213 { 214 print $fh "\t", $source_tab[$trna->hit_source()]; 215 } 216 print $fh "\n"; 217 218 $$r_fpass_trna_base_ct += abs($trna->end() - $trna->start()) + 1; 219 } 220 $self->close_file(); 221} 222 223# Create dummy first-pass result file with all sequences 224sub prep_for_secpass_only 225{ 226 my $self = shift; 227 my ($opts, $stats, $seq_file) = @_; 228 229 $seq_file->open_file($opts->fasta_file(), "read"); 230 &open_for_append(\$self->{FILE_H}, $self->{file_name}); 231 my $fh = $self->{FILE_H}; 232 233 # Don't look for a specific Seq number 234 while ($seq_file->read_fasta($opts, 0)) 235 { 236 print $fh $seq_file->seq_name()."\t1\t1\t".$seq_file->seq_length()."\t???\t???\t".$seq_file->seq_id()."\t".$seq_file->seq_length()." C none\n"; 237 $stats->increment_numscanned(); 238 } 239 240 $self->close_file(); 241 $seq_file->close_file(); 242} 243 244sub index_results 245{ 246 my $self = shift; 247 my ($r_seqinfo_flag) = @_; 248 my $line = ""; 249 my $filepos = undef; 250 my $line_ct = 0; 251 my $prev_seqname = ""; 252 my $seqname = ""; 253 my $record = []; 254 255 if ($self->open_file()) 256 { 257 my $fh = $self->{FILE_H}; 258 $filepos = tell($fh); 259 while ($line = <$fh>) 260 { 261 if ($line =~ /Type\tCodon\tSeqID\tSeqLen/) 262 { 263 # Column header present if we record seqID's and lengths 264 $$r_seqinfo_flag = 1; 265 } 266 elsif ($line =~ /^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)/o) 267 { 268 $seqname = $1; 269 if ($seqname ne $prev_seqname) 270 { 271 $record = []; 272 push(@$record, $filepos); 273 push(@$record, $seqname); 274 push(@$record, 0); 275 push(@{$self->{indexes}}, $record); 276 277 if ($prev_seqname ne "") 278 { 279 $self->{indexes}->[scalar(@{$self->{indexes}})-2]->[2] = $line_ct; 280 } 281 282 $prev_seqname = $seqname; 283 $line_ct = 0; 284 } 285 $line_ct++; 286 } 287 $filepos = tell($fh); 288 } 289 if ($prev_seqname ne "") 290 { 291 $self->{indexes}->[scalar(@{$self->{indexes}})-1]->[2] = $line_ct; 292 } 293 $self->close_file(); 294 } 295} 296 297sub get_next_tRNA_candidate 298{ 299 my $self = shift; 300 my ($opts, $seqinfo_flag, $seq_ct, $trna) = @_; 301 my $fh = $self->{FILE_H}; 302 my $line = ""; 303 my $padding = $opts->padding(); 304 my ($trnact, $hit_source); 305 306 $trna->clear(); 307 my $record = $self->{indexes}->[$self->{current_seq}]; 308 if ($self->{current_seq} != $seq_ct) 309 { 310 $self->{current_seq} = $seq_ct; 311 $self->{current_record} = 0; 312 $record = $self->{indexes}->[$seq_ct]; 313 seek($fh, $record->[0], 0); 314 } 315 316 if (defined $fh and $line = <$fh> and ($self->{current_record} < $record->[2])) 317 { 318 if ($line =~ /^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)/o) 319 { 320 $trna->seqname($1); 321 $trnact = $2; 322 $trna->id($trnact); 323 $trna->tRNAscan_id($trna->seqname().".t".$trnact); 324 $trna->start($3); # trna subseq absolute start index 325 $trna->end($4); # trna subseq absolute end index 326 $trna->isotype($5); 327 $trna->anticodon($6); 328 $trna->src_seqid($7); 329 $trna->src_seqlen($8); 330 $trna->score($9); 331 $trna->model($10); 332 $hit_source = $'; 333 $hit_source =~ s/[\s\t\n]//g; 334 $trna->hit_source($hit_source); 335 336 # if seqinfo_flag not set, file does not have SeqID info in 337 # 7th column of output, don't mistake number read for SeqID 338 if (!$seqinfo_flag) 339 { 340 $trna->src_seqid(0); 341 } 342 343 if ($trna->end() > $trna->start()) 344 { 345 $trna->strand("+"); 346 347 # pad ends of sequence only if EufindtRNA or Infernal first-pass is being used 348 # and $seqinfo_flag is set (we know the seq lengths) 349 if (($opts->eufind_mode() || $opts->infernal_fp()) && $seqinfo_flag) 350 { 351 $trna->start(&max(1, $trna->start() - $padding)); 352 $trna->end(&min($trna->src_seqlen(), $trna->end() + $padding)); 353 } 354 } 355 else 356 { 357 $trna->strand("-"); 358 359 if (($opts->eufind_mode() || $opts->infernal_fp()) && $seqinfo_flag) 360 { 361 $trna->start(&min($trna->src_seqlen(), $trna->start() + $padding)); 362 $trna->end(&max(1, $trna->end() - $padding)); 363 } 364 } 365 366 if ($trna->end() == $trna->start()) 367 { 368 print STDERR "Error reading $self->{file_name}: tRNA of length 0\n"; 369 } 370 371 } 372 $self->{current_record}++; 373 } 374} 375 376sub open_flanking 377{ 378 my $self = shift; 379 my $mode = shift; 380 381 my $success = 0; 382 383 if ($self->{flanking_file} ne "") 384 { 385 if ($mode eq "write") 386 { 387 &open_for_write(\$self->{FLANKING_H}, $self->{flanking_file}); 388 } 389 else 390 { 391 &open_for_read(\$self->{FLANKING_H}, $self->{flanking_file}); 392 } 393 $success = 1; 394 } 395 else 396 { 397 die "First pass flanking file name is not set.\n" 398 } 399 400 return $success; 401} 402 403sub close_flanking 404{ 405 my $self = shift; 406 407 if (defined $self->{FLANKING_H}) 408 { 409 close($self->{FLANKING_H}); 410 } 411} 412 413sub write_tRNA_flanking 414{ 415 my $self = shift; 416 my $trna = shift; 417 418 if (defined $self->{FLANKING_H}) 419 { 420 my $fh = $self->{FLANKING_H}; 421 print $fh $trna->seqname()."\t".$trna->id()."\t".$trna->seq()."\t".$trna->upstream()."\t".$trna->downstream()."\n"; 422 } 423} 424 425sub read_tRNA_flanking 426{ 427 my $self = shift; 428 my $trna = shift; 429 430 my $fh = $self->{FLANKING_H}; 431 my $line = ""; 432 433 if (defined $fh and $line = <$fh>) 434 { 435 chomp($line); 436 my @columns = split(/\t/, $line); 437 if ($trna->seqname() eq $columns[0] and $trna->id() == $columns[1]) 438 { 439 $trna->seq($columns[2]); 440 $trna->upstream($columns[3]); 441 $trna->downstream($columns[4]); 442 } 443 } 444} 445 4461; 447