1# tRNAscanSE/ScanResult.pm 2# This class describes the outputs of scan results used in tRNAscan-SE. 3# 4# -------------------------------------------------------------- 5# This module is part of the tRNAscan-SE program. 6# Copyright (C) 2017 Patricia Chan and Todd Lowe 7# -------------------------------------------------------------- 8# 9 10package tRNAscanSE::ScanResult; 11 12use strict; 13use tRNAscanSE::Utils; 14use tRNAscanSE::Sequence; 15use tRNAscanSE::tRNA; 16use tRNAscanSE::ArraytRNA; 17use tRNAscanSE::IntResultFile; 18use tRNAscanSE::MultiResultFile; 19use tRNAscanSE::Options; 20 21require Exporter; 22our @ISA = qw(Exporter); 23our @EXPORT = qw(save_Acedb_from_firstpass write_tRNA write_tRNAs output_tRNA write_bed output_split_fragments print_results_header); 24 25our $printed_header = 0; # keeps track of whether or 26 # or not results column header 27 # has been printed yet 28our ($max_seq_name_width, $max_seq_len_width); 29 30 31sub save_Acedb_from_firstpass 32{ 33 my ($output_codon, $r_one_let_trans_map, $fp_tRNAs, $out_file) = @_; 34 my $triplet = ""; 35 36 &open_for_append(\*FILE_H, $out_file); 37 38 for (my $i = 0; $i < $fp_tRNAs->get_count(); $i++) 39 { 40 my $trna = $fp_tRNAs->get($i); 41 42 printf FILE_H "Sequence\t%s\nSubsequence\t%s.t%d %d %d\n\n", 43 $trna->seqname(), $trna->seqname(), $i + 1, $trna->start(), $trna->end(); 44 printf FILE_H "Sequence\t%s.t%d\nSource\t\t%s\n", $trna->seqname(), $i + 1, $trna->seqname(); 45 46 if ($trna->get_intron_count() > 0) 47 { 48 my @ar_introns = $trna->ar_introns(); 49 50 if ($ar_introns[0]->{rel_start} < $ar_introns[0]->{rel_end}) 51 { 52 printf FILE_H "Source_Exons\t1 %d\n", ($ar_introns[0]->{rel_start} - $trna->start()); 53 printf FILE_H "Source_Exons\t%d %d\n", 54 $ar_introns[0]->{rel_end} - $trna->start() + 2, 55 $trna->end() - $trna->start() + 1; 56 } 57 else 58 { 59 printf FILE_H "Source_Exons\t1 %d\n", ($trna->start() - $ar_introns[0]->{rel_start} + 1); 60 printf FILE_H "Source_Exons\t%d %d\n", 61 $trna->start() - $ar_introns[0]->{rel_end} + 2, 62 $trna->start() - $trna->end() + 1; 63 } 64 } 65 printf FILE_H "Brief_identification tRNA-%s\n", $trna->isotype(); 66 67 # either output Codon or Anticodon for tRNA 68 $triplet = uc($trna->anticodon()); 69 if ($output_codon) 70 { 71 $triplet = &rev_comp_seq($triplet); 72 } 73 74 printf FILE_H "Transcript tRNA \"%s %s %s\"\n\n", 75 $triplet, $trna->isotype(), $r_one_let_trans_map->{$trna->isotype()}; 76 77 } 78 close(FILE_H); 79} 80 81sub print_results_header 82{ 83 my ($TABOUT, $opts, $get_hmm_score, $seq_name_width, $seq_len_width, $fp) = @_; 84 my ($label, $codon_label) = ""; 85 86 if ($opts->cove_mode()) 87 { 88 $label = "\tCove"; 89 } 90 elsif ($opts->infernal_mode()) 91 { 92 $label = "\tInf"; 93 } 94 elsif ($opts->eufind_mode() && !$opts->tscan_mode()) 95 { 96 $label = "\tEufind"; 97 } 98 99 if ($opts->output_codon()) 100 { 101 $codon_label = " "; 102 } 103 else 104 { 105 $codon_label = "Anti"; 106 } 107 108 if (!($opts->ace_output())) 109 { 110 printf {$$TABOUT} "%-".$seq_name_width."s\t\t","Sequence"; 111 printf {$$TABOUT} "%-".$seq_len_width."s\t","tRNA"; 112 printf {$$TABOUT} "%-".$seq_len_width."s\t","Bounds"; 113 print {$$TABOUT} "tRNA\t$codon_label\tIntron Bounds",$label; 114 115 if ($get_hmm_score) 116 { 117 print {$$TABOUT} "\tHMM\t2'Str"; 118 } 119 if ($opts->infernal_score()) 120 { 121 print {$$TABOUT} "\tInf"; 122 } 123 if ($opts->save_source()) 124 { 125 print {$$TABOUT} "\tHit"; 126 } 127 if (!$fp and ((($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) and !$opts->no_isotype() and $opts->detail()) or $opts->metagenome_mode())) 128 { 129 print {$$TABOUT} "\tIsotype\tIsotype"; 130 if ($opts->euk_mode() and $opts->mito_model() ne "") 131 { 132 print {$$TABOUT} "\tType"; 133 } 134 } 135 print {$$TABOUT} "\t "; 136 print {$$TABOUT} "\n"; 137 138 printf {$$TABOUT} "%-".$seq_name_width."s\t","Name"; 139 print {$$TABOUT} "tRNA \#\t"; 140 printf {$$TABOUT} "%-".$seq_len_width."s\t","Begin"; 141 printf {$$TABOUT} "%-".$seq_len_width."s\t","End"; 142 143 print {$$TABOUT} "Type\tCodon\tBegin\tEnd\tScore"; 144 145 if ($get_hmm_score) 146 { 147 print {$$TABOUT} "\tScore\tScore"; 148 } 149 if ($opts->infernal_score()) 150 { 151 print {$$TABOUT} "\tScore"; 152 } 153 if ($opts->save_source()) 154 { 155 print {$$TABOUT} "\tOrigin"; 156 } 157 if (!$fp and ((($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) and !$opts->no_isotype() and $opts->detail()) or $opts->metagenome_mode())) 158 { 159 print {$$TABOUT} "\tCM\tScore"; 160 if ($opts->euk_mode() and $opts->mito_model() ne "") 161 { 162 print {$$TABOUT} "\t "; 163 } 164 } 165 print {$$TABOUT} "\tNote"; 166 print {$$TABOUT} "\n"; 167 168 printf {$$TABOUT} "%-".$seq_name_width."s\t","--------"; 169 print {$$TABOUT} "------\t"; 170 printf {$$TABOUT} "%-".$seq_len_width."s\t","-----"; 171 printf {$$TABOUT} "%-".$seq_len_width."s\t","------"; 172 print {$$TABOUT} "----\t-----\t-----\t----\t------"; 173 174 if ($get_hmm_score) 175 { 176 print {$$TABOUT} "\t-----\t-----"; 177 } 178 if ($opts->infernal_score()) 179 { 180 print {$$TABOUT} "\t-----"; 181 } 182 if ($opts->save_source()) 183 { 184 print {$$TABOUT} "\t------"; 185 } 186 if (!$fp and ((($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) and !$opts->no_isotype() and $opts->detail()) or $opts->metagenome_mode())) 187 { 188 print {$$TABOUT} "\t-------\t-------"; 189 if ($opts->euk_mode() and $opts->mito_model() ne "") 190 { 191 print {$$TABOUT} "\t---------"; 192 } 193 } 194 print {$$TABOUT} "\t------"; 195 print {$$TABOUT} "\n"; 196 } 197} 198 199sub write_tRNA 200{ 201 my ($file_name, $seq_name, $seq_desc, $seq, $overwrite) = @_; 202 203 my $trna_file = tRNAscanSE::Sequence->new; 204 my $write_mode = "append"; 205 if ($overwrite) { 206 $write_mode = "write"; 207 } 208 $trna_file->set_seq_info($seq_name, $seq_desc, length($seq), $seq); 209 $trna_file->open_file($file_name, $write_mode); 210 $trna_file->write_fasta(); 211 $trna_file->close_file(); 212} 213 214sub write_tRNAs 215{ 216 my ($file_name, $sp_int_results, $mat_seq, $overwrite, $model) = @_; 217 218 my $count = 0; 219 my $trna_file = tRNAscanSE::Sequence->new; 220 my $write_mode = "append"; 221 if ($overwrite) 222 { 223 $write_mode = "write"; 224 } 225 my $seq = ""; 226 my $tRNA = tRNAscanSE::tRNA->new; 227 $trna_file->open_file($file_name, $write_mode); 228 if ($sp_int_results->open_file("read")) 229 { 230 my @record_indexes = $sp_int_results->get_indexes(); 231 232 for (my $i = 0; $i < scalar(@record_indexes); $i++) 233 { 234 $sp_int_results->get_tRNA($record_indexes[$i]->[0], $tRNA); 235 if ($model eq "" or $tRNA->model() eq $model) 236 { 237 if ($mat_seq) 238 { 239 $seq = $tRNA->mat_seq(); 240 } 241 else 242 { 243 $seq = $tRNA->seq(); 244 } 245 $trna_file->set_seq_info($tRNA->seqname().".t".&pad_num($tRNA->id(), 6), "", length($seq), $seq); 246 $trna_file->write_fasta(); 247 $count++; 248 } 249 } 250 } 251 $trna_file->close_file(); 252 253 return $count; 254} 255 256# Write final tRNA prediction to various selected output sources/files 257# Sets globals $MaxSeqNameWidth and $MaxSeqLenWidth and $printed_header 258 259sub output_tRNA 260{ 261 my ($global_vars, $cm, $r_tab_results, $get_hmm_score, $program_id) = @_; 262 my $opts = $global_vars->{options}; 263 my $log = $global_vars->{log_file}; 264 my $gc = $global_vars->{gc}; 265 my $sp_int_results = $global_vars->{sp_int_results}; 266 my $iso_int_results = $global_vars->{iso_int_results}; 267 my $tRNA = tRNAscanSE::tRNA->new; 268 269 my $results_line = ""; 270 my $isotype_line = ""; 271 my ($iso_models, $mito_models); 272 273 my @sp_indexes = $sp_int_results->get_indexes(); 274 if ($sp_int_results->open_file("read")) 275 { 276 if (!$opts->no_isotype() and $iso_int_results->open_file("read")) 277 { 278 ($iso_models, $mito_models) = &get_models($opts, $cm); 279 $iso_int_results->read_models(); 280 } 281 if ($opts->isotype_specific_file() ne "") 282 { 283 &open_for_append(\*ISOTYPE, $opts->isotype_specific_file()); 284 } 285 if ($opts->save_all_struct()) 286 { 287 &open_for_append(\*SECSTRUCT, $opts->all_struct_file()); 288 } 289 if ($opts->ace_output()) 290 { 291 &open_for_append(\*ACEOUT, $opts->out_file()); 292 } 293 else 294 { 295 &open_for_append(\*TABOUT, $opts->out_file()); 296 } 297 if ($opts->output_fasta_file() ne "") 298 { 299 &open_for_append(\*FASTA, $opts->output_fasta_file()); 300 } 301 302 for (my $i = 0; $i < scalar(@sp_indexes); $i++) 303 { 304 $sp_int_results->get_tRNA($sp_indexes[$i]->[0], $tRNA); 305 if (!$opts->no_isotype()) 306 { 307 $iso_int_results->get_next_tRNA($tRNA); 308 } 309 310 my ($type, $model, $score, $ss) = $tRNA->get_highest_score_model(); 311 if ($tRNA->isotype() eq "Met" and $type eq "cyto" and ($model eq "iMet" or $model eq "fMet" or $model eq "Ile2")) 312 { 313 $tRNA->isotype($model); 314 } 315 elsif ($tRNA->isotype() eq "Met" and $type eq "cyto" and $model ne "Met" and $model ne "iMet" and $model ne "fMet") 316 { 317 $tRNA->sort_multi_models("model"); 318 my ($met_iso_model, $met_iso_score, $met_iso_ss) = $tRNA->get_model_hit("cyto", $tRNA->isotype()); 319 my ($ile2_iso_model, $ile2_iso_score, $ile2_iso_ss) = $tRNA->get_model_hit("cyto", "Ile2"); 320 if ($ile2_iso_score > 0 and $met_iso_score > 0) 321 { 322 if (($score - $ile2_iso_score) <= 5 and ($ile2_iso_score - $met_iso_score) >= 5 and $tRNA->score() > 50) 323 { 324 $tRNA->isotype("Ile2"); 325 } 326 } 327 } 328 329 if (!$opts->results_to_stdout()) 330 { 331 $log->broadcast($tRNA->tRNAscan_id().": ".$opts->second_pass_label()." type= ".$tRNA->isotype()."\t ". 332 "Score= ".$tRNA->score()); 333 } 334 if ($opts->save_all_struct()) 335 { 336 &save_allStruct_output(\*SECSTRUCT, $opts, $gc, $get_hmm_score, $tRNA); 337 } 338 if ($opts->output_fasta_file() ne "") 339 { 340 &write_tRNA_sequence(\*FASTA, $tRNA); 341 } 342 343 # Create tabular results line, ready for output 344 if (!$printed_header) 345 { 346 $max_seq_name_width = &max(length($tRNA->src_seqid()) + 1, 8); 347 $max_seq_len_width = length($tRNA->src_seqlen()); 348 } 349 $results_line = &construct_tab_output($opts, $get_hmm_score, $tRNA, $max_seq_name_width, $max_seq_len_width); 350 351 # Internal copy of results saved for later uses 352 push(@$r_tab_results, $results_line); 353 354 if ($opts->ace_output()) 355 { 356 &save_Acedb_from_secpass(\*ACEOUT, $opts, $gc, $tRNA, $program_id); 357 } 358 else 359 { 360 if (!($opts->brief_output() || $printed_header)) 361 { 362 &print_results_header(\*TABOUT, $opts, $get_hmm_score, $max_seq_name_width, $max_seq_len_width, 0); 363 if ($opts->isotype_specific_file() ne "") 364 { 365 &print_isotype_specific_header(\*ISOTYPE, $opts, $iso_models, $mito_models); 366 } 367 368 $printed_header = 1; 369 } 370 print TABOUT $results_line; 371 372 if ($opts->isotype_specific_file() ne "") 373 { 374 $isotype_line = &construct_isotype_specific_output($opts, $iso_models, $mito_models, $tRNA); 375 print ISOTYPE $isotype_line; 376 } 377 } 378 } 379 380 if ($opts->ace_output()) 381 { 382 close ACEOUT; 383 } 384 else 385 { 386 close TABOUT; 387 } 388 if ($opts->save_all_struct()) 389 { 390 close SECSTRUCT; 391 } 392 if ($opts->isotype_specific_file() ne "") 393 { 394 close ISOTYPE; 395 } 396 if (!$opts->no_isotype()) 397 { 398 $iso_int_results->close_file(); 399 } 400 if (!$opts->output_fasta_file() ne "") 401 { 402 close FASTA; 403 } 404 $sp_int_results->close_file(); 405 } 406} 407 408sub save_allStruct_output 409{ 410 my ($SECSTRUCT, $opts, $gc, $get_hmm_score, $tRNA) = @_; 411 412 my $ruler = ' * |' x 20; # ruler printed out with 413 # secondary structure output 414 415 my $seqlen = length($tRNA->seq()); 416 my ($type, $model, $score, $ss) = $tRNA->get_highest_score_model(); 417 418 if ($tRNA->strand() eq "+") 419 { 420 print {$$SECSTRUCT} $tRNA->seqname().".trna".$tRNA->id()." (".$tRNA->start()."-".$tRNA->end().")\t", 421 "Length: $seqlen bp\nType: ".$tRNA->isotype()."\t"; 422 } 423 else 424 { 425 print {$$SECSTRUCT} $tRNA->seqname().".trna".$tRNA->id()." (".$tRNA->end()."-".$tRNA->start().")\t", 426 "Length: $seqlen bp\nType: ".$tRNA->isotype()."\t"; 427 } 428 429 if ($opts->output_codon()) 430 { 431 print {$$SECSTRUCT} "Codon: ", &rev_comp_seq($tRNA->anticodon()), " at "; 432 } 433 else 434 { 435 print {$$SECSTRUCT} "Anticodon: ".$tRNA->anticodon()." at "; 436 } 437 438 if ($tRNA->anticodon() eq $gc->undef_anticodon()) 439 { 440 print {$$SECSTRUCT} "0-0 (0-0)\t"; 441 } 442 else 443 { 444 my @ar_ac_pos = $tRNA->ar_ac_pos(); 445 for (my $i = 0; $i < scalar(@ar_ac_pos); $i++) 446 { 447 if ($i > 0) 448 { 449 print {$$SECSTRUCT} ","; 450 } 451 print {$$SECSTRUCT} $ar_ac_pos[$i]->{rel_start}."-".$ar_ac_pos[$i]->{rel_end}; 452 } 453 for (my $i = 0; $i < scalar(@ar_ac_pos); $i++) 454 { 455 if ($i == 0) 456 { 457 print {$$SECSTRUCT} " ("; 458 } 459 elsif ($i > 0) 460 { 461 print {$$SECSTRUCT} ","; 462 } 463 464 if ($tRNA->strand() eq "+") 465 { 466 print {$$SECSTRUCT} $ar_ac_pos[$i]->{rel_start} + $tRNA->start() - 1, "-", 467 $ar_ac_pos[$i]->{rel_start} + $tRNA->start() + 1; 468 } 469 else 470 { 471 print {$$SECSTRUCT} $tRNA->end() - $ar_ac_pos[$i]->{rel_start} + 1, "-", 472 $tRNA->end() - $ar_ac_pos[$i]->{rel_start} - 1; 473 } 474 if ($i == (scalar(@ar_ac_pos) - 1)) 475 { 476 print {$$SECSTRUCT} ")\t"; 477 } 478 479 } 480 } 481 482 print {$$SECSTRUCT} "Score: ".$tRNA->score()."\n"; 483 my @ar_introns = (); 484 my $nci_count = 0; 485 if ($tRNA->get_intron_count() > 0) 486 { 487 @ar_introns = $tRNA->ar_introns(); 488 foreach my $intron (@ar_introns) 489 { 490 if (defined $intron) 491 { 492 print {$$SECSTRUCT} "Possible intron: $intron->{rel_start}-$intron->{rel_end} "; 493 if ($tRNA->strand() eq "+") 494 { 495 print {$$SECSTRUCT} "(".$intron->{start}."-".$intron->{end}.")\n"; 496 } 497 else 498 { 499 print {$$SECSTRUCT} "(".$intron->{end}."-".$intron->{start}.")\n"; 500 } 501 } 502 } 503 } 504 505 my $line = ""; 506 my $note = ""; 507 if ($tRNA->is_pseudo() and $tRNA->is_trunc()) 508 { 509 $note = "Possible truncation, pseudogene"; 510 } 511 elsif ($tRNA->is_pseudo()) 512 { 513 $note = "Possible pseudogene"; 514 } 515 elsif ($tRNA->is_trunc()) 516 { 517 $note = "Possible truncation"; 518 } 519 520 if ($note ne "") 521 { 522 $line = sprintf("%s", $note); 523 } 524 525 if ($get_hmm_score) 526 { 527 if ($note ne "") 528 { 529 $line .= ": "; 530 } 531 532 $line .= sprintf("HMM Sc=%.2f\tSec struct Sc=%.2f", $tRNA->hmm_score(), $tRNA->ss_score()); 533 } 534 535 if ($opts->infernal_score()) 536 { 537 my $inf = $tRNA->get_domain_model("infernal"); 538 if (defined $inf) 539 { 540 $line .= "\tInfernal Sc=".$inf->{score}; 541 } 542 } 543 if ($opts->mito_mode()) 544 { 545 $note = ""; 546 if ($tRNA->category() ne "") 547 { 548 $note = $tRNA->category(); 549 $note =~ s/_/ /g; 550 $note =~ s/mito //g; 551 $note =~ s/iso/type/g; 552 $note =~ s/ac/anticodon/g; 553 $note = uc(substr($note, 0, 1)).substr($note, 1); 554 if ($tRNA->note() ne "") 555 { 556 $note = $note . " ". $tRNA->note(); 557 } 558 $line .= "Note: ".$note; 559 } 560 } 561 562 if ($line ne "") 563 { 564 print {$$SECSTRUCT} $line."\n"; 565 } 566 567 if (!$opts->arch_mode()) 568 { 569 print {$$SECSTRUCT} " ",substr($ruler, 0, $seqlen - 1),"\n"; 570 print {$$SECSTRUCT} "Seq: ".$tRNA->seq()."\nStr: ".$tRNA->ss()."\n\n"; 571 } 572 else 573 { 574 print {$$SECSTRUCT} " ",substr($ruler, 0, length($tRNA->mat_seq()) - 1),"\n"; 575 print {$$SECSTRUCT} "Seq: ".$tRNA->mat_seq()."\nStr: ".$tRNA->mat_ss()."\n"; 576 if (uc($tRNA->seq()) ne uc($tRNA->mat_seq())) 577 { 578 my $precursor_seq = uc($tRNA->seq()); 579 foreach my $intron (@ar_introns) 580 { 581 if (defined $intron) 582 { 583 my $intron_seq = uc(substr($tRNA->seq(), $intron->{rel_start} - 1, $intron->{rel_end} - $intron->{rel_start} + 1)); 584 if ($intron_seq ne "") 585 { 586 $precursor_seq =~ s/$intron_seq/\[$intron_seq\]/; 587 } 588 } 589 } 590 print {$$SECSTRUCT} "Pre: ". $precursor_seq ."\n\n"; 591 } 592 else 593 { 594 print {$$SECSTRUCT} "\n"; 595 } 596 } 597} 598 599sub write_tRNA_sequence 600{ 601 my ($FASTA, $tRNA) = @_; 602 603 my $tRNAscan_id = $tRNA->seqname().".trna".$tRNA->id(); 604 print {$FASTA} ">".$tRNAscan_id." ". 605 $tRNA->seqname().":".$tRNA->start()."-".$tRNA->end()." (".$tRNA->strand().") ". 606 $tRNA->isotype()." (".$tRNA->anticodon().") ".length($tRNA->seq())." bp Sc: ".$tRNA->score(); 607 if ($tRNA->is_pseudo()) 608 { 609 print {$FASTA} " Possible pseudogene\n"; 610 } 611 else 612 { 613 print {$FASTA} "\n"; 614 } 615 my $parts = int(length($tRNA->seq()) / 60); 616 my $remain = length($tRNA->seq()) % 60; 617 for (my $j = 0; $j < $parts; $j++) 618 { 619 print {$FASTA} uc(substr($tRNA->seq(), $j * 60, 60))."\n"; 620 } 621 if ($remain > 0) 622 { 623 print {$FASTA} uc(substr($tRNA->seq(), $parts * 60))."\n"; 624 } 625} 626 627# Save tRNA hits in Tabular output 628sub construct_tab_output 629{ 630 my ($opts, $get_hmm_score, $tRNA, $max_seq_name_width, $max_seq_len_width) = @_; 631 632 my ($result_line, $tRNA_type); 633 my ($type, $model, $score, $ss) = $tRNA->get_highest_score_model(); 634 $tRNA->sort_multi_models("model"); 635 my ($iso_model, $iso_score, $iso_ss) = $tRNA->get_model_hit("cyto", $tRNA->isotype()); 636 637 $result_line = sprintf "%-".$max_seq_name_width."s\t", $tRNA->seqname(); 638 $result_line .= $tRNA->id()."\t"; 639 640 if ($tRNA->strand() eq "+") 641 { 642 $result_line .= sprintf "%-".$max_seq_len_width."d\t", $tRNA->start(); 643 $result_line .= sprintf "%-".$max_seq_len_width."d\t", $tRNA->end(); 644 } 645 else 646 { 647 $result_line .= sprintf "%-".$max_seq_len_width."d\t", $tRNA->end(); 648 $result_line .= sprintf "%-".$max_seq_len_width."d\t", $tRNA->start(); 649 } 650 651 $result_line .= $tRNA->isotype()."\t"; 652 653 if ($opts->output_codon()) 654 { 655 $result_line .= (&rev_comp_seq($tRNA->anticodon()))."\t"; 656 } 657 else 658 { 659 $result_line .= $tRNA->anticodon()."\t"; 660 } 661 662 my @ar_introns = (); 663 if ($tRNA->get_intron_count() == 0) 664 { 665 $result_line .= "0\t0"; 666 } 667 else 668 { 669 my $intron_ct = 0; 670 @ar_introns = $tRNA->ar_introns(); 671 for (my $i = 0; $i < scalar(@ar_introns); $i++) 672 { 673 if (defined $ar_introns[$i]) 674 { 675 if ($intron_ct > 0) 676 { 677 $result_line .= ","; 678 } 679 if ($tRNA->strand() eq "+") 680 { 681 $result_line .= $ar_introns[$i]->{start}; 682 } 683 else 684 { 685 $result_line .= $ar_introns[$i]->{end}; 686 } 687 $intron_ct++; 688 } 689 } 690 $result_line .= "\t"; 691 $intron_ct = 0; 692 for (my $i = 0; $i < scalar(@ar_introns); $i++) 693 { 694 if (defined $ar_introns[$i]) 695 { 696 if ($intron_ct > 0) 697 { 698 $result_line .= ","; 699 } 700 if ($tRNA->strand() eq "+") 701 { 702 $result_line .= $ar_introns[$i]->{end}; 703 } 704 else 705 { 706 $result_line .= $ar_introns[$i]->{start}; 707 } 708 $intron_ct++; 709 } 710 } 711 } 712 $result_line .= "\t".$tRNA->score(); 713 714 if ($get_hmm_score) 715 { 716 $result_line .= sprintf "\t%.2f\t%.2f", $tRNA->hmm_score(), $tRNA->ss_score(); 717 } 718 if ($opts->infernal_score()) 719 { 720 my $inf = $tRNA->get_domain_model("infernal"); 721 if (defined $inf) 722 { 723 $result_line .= "\t".$inf->{score}; 724 } 725 } 726 if ($opts->save_source()) 727 { 728 $result_line .= "\t".$tRNA->hit_source(); 729 } 730 if ((($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) and !$opts->no_isotype() and $opts->detail()) or $opts->metagenome_mode()) 731 { 732 $result_line .= "\t".$model; 733 $result_line .= "\t".$score; 734 if ($opts->euk_mode() and $opts->mito_model() ne "") 735 { 736 $tRNA->category($type); 737 if ($type eq "cyto") 738 { 739 $result_line .= "\tcytosolic"; 740 } 741 else 742 { 743 $result_line .= "\t".$type; 744 } 745 } 746 } 747 if (!$opts->mito_mode() and !$opts->numt_mode()) 748 { 749 my $note = "\t"; 750 if ($tRNA->is_pseudo()) 751 { 752 $note .= "pseudo"; 753 } 754 755 if ($opts->detail()) 756 { 757 if ((($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) and !$opts->no_isotype()) or $opts->metagenome_mode()) 758 { 759 if ($model ne "" and $tRNA->isotype() ne "Undet") 760 { 761 if ($model ne $tRNA->isotype()) 762 { 763 if (($model eq "iMet" or $model eq "fMet" or $model eq "Ile2") and $tRNA->isotype() eq "Met") 764 {} 765 elsif (($model eq "LeuTAA" or $model eq "LeuTAG") and ($tRNA->isotype() eq "Leu" or $tRNA->isotype() eq "Undet") and $type eq "mito") 766 {} 767 elsif (($model eq "SerGCT" or $model eq "SerTGA") and ($tRNA->isotype() eq "Ser" or $tRNA->isotype() eq "Undet") and $type eq "mito") 768 {} 769 else 770 { 771 if ($note ne "\t") 772 { 773 $note .= ","; 774 } 775 $note .= sprintf("IPD:%0.2f", ($iso_score - $score)); 776 } 777 } 778 } 779 } 780 781 if ($tRNA->is_trunc()) 782 { 783 if ($note ne "\t") 784 { 785 $note .= ","; 786 } 787 $note .= $tRNA->trunc(); 788 } 789 790 if ($opts->search_mode() eq "archaea") 791 { 792 if ($tRNA->get_intron_count() > 0) 793 { 794 my $ci_count = 0; 795 my $nci_count = 0; 796 for (my $i = 0; $i < scalar(@ar_introns); $i++) 797 { 798 if ($ar_introns[$i]->{type} eq "CI") 799 { 800 $ci_count++; 801 } 802 elsif ($ar_introns[$i]->{type} eq "NCI") 803 { 804 $nci_count++; 805 } 806 } 807 if ($ci_count > 0) 808 { 809 if ($note ne "\t") 810 { 811 $note .= ","; 812 } 813 $note .= "CI"; 814 } 815 if ($nci_count > 0) 816 { 817 if ($note ne "\t") 818 { 819 $note .= ","; 820 } 821 $note .= "NCI:".$nci_count; 822 } 823 } 824 } 825 } 826 827 $result_line .= $note; 828 } 829 if ($opts->mito_mode()) 830 { 831 my $note = ""; 832 if ($tRNA->category() ne "" and $opts->detail()) 833 { 834 $note = $tRNA->category(); 835 $note =~ s/mito //g; 836 $note =~ s/iso/type/g; 837 if ($tRNA->note() ne "") 838 { 839 $note = $note . $tRNA->note(); 840 } 841 } 842 $result_line .= "\t".$note; 843 } 844 $result_line .= "\n"; 845 846 return $result_line; 847} 848 849sub get_models 850{ 851 my ($opts, $cm) = @_; 852 my %iso_models = (); 853 my %mito_models = (); 854 855 my $models = $cm->get_models_from_db($cm->isotype_cm_db_file_path()); 856 foreach my $cur_iso_cm (sort @$models) 857 { 858 my $model = $cur_iso_cm; 859 if ($cur_iso_cm =~ /^arch-(\S+)/ || $cur_iso_cm =~ /^euk-(\S+)/ || $cur_iso_cm =~ /^bact-(\S+)/) 860 { 861 $model = $1; 862 } 863 $iso_models{$model} = 1; 864 } 865 866 if ($opts->euk_mode() and $opts->mito_model() ne "") 867 { 868 $models = $cm->get_models_from_db($cm->mito_isotype_cm_db_file_path()); 869 foreach my $cur_iso_cm (sort @$models) 870 { 871 $mito_models{$cur_iso_cm} = 1; 872 } 873 } 874 875 return (\%iso_models, \%mito_models); 876} 877 878sub print_isotype_specific_header 879{ 880 my ($ISOTYPE, $opts, $iso_models, $mito_models) = @_; 881 882 print {$$ISOTYPE} "tRNAscanID\tAnticodon_predicted_isotype"; 883 foreach my $cur_iso_cm (sort keys %$iso_models) 884 { 885 print {$$ISOTYPE} "\t".$cur_iso_cm; 886 } 887 888 if ($opts->euk_mode() and $opts->mito_model() ne "") 889 { 890 foreach my $cur_iso_cm (sort keys %$mito_models) 891 { 892 print {$$ISOTYPE} "\tmito_".$cur_iso_cm; 893 } 894 } 895 896 print {$$ISOTYPE} "\n"; 897} 898 899sub construct_isotype_specific_output 900{ 901 my ($opts, $iso_models, $mito_models, $tRNA) = @_; 902 903 my $result_line = $tRNA->seqname().".trna".$tRNA->id(); 904 $result_line .= "\t".$tRNA->isotype(); 905 $tRNA->sort_multi_models("model"); 906 907 foreach my $cur_iso_cm (sort keys %$iso_models) 908 { 909 my ($model, $score, $ss) = $tRNA->get_model_hit("cyto", $cur_iso_cm); 910 if ($model ne "") 911 { 912 $result_line .= "\t".$score; 913 } 914 else 915 { 916 $result_line .= "\t-999"; 917 } 918 } 919 920 if ($opts->euk_mode() and $opts->mito_model() ne "") 921 { 922 foreach my $cur_iso_cm (sort keys %$mito_models) 923 { 924 my ($model, $score, $ss) = $tRNA->get_model_hit("mito", $cur_iso_cm); 925 if ($model ne "") 926 { 927 $result_line .= "\t".$score; 928 } 929 else 930 { 931 $result_line .= "\t-999"; 932 } 933 } 934 } 935 936 $result_line .= "\n"; 937 938 return $result_line; 939} 940 941sub save_Acedb_from_secpass 942{ 943 my ($ACEOUT, $opts, $gc, $tRNA, $program_id) = @_; 944 945 print {$$ACEOUT} "Sequence\t".$tRNA->seqname()."\nSubsequence\t".$tRNA->tRNAscan_id()." ".$tRNA->start()." ".$tRNA->end()."\n\n"; 946 print {$$ACEOUT} "Sequence\t".$tRNA->tRNAscan_id()."\nSource\t\t".$tRNA->seqname()."\n"; 947 if ($tRNA->get_intron_count() > 0) 948 { 949 my @ar_introns = $tRNA->ar_introns(); 950 print {$$ACEOUT} "Source_Exons\t1 ", $ar_introns[0]->{rel_start} - 1,"\n"; 951 print {$$ACEOUT} "Source_Exons\t", $ar_introns[0]->{rel_end} + 1," ", abs($tRNA->end() - $tRNA->start()) + 1,"\n"; 952 } 953 print {$$ACEOUT} "Brief_identification tRNA-".$tRNA->isotype()."\n", 954 "Transcript tRNA \""; 955 956 if ($opts->output_codon()) 957 { 958 print {$$ACEOUT} &rev_comp_seq($tRNA->anticodon()); 959 } 960 else 961 { 962 print {$$ACEOUT} $tRNA->anticodon(); 963 } 964 965 print {$$ACEOUT} " ".$tRNA->isotype()." ", $gc->one_let_trans_map()->{$tRNA->isotype()}, 966 "\"\nScore $program_id ".$tRNA->score()."\n"; 967 968 if ($tRNA->is_pseudo()) 969 { 970 printf {$$ACEOUT} "Remark \"Likely pseudogene (HMM Sc=%.2f / Sec struct Sc=%.2f)\"\n", 971 $tRNA->hmm_score(), $tRNA->ss_score(); 972 } 973 print {$$ACEOUT} "\n"; 974} 975 976sub write_bed 977{ 978 my ($global_vars) = @_; 979 my $opts = $global_vars->{options}; 980 my $sp_int_results = $global_vars->{sp_int_results}; 981 my $iso_int_results = $global_vars->{iso_int_results}; 982 983 $sp_int_results->sort_records("bed_output"); 984 if (!$opts->no_isotype()) 985 { 986 $iso_int_results->sort_records("tRNAscan_id"); 987 } 988 989 my $tRNA = tRNAscanSE::tRNA->new; 990 &open_for_append(\*FILE_OUT, $opts->bed_file()); 991 my @sp_indexes = $sp_int_results->get_indexes(); 992 my @ iso_indexes = $iso_int_results->get_indexes(); 993 if ($sp_int_results->open_file("read")) 994 { 995 if (!$opts->no_isotype()) 996 { 997 $iso_int_results->open_file("read"); 998 } 999 1000 for (my $i = 0; $i < scalar(@sp_indexes); $i++) 1001 { 1002 $sp_int_results->get_tRNA($sp_indexes[$i]->[0], $tRNA); 1003 1004 if (!$opts->no_isotype()) 1005 { 1006 my $id = $tRNA->seqname().".t".&pad_num($tRNA->id(), 6); 1007 my $index = $iso_int_results->bsearch_tRNAscan_id($id); 1008 if ($index > -1) 1009 { 1010 $iso_int_results->get_tRNA($iso_indexes[$index]->[0], $tRNA); 1011 my ($type, $model, $score, $ss) = $tRNA->get_highest_score_model(); 1012 if ($tRNA->isotype() eq "Met" and $type eq "cyto" and ($model eq "iMet" or $model eq "fMet" or $model eq "Ile2")) 1013 { 1014 $tRNA->isotype($model); 1015 $tRNA->tRNAscan_id($tRNA->seqname().".tRNA".$tRNA->id()."-".$tRNA->isotype().$tRNA->anticodon()); 1016 } 1017 elsif ($tRNA->isotype() eq "Met" and $type eq "cyto" and $model ne "Met" and $model ne "iMet" and $model ne "fMet") 1018 { 1019 $tRNA->sort_multi_models("model"); 1020 my ($met_iso_model, $met_iso_score, $met_iso_ss) = $tRNA->get_model_hit("cyto", $tRNA->isotype()); 1021 my ($ile2_iso_model, $ile2_iso_score, $ile2_iso_ss) = $tRNA->get_model_hit("cyto", "Ile2"); 1022 if ($ile2_iso_score > 0 and $met_iso_score > 0) 1023 { 1024 if (($score - $ile2_iso_score) <= 5 and ($ile2_iso_score - $met_iso_score) >= 5 and $tRNA->score() > 50) 1025 { 1026 $tRNA->isotype("Ile2"); 1027 $tRNA->tRNAscan_id($tRNA->seqname().".tRNA".$tRNA->id()."-".$tRNA->isotype().$tRNA->anticodon()); 1028 } 1029 } 1030 } 1031 } 1032 } 1033 1034 print FILE_OUT $tRNA->seqname()."\t".($tRNA->start() - 1)."\t".$tRNA->end()."\t".$tRNA->tRNAscan_id()."\t".&convert_bed_score($tRNA->score())."\t". 1035 $tRNA->strand()."\t".($tRNA->start() - 1)."\t".$tRNA->end()."\t0\t".($tRNA->get_intron_count() + 1)."\t"; 1036 if ($tRNA->get_intron_count() == 0) 1037 { 1038 print FILE_OUT ($tRNA->end() - $tRNA->start() + 1).",\t0,\n"; 1039 } 1040 else 1041 { 1042 my @ar_introns = $tRNA->ar_introns(); 1043 my $block_sizes = ""; 1044 my $block_starts = "0,"; 1045 my $prev_start = 1; 1046 if ($tRNA->strand() eq "+") 1047 { 1048 for (my $i = 0; $i < scalar(@ar_introns); $i++) 1049 { 1050 $block_sizes .= ($ar_introns[$i]->{rel_start} - $prev_start).","; 1051 $block_starts .= $ar_introns[$i]->{rel_end}.","; 1052 $prev_start = $ar_introns[$i]->{rel_end} + 1; 1053 } 1054 $block_sizes .= ($tRNA->end() - $ar_introns[(scalar(@ar_introns)-1)]->{end}).","; 1055 } 1056 else 1057 { 1058 $prev_start = length($tRNA->seq()); 1059 for (my $i = (scalar(@ar_introns)-1); $i >= 0; $i--) 1060 { 1061 $block_sizes .= ($prev_start - $ar_introns[$i]->{rel_end}).","; 1062 $block_starts .= ($prev_start - $ar_introns[$i]->{rel_start} + 1).","; 1063 $prev_start = $ar_introns[$i]->{rel_start}; 1064 } 1065 $block_sizes .= ($ar_introns[0]->{rel_start} - 1).","; 1066 } 1067 print FILE_OUT $block_sizes."\t".$block_starts."\n"; 1068 } 1069 } 1070 1071 if (!$opts->no_isotype()) 1072 { 1073 $iso_int_results->close_file(); 1074 } 1075 $sp_int_results->close_file(); 1076 } 1077 close(FILE_OUT); 1078} 1079 1080sub convert_bed_score 1081{ 1082 my ($cm_score) = @_; 1083 1084 my $bed_score = $cm_score * 10; 1085 if ($bed_score > 1000) 1086 { 1087 $bed_score = 1000; 1088 } 1089 1090 return $bed_score; 1091} 1092 1093sub output_split_fragments 1094{ 1095 my ($opts, $r_pairs, $r_5half_hits, $r_3half_hits) = @_; 1096 1097 my ($r_5half, $r_3half); 1098 1099 &open_for_append(\*SPLITFILE, $opts->split_fragment_file()); 1100 printf SPLITFILE "Fragment1\tFragment2\tSeqName1\tStartPos1\tEndPos1\tSeqName2\tStartPos2\tEndPos2\tScore1\tScore2\n"; 1101 1102 foreach my $r_pair (@$r_pairs) 1103 { 1104 if (defined $r_pair->{"5h"} && defined $r_pair->{"3h"}) 1105 { 1106 $r_5half = $r_5half_hits->[$r_pair->{"5h"}]; 1107 $r_3half = $r_3half_hits->[$r_pair->{"3h"}]; 1108 print SPLITFILE $r_5half->{seq}."\t".$r_3half->{seq}."\t", 1109 $r_5half->{hit_seqname}."\t".$r_5half->{tRNA_start}."\t".$r_5half->{tRNA_end}."\t", 1110 $r_3half->{hit_seqname}."\t".$r_3half->{tRNA_start}."\t".$r_3half->{tRNA_end}."\t", 1111 $r_5half->{score}."\t".$r_3half->{score}."\n"; 1112 print SPLITFILE $r_5half->{ss}."\t".$r_3half->{ss}."\t\t\t\t\t\t\t\t\n"; 1113 } 1114 elsif (defined $r_pair->{"5h"} && !defined $r_pair->{"3h"}) 1115 { 1116 $r_5half = $r_5half_hits->[$r_pair->{"5h"}]; 1117 print SPLITFILE $r_5half->{seq}."\t\t", 1118 $r_5half->{hit_seqname}."\t".$r_5half->{tRNA_start}."\t".$r_5half->{tRNA_end}."\t", 1119 "\t\t\t", 1120 $r_5half->{score}."\t\n"; 1121 print SPLITFILE $r_5half->{ss}."\t\t\t\t\t\t\t\t\t\n"; 1122 } 1123 elsif (!defined $r_pair->{"5h"} && defined $r_pair->{"3h"}) 1124 { 1125 $r_3half = $r_3half_hits->[$r_pair->{"3h"}]; 1126 print SPLITFILE "\t".$r_3half->{seq}."\t", 1127 "\t\t\t", 1128 $r_3half->{hit_seqname}."\t".$r_3half->{tRNA_start}."\t".$r_3half->{tRNA_end}."\t", 1129 "\t".$r_3half->{score}."\n"; 1130 print SPLITFILE "\t".$r_3half->{ss}."\t\t\t\t\t\t\t\t\n"; 1131 } 1132 } 1133 close(SPLITFILE); 1134} 1135 11361; 1137