1#! @PERL@ 2# 3# -------------------------------------------------------------------- 4# tRNAscan-SE: a program for improved detection of transfer RNA 5# genes in genomic sequence 6# 7# Version 2.0.5 8# 9# Copyright (C) 2019 Patricia Chan and Todd Lowe 10# 11# School of Engineering, University of California, Santa Cruz 12# lowe@soe.ucsc.edu 13# http://lowelab.ucsc.edu/ 14# -------------------------------------------------------------------- 15# 16# Usage: 17# tRNAscan-SE [options] <FASTA file(s)> 18# 19 20use strict; 21use lib "@libdir@/tRNAscan-SE"; 22use Getopt::Long; 23use tRNAscanSE::Configuration; 24use tRNAscanSE::tRNA; 25use tRNAscanSE::SprinzlPos; 26use tRNAscanSE::ArraytRNA; 27use tRNAscanSE::Utils; 28use tRNAscanSE::GeneticCode; 29use tRNAscanSE::Options; 30use tRNAscanSE::Eufind; 31use tRNAscanSE::Tscan; 32use tRNAscanSE::CM; 33use tRNAscanSE::LogFile; 34use tRNAscanSE::Stats; 35use tRNAscanSE::Sequence; 36use tRNAscanSE::FpScanResultFile; 37use tRNAscanSE::ScanResult; 38use tRNAscanSE::IntResultFile; 39use tRNAscanSE::MultiResultFile; 40use tRNAscanSE::SS; 41 42our $version = "2.0.5"; 43our $release_date = "October 2019"; 44our $program_id = "tRNAscan-SE-".$version; 45 46# modified by 'make' 47our $default_conf = "@sysconfdir@/tRNAscan-SE.conf"; 48 49# Signal handling 50$SIG{'TERM'} = 'error_handler'; 51$SIG{'QUIT'} = 'error_handler'; 52$SIG{'INT'} = 'error_handler'; 53 54# Global variables 55our @fp_start_time; 56our $opts = tRNAscanSE::Options->new; 57our $global_constants = tRNAscanSE::Configuration->new(); 58our $log = tRNAscanSE::LogFile->new("default"); 59our $sprinzl = tRNAscanSE::SprinzlPos->new; 60our $fp_tRNAs = tRNAscanSE::ArraytRNA->new; 61our $sp_tRNAs = tRNAscanSE::ArraytRNA->new; 62our $fp_result_file = tRNAscanSE::FpScanResultFile->new(""); 63our $sp_int_results = tRNAscanSE::IntResultFile->new; 64our $iso_int_results = tRNAscanSE::MultiResultFile->new; 65our $gc = tRNAscanSE::GeneticCode->new; 66our $stats = tRNAscan::Stats->new; 67our $seq_file = tRNAscanSE::Sequence->new; 68our $eufind = tRNAscanSE::Eufind->new; 69our $tscan = tRNAscanSE::Tscan->new; 70our $cm = tRNAscanSE::CM->new; 71 72$global_constants->config_file($default_conf); 73 74our %global_vars = (global_constants => $global_constants, 75 log_file => $log, 76 options => $opts, 77 sprinzl => $sprinzl, 78 fp_tRNAs => $fp_tRNAs, 79 sp_tRNAs => $sp_tRNAs, 80 fp_result_file => $fp_result_file, 81 sp_int_results => $sp_int_results, 82 iso_int_results => $iso_int_results, 83 sequence => $seq_file, 84 gc => $gc, 85 stats => $stats 86 ); 87 88# set user-selectable options 89&set_options(); 90 91# set location of binaries & data files, 92# plus, check to make sure they are there 93$cm->set_file_paths(\%global_vars); 94$cm->check_lib_files($opts); 95$cm->set_bin($global_constants->get("bin_dir")); 96$cm->set_infernal_bin($global_constants->get("infernal_dir")); 97$eufind->set_bin($global_constants->get("bin_dir")); 98$tscan->set_bin($global_constants->get("bin_dir")); 99 100# initialize variables 101$gc->read_transl_table($opts); 102if ($opts->save_stats()) 103{ 104 $stats->file_name($opts->stats_file()); 105} 106 107# Start processing 108&initialize_process(); 109 110# prescan with either tRNAscan/eufind or both 111if ($opts->tscan_mode() || $opts->eufind_mode() || $opts->infernal_fp()) 112{ 113 &first_pass_prescan(); 114} 115 116# Check to see if no sequences were read from input file(s) 117if (($stats->numscanned() == 0) && ($opts->eufind_mode() || $opts->tscan_mode() || $opts->infernal_fp())) 118{ 119 if ($opts->seq_key() ne '\S*') 120 { 121 die "\nNo FASTA sequences matching \'".$opts->raw_seq_key()."\' key found\n\n"; 122 } 123 elsif ($opts->multiple_files()) 124 { 125 die "\nFATAL: No sequences in FASTA format found in ", join(', ',@ARGV),"\n\n"; 126 } 127 else 128 { 129 die "\nFATAL: No sequences in FASTA format found in file ".$opts->fasta_file()."\n\n"; 130 } 131} 132 133# Run Cove or Infernal on candidate tRNAs picked in first pass, 134# or by itself on seqs if no first pass searches 135elsif ($opts->cove_mode() || $opts->infernal_mode()) 136{ 137 $sp_int_results->file_name($opts->secondpass_int_result_file()); 138 $iso_int_results->file_name($opts->isotype_int_result_file()); 139 &run_cm_scan(); 140} 141 142$stats->end_sp_timer(); 143 144if ($opts->save_stats()) 145{ 146 $stats->open_file(); 147 $stats->save_final_stats($opts, $gc, $fp_result_file->get_hit_count(), $cm->tab_results()); 148 $stats->close_file(); 149} 150 151$log->finish_process(); 152 153&cleanup(); # clean up temp files 154exit(0); 155 156# END main 157 158 159sub initialize_process 160{ 161 # print program info header, credits, & selected run options 162 if (!$opts->quiet_mode()) 163 { 164 print STDERR "\ntRNAscan-SE v.$version ($release_date) -", 165 " scan sequences for transfer RNAs\n"; 166 &display_credits(); 167 $opts->display_run_options($cm, $tscan, $eufind, $global_constants, *STDERR); 168 } 169 170 $stats->start_fp_timer(); # save starting time 171 172 # if statistics are being saved, write run options in stats file 173 if ($opts->save_stats()) 174 { 175 my $host = `hostname`; 176 chomp($host); 177 $stats->open_file(); 178 $stats->write_line("\ntRNAscan-SE v.$version ($release_date) scan results (on host $host)\nStarted: ".`date`); 179 $opts->display_run_options($cm, $tscan, $eufind, $global_constants, $stats->FILE_H()); 180 $stats->close_file(); 181 } 182} 183 184# Running tRNAscan and/or EufindtRNA 185sub first_pass_prescan 186{ 187 if ($opts->infernal_fp()) 188 { 189 $log->status("Phase I: Searching for tRNAs with HMM-enabled Infernal"); 190 } 191 else 192 { 193 $log->status("Phase I: Searching for tRNAs with tRNAscan and/or EufindtRNA"); 194 } 195 196 # open seq file to search 197 $seq_file->open_file($opts->fasta_file(), "read"); 198 199 # Main loop for reading seqs & scanning with tRNAscan and/or EufindtRNA 200 my $targ_seq_id = 0; # Don't look for a specific Seq number 201 my $start_index = 1; 202 my $sequence_scanned = 0; 203 my $printed_header = 0; 204 my $eufind_output; 205 my @hit_list = (); 206 my $tmp_raw = $global_constants->get("tmp_raw"); 207 my $tmp_fa = $global_constants->get("tmp_fa"); 208 my $tmp_fa_file = tRNAscanSE::Sequence->new; 209 my $missing_fa_file = tRNAscanSE::Sequence->new; 210 211 while ($seq_file->read_fasta($opts, $targ_seq_id)) 212 { 213 if ($opts->cove_mode() || $opts->infernal_mode()) 214 { 215 $log->broadcast("Scanned seqs: ".$stats->numscanned()." (at ".$seq_file->seq_name().")"); 216 } 217 $stats->increment_numscanned(); 218 $stats->increment_first_pass_base_ct($seq_file->seq_length()); 219 220 do 221 { 222 # Write one input sequence / seq buffer to tmp_fa file 223 $tmp_fa_file->open_file($tmp_fa, "write"); 224 $tmp_fa_file->set_seq_info($seq_file->seq_name(), $seq_file->seq_description(), 225 $seq_file->seq_length(), $seq_file->sequence()); 226 $tmp_fa_file->write_fasta(); 227 $tmp_fa_file->close_file(); 228 229 if ($opts->infernal_fp()) 230 { 231 $cm->first_pass_scan(\%global_vars, $start_index, $seq_file->seq_name()); 232 } 233 else 234 { 235 # Run tRNAscan on $tmp_fa file & write results to $tmp_raw output file 236 if ($opts->tscan_mode()) 237 { 238 $tscan->run_tRNAscan($tmp_fa, $tmp_raw, $start_index, $global_constants->get("lib_dir"), $seq_file->seq_name()); 239 if ($opts->save_verbose()) 240 { 241 $tscan->append_verbfile($opts->verb_file(), $tmp_fa, $seq_file->seq_name()); 242 } 243 $tscan->process_tRNAscan_hits(\%global_vars, $seq_file->seq_name()); 244 } 245 246 # Run eufindtRNA program & save results in memory in $Eufind_output array 247 if ($opts->eufind_mode()) 248 { 249 $eufind_output = $eufind->run_eufind($tmp_fa, $start_index, $opts->max_int_len(), $seq_file->seq_name()); 250 if ($eufind_output ne "") 251 { 252 $eufind->process_Eufind_hits(\%global_vars, $eufind_output); 253 $eufind_output = ""; 254 } 255 } 256 } 257 $sequence_scanned = 1; # Flag indicating current sequence has been scanned 258 259 # Check to see if all of sequence was read in last buffer-sized chunck 260 if ($seq_file->seq_buf_overrun()) 261 { 262 $start_index = $seq_file->buffer_end_index() + 1; 263 if ($seq_file->read_more_fasta($opts)) 264 { 265 $sequence_scanned = 0; 266 } 267 } 268 269 } 270 until ($sequence_scanned); 271 272 if ($fp_tRNAs->get_count() > 0) 273 { 274 $stats->increment_seqs_hit(); 275 276 # save results in ACeDB format now if not using Cove analysis 277 if ($opts->ace_output() && (!$opts->CM_mode())) 278 { 279 &save_Acedb_from_firstpass($opts->output_codon(), $gc->one_let_trans_map(), $fp_tRNAs, $opts->out_file()); 280 } 281 else 282 { 283 # save all hits for this seq 284 my $fpass_trna_base_ct = $stats->fpass_trna_base_ct(); 285 if (!$opts->CM_mode()) 286 { 287 if (!($opts->brief_output() || $printed_header)) 288 { 289 &open_for_append(\*TABOUT, $opts->out_file()); 290 &print_results_header(\*TABOUT, $opts, 0, 8, 8, 1); 291 close (TABOUT); 292 $printed_header = 1; 293 } 294 } 295 $fp_result_file->save_firstpass_output($opts, $fp_tRNAs, \$fpass_trna_base_ct, $seq_file->seq_length(), $seq_file->seq_id()); 296 $stats->fpass_trna_base_ct($fpass_trna_base_ct); 297 } 298 299 # clear hit array 300 $fp_tRNAs->clear(); 301 } 302 elsif ($opts->save_missed()) 303 { 304 # save sequence that had no tRNA hits if -M param set 305 # NOTE: only writes last frame of seq buffer if seq length > max_seq_buffer 306 $missing_fa_file->open_file($opts->missed_seq_file(), "append"); 307 $missing_fa_file->set_seq_info($seq_file->seq_name(), $seq_file->seq_description(), $seq_file->seq_length(), $seq_file->sequence()); 308 $missing_fa_file->write_fasta(); 309 $missing_fa_file->close_file(); 310 } 311 312 $seq_file->reset_buffer_ct(); 313 $start_index = 1; 314 315 } # while (read_fasta()) - still more seqs to scan 316 317 $seq_file->close_file(); 318 # remove temporary files 319 system("rm -f $tmp_raw $tmp_fa"); 320 $seq_file->release_memory(); # release memory 321 322 $log->broadcast("\n".$stats->numscanned()." seqs scanned, ".$stats->seqs_hit()." seqs had at ". 323 "least one hit.\n".$stats->trnatotal()." total tRNAs predicted in first pass scans"); 324 325 if ((!$opts->CM_mode()) && ($stats->trnatotal() == 0) && (!$opts->quiet_mode())) 326 { 327 $log->status("No tRNAs found."); 328 } 329 330 $stats->end_fp_timer(); # save time first-pass scans are done 331 332 if ($opts->save_stats()) 333 { 334 $stats->open_file(); 335 $stats->save_firstpass_stats(); 336 $stats->close_file(); 337 } 338} 339 340# Run Cove or Infernal 341sub run_cm_scan 342{ 343 $stats->start_sp_timer(); 344 345 if ($opts->tscan_mode() || $opts->eufind_mode() || $opts->infernal_fp()) 346 { 347 $log->status("Phase II: ".$opts->second_pass_label()." verification of candidate ". 348 "tRNAs detected with first-pass scan"); 349 } 350 else 351 { 352 $log->status("Running ".$opts->second_pass_label()." analysis"); 353 if (!$opts->use_prev_ts_run()) 354 { 355 $fp_result_file->prep_for_secpass_only($opts, $stats, $seq_file); 356 } 357 } 358 359 # Name of tRNA sequence currently in memory 360 my $prev_seq_name = ''; 361 362 # flag indicates if seqid and seqlen are saved in firstpass result file 363 my $seqinfo_flag = 0; 364 365 my $curseq_trnact = 0; 366 my $prescan_trna = tRNAscanSE::tRNA->new; 367 my $tRNAs_found = 0; 368 my $index = -1; 369 370 $seq_file->open_file($opts->fasta_file(), "read"); 371 $fp_result_file->index_results(\$seqinfo_flag); 372 my @fp_result_file_indexes = $fp_result_file->get_indexes(); 373 $fp_result_file->open_file(); 374 375 for (my $seq_ct = 0; $seq_ct < scalar(@fp_result_file_indexes); $seq_ct++) 376 { 377 $sp_int_results->file_name($opts->secondpass_int_result_file()); 378 $global_vars{sp_int_results} = $sp_int_results; 379 $sp_int_results->open_file("write"); 380 $sp_tRNAs->clear(); 381 $log->broadcast("Scanning ".$fp_result_file_indexes[$seq_ct]->[1]); 382 383 if ($opts->cove_mode()) 384 { 385 $fp_result_file->reset_current_seq(); 386 $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $prescan_trna); 387 while ($prescan_trna->seqname() ne "") 388 { 389 # Retrieve tRNA sequence and write to tmp_trnaseq_file 390 if (!&prepare_tRNA_to_scan($seq_file, $prescan_trna)) 391 { 392 next; 393 } 394 $tRNAs_found = $cm->analyze_with_cove(\%global_vars, $prescan_trna, \$curseq_trnact); 395 if (!$cm->CM_check_for_introns()) 396 { 397 $stats->increment_total_secpass_ct($tRNAs_found); 398 } 399 400 $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $prescan_trna); 401 } 402 } 403 else 404 { 405 # Retrieve tRNA sequences and write to tmp_trnaseq_file 406 if (!&prepare_multi_tRNAs_to_scan($seqinfo_flag, $seq_file, $seq_ct)) 407 { 408 next; 409 } 410 411 if ($opts->mito_mode()) 412 { 413 $tRNAs_found = $cm->analyze_mito(\%global_vars, $seqinfo_flag, $seq_ct, $fp_result_file_indexes[$seq_ct]->[1], \$curseq_trnact); 414 } 415 elsif($opts->alternate_mode()) 416 { 417 $tRNAs_found = $cm->analyze_alternate(\%global_vars, $seqinfo_flag, $seq_ct, $fp_result_file_indexes[$seq_ct]->[1], \$curseq_trnact); 418 } 419 elsif ($opts->metagenome_mode()) 420 { 421 422 } 423 elsif ($opts->numt_mode()) 424 { 425 426 } 427 elsif ($opts->infernal_mode()) 428 { 429 $tRNAs_found = $cm->analyze_with_cmsearch(\%global_vars, $seqinfo_flag, $seq_ct, $fp_result_file_indexes[$seq_ct]->[1], \$curseq_trnact); 430 } 431 432 $stats->increment_total_secpass_ct($tRNAs_found); 433 } 434 435 $sp_int_results->close_file(); 436 437 if (($curseq_trnact > 0) and $cm->CM_check_for_introns()) 438 { 439 if (&prepare_intron_scan($seq_file)) 440 { 441 $cm->scan_noncanonical_introns(\%global_vars, $fp_result_file_indexes[$seq_ct]->[1]); 442 } 443 } 444 445 if ($curseq_trnact > 0) 446 { 447 if ($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) 448 { 449 $cm->truncated_tRNA_search(\%global_vars, $fp_result_file_indexes[$seq_ct]->[1]); 450 451 if (!$opts->no_isotype()) 452 { 453 $cm->isotype_cmsearch(\%global_vars); 454 } 455 } 456 457 &output_tRNA(\%global_vars, $cm, $cm->tab_results(), $cm->get_hmm_score(), $program_id); 458 } 459 460 if (($sp_int_results->get_count() > 0) and $cm->CM_check_for_split_halves()) 461 { 462 my @sp_indexes = $sp_int_results->get_indexes(); 463 if ($sp_int_results->open_file("read")) 464 { 465 for (my $i = 0; $i < scalar(@sp_indexes); $i++) 466 { 467 my $cm_tRNA = tRNAscanSE::tRNA->new; 468 $sp_int_results->get_tRNA($sp_indexes[$i]->[0], $cm_tRNA); 469 $sp_tRNAs->put($cm_tRNA); 470 } 471 $sp_int_results->close_file(); 472 473 $cm->scan_split_tRNAs(\%global_vars); 474 } 475 } 476 477 if ($opts->bed_file() ne "") 478 { 479 if ($curseq_trnact > 0) 480 { 481 &write_bed(\%global_vars); 482 } 483 } 484 485 $sp_int_results->clear_index(); 486 $curseq_trnact = 0; 487 } 488 489 $fp_result_file->close_file(); 490 $seq_file->close_file(); 491 492 if (($stats->total_secpass_ct() == 0) && (!$opts->quiet_mode())) 493 { 494 print STDERR "No tRNAs found.\n\n"; 495 } 496} 497 498# Extracts tRNA sequences with given coordinates, and writes to $tmp_ 499sub prepare_multi_tRNAs_to_scan 500{ 501 my ($seqinfo_flag, $seq_file, $seq_ct) = @_; 502 503 system("rm -f ".$global_constants->get("tmp_trnaseq_file")); 504 505 my $trna_file = tRNAscanSE::Sequence->new; 506 $trna_file->open_file($global_constants->get("tmp_trnaseq_file"), "write"); 507 508 my $flanking = 0; 509 my $trna = tRNAscanSE::tRNA->new; 510 $fp_result_file->reset_current_seq(); 511 $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $trna); 512 if ($fp_result_file->open_flanking("write")) 513 { 514 $flanking = 1; 515 } 516 while ($trna->seqname() ne "") 517 { 518 $seq_file->get_tRNA_sequence(\%global_vars, $trna); 519 $stats->increment_secpass_base_ct($trna->len()); 520 $trna_file->set_seq_info($trna->seqname().".t".&pad_num($trna->id(), 6), $seq_file->seq_description(), length($trna->seq()), $trna->seq()); 521 $trna_file->write_fasta(); 522 if ($flanking) 523 { 524 $fp_result_file->write_tRNA_flanking($trna); 525 } 526 527 $seq_file->release_memory(); 528 529 $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $trna); 530 } 531 $trna_file->close_file(); 532 $fp_result_file->close_flanking(); 533 534 return 1; 535} 536 537# Extracts tRNA sequence with given coordinates, and writes to $tmp_ 538sub prepare_tRNA_to_scan 539{ 540 my ($seq_file, $trna) = @_; 541 542 $seq_file->get_tRNA_sequence(\%global_vars, $trna); 543 $stats->increment_secpass_base_ct($trna->len()); 544 545 &write_tRNA($global_constants->get("tmp_trnaseq_file"), $seq_file->seq_name(), $seq_file->seq_description(), $trna->seq(), 1); 546 547 $seq_file->release_memory(); 548 549 return 1; 550} 551 552# Extracts tRNA sequences with given coordinates, and writes to $tmp_ 553sub prepare_intron_scan 554{ 555 my ($seq_file) = @_; 556 my $ret_value = 1; 557 558 system("rm -f ".$global_constants->get("tmp_trnaseq_file")); 559 560 my $trna_file = tRNAscanSE::Sequence->new; 561 my $cm_tRNA = undef; 562 $trna_file->open_file($global_constants->get("tmp_trnaseq_file"), "write"); 563 564 my $trna = tRNAscanSE::tRNA->new; 565 my $padded_seq = ""; 566 $sp_tRNAs->clear(); 567 my @sp_indexes = $sp_int_results->get_indexes(); 568 if ($sp_int_results->open_file("read")) 569 { 570 for (my $i = 0; $i < scalar(@sp_indexes); $i++) 571 { 572 $cm_tRNA = tRNAscanSE::tRNA->new; 573 $sp_int_results->get_tRNA($sp_indexes[$i]->[0], $cm_tRNA); 574 my $orig_seq = $cm_tRNA->seq(); 575 $seq_file->get_tRNA_sequence(\%global_vars, $cm_tRNA); 576 if (uc($orig_seq) ne uc($cm_tRNA->seq())) 577 { 578 $ret_value = 0; 579 $log->error("tRNA sequence does not match for intron scan: ".$cm_tRNA->tRNAscan_id()." ".$cm_tRNA->seqname().":".$cm_tRNA->start()."-".$cm_tRNA->end()); 580 } 581 $padded_seq = $cm_tRNA->upstream().$cm_tRNA->seq().$cm_tRNA->downstream(); 582 $trna_file->set_seq_info($cm_tRNA->seqname().".trna".&pad_num($cm_tRNA->id(), 6), $cm_tRNA->tRNAscan_id(), length($padded_seq), $padded_seq); 583 $trna_file->write_fasta(); 584 $sp_tRNAs->put($cm_tRNA); 585 $seq_file->release_memory(); 586 } 587 $sp_int_results->close_file(); 588 } 589 $trna_file->close_file(); 590 591 return $ret_value; 592} 593 594# clean up temp files 595sub cleanup 596{ 597 system("rm -f ".$global_constants->get("temp_dir")."/tscan$$".'*'); 598 system("rm -f ".$opts->fafile().".pid"); 599} 600 601sub error_handler 602{ 603 print "\nAborting tRNAscan-SE\n\n"; 604 605 my $ppid = $$; 606 my $psout = `ps -ef`; 607 my @ps_lines = split(/\n/,$psout); 608 foreach my $line (0..$#ps_lines) 609 { 610 if ($ps_lines[$line] =~/^\s+\S+\s+(\d+)\s+($ppid)\s/) 611 { 612 print STDERR "Killing process $1:\n",$ps_lines[$line],"\n"; 613 my $killct = kill 'KILL', $1; 614 print STDERR "$killct jobs received the kill signal\n"; 615 } 616 } 617 618 &cleanup(); 619 exit(1); 620} 621 622sub display_credits 623{ 624 print STDERR "Copyright (C) 2019 Patricia Chan and Todd Lowe\n", 625 " University of California Santa Cruz\n", 626 "Freely distributed under the GNU General Public License (GPLv3)\n\n"; 627} 628 629sub print_usage 630{ 631 print STDERR "\nUsage: tRNAscan-SE [-options] <FASTA file(s)>\n\n"; 632 print STDERR " Scan a sequence file for tRNAs \n", 633 " -- default: use Infernal & tRNA covariance models\n", 634 " with eukaryotic sequences \n", 635 " (use -B, -A, -M, -O or -G to scan other types of sequences)\n\n", 636 "Basic Options\n", 637 " -E : search for eukaryotic tRNAs (default)\n", 638 " -B : search for bacterial tRNAs\n", 639 " -A : search for archaeal tRNAs\n", 640 " -M <model> : search for mitochondrial tRNAs\n", 641 " options: mammal, vert\n", 642 " -O : search for other organellar tRNAs\n", 643 " -G : use general tRNA model (cytoslic tRNAs from all 3 domains included)\n", 644 " -L : search using the legacy method (tRNAscan, EufindtRNA, and COVE)\n", 645 " use with -E, -B, -A, -O, or -G\n", 646 " -I : search using Infernal (default)\n", 647 " use with -E, -B, -A, -O, or -G\n", 648# " -T : search for tRNAs in metagenome\n", 649# " -N : search for tRNAs in nuclear mitochondrial DNA regions (NUMTs)\n", 650 " -o <file> : save final results in <file>\n", 651 " -f <file> : save tRNA secondary structures to <file>\n", 652 " -m <file> : save statistics summary for run in <file>\n", 653 " (speed, # tRNAs found in each part of search, etc)\n", 654 " -H : show both primary and secondary structure components to\n", 655 " covariance model bit scores\n", 656 " -q : quiet mode (credits & run option selections suppressed)\n\n", 657 " -h : print full list (long) of available options\n\n"; 658} 659 660sub print_all_options 661{ 662 print "\nUsage: tRNAscan-SE [-options] <FASTA file(s)>\n\n"; 663 print " Scan a sequence file for tRNAs \n", 664 " -- default: use Infernal & tRNA covariance models\n", 665 " with eukaryotic sequences \n", 666 " (use 'Search Mode Options' below to scan other types of sequences)\n\n", 667 "Search Mode Options:\n\n", 668 " -E : search for eukaryotic tRNAs (default)\n", 669 " -B : search for bacterial tRNAs\n", 670 " -A : search for archaeal tRNAs\n", 671 " -M <model> : search for mitochondrial tRNAs\n", 672 " options: mammal, vert\n", 673 " -O : search for other organellar tRNAs\n", 674 " -G : use general tRNA model (cytoslic tRNAs from all 3 domains included)\n", 675 " --mt <model> : use mito tRNA models for cytosolic/mito detemination\n", 676 " (if not specified, only cytosolic isotype-specific model scan will be performed)\n", 677# " -T : search for tRNAs in metagenome\n", 678# " -N : search for tRNAs in nuclear mitochondrial DNA regions (NUMTs)\n", 679 " -I : search using Infernal\n", 680 " default use with -E, -B, -A, or -G; optional for -O\n", 681 " --max : maximum sensitivity mode - search using Infernal without hmm filter (very slow)\n", 682 " -L : search using the legacy method (tRNAscan, EufindtRNA, and COVE)\n", 683 " use with -E, -B, -A or -G\n", 684 " -C --cove : search using COVE analysis only (legacy, extremely slow)\n", 685 " default use with -O\n", 686 " -H --breakdown : show breakdown of primary and secondary structure components to\n", 687 " covariance model bit scores\n", 688 " -D --nopseudo : disable pseudogene checking\n\n", 689 690 "Output options:\n\n", 691 " -o --output <file> : save final results in <file>\n", 692 " -f --struct <file> : save tRNA secondary structures to <file>\n", 693 " -s --isospecific <file> : save results using isotype-specific models in <file>\n", 694 " -m --stats <file> : save statistics summary for run in <file>\n", 695 " (speed, # tRNAs found in each part of search, etc)\n", 696 " -b --bed <file> : save results in BED file format of <file>\n", 697 " -a --fasta <file> : save predicted tRNA sequences in FASTA file format of <file>\n", 698 " -l --log <file> : save log of program progress in <file>\n", 699 " --detail : display prediction outputs in detailed view\n", 700 " --brief : brief output format (no column headers)\n\n", 701 " -? \# : '#' in place of <file> chooses default name for output files\n", 702 " -p --prefix <label> : use <label> prefix for all default output file names\n\n", 703 " -d --progress : display program progress messages\n", 704 " -q --quiet : quiet mode (credits & run option selections suppressed)\n", 705 " -y --hitsrc : show origin of hits (Ts=tRNAscan 1.4, Eu=EufindtRNA, \n", 706 " Bo=Both Ts and Eu, Inf=Infernal)\n\n", 707 708 "Specify Alternate Cutoffs / Data Files:\n\n", 709 " -X --score <score> : set cutoff score (in bits) for reporting tRNAs (default=20)\n", 710 " -g --gencode <file> : use alternate genetic codes specified in <file> for\n", 711 " determining tRNA type\n", 712 " -z --pad <number> : use <number> nucleotides padding when passing first-pass\n", 713 " tRNA bounds predictions to CM analysis (default=8)\n", 714 " --len <length> : set max length of tRNA intron+variable region for legacy search mode\n", 715 " (default=116bp)\n", 716 717 "Misc Options:\n\n", 718 " -h --help : print this help message\n", 719 " -c --conf <file> : tRNAscan-SE configuration file (default: tRNAscan-SE.conf)\n", 720 " -Q --forceow : do not prompt user before overwriting pre-existing\n", 721 " result files (for batch processing)\n\n", 722 " --match <EXPR> : search only sequences with names matching <EXPR> string\n", 723 " (<EXPR> may contain * or ? wildcard chars)\n", 724 " --search <EXPR> : start search at sequence with name matching <EXPR> string\n", 725 " and continue to end of input sequence file(s)\n", 726 727 "Special Advanced Options (for testing & special purposes)\n\n", 728 " -U : search for tRNAs with alternate models defined in configuration file\n\n", 729 " -t --tscan : search using tRNAscan only (defaults to strict params)\n", 730 " --tmode <mode> : explicitly set tRNAscan params, where <mode>=R or S\n", 731 " (R=relaxed, S=strict tRNAscan v1.3 params)\n\n", 732 " -v --verbose <file> : save verbose tRNAscan 1.3 output to <file>\n", 733 " --nomerge : Keep redundant tRNAscan 1.3 hits (don't filter out multiple\n", 734 " predictions per tRNA identification)\n", 735 " -e --eufind : search using Eukaryotic tRNA finder (EufindtRNA) only\n", 736 " (defaults to Normal seach parameters when run alone,\n", 737 " or to Relaxed search params when run with Cove)\n", 738 " --emode <mode> : explicitly set EufindtRNA params, where <mode>=R, N, or S\n", 739 " (relaxed, normal, or strict)\n\n", 740 " --iscore <score> : manually set \"intermediate\" cutoff score for EufindtRNA\n", 741 " -r --fsres <file> : save first-pass scan results from EufindtRNA, tRNAscan, or\n", 742 " Infernal hmm in <file> in tabular results format\n", 743 " --mid : fast scan mode - search using Infernal with mid-level strictness of hmm filter\n", 744 " -F --falsepos <file> : save first-pass candidate tRNAs in <file> that were then\n", 745 " found to be false positives by second-pass analysis\n", 746 " --missed <file> : save all seqs that do NOT have at least one\n", 747 " tRNA prediction in them (aka \"missed\" seqs)\n", 748 " --thread <number> : number of threads used for running infernal (default is to use available threads)\n", 749 "\n\n"; 750} 751 752sub set_options 753{ 754 # clear option vars 755 our $opt_conf= ''; 756 our $opt_acedb=0; our $opt_quiet=0; our $opt_progress=0; our $opt_log=""; 757 our $opt_euk=1; our $opt_bact=0; our $opt_arch=0; our $opt_organ=0; our $opt_general=0; our $opt_mito=''; 758 our $opt_legacy=0; our $opt_inf=0; our $opt_isocm=''; our $opt_mt = ''; 759 our $opt_metagenome=0; our $opt_numt=0; 760 our $opt_alt=0; 761 our $opt_cove=0; our $opt_mid=0; our $opt_max=0; our $opt_eufind=0; our $opt_tscan=0; 762 our $opt_ncintron=0; our $opt_frag=''; 763 our $opt_breakdown=0; our $opt_nopseudo=0; our $opt_nomerge=0; our $opt_hitsrc=0; 764 our $opt_output=''; our $opt_struct=''; our $opt_stats=''; our $opt_isospecific=''; our $opt_bed=''; our $opt_fasta=''; our $opt_brief=0; 765 our $opt_detail=0; 766 our $opt_prefix=''; our $opt_match=''; our $opt_search=''; 767 our $opt_gencode=''; our $opt_codons=0; 768 our $opt_tmode=''; our $opt_emode=''; our $opt_fsres=''; our $opt_filter=''; our $opt_falsepos=''; our $opt_missed=''; 769 our $opt_score=1000; our $opt_iscore=1000; our $opt_len=-1; our $opt_pad=1000; 770 our $opt_help=0; our $opt_verbose=''; our $opt_forceow=0; 771 our $opt_w=''; our $opt_U=0; our $opt_Y=0; our $opt_thread=999; 772 773 Getopt::Long::Configure("bundling", "no_ignore_case", "no_auto_abbrev"); 774 my $result = &GetOptions( 775 # Configuration 776 "conf|c=s","log|l=s", 777 # Misc option switches 778 "help|h", 779 "quiet|q","hitsrc|y","breakdown|H", 780 "Y", 781 "progress|d","nopseudo|D","codons","forceow|Q","nomerge", 782 # Search mode switches 783 "euk|E", "bact|B", "arch|A", "organ|O", "general|G", "mito|M=s", 784 "legacy|L", "inf|I", "isocm|S=s", 785# "metagenome|T", "numt|N", 786 "alt|U", "mt=s", 787 "eufind|e", "tscan|t", "cove|C", "mid", "max", 788 # file name input specifiers 789 "gencode|g=s", 790 # file name output specifiers 791 "output|o=s", "stats|m=s", "struct|f=s", "bed|b=s", "fasta|a=s", "isospecific|s=s", "acedb", "brief", 792 "fsres|r=s","verbose|v=s","w=s","falsepos|F=s","missed=s", "detail", 793 #string parameters 794 "prefix|p=s","match=s","search=s","emode=s","tmode=s", 795 #numerical parameters 796 "score|X=f","iscore=f","pad|z=i","len=i", 797 "thread=i"); 798 799 if ($opt_help) 800 { 801 print STDERR "\ntRNAscan-SE $version ($release_date)\n"; 802 &display_credits; 803 &print_all_options; 804 exit(0); 805 } 806 if ($#ARGV < 0) 807 { 808 print STDERR "\ntRNAscan-SE $version ($release_date)\n"; 809 print STDERR "\nFATAL: No sequence file(s) specified.\n"; 810 &print_usage(); 811 exit(1); 812 } 813 814 # set location of temp and lib files 815 if ($ENV{TMPDIR}) 816 { 817 $global_constants->set_temp_dir($ENV{TMPDIR}); 818 } 819 820 # set defaults 821 if ($opt_conf ne "") 822 { 823 $global_constants->config_file($opt_conf); 824 } 825 $global_constants->read_configuration_file(); 826 $cm->set_defaults(\%global_vars); 827 $eufind->set_defaults(\%global_vars); 828 $tscan->set_defaults(\%global_vars); 829 830 # use input seq file name as prefix for default output file names 831 my $fafile = $ARGV[0]; 832 $fafile =~ s/\.fa|\.seq$//; 833 834 # use specified prefix for default output file names, take .seq or .fa extensions off 835 if ($opt_prefix ne '') 836 { 837 $fafile = $opt_prefix; 838 } 839 $opts->fafile($fafile); 840 $opts->secondpass_int_result_file($global_constants->get("temp_dir")."/tscan$$"."_sp.out"); 841 $opts->isotype_int_result_file($global_constants->get("temp_dir")."/tscan$$"."_iso.out"); 842 $opts->truncated_int_result_file($global_constants->get("temp_dir")."/tscan$$"."_sp_trunc.out"); 843 844 if ($opt_detail) 845 { 846 $opts->detail(1); 847 } 848 849 # Do NOT prompt before overwriting pre-existing output files; good for use in batch-mode jobs 850 if ($opt_forceow != 0) 851 { 852 $opts->prompt_for_overwrite(0); 853 } 854 855 # set name of result file 856 if ($opt_output ne '') 857 { 858 $opts->results_to_stdout(0); 859 if ($opt_output eq "#") 860 { 861 $opts->out_file("$fafile.out"); 862 } 863 else 864 { 865 $opts->out_file($opt_output); 866 } 867 &check_output_file($opts->out_file(), $opts->prompt_for_overwrite()); 868 } 869 870 # save results in ACeDB output 871 if ($opt_acedb != 0) 872 { 873 $opts->ace_output(1); 874 } 875 876 # use brief output (suppress column header) 877 if ($opt_brief != 0) 878 { 879 $opts->brief_output(1); 880 } 881 882 # use quite mode (suppress credits & user-selected options) 883 if ($opt_quiet != 0) 884 { 885 $opts->quiet_mode(1); 886 $log->quiet_mode(1); 887 } 888 889 # save source of tRNA hit 890 if ($opt_hitsrc != 0) 891 { 892 if ($opt_mito ne "" || $opt_numt) 893 { 894 die "FATAL: Conflicting search options have been selected. -y cannot be combined with -M.\n"; 895 } 896 $opts->save_source(1); 897 } 898 899 # disable pseudogene filtering 900 if ($opt_nopseudo != 0) 901 { 902 $cm->skip_pseudo_filter(1); 903 } 904 905 # translate anticodon to codon for output 906 if ($opt_codons != 0) 907 { 908 $opts->output_codon(1); 909 } 910 911 # search only sequences matching KEY name 912 # save original KEY expr 913 # turning KEY into regular expression notation 914 if ($opt_match ne '') 915 { 916 $opts->seq_key($opt_match); 917 $opts->raw_seq_key($opts->seq_key()); 918 my $key = $opts->seq_key(); 919 $key =~ s/(\W)/\\$1/g; 920 $key =~ s/\\\*/\\S\*/g; 921 $key =~ s/\\\?/\\S/g; 922 $key =~ s/[\"\']//g; 923 $opts->seq_key($key); 924 } 925 # search all sequences after matching KEY 926 # save original KEY expr 927 # turning KEY into regular expression notation 928 elsif ($opt_search ne '') 929 { 930 $opts->start_at_key(1); 931 $opts->seq_key($opt_search); 932 $opts->raw_seq_key($opts->seq_key()); 933 my $key = $opts->seq_key(); 934 $key =~ s/(\W)/\\$1/g; 935 $key =~ s/\\\*/\\S\*/g; 936 $key =~ s/\\\?/\\S/g; 937 $key =~ s/[\"\']//g; 938 $opts->seq_key($key); 939 } 940 else 941 { 942 $opts->seq_key('\S*'); 943 } 944 945 if ($opt_isocm ne "" and $opt_isocm ne "on" and $opt_isocm ne "off") 946 { 947 die "FATAL: Invalid value for --isocm. Please use on or off or leave out the option for default setting\n"; 948 } 949 950 if ($opt_alt != 0) 951 { 952 if ($opt_bact || $opt_arch || $opt_general || $opt_mito ne "" || $opt_metagenome || $opt_numt) 953 { 954 die "FATAL: Conflicting search options have been selected. -U cannot be combined with other sequence type search option.\n"; 955 } 956 if ($opt_isocm eq "on") 957 { 958 die "FATAL: Conflicting search options have been selected. -U cannot be combined with --isocm.\n"; 959 } 960 961 my $cms = $global_constants->get("alt_cm"); 962 if (!defined $cms or scalar(keys %$cms) == 0) 963 { 964 die "FATAL: Alternate covariance models are not defined in the configuration file when using -U.\n"; 965 } 966 967 $opt_inf = 1; 968 $opt_eufind = 0; 969 $opt_tscan = 0; 970 $opts->search_mode("alt"); 971 $opt_euk = 0; 972 $opt_mid = 1; 973 $cm->skip_pseudo_filter(1); 974 975 $opts->no_isotype(1); 976 if ($opt_ncintron != 0) 977 { 978 die "FATAL: Conflicting search options have been selected. -U and --ncintron cannot be used simultaneously.\n"; 979 } 980 if ($opt_frag ne '') 981 { 982 die "FATAL: Conflicting search options have been selected. -U and --frag cannot be used simultaneously.\n"; 983 } 984 985 $opts->CM_mode("infernal"); 986 } 987 988 if ($opt_bact != 0) 989 { 990 if ($opt_arch || $opt_general || $opt_mito ne "" || $opt_metagenome || $opt_numt) 991 { 992 die "FATAL: Conflicting search options have been selected. -B cannot be combined with other sequence type search option.\n"; 993 } 994 995 $eufind->eufind_intscore($global_constants->get_subvalue("eufind", "bact_intscore")); 996 $opts->search_mode("bacteria"); 997 $opt_euk = 0; 998 $opts->CM_mode("infernal"); 999 $opt_inf = 1; 1000 1001 $opts->no_isotype(0); 1002 if ($opt_isocm eq "off") 1003 { 1004 $opts->no_isotype(1); 1005 } 1006 1007 if ($opt_mt ne '') 1008 { 1009 die "FATAL: Conflicting search options have been selected. -B and --mt cannot be used simultaneously.\n"; 1010 } 1011 if ($opt_ncintron != 0) 1012 { 1013 die "FATAL: Conflicting search options have been selected. -B and --ncintron cannot be used simultaneously.\n"; 1014 } 1015 if ($opt_frag ne '') 1016 { 1017 die "FATAL: Conflicting search options have been selected. -B and --frag cannot be used simultaneously.\n"; 1018 } 1019 } 1020 1021 if ($opt_arch != 0) 1022 { 1023 if ($opt_bact || $opt_general || $opt_mito ne "" || $opt_metagenome || $opt_numt) 1024 { 1025 die "FATAL: Conflicting search options have been selected. -A cannot be combined with other sequence type search option.\n"; 1026 } 1027 1028 $eufind->eufind_intscore($global_constants->get_subvalue("eufind", "arch_intscore")); 1029 $opts->search_mode("archaea"); 1030 $opt_euk = 0; 1031 $opts->CM_mode("infernal"); 1032 $opt_inf = 1; 1033 1034 # check for non-canonical introns 1035 $cm->CM_check_for_introns(1); 1036 1037 # check for tRNA fragments of split tRNAs 1038 if ($opt_frag ne '') 1039 { 1040 $cm->CM_check_for_split_halves(1); 1041 if ($opt_frag eq "#") 1042 { 1043 $opts->split_fragment_file("$fafile.frag"); 1044 } 1045 elsif (($opt_frag eq "\$") || ($opt_frag eq "-")) 1046 { 1047 $opts->split_fragment_file("-"); 1048 1049 # sends output to stdout instead of tabular output 1050 if ($opts->results_to_stdout()) 1051 { 1052 $opts->results_to_stdout(0); 1053 $opts->out_file("/dev/null"); 1054 } 1055 } 1056 else 1057 { 1058 $opts->split_fragment_file($opt_frag); 1059 } 1060 &check_output_file($opts->split_fragment_file(), $opts->prompt_for_overwrite()); 1061 } 1062 1063 $opts->no_isotype(0); 1064 if ($opt_isocm eq "off") 1065 { 1066 $opts->no_isotype(0); 1067 } 1068 1069 if ($opt_mt ne '') 1070 { 1071 die "FATAL: Conflicting search options have been selected. -A and --mt cannot be used simultaneously.\n"; 1072 } 1073 } 1074 1075 # use original general cove model with all tRNAs from 3 domains 1076 if ($opt_general != 0) 1077 { 1078 if ($opt_bact || $opt_arch || $opt_mito ne "" || $opt_metagenome || $opt_numt) 1079 { 1080 die "FATAL: Conflicting search options have been selected. -G cannot be combined with other sequence type search option.\n"; 1081 } 1082 if ($opt_isocm eq "on") 1083 { 1084 die "FATAL: Conflicting search options have been selected. -G cannot be combined with --isocm.\n"; 1085 } 1086 1087 $opts->search_mode("general"); 1088 $opt_euk = 0; 1089 $opt_mid = 1; 1090 $opts->CM_mode("infernal"); 1091 $opt_inf = 1; 1092 $opts->no_isotype(1); 1093 1094 if ($opt_ncintron != 0) 1095 { 1096 die "FATAL: Conflicting search options have been selected. -G and --ncintron cannot be used simultaneously.\n"; 1097 } 1098 if ($opt_frag ne '') 1099 { 1100 die "FATAL: Conflicting search options have been selected. -G and --frag cannot be used simultaneously.\n"; 1101 } 1102 } 1103 1104 if ($opt_organ != 0) 1105 { 1106 $opt_eufind = 0; 1107 $opt_tscan = 0; 1108 1109 $opts->search_mode("organelle"); 1110 $opt_euk = 0; 1111 $opts->no_isotype(1); 1112 $opt_inf = 1; 1113 if ($opt_cove) 1114 { 1115 $opt_inf = 0; 1116 } 1117 1118 $cm->cm_cutoff($global_constants->get("organelle_cm_cutoff")); 1119 1120 # disable psuedogene checking 1121 $cm->skip_pseudo_filter(1); 1122 1123 if ($opt_ncintron != 0) 1124 { 1125 die "FATAL: Conflicting search options have been selected. -O and --ncintron cannot be used simultaneously.\n"; 1126 } 1127 if ($opt_frag ne '') 1128 { 1129 die "FATAL: Conflicting search options have been selected. -O and --frag cannot be used simultaneously.\n"; 1130 } 1131 if ($opt_isocm eq "on") 1132 { 1133 die "FATAL: Conflicting search options have been selected. -O cannot be combined with --isocm.\n"; 1134 } 1135 } 1136 1137 if ($opt_mito ne "") 1138 { 1139 if ($opt_bact || $opt_arch || $opt_general || $opt_metagenome || $opt_numt) 1140 { 1141 die "FATAL: Conflicting search options have been selected. -M cannot be combined with other sequence type search option.\n"; 1142 } 1143 if ($opt_isocm eq "on") 1144 { 1145 die "FATAL: Conflicting search options have been selected. -M cannot be combined with --isocm.\n"; 1146 } 1147 1148 my $cms = $global_constants->get("mito_cm_".lc($opt_mito)); 1149 if (!defined $cms or scalar(keys %$cms) == 0) 1150 { 1151 die "FATAL: Mt-tRNA covariance models $opt_mito are not defined in the configuration file when using -M.\n"; 1152 } 1153 1154 $opts->search_mode("mito"); 1155 $opts->mito_model(lc($opt_mito)); 1156 $opt_euk = 0; 1157 $opt_mid = 1; 1158 $opt_inf = 1; 1159 $opts->no_isotype(1); 1160 1161 if ($opt_eufind || $opt_tscan || $opt_cove) 1162 { 1163 die "FATAL: Conflicting search options have been selected. -M is only supported by using Infernal search.\n"; 1164 } 1165 if ($opt_ncintron != 0) 1166 { 1167 die "FATAL: Conflicting search options have been selected. -M and --ncintron cannot be used simultaneously.\n"; 1168 } 1169 if ($opt_frag ne '') 1170 { 1171 die "FATAL: Conflicting search options have been selected. -M and --frag cannot be used simultaneously.\n"; 1172 } 1173 1174 $opts->CM_mode("infernal"); 1175 1176 if ($opts->mito_model() eq "mammal" or $opts->mito_model() eq "vert") 1177 { 1178 $opts->gc_file($global_constants->get("gc_vert_mito")); 1179 $opts->alt_gcode(1); 1180 } 1181 elsif ($opts->mito_model() eq "invert") 1182 { 1183 $opts->gc_file($global_constants->get("gc_invert_mito")); 1184 $opts->alt_gcode(1); 1185 } 1186 elsif ($opts->mito_model() eq "fungi") 1187 { 1188 $opts->gc_file($global_constants->get("gc_yeast_mito")); 1189 $opts->alt_gcode(1); 1190 } 1191 } 1192 1193 if ($opt_metagenome != 0) 1194 { 1195 if ($opt_bact || $opt_arch || $opt_general || $opt_mito ne "" || $opt_numt) 1196 { 1197 die "FATAL: Conflicting search options have been selected. -T cannot be combined with other sequence type search option.\n"; 1198 } 1199 if ($opt_isocm eq "on") 1200 { 1201 die "FATAL: Conflicting search options have been selected. -T cannot be combined with --isocm.\n"; 1202 } 1203 1204 $opts->search_mode("metagenome"); 1205 $opt_euk = 0; 1206 $opt_inf = 1; 1207 $opts->no_isotype(1); 1208 1209 if ($opt_eufind || $opt_tscan || $opt_cove) 1210 { 1211 die "FATAL: Conflicting search options have been selected. -T is only supported by using Infernal search.\n"; 1212 } 1213 1214 $opts->infernal_fp(1); 1215 $opts->CM_mode("infernal"); 1216 } 1217 1218 if ($opt_numt != 0) 1219 { 1220 if ($opt_bact || $opt_arch || $opt_general || $opt_mito ne "" || $opt_metagenome) 1221 { 1222 die "FATAL: Conflicting search options have been selected. -N cannot be combined with other sequence type search option.\n"; 1223 } 1224 if ($opt_isocm eq "on") 1225 { 1226 die "FATAL: Conflicting search options have been selected. -N cannot be combined with --isocm.\n"; 1227 } 1228 1229 $opts->search_mode("numt"); 1230 $opt_euk = 0; 1231 $opt_inf = 1; 1232 $opts->no_isotype(1); 1233 1234 if ($opt_eufind || $opt_tscan || $opt_cove) 1235 { 1236 die "FATAL: Conflicting search options have been selected. -N is only supported by using Infernal search.\n"; 1237 } 1238 if ($opt_ncintron != 0) 1239 { 1240 die "FATAL: Conflicting search options have been selected. -N and --ncintron cannot be used simultaneously.\n"; 1241 } 1242 if ($opt_frag ne '') 1243 { 1244 die "FATAL: Conflicting search options have been selected. -N and --frag cannot be used simultaneously.\n"; 1245 } 1246 1247 $opts->infernal_fp(1); 1248 $opts->CM_mode("infernal"); 1249 } 1250 1251 if ($opt_euk != 0) 1252 { 1253 $opts->search_mode("euk"); 1254 1255 $opts->no_isotype(0); 1256 if ($opt_isocm eq "off") 1257 { 1258 $opts->no_isotype(1); 1259 if ($opt_mt ne '') 1260 { 1261 die "FATAL: Conflicting search options have been selected. -S and --mt cannot be used simultaneously.\n"; 1262 } 1263 } 1264 else 1265 { 1266 if ($opt_mt ne '') 1267 { 1268 my $cms = $global_constants->get("mito_cm_".lc($opt_mt)); 1269 if (!defined $cms or scalar(keys %$cms) == 0) 1270 { 1271 die "FATAL: Mt-tRNA covariance models $opt_mt are not defined in the configuration file when using --mt.\n"; 1272 } 1273 $opts->mito_model($opt_mt); 1274 } 1275 } 1276 if ($opt_ncintron != 0) 1277 { 1278 die "FATAL: Conflicting search options have been selected. --ncintron cannot be used simultaneously with default search mode.\n"; 1279 } 1280 if ($opt_frag ne '') 1281 { 1282 die "FATAL: Conflicting search options have been selected. --frag cannot be used simultaneously with default search mode.\n"; 1283 } 1284 $opt_inf = 1; 1285 $opts->CM_mode("infernal"); 1286 } 1287 1288 if ($opt_legacy) 1289 { 1290 if ($opt_max or $opt_mid) 1291 { 1292 die "FATAL: Conflicting search options have been selected. -L, --fast, and --max cannot be used simultaneously.\n"; 1293 } 1294 if ($opt_arch && ($opt_ncintron != 0 || $opt_frag ne '')) 1295 { 1296 die "FATAL: Conflicting search options have been selected. --ncintron and --frag are not supported with legacy mode.\n"; 1297 } 1298 if ($opt_mito ne "" || $opt_metagenome || $opt_numt) 1299 { 1300 die "FATAL: Conflicting search options have been selected. -L cannot be combined with -M.\n"; 1301 } 1302 if ($opt_isocm eq "on") 1303 { 1304 die "FATAL: Conflicting search options have been selected. -L cannot be combined with --isocm.\n"; 1305 } 1306 1307 $opt_inf = 0; 1308 $opts->no_isotype(1); 1309 $opts->infernal_fp(0); 1310 $opts->CM_mode("cove"); 1311 if ($opt_arch) 1312 { 1313 $cm->CM_check_for_introns(0); 1314 } 1315 } 1316 1317 # do Cove scan only 1318 if ($opt_cove != 0) 1319 { 1320 if ($opt_eufind || $opt_tscan || $opt_max || $opt_mid) 1321 { 1322 die "FATAL: Conflicting search options have been selected. -t, -e, --fast, --max, and -C cannot be used simultaneously.\n"; 1323 } 1324 if ($opt_arch && ($opt_ncintron != 0 || $opt_frag ne '')) 1325 { 1326 die "FATAL: Conflicting search options have been selected. --ncintron and --frag are not supported with COVE only mode.\n"; 1327 } 1328 if ($opt_mito ne "" || $opt_metagenome || $opt_numt) 1329 { 1330 die "FATAL: Conflicting search options have been selected. -C cannot be combined with -M.\n"; 1331 } 1332 if ($opt_isocm eq "on") 1333 { 1334 die "FATAL: Conflicting search options have been selected. -C cannot be combined with --isocm.\n"; 1335 } 1336 1337 $opt_inf = 0; 1338 $opts->no_isotype(1); 1339 $opts->CM_mode("cove"); 1340 if ($opt_euk) 1341 { 1342 $opts->infernal_fp(0); 1343 } 1344 if ($opt_arch) 1345 { 1346 $cm->CM_check_for_introns(0); 1347 } 1348 } 1349 1350 # don't use tRNAscan unless also specified by -T option 1351 # don't use eufindtRNA unless also specified by -E option 1352 if ($opt_cove) 1353 { 1354 $opts->tscan_mode(0); 1355 $opts->eufind_mode(0); 1356 } 1357 1358 # do tRNAscan only 1359 if ($opt_tscan != 0) 1360 { 1361 # if only using tRNAscan, use strict tRNAscan 1.3 params 1362 # since Cove won't eliminate high false pos rate with default params 1363 $opts->tscan_mode(1); 1364 $tscan->tscan_params($global_constants->get_subvalue("tscan", "strict_param")); 1365 $opts->strict_params(1); 1366 $opt_inf = 0; 1367 $opts->no_isotype(1); 1368 1369 # if -C isn't also specified, turn off Cove filtering 1370 # if -i isn't also specified, turn off infernal filtering 1371 if (($opt_cove == 0) || ($opt_max == 0)) 1372 { 1373 $opts->CM_mode(""); 1374 } 1375 1376 # if -E isn't also specified, turn off eufindtRNA 1377 if ($opt_eufind == 0) 1378 { 1379 $opts->eufind_mode(0); 1380 } 1381 $opts->secondpass_int_result_file(""); 1382 $opts->isotype_int_result_file(""); 1383 $opts->truncated_int_result_file(""); 1384 } 1385 1386 # set tRNAscan search params 1387 if ($opt_tmode ne '') 1388 { 1389 if (!$opt_tscan and !$opt_legacy) 1390 { 1391 die "FATAL: Conflicting search options have been selected. --tmode can only be used with -t or -L.\n"; 1392 } 1393 1394 $opt_tmode = uc($opt_tmode); 1395 # use relaxed tRNAscan params 1396 if ($opt_tmode eq "R") 1397 { 1398 $tscan->tscan_params($global_constants->get_subvalue("tscan", "relaxed_param")); 1399 $opts->strict_params(0); 1400 } 1401 # use strict tRNAscan v1.3 params 1402 elsif ($opt_tmode eq "S") 1403 { 1404 $tscan->tscan_params($global_constants->get_subvalue("tscan", "strict_param")); 1405 $opts->strict_params(1); 1406 } 1407 # use alternate tRNAscan params 1408 elsif ($opt_tmode eq "A") 1409 { 1410 $tscan->tscan_params($global_constants->get_subvalue("tscan", "alt_param")); 1411 $opts->strict_params(0); 1412 } 1413 else 1414 { 1415 print STDERR "\nWARNING: tRNAscan parameter specified", 1416 " with -t option not recognized.\n", 1417 " Defaulting to strict tRNAscan params\n\n"; 1418 $tscan->tscan_params($global_constants->get_subvalue("tscan", "strict_param")); 1419 $opts->strict_params(1); 1420 } 1421 } 1422 1423 # don't merge redundant tRNAscan hits option only for diagnostic purposes 1424 if ($opt_nomerge != 0) 1425 { 1426 $tscan->keep_tscan_repeats(1); 1427 } 1428 1429 # use eufindtRNA 1430 if ($opt_eufind != 0) 1431 { 1432 $opts->eufind_mode(1); 1433 1434 # if -C isn't also specified, turn off Cove filtering 1435 # if -i isn't also specified, turn off infernal filtering 1436 if (($opt_cove == 0) || ($opt_max == 0)) 1437 { 1438 $opts->CM_mode(""); 1439 } 1440 $opt_inf = 0; 1441 $opts->no_isotype(1); 1442 1443 if (!$opts->cove_mode() && !$opts->infernal_mode()) 1444 { 1445 # use more strict default params if no second-pass filtering 1446 $eufind->eufind_params(""); 1447 } 1448 else 1449 { 1450 # use more relaxed params if using second-pass filtering 1451 $eufind->eufind_params($global_constants->get_subvalue("eufind", "relaxed_param")); 1452 } 1453 1454 # turn off tRNAscan if not specified on command line 1455 if ($opt_tscan == 0) 1456 { 1457 $opts->tscan_mode(0); 1458 } 1459 $opts->secondpass_int_result_file(""); 1460 $opts->isotype_int_result_file(""); 1461 $opts->truncated_int_result_file(""); 1462 } 1463 1464 # set eufindtRNA search params 1465 if ($opt_emode ne '') 1466 { 1467 if (!$opt_eufind and !$opt_legacy) 1468 { 1469 die "FATAL: Conflicting search options have been selected. --emode can only be used with -e or -L.\n"; 1470 } 1471 1472 $opt_emode = uc($opt_emode); 1473 # use relaxed params that does not look for poly T 1474 if ($opt_emode eq "R") 1475 { 1476 $eufind->eufind_params($global_constants->get_subvalue("eufind", "relaxed_param")); 1477 } 1478 # use default params and penalizes for no poly T 1479 elsif ($opt_emode eq "N") 1480 { 1481 $eufind->eufind_params(""); 1482 } 1483 # use strict params and requires poly T 1484 # default intermediate cutoff for original algorithm 1485 elsif ($opt_emode eq "S") 1486 { 1487 $eufind->eufind_params($global_constants->get_subvalue("eufind", "strict_param")); 1488 $eufind->eufind_intscore($global_constants->get_subvalue("eufind", "orig_intscore")); 1489 } 1490 else 1491 { 1492 print STDERR "\nWARNING: EufindtRNA parameter specified", 1493 " with -e option not recognized.\n", 1494 " Defaulting to relaxed EufindtRNA params\n\n"; 1495 $eufind->eufind_params($global_constants->get_subvalue("eufind", "relaxed_param")); 1496 } 1497 } 1498 1499 if ($opt_inf) 1500 { 1501 if ($opt_legacy) 1502 { 1503 die "FATAL: Conflicting search options have been selected. -L and -I cannot be used simultaneously.\n"; 1504 } 1505 if ($opt_eufind || $opt_tscan || $opt_cove) 1506 { 1507 die "FATAL: Conflicting search options have been selected. -I, -t, -e, and -C cannot be used simultaneously.\n"; 1508 } 1509 1510 $opts->tscan_mode(0); 1511 $opts->eufind_mode(0); 1512 if ($opt_euk || $opt_bact || $opt_arch) 1513 { 1514 $opts->infernal_fp(1); 1515 } 1516 $opts->CM_mode("infernal"); 1517 $opt_legacy = 0; 1518 } 1519 1520 # do max scan 1521 if ($opt_max != 0) 1522 { 1523 if ($opt_eufind || $opt_tscan || $opt_cove || $opt_legacy || ($opt_mid and $opt_mito != 0)) 1524 { 1525 die "FATAL: Conflicting search options have been selected. --max, -t, -e, -C, and -L cannot be used simultaneously.\n"; 1526 } 1527 1528 $opts->tscan_mode(0); 1529 $opts->eufind_mode(0); 1530 if ($opt_euk || $opt_bact || $opt_arch) 1531 { 1532 $opts->infernal_fp(0); 1533 } 1534 elsif ($opt_mito) 1535 { 1536 $opt_mid = 0; 1537 } 1538 } 1539 1540 # set hmm filter flag for infernal 1541 if ($opt_mid) 1542 { 1543 if ($opt_eufind || $opt_tscan || $opt_cove || $opt_legacy || $opt_max) 1544 { 1545 die "FATAL: Conflicting search options have been selected. --fast, -t, -e, -C, and -L cannot be used simultaneously.\n"; 1546 } 1547 1548 if ($opt_euk || $opt_bact || $opt_arch) 1549 { 1550 $opts->infernal_fp(0); 1551 } 1552 $opts->hmm_filter(1); 1553 } 1554 1555 if ($opt_isospecific ne "") 1556 { 1557 if ($opts->no_isotype()) 1558 { 1559 die "FATAL: Conflicting search options have been selected. -s cannot be used when isotype model scan is disabled.\n"; 1560 } 1561 if ($opt_mito ne "" || $opt_numt) 1562 { 1563 die "FATAL: Conflicting search options have been selected. -s cannot be combined with -M.\n"; 1564 } 1565 1566 if ($opt_isospecific eq "#") 1567 { 1568 $opts->isotype_specific_file("$fafile.iso"); 1569 } 1570 elsif (($opt_isospecific eq "\$") || ($opt_isospecific eq "-")) 1571 { 1572 $opts->isotype_specific_file("-"); 1573 1574 # sends output to stdout instead of tabular output 1575 if ($opts->results_to_stdout()) 1576 { 1577 $opts->results_to_stdout(0); 1578 $opts->out_file("/dev/null"); 1579 } 1580 } 1581 else 1582 { 1583 $opts->isotype_specific_file($opt_isospecific); 1584 } 1585 &check_output_file($opts->isotype_specific_file(), $opts->prompt_for_overwrite()); 1586 } 1587 1588 if ($opts->no_isotype()) 1589 { 1590 $opts->isotype_specific_file(""); 1591 } 1592 1593 if ($opt_bed ne "") 1594 { 1595 if ($opt_bed eq "#") 1596 { 1597 $opts->bed_file("$fafile.bed"); 1598 } 1599 elsif (($opt_bed eq "\$") || ($opt_bed eq "-")) 1600 { 1601 $opts->bed_file("-"); 1602 1603 # sends output to stdout instead of tabular output 1604 if ($opts->results_to_stdout()) 1605 { 1606 $opts->results_to_stdout(0); 1607 $opts->out_file("/dev/null"); 1608 } 1609 } 1610 else 1611 { 1612 $opts->bed_file($opt_bed); 1613 } 1614 &check_output_file($opts->bed_file(), $opts->prompt_for_overwrite()); 1615 } 1616 1617 if ($opt_fasta ne "") 1618 { 1619 if ($opt_fasta eq "#") 1620 { 1621 $opts->output_fasta_file("$fafile.fa"); 1622 } 1623 elsif (($opt_fasta eq "\$") || ($opt_fasta eq "-")) 1624 { 1625 $opts->output_fasta_file("-"); 1626 1627 # sends output to stdout instead of tabular output 1628 if ($opts->results_to_stdout()) 1629 { 1630 $opts->results_to_stdout(0); 1631 $opts->out_file("/dev/null"); 1632 } 1633 } 1634 else 1635 { 1636 $opts->output_fasta_file($opt_fasta); 1637 } 1638 &check_output_file($opts->output_fasta_file(), $opts->prompt_for_overwrite()); 1639 } 1640 1641 if ($opt_iscore != 1000) 1642 { 1643 if (!$opt_eufind and !$opt_legacy) 1644 { 1645 die "FATAL: Conflicting search options have been selected. --iscore can only be used with -e or -L.\n"; 1646 } 1647 $eufind->eufind_intscore($opt_iscore); 1648 } 1649 1650 # pad both ends of first-pass hits with this 1651 # many extra bases before passing to Cove 1652 if ($opt_pad != 1000) 1653 { 1654 $opts->padding($opt_pad); 1655 } 1656 1657 # use alternate genetic code table 1658 if ($opt_gencode ne '') 1659 { 1660 $opts->gc_file($opt_gencode); 1661 $opts->alt_gcode(1); 1662 } 1663 1664 # get HMM score for tRNA hits 1665 if ($opt_breakdown != 0) 1666 { 1667 if ($opt_mito ne "" || $opt_numt) 1668 { 1669 die "FATAL: Conflicting search options have been selected. -H cannot be combined with -M.\n"; 1670 } 1671 $cm->get_hmm_score(1); 1672 } 1673 1674 # save stats summary file 1675 if ($opt_stats ne '') 1676 { 1677 $opts->save_stats(1); 1678 if ($opt_stats eq "#") 1679 { 1680 $opts->stats_file("$fafile.stats"); 1681 } 1682 else 1683 { 1684 $opts->stats_file($opt_stats); 1685 } 1686 &check_output_file($opts->stats_file(), $opts->prompt_for_overwrite()); 1687 } 1688 1689 # save coves secondary structures for tRNA's whose acodons it couldn't call 1690 if ($opt_w ne '') 1691 { 1692 $opts->save_odd_struct(1); 1693 if ($opt_w eq "#") 1694 { 1695 $opts->odd_struct_file("$fafile.oddstruct"); 1696 } 1697 else 1698 { 1699 $opts->odd_struct_file($opt_w); 1700 } 1701 &check_output_file($opts->odd_struct_file(), $opts->prompt_for_overwrite()); 1702 } 1703 1704 # save all coves secondary structures 1705 if ($opt_struct ne '') 1706 { 1707 $opts->save_all_struct(1); 1708 if ($opt_struct eq "#") 1709 { 1710 $opts->all_struct_file("$fafile.ss"); 1711 } 1712 # sends structure output to stdout instead of tabular output 1713 elsif (($opt_struct eq "\$") || ($opt_struct eq "-")) 1714 { 1715 $opts->all_struct_file("-"); 1716 if ($opts->results_to_stdout()) 1717 { 1718 $opts->results_to_stdout(0); 1719 $opts->out_file("/dev/null"); 1720 } 1721 } 1722 else 1723 { 1724 $opts->all_struct_file($opt_struct); 1725 } 1726 &check_output_file($opts->all_struct_file(), $opts->prompt_for_overwrite()); 1727 } 1728 1729 # save only seqs without a tRNA hit 1730 if ($opt_missed ne '') 1731 { 1732 $opts->save_missed(1); 1733 if ($opt_missed eq "#") 1734 { 1735 $opts->missed_seq_file("$fafile.missed"); 1736 } 1737 else 1738 { 1739 $opts->missed_seq_file($opt_missed); 1740 } 1741 &check_output_file($opts->missed_seq_file(),$opts->prompt_for_overwrite()); 1742 } 1743 1744 # outputs PID number in file for tRNAscan-SE web server program 1745 if ($opt_Y != 0) 1746 { 1747 &check_output_file("$fafile.pid", $opts->prompt_for_overwrite()); 1748 &open_for_write(\*TESTF, "$fafile.pid"); 1749 print TESTF "PID=$$\n"; 1750 close(TESTF); 1751 } 1752 1753 # save verbose tRNAscan output 1754 if ($opt_verbose ne '') 1755 { 1756 $opts->save_verbose(1); 1757 my $tmp_verb = &tempname($global_constants->get("temp_dir"),".vb"); # get temp output file name 1758 &check_output_file($tmp_verb, $opts->prompt_for_overwrite()); 1759 $tscan->tscan_params($tscan->tscan_params() . "-v $tmp_verb"); 1760 if ($opt_verbose eq "#") 1761 { 1762 $opts->verb_file("$fafile.verb"); 1763 } 1764 else 1765 { 1766 $opts->verb_file($opt_verbose); 1767 } 1768 &check_output_file($opts->verb_file(),$opts->prompt_for_overwrite()); 1769 } 1770 1771 # use previous results output file 1772 if ($opt_filter ne '') 1773 { 1774 $opts->tscan_mode(0); 1775 $opts->eufind_mode(0); 1776 $opts->use_prev_ts_run(1); 1777 $opts->firstpass_result_file($opt_filter); 1778 if (!(-e $opts->firstpass_result_file())) { 1779 die "FATAL: Can't find formatted tRNA output file ", 1780 $opts->firstpass_result_file()."\n\n"; 1781 } 1782 } 1783 # create named file for first pass results 1784 elsif ($opt_fsres ne '') 1785 { 1786 $opts->save_firstpass_res(1); 1787 if ($opt_fsres eq "#") 1788 { 1789 $opts->firstpass_result_file("$fafile.fpass.out"); 1790 } 1791 else 1792 { 1793 $opts->firstpass_result_file($opt_fsres); 1794 } 1795 &check_output_file($opts->firstpass_result_file(), $opts->prompt_for_overwrite()); 1796 $fp_result_file->init_fp_result_file($opts->firstpass_result_file()); 1797 } 1798 # create temp file for firstpass output 1799 else 1800 { 1801 $opts->firstpass_result_file(&tempname($global_constants->get("temp_dir"), ".fpass")); 1802 &check_output_file($opts->firstpass_result_file(), $opts->prompt_for_overwrite()); 1803 $fp_result_file->init_fp_result_file($opts->firstpass_result_file()); 1804 } 1805 $opts->firstpass_flanking_file($global_constants->get("temp_dir")."/tscan$$"."_fp_flanking.out"); 1806 $fp_result_file->flanking_file($opts->firstpass_flanking_file()); 1807 1808 # save false positive tRNAs from first-pass scans that Cove bonked 1809 # save source of tRNA hit (-y option) if not using infernal 1810 if ($opt_falsepos ne '') 1811 { 1812 if ($opt_mito ne "" || $opt_numt) 1813 { 1814 die "FATAL: Conflicting search options have been selected. -F cannot be combined with -M.\n"; 1815 } 1816 $opts->save_falsepos(1); 1817 if (!$opts->infernal_mode()) 1818 { 1819 $opts->save_source(1); 1820 } 1821 if ($opt_falsepos eq "#") 1822 { 1823 $opts->falsepos_file("$fafile.fpos"); 1824 } 1825 else 1826 { 1827 $opts->falsepos_file($opt_falsepos); 1828 } 1829 &check_output_file($opts->falsepos_file(), $opts->prompt_for_overwrite()); 1830 } 1831 1832 # set MAX intron+variable loop region size used in EufindtRNA & Cove 1833 if ($opt_len > 0) 1834 { 1835 $opts->max_int_len($opt_len); 1836 1837 # look for long tRNAs if needed 1838 if ($opts->use_prev_ts_run() || $opts->eufind_mode()) 1839 { 1840 $opts->find_long_tRNAs(1); 1841 } 1842 else 1843 { 1844 $cm->max_cove_tRNA_length($opts->max_int_len() + $cm->min_tRNA_no_intron()); 1845 } 1846 } 1847 1848 if ($opt_progress != 0) 1849 { 1850 $log->open_file("-") || die "FATAL: Unable to open standard out to display program progress\n\n"; 1851 $opts->display_progress(1); 1852 } 1853 elsif ($opt_log ne "") 1854 { 1855 if ($opt_log eq "#") 1856 { 1857 $opts->log_file("$fafile.log"); 1858 } 1859 else 1860 { 1861 $opts->log_file($opt_log); 1862 } 1863 &check_output_file($opts->log_file(), $opts->prompt_for_overwrite()); 1864 1865 $log->file_name($opts->log_file()); 1866 $log->initialize_log("tRNAscan-SE", "", ""); 1867 1868 $opts->save_progress(1); 1869 } 1870 else 1871 { 1872 $log->open_file("/dev/null") || die "FATAL: Unable to open /dev/null to record program progress\n\n"; 1873 } 1874 1875 # use different Cove-score cutoff for reporting "real" tRNAs 1876 # dummy opt_X val is 10,000 to avoid overlap with a real value a user might specify 1877 if ($opt_score != 1000) 1878 { 1879 $cm->cm_cutoff($opt_score); 1880 if ($opt_score < $cm->infernal_fp_cutoff()) 1881 { 1882 $cm->infernal_fp_cutoff($opt_score); 1883 } 1884 } 1885 1886 if ($opt_thread != 999) 1887 { 1888 if ($opt_thread < 0) 1889 { 1890 die "FATAL: Number of threads for running Infernal must be at least 0.\n"; 1891 } 1892 if ($opt_eufind || $opt_tscan || $opt_cove || $opt_legacy) 1893 { 1894 die "FATAL: Conflicting search options have been selected. --thread, -t, -e, -C, and -L cannot be used simultaneously.\n"; 1895 } 1896 $cm->infernal_thread($opt_thread); 1897 } 1898 1899 # only one seq file on command line 1900 if ($#ARGV == 0) 1901 { 1902 $opts->multiple_files(0); 1903 $opts->fasta_file($ARGV[0]); 1904 } 1905 else 1906 { 1907 $opts->multiple_files(1); 1908 my $tmp_multiseq_file = &tempname($global_constants->get("temp_dir"), ".mseq"); 1909 &check_output_file($tmp_multiseq_file, $opts->prompt_for_overwrite()); 1910 foreach my $filename (@ARGV) 1911 { 1912 system("cat $filename >> $tmp_multiseq_file"); 1913 } 1914 $opts->fasta_file($tmp_multiseq_file); 1915 } 1916 1917 if ($opts->cove_mode()) 1918 { 1919 $opts->second_pass_label("Cove"); 1920 } 1921 if ($opts->infernal_mode()) 1922 { 1923 $opts->second_pass_label("Infernal"); 1924 } 1925 1926 $cm->CM_mode($opts->CM_mode()); 1927} 1928