1#! @PERL@
2#
3# --------------------------------------------------------------------
4# tRNAscan-SE: a program for improved detection of transfer RNA
5#              genes in genomic sequence
6#
7# Version 2.0.5
8#
9# Copyright (C) 2019 Patricia Chan and Todd Lowe
10#
11# School of Engineering, University of California, Santa Cruz
12# lowe@soe.ucsc.edu
13# http://lowelab.ucsc.edu/
14# --------------------------------------------------------------------
15#
16# Usage:
17# tRNAscan-SE [options] <FASTA file(s)>
18#
19
20use strict;
21use lib "@libdir@/tRNAscan-SE";
22use Getopt::Long;
23use tRNAscanSE::Configuration;
24use tRNAscanSE::tRNA;
25use tRNAscanSE::SprinzlPos;
26use tRNAscanSE::ArraytRNA;
27use tRNAscanSE::Utils;
28use tRNAscanSE::GeneticCode;
29use tRNAscanSE::Options;
30use tRNAscanSE::Eufind;
31use tRNAscanSE::Tscan;
32use tRNAscanSE::CM;
33use tRNAscanSE::LogFile;
34use tRNAscanSE::Stats;
35use tRNAscanSE::Sequence;
36use tRNAscanSE::FpScanResultFile;
37use tRNAscanSE::ScanResult;
38use tRNAscanSE::IntResultFile;
39use tRNAscanSE::MultiResultFile;
40use tRNAscanSE::SS;
41
42our $version = "2.0.5";
43our $release_date = "October 2019";
44our $program_id = "tRNAscan-SE-".$version;
45
46# modified by 'make'
47our $default_conf = "@sysconfdir@/tRNAscan-SE.conf";
48
49# Signal handling
50$SIG{'TERM'} = 'error_handler';
51$SIG{'QUIT'} = 'error_handler';
52$SIG{'INT'} = 'error_handler';
53
54# Global variables
55our @fp_start_time;
56our $opts = tRNAscanSE::Options->new;
57our $global_constants = tRNAscanSE::Configuration->new();
58our $log = tRNAscanSE::LogFile->new("default");
59our $sprinzl = tRNAscanSE::SprinzlPos->new;
60our $fp_tRNAs = tRNAscanSE::ArraytRNA->new;
61our $sp_tRNAs = tRNAscanSE::ArraytRNA->new;
62our $fp_result_file = tRNAscanSE::FpScanResultFile->new("");
63our $sp_int_results = tRNAscanSE::IntResultFile->new;
64our $iso_int_results = tRNAscanSE::MultiResultFile->new;
65our $gc = tRNAscanSE::GeneticCode->new;
66our $stats = tRNAscan::Stats->new;
67our $seq_file = tRNAscanSE::Sequence->new;
68our $eufind = tRNAscanSE::Eufind->new;
69our $tscan = tRNAscanSE::Tscan->new;
70our $cm = tRNAscanSE::CM->new;
71
72$global_constants->config_file($default_conf);
73
74our %global_vars = (global_constants => $global_constants,
75					log_file => $log,
76					options => $opts,
77                    sprinzl => $sprinzl,
78                    fp_tRNAs => $fp_tRNAs,
79                    sp_tRNAs => $sp_tRNAs,
80                    fp_result_file => $fp_result_file,
81                    sp_int_results => $sp_int_results,
82                    iso_int_results => $iso_int_results,
83                    sequence => $seq_file,
84                    gc => $gc,
85                    stats => $stats
86                   );
87
88# set user-selectable options
89&set_options();
90
91# set location of binaries & data files,
92# plus, check to make sure they are there
93$cm->set_file_paths(\%global_vars);
94$cm->check_lib_files($opts);
95$cm->set_bin($global_constants->get("bin_dir"));
96$cm->set_infernal_bin($global_constants->get("infernal_dir"));
97$eufind->set_bin($global_constants->get("bin_dir"));
98$tscan->set_bin($global_constants->get("bin_dir"));
99
100# initialize variables
101$gc->read_transl_table($opts);
102if ($opts->save_stats())
103{
104    $stats->file_name($opts->stats_file());
105}
106
107# Start processing
108&initialize_process();
109
110# prescan with either tRNAscan/eufind or both
111if ($opts->tscan_mode() || $opts->eufind_mode() || $opts->infernal_fp())
112{
113    &first_pass_prescan();
114}
115
116# Check to see if no sequences were read from input file(s)
117if (($stats->numscanned() == 0) && ($opts->eufind_mode() || $opts->tscan_mode() || $opts->infernal_fp()))
118{
119    if ($opts->seq_key() ne '\S*')
120    {
121        die "\nNo FASTA sequences matching \'".$opts->raw_seq_key()."\' key found\n\n";
122    }
123    elsif ($opts->multiple_files())
124    {
125        die "\nFATAL: No sequences in FASTA format found in ", join(', ',@ARGV),"\n\n";
126    }
127    else
128    {
129        die "\nFATAL: No sequences in FASTA format found in file ".$opts->fasta_file()."\n\n";
130    }
131}
132
133# Run Cove or Infernal on candidate tRNAs picked in first pass,
134#  or by itself on seqs if no first pass searches
135elsif ($opts->cove_mode() || $opts->infernal_mode())
136{
137    $sp_int_results->file_name($opts->secondpass_int_result_file());
138    $iso_int_results->file_name($opts->isotype_int_result_file());
139    &run_cm_scan();
140}
141
142$stats->end_sp_timer();
143
144if ($opts->save_stats())
145{
146    $stats->open_file();
147    $stats->save_final_stats($opts, $gc, $fp_result_file->get_hit_count(), $cm->tab_results());
148    $stats->close_file();
149}
150
151$log->finish_process();
152
153&cleanup();                        # clean up temp files
154exit(0);
155
156# END main
157
158
159sub initialize_process
160{
161    # print program info header, credits, & selected run options
162    if (!$opts->quiet_mode())
163    {
164        print STDERR "\ntRNAscan-SE v.$version ($release_date) -",
165            " scan sequences for transfer RNAs\n";
166        &display_credits();
167        $opts->display_run_options($cm, $tscan, $eufind, $global_constants, *STDERR);
168    }
169
170    $stats->start_fp_timer();                    # save starting time
171
172    # if statistics are being saved, write run options in stats file
173    if ($opts->save_stats())
174    {
175        my $host = `hostname`;
176        chomp($host);
177        $stats->open_file();
178        $stats->write_line("\ntRNAscan-SE v.$version ($release_date) scan results (on host $host)\nStarted: ".`date`);
179        $opts->display_run_options($cm, $tscan, $eufind, $global_constants, $stats->FILE_H());
180        $stats->close_file();
181    }
182}
183
184# Running tRNAscan and/or EufindtRNA
185sub first_pass_prescan
186{
187    if ($opts->infernal_fp())
188    {
189        $log->status("Phase I: Searching for tRNAs with HMM-enabled Infernal");
190    }
191    else
192    {
193        $log->status("Phase I: Searching for tRNAs with tRNAscan and/or EufindtRNA");
194    }
195
196    # open seq file to search
197    $seq_file->open_file($opts->fasta_file(), "read");
198
199    # Main loop for reading seqs & scanning with tRNAscan and/or EufindtRNA
200    my $targ_seq_id = 0;      # Don't look for a specific Seq number
201    my $start_index = 1;
202    my $sequence_scanned = 0;
203    my $printed_header = 0;
204    my $eufind_output;
205    my @hit_list = ();
206    my $tmp_raw = $global_constants->get("tmp_raw");
207    my $tmp_fa = $global_constants->get("tmp_fa");
208    my $tmp_fa_file = tRNAscanSE::Sequence->new;
209    my $missing_fa_file = tRNAscanSE::Sequence->new;
210
211    while ($seq_file->read_fasta($opts, $targ_seq_id))
212    {
213        if ($opts->cove_mode() || $opts->infernal_mode())
214        {
215            $log->broadcast("Scanned seqs: ".$stats->numscanned()." (at ".$seq_file->seq_name().")");
216        }
217        $stats->increment_numscanned();
218        $stats->increment_first_pass_base_ct($seq_file->seq_length());
219
220        do
221        {
222            # Write one input sequence / seq buffer to tmp_fa file
223            $tmp_fa_file->open_file($tmp_fa, "write");
224            $tmp_fa_file->set_seq_info($seq_file->seq_name(), $seq_file->seq_description(),
225                                       $seq_file->seq_length(), $seq_file->sequence());
226            $tmp_fa_file->write_fasta();
227            $tmp_fa_file->close_file();
228
229            if ($opts->infernal_fp())
230            {
231                $cm->first_pass_scan(\%global_vars, $start_index, $seq_file->seq_name());
232            }
233            else
234            {
235                # Run tRNAscan on $tmp_fa file & write results to $tmp_raw output file
236                if ($opts->tscan_mode())
237                {
238                    $tscan->run_tRNAscan($tmp_fa, $tmp_raw, $start_index, $global_constants->get("lib_dir"), $seq_file->seq_name());
239                    if ($opts->save_verbose())
240                    {
241                        $tscan->append_verbfile($opts->verb_file(), $tmp_fa, $seq_file->seq_name());
242                    }
243                    $tscan->process_tRNAscan_hits(\%global_vars, $seq_file->seq_name());
244                }
245
246                # Run eufindtRNA program & save results in memory in $Eufind_output array
247                if ($opts->eufind_mode())
248                {
249                    $eufind_output = $eufind->run_eufind($tmp_fa, $start_index, $opts->max_int_len(), $seq_file->seq_name());
250                    if ($eufind_output ne "")
251                    {
252                        $eufind->process_Eufind_hits(\%global_vars, $eufind_output);
253                        $eufind_output = "";
254                    }
255                }
256            }
257            $sequence_scanned = 1;    # Flag indicating current sequence has been scanned
258
259            # Check to see if all of sequence was read in last buffer-sized chunck
260            if ($seq_file->seq_buf_overrun())
261            {
262                $start_index = $seq_file->buffer_end_index() + 1;
263                if ($seq_file->read_more_fasta($opts))
264                {
265                    $sequence_scanned = 0;
266                }
267            }
268
269        }
270        until ($sequence_scanned);
271
272        if ($fp_tRNAs->get_count() > 0)
273        {
274            $stats->increment_seqs_hit();
275
276            # save results in ACeDB format now if not using Cove analysis
277            if ($opts->ace_output() && (!$opts->CM_mode()))
278            {
279                &save_Acedb_from_firstpass($opts->output_codon(), $gc->one_let_trans_map(), $fp_tRNAs, $opts->out_file());
280            }
281            else
282            {
283                # save all hits for this seq
284                my $fpass_trna_base_ct = $stats->fpass_trna_base_ct();
285                if (!$opts->CM_mode())
286                {
287                    if (!($opts->brief_output() || $printed_header))
288                    {
289                        &open_for_append(\*TABOUT, $opts->out_file());
290                        &print_results_header(\*TABOUT, $opts, 0, 8, 8, 1);
291                        close (TABOUT);
292                        $printed_header = 1;
293                    }
294                }
295                $fp_result_file->save_firstpass_output($opts, $fp_tRNAs, \$fpass_trna_base_ct, $seq_file->seq_length(), $seq_file->seq_id());
296                $stats->fpass_trna_base_ct($fpass_trna_base_ct);
297            }
298
299            # clear hit array
300            $fp_tRNAs->clear();
301        }
302        elsif ($opts->save_missed())
303        {
304            # save sequence that had no tRNA hits if -M param set
305            # NOTE: only writes last frame of seq buffer if seq length > max_seq_buffer
306            $missing_fa_file->open_file($opts->missed_seq_file(), "append");
307            $missing_fa_file->set_seq_info($seq_file->seq_name(), $seq_file->seq_description(), $seq_file->seq_length(), $seq_file->sequence());
308            $missing_fa_file->write_fasta();
309            $missing_fa_file->close_file();
310        }
311
312        $seq_file->reset_buffer_ct();
313        $start_index = 1;
314
315    }     # while (read_fasta()) - still more seqs to scan
316
317    $seq_file->close_file();
318                                        # remove temporary files
319    system("rm -f $tmp_raw $tmp_fa");
320    $seq_file->release_memory();        # release memory
321
322    $log->broadcast("\n".$stats->numscanned()." seqs scanned, ".$stats->seqs_hit()." seqs had at ".
323        "least one hit.\n".$stats->trnatotal()." total tRNAs predicted in first pass scans");
324
325    if ((!$opts->CM_mode()) && ($stats->trnatotal() == 0)  && (!$opts->quiet_mode()))
326    {
327        $log->status("No tRNAs found.");
328    }
329
330    $stats->end_fp_timer();             # save time first-pass scans are done
331
332    if ($opts->save_stats())
333    {
334        $stats->open_file();
335        $stats->save_firstpass_stats();
336        $stats->close_file();
337    }
338}
339
340# Run Cove or Infernal
341sub run_cm_scan
342{
343    $stats->start_sp_timer();
344
345    if ($opts->tscan_mode() || $opts->eufind_mode() || $opts->infernal_fp())
346    {
347        $log->status("Phase II: ".$opts->second_pass_label()." verification of candidate ".
348            "tRNAs detected with first-pass scan");
349    }
350    else
351    {
352        $log->status("Running ".$opts->second_pass_label()." analysis");
353        if (!$opts->use_prev_ts_run())
354        {
355            $fp_result_file->prep_for_secpass_only($opts, $stats, $seq_file);
356        }
357    }
358
359    # Name of tRNA sequence currently in memory
360    my $prev_seq_name = '';
361
362    # flag indicates if seqid and seqlen are saved in firstpass result file
363    my $seqinfo_flag = 0;
364
365    my $curseq_trnact = 0;
366    my $prescan_trna = tRNAscanSE::tRNA->new;
367    my $tRNAs_found = 0;
368    my $index = -1;
369
370    $seq_file->open_file($opts->fasta_file(), "read");
371    $fp_result_file->index_results(\$seqinfo_flag);
372    my @fp_result_file_indexes = $fp_result_file->get_indexes();
373    $fp_result_file->open_file();
374
375    for (my $seq_ct = 0; $seq_ct < scalar(@fp_result_file_indexes); $seq_ct++)
376    {
377        $sp_int_results->file_name($opts->secondpass_int_result_file());
378        $global_vars{sp_int_results} = $sp_int_results;
379        $sp_int_results->open_file("write");
380        $sp_tRNAs->clear();
381        $log->broadcast("Scanning ".$fp_result_file_indexes[$seq_ct]->[1]);
382
383        if ($opts->cove_mode())
384        {
385            $fp_result_file->reset_current_seq();
386            $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $prescan_trna);
387            while ($prescan_trna->seqname() ne "")
388            {
389                # Retrieve tRNA sequence and write to tmp_trnaseq_file
390                if (!&prepare_tRNA_to_scan($seq_file, $prescan_trna))
391                {
392                    next;
393                }
394                $tRNAs_found = $cm->analyze_with_cove(\%global_vars, $prescan_trna, \$curseq_trnact);
395                if (!$cm->CM_check_for_introns())
396                {
397                    $stats->increment_total_secpass_ct($tRNAs_found);
398                }
399
400                $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $prescan_trna);
401            }
402        }
403        else
404        {
405            # Retrieve tRNA sequences and write to tmp_trnaseq_file
406            if (!&prepare_multi_tRNAs_to_scan($seqinfo_flag, $seq_file, $seq_ct))
407            {
408                next;
409            }
410
411            if ($opts->mito_mode())
412            {
413                $tRNAs_found = $cm->analyze_mito(\%global_vars, $seqinfo_flag, $seq_ct, $fp_result_file_indexes[$seq_ct]->[1], \$curseq_trnact);
414            }
415            elsif($opts->alternate_mode())
416            {
417                $tRNAs_found = $cm->analyze_alternate(\%global_vars, $seqinfo_flag, $seq_ct, $fp_result_file_indexes[$seq_ct]->[1], \$curseq_trnact);
418            }
419            elsif ($opts->metagenome_mode())
420            {
421
422            }
423            elsif ($opts->numt_mode())
424            {
425
426            }
427            elsif ($opts->infernal_mode())
428            {
429                $tRNAs_found = $cm->analyze_with_cmsearch(\%global_vars, $seqinfo_flag, $seq_ct, $fp_result_file_indexes[$seq_ct]->[1], \$curseq_trnact);
430            }
431
432            $stats->increment_total_secpass_ct($tRNAs_found);
433        }
434
435        $sp_int_results->close_file();
436
437        if (($curseq_trnact > 0) and $cm->CM_check_for_introns())
438        {
439            if (&prepare_intron_scan($seq_file))
440            {
441                $cm->scan_noncanonical_introns(\%global_vars, $fp_result_file_indexes[$seq_ct]->[1]);
442            }
443        }
444
445        if ($curseq_trnact > 0)
446        {
447            if ($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode())
448            {
449                $cm->truncated_tRNA_search(\%global_vars, $fp_result_file_indexes[$seq_ct]->[1]);
450
451                if (!$opts->no_isotype())
452                {
453                    $cm->isotype_cmsearch(\%global_vars);
454                }
455            }
456
457            &output_tRNA(\%global_vars, $cm, $cm->tab_results(), $cm->get_hmm_score(), $program_id);
458        }
459
460        if (($sp_int_results->get_count() > 0) and $cm->CM_check_for_split_halves())
461        {
462            my @sp_indexes = $sp_int_results->get_indexes();
463            if ($sp_int_results->open_file("read"))
464            {
465                for (my $i = 0; $i < scalar(@sp_indexes); $i++)
466                {
467                    my $cm_tRNA = tRNAscanSE::tRNA->new;
468                    $sp_int_results->get_tRNA($sp_indexes[$i]->[0], $cm_tRNA);
469                    $sp_tRNAs->put($cm_tRNA);
470                }
471                $sp_int_results->close_file();
472
473                $cm->scan_split_tRNAs(\%global_vars);
474            }
475        }
476
477        if ($opts->bed_file() ne "")
478        {
479            if ($curseq_trnact > 0)
480            {
481                &write_bed(\%global_vars);
482            }
483        }
484
485        $sp_int_results->clear_index();
486        $curseq_trnact = 0;
487    }
488
489    $fp_result_file->close_file();
490    $seq_file->close_file();
491
492    if (($stats->total_secpass_ct() == 0) && (!$opts->quiet_mode()))
493    {
494        print STDERR "No tRNAs found.\n\n";
495    }
496}
497
498# Extracts tRNA sequences with given coordinates, and writes to $tmp_
499sub prepare_multi_tRNAs_to_scan
500{
501    my ($seqinfo_flag, $seq_file, $seq_ct) = @_;
502
503    system("rm -f ".$global_constants->get("tmp_trnaseq_file"));
504
505    my $trna_file = tRNAscanSE::Sequence->new;
506    $trna_file->open_file($global_constants->get("tmp_trnaseq_file"), "write");
507
508    my $flanking = 0;
509    my $trna = tRNAscanSE::tRNA->new;
510    $fp_result_file->reset_current_seq();
511    $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $trna);
512    if ($fp_result_file->open_flanking("write"))
513    {
514        $flanking = 1;
515    }
516    while ($trna->seqname() ne "")
517    {
518        $seq_file->get_tRNA_sequence(\%global_vars, $trna);
519        $stats->increment_secpass_base_ct($trna->len());
520		$trna_file->set_seq_info($trna->seqname().".t".&pad_num($trna->id(), 6), $seq_file->seq_description(), length($trna->seq()), $trna->seq());
521		$trna_file->write_fasta();
522        if ($flanking)
523        {
524            $fp_result_file->write_tRNA_flanking($trna);
525        }
526
527        $seq_file->release_memory();
528
529        $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $trna);
530    }
531    $trna_file->close_file();
532    $fp_result_file->close_flanking();
533
534    return 1;
535}
536
537# Extracts tRNA sequence with given coordinates, and writes to $tmp_
538sub prepare_tRNA_to_scan
539{
540    my ($seq_file, $trna) = @_;
541
542    $seq_file->get_tRNA_sequence(\%global_vars, $trna);
543    $stats->increment_secpass_base_ct($trna->len());
544
545    &write_tRNA($global_constants->get("tmp_trnaseq_file"), $seq_file->seq_name(), $seq_file->seq_description(), $trna->seq(), 1);
546
547    $seq_file->release_memory();
548
549    return 1;
550}
551
552# Extracts tRNA sequences with given coordinates, and writes to $tmp_
553sub prepare_intron_scan
554{
555    my ($seq_file) = @_;
556    my $ret_value = 1;
557
558    system("rm -f ".$global_constants->get("tmp_trnaseq_file"));
559
560    my $trna_file = tRNAscanSE::Sequence->new;
561    my $cm_tRNA = undef;
562    $trna_file->open_file($global_constants->get("tmp_trnaseq_file"), "write");
563
564    my $trna = tRNAscanSE::tRNA->new;
565    my $padded_seq = "";
566    $sp_tRNAs->clear();
567    my @sp_indexes = $sp_int_results->get_indexes();
568    if ($sp_int_results->open_file("read"))
569    {
570        for (my $i = 0; $i < scalar(@sp_indexes); $i++)
571        {
572            $cm_tRNA = tRNAscanSE::tRNA->new;
573            $sp_int_results->get_tRNA($sp_indexes[$i]->[0], $cm_tRNA);
574            my $orig_seq = $cm_tRNA->seq();
575            $seq_file->get_tRNA_sequence(\%global_vars, $cm_tRNA);
576            if (uc($orig_seq) ne uc($cm_tRNA->seq()))
577            {
578				$ret_value = 0;
579                $log->error("tRNA sequence does not match for intron scan: ".$cm_tRNA->tRNAscan_id()." ".$cm_tRNA->seqname().":".$cm_tRNA->start()."-".$cm_tRNA->end());
580			}
581            $padded_seq = $cm_tRNA->upstream().$cm_tRNA->seq().$cm_tRNA->downstream();
582            $trna_file->set_seq_info($cm_tRNA->seqname().".trna".&pad_num($cm_tRNA->id(), 6), $cm_tRNA->tRNAscan_id(), length($padded_seq), $padded_seq);
583            $trna_file->write_fasta();
584            $sp_tRNAs->put($cm_tRNA);
585            $seq_file->release_memory();
586        }
587        $sp_int_results->close_file();
588    }
589    $trna_file->close_file();
590
591    return $ret_value;
592}
593
594# clean up temp files
595sub cleanup
596{
597    system("rm -f ".$global_constants->get("temp_dir")."/tscan$$".'*');
598    system("rm -f ".$opts->fafile().".pid");
599}
600
601sub error_handler
602{
603    print "\nAborting tRNAscan-SE\n\n";
604
605    my $ppid = $$;
606    my $psout = `ps -ef`;
607    my @ps_lines = split(/\n/,$psout);
608    foreach my $line (0..$#ps_lines)
609    {
610        if ($ps_lines[$line] =~/^\s+\S+\s+(\d+)\s+($ppid)\s/)
611        {
612           print STDERR "Killing process $1:\n",$ps_lines[$line],"\n";
613            my $killct = kill 'KILL', $1;
614           print STDERR "$killct jobs received the kill signal\n";
615        }
616    }
617
618    &cleanup();
619    exit(1);
620}
621
622sub display_credits
623{
624    print STDERR "Copyright (C) 2019 Patricia Chan and Todd Lowe\n",
625                 "                   University of California Santa Cruz\n",
626                 "Freely distributed under the GNU General Public License (GPLv3)\n\n";
627}
628
629sub print_usage
630{
631    print STDERR "\nUsage: tRNAscan-SE [-options] <FASTA file(s)>\n\n";
632    print STDERR "  Scan a sequence file for tRNAs \n",
633    "          -- default: use Infernal & tRNA covariance models\n",
634    "             with eukaryotic sequences \n",
635    "             (use -B, -A, -M, -O or -G to scan other types of sequences)\n\n",
636    "Basic Options\n",
637    "  -E         : search for eukaryotic tRNAs (default)\n",
638    "  -B         : search for bacterial tRNAs\n",
639    "  -A         : search for archaeal tRNAs\n",
640    "  -M <model> : search for mitochondrial tRNAs\n",
641    "                 options: mammal, vert\n",
642    "  -O         : search for other organellar tRNAs\n",
643    "  -G         : use general tRNA model (cytoslic tRNAs from all 3 domains included)\n",
644    "  -L         : search using the legacy method (tRNAscan, EufindtRNA, and COVE)\n",
645    "                 use with -E, -B, -A, -O, or -G\n",
646    "  -I         : search using Infernal (default)\n",
647    "                 use with -E, -B, -A, -O, or -G\n",
648#    "  -T         : search for tRNAs in metagenome\n",
649#    "  -N         : search for tRNAs in nuclear mitochondrial DNA regions (NUMTs)\n",
650    "  -o <file>  : save final results in <file>\n",
651    "  -f <file>  : save tRNA secondary structures to <file>\n",
652    "  -m <file>  : save statistics summary for run in <file>\n",
653    "               (speed, # tRNAs found in each part of search, etc)\n",
654    "  -H         : show both primary and secondary structure components to\n",
655    "               covariance model bit scores\n",
656    "  -q         : quiet mode (credits & run option selections suppressed)\n\n",
657    "  -h         : print full list (long) of available options\n\n";
658}
659
660sub print_all_options
661{
662    print "\nUsage: tRNAscan-SE [-options] <FASTA file(s)>\n\n";
663    print "  Scan a sequence file for tRNAs \n",
664    "   -- default: use Infernal & tRNA covariance models\n",
665    "      with eukaryotic sequences \n",
666    "      (use 'Search Mode Options' below to scan other types of sequences)\n\n",
667    "Search Mode Options:\n\n",
668    "  -E                          : search for eukaryotic tRNAs (default)\n",
669    "  -B                          : search for bacterial tRNAs\n",
670    "  -A                          : search for archaeal tRNAs\n",
671    "  -M <model>                  : search for mitochondrial tRNAs\n",
672    "                                  options: mammal, vert\n",
673    "  -O                          : search for other organellar tRNAs\n",
674    "  -G                          : use general tRNA model (cytoslic tRNAs from all 3 domains included)\n",
675    "  --mt <model>                : use mito tRNA models for cytosolic/mito detemination\n",
676    "                                  (if not specified, only cytosolic isotype-specific model scan will be performed)\n",
677#    "  -T                          : search for tRNAs in metagenome\n",
678#    "  -N                          : search for tRNAs in nuclear mitochondrial DNA regions (NUMTs)\n",
679    "  -I                          : search using Infernal\n",
680    "                                  default use with -E, -B, -A, or -G; optional for -O\n",
681    "      --max                   : maximum sensitivity mode - search using Infernal without hmm filter (very slow)\n",
682    "  -L                          : search using the legacy method (tRNAscan, EufindtRNA, and COVE)\n",
683    "                                  use with -E, -B, -A or -G\n",
684    "  -C  --cove                  : search using COVE analysis only (legacy, extremely slow)\n",
685    "                                  default use with -O\n",
686    "  -H  --breakdown             : show breakdown of primary and secondary structure components to\n",
687    "                                  covariance model bit scores\n",
688    "  -D  --nopseudo              : disable pseudogene checking\n\n",
689
690    "Output options:\n\n",
691    "  -o  --output <file>         : save final results in <file>\n",
692    "  -f  --struct <file>         : save tRNA secondary structures to <file>\n",
693    "  -s  --isospecific <file>    : save results using isotype-specific models in <file>\n",
694    "  -m  --stats <file>          : save statistics summary for run in <file>\n",
695    "                                  (speed, # tRNAs found in each part of search, etc)\n",
696    "  -b  --bed <file>            : save results in BED file format of <file>\n",
697    "  -a  --fasta <file>          : save predicted tRNA sequences in FASTA file format of <file>\n",
698    "  -l  --log <file>            : save log of program progress in <file>\n",
699    "  --detail                    : display prediction outputs in detailed view\n",
700    "  --brief                     : brief output format (no column headers)\n\n",
701    "  -? \#                       : '#' in place of <file> chooses default name for output files\n",
702    "  -p  --prefix <label>        : use <label> prefix for all default output file names\n\n",
703    "  -d  --progress              : display program progress messages\n",
704    "  -q  --quiet                 : quiet mode (credits & run option selections suppressed)\n",
705    "  -y  --hitsrc                : show origin of hits (Ts=tRNAscan 1.4, Eu=EufindtRNA, \n",
706    "                                  Bo=Both Ts and Eu, Inf=Infernal)\n\n",
707
708    "Specify Alternate Cutoffs / Data Files:\n\n",
709    "  -X  --score <score>         : set cutoff score (in bits) for reporting tRNAs (default=20)\n",
710    "  -g  --gencode <file>        : use alternate genetic codes specified in <file> for\n",
711    "                                  determining tRNA type\n",
712    "  -z  --pad <number>          : use <number> nucleotides padding when passing first-pass\n",
713    "                                  tRNA bounds predictions to CM analysis (default=8)\n",
714    "  --len <length>              : set max length of tRNA intron+variable region for legacy search mode\n",
715    "                                  (default=116bp)\n",
716
717    "Misc Options:\n\n",
718    "  -h  --help                  : print this help message\n",
719    "  -c  --conf <file>           : tRNAscan-SE configuration file (default: tRNAscan-SE.conf)\n",
720    "  -Q  --forceow               : do not prompt user before overwriting pre-existing\n",
721    "                                  result files  (for batch processing)\n\n",
722    "  --match <EXPR>              : search only sequences with names matching <EXPR> string\n",
723    "                                  (<EXPR> may contain * or ? wildcard chars)\n",
724    "  --search <EXPR>             : start search at sequence with name matching <EXPR> string\n",
725    "                                  and continue to end of input sequence file(s)\n",
726
727    "Special Advanced Options (for testing & special purposes)\n\n",
728    "  -U                          : search for tRNAs with alternate models defined in configuration file\n\n",
729    "  -t  --tscan                 : search using tRNAscan only (defaults to strict params)\n",
730    "  --tmode <mode>              : explicitly set tRNAscan params, where <mode>=R or S\n",
731    "                                  (R=relaxed, S=strict tRNAscan v1.3 params)\n\n",
732    "  -v  --verbose <file>        : save verbose tRNAscan 1.3 output to <file>\n",
733    "  --nomerge                   : Keep redundant tRNAscan 1.3 hits (don't filter out multiple\n",
734    "                                  predictions per tRNA identification)\n",
735    "  -e  --eufind                : search using Eukaryotic tRNA finder (EufindtRNA) only\n",
736    "                                  (defaults to Normal seach parameters when run alone,\n",
737    "                                  or to Relaxed search params when run with Cove)\n",
738    "  --emode <mode>              : explicitly set EufindtRNA params, where <mode>=R, N, or S\n",
739    "                                  (relaxed, normal, or strict)\n\n",
740    "  --iscore <score>            : manually set \"intermediate\" cutoff score for EufindtRNA\n",
741    "  -r  --fsres <file>          : save first-pass scan results from EufindtRNA, tRNAscan, or\n",
742    "                                  Infernal hmm in <file> in tabular results format\n",
743    "  --mid                       : fast scan mode - search using Infernal with mid-level strictness of hmm filter\n",
744    "  -F  --falsepos <file>       : save first-pass candidate tRNAs in <file> that were then\n",
745    "                                  found to be false positives by second-pass analysis\n",
746    "  --missed <file>             : save all seqs that do NOT have at least one\n",
747    "                                  tRNA prediction in them (aka \"missed\" seqs)\n",
748    "  --thread <number>           : number of threads used for running infernal (default is to use available threads)\n",
749    "\n\n";
750}
751
752sub set_options
753{
754    # clear option vars
755    our $opt_conf= '';
756    our $opt_acedb=0; our $opt_quiet=0; our $opt_progress=0; our $opt_log="";
757    our $opt_euk=1; our $opt_bact=0; our $opt_arch=0; our $opt_organ=0; our $opt_general=0; our $opt_mito='';
758    our $opt_legacy=0; our $opt_inf=0; our $opt_isocm=''; our $opt_mt = '';
759    our $opt_metagenome=0; our $opt_numt=0;
760    our $opt_alt=0;
761    our $opt_cove=0; our $opt_mid=0; our $opt_max=0; our $opt_eufind=0; our $opt_tscan=0;
762    our $opt_ncintron=0; our $opt_frag='';
763    our $opt_breakdown=0; our $opt_nopseudo=0; our $opt_nomerge=0; our $opt_hitsrc=0;
764    our $opt_output=''; our $opt_struct=''; our $opt_stats=''; our $opt_isospecific=''; our $opt_bed=''; our $opt_fasta=''; our $opt_brief=0;
765    our $opt_detail=0;
766    our $opt_prefix=''; our $opt_match=''; our $opt_search='';
767    our $opt_gencode=''; our $opt_codons=0;
768    our $opt_tmode=''; our $opt_emode=''; our $opt_fsres=''; our $opt_filter=''; our $opt_falsepos=''; our $opt_missed='';
769    our $opt_score=1000; our $opt_iscore=1000; our $opt_len=-1; our $opt_pad=1000;
770    our $opt_help=0; our $opt_verbose=''; our $opt_forceow=0;
771    our $opt_w=''; our $opt_U=0; our $opt_Y=0; our $opt_thread=999;
772
773    Getopt::Long::Configure("bundling", "no_ignore_case", "no_auto_abbrev");
774    my $result = &GetOptions(
775                            # Configuration
776                            "conf|c=s","log|l=s",
777                            # Misc option switches
778                            "help|h",
779                            "quiet|q","hitsrc|y","breakdown|H",
780                            "Y",
781                            "progress|d","nopseudo|D","codons","forceow|Q","nomerge",
782                            # Search mode switches
783                            "euk|E", "bact|B", "arch|A", "organ|O", "general|G", "mito|M=s",
784                            "legacy|L", "inf|I", "isocm|S=s",
785#                            "metagenome|T", "numt|N",
786                            "alt|U", "mt=s",
787                            "eufind|e", "tscan|t", "cove|C", "mid", "max",
788                            # file name input specifiers
789                            "gencode|g=s",
790                            # file name output specifiers
791                            "output|o=s", "stats|m=s", "struct|f=s", "bed|b=s", "fasta|a=s", "isospecific|s=s", "acedb", "brief",
792                            "fsres|r=s","verbose|v=s","w=s","falsepos|F=s","missed=s", "detail",
793                            #string parameters
794                            "prefix|p=s","match=s","search=s","emode=s","tmode=s",
795                            #numerical parameters
796                            "score|X=f","iscore=f","pad|z=i","len=i",
797                            "thread=i");
798
799    if ($opt_help)
800    {
801        print STDERR "\ntRNAscan-SE $version ($release_date)\n";
802        &display_credits;
803        &print_all_options;
804        exit(0);
805    }
806    if ($#ARGV < 0)
807    {
808        print STDERR "\ntRNAscan-SE $version ($release_date)\n";
809        print STDERR "\nFATAL: No sequence file(s) specified.\n";
810        &print_usage();
811        exit(1);
812    }
813
814    # set location of temp and lib files
815    if ($ENV{TMPDIR})
816    {
817        $global_constants->set_temp_dir($ENV{TMPDIR});
818    }
819
820    # set defaults
821	if ($opt_conf ne "")
822	{
823		$global_constants->config_file($opt_conf);
824	}
825	$global_constants->read_configuration_file();
826    $cm->set_defaults(\%global_vars);
827    $eufind->set_defaults(\%global_vars);
828    $tscan->set_defaults(\%global_vars);
829
830    # use input seq file name as prefix for default output file names
831    my $fafile =  $ARGV[0];
832    $fafile =~ s/\.fa|\.seq$//;
833
834    # use specified prefix for default output file names, take .seq or .fa extensions off
835    if ($opt_prefix ne '')
836    {
837        $fafile = $opt_prefix;
838    }
839    $opts->fafile($fafile);
840    $opts->secondpass_int_result_file($global_constants->get("temp_dir")."/tscan$$"."_sp.out");
841    $opts->isotype_int_result_file($global_constants->get("temp_dir")."/tscan$$"."_iso.out");
842    $opts->truncated_int_result_file($global_constants->get("temp_dir")."/tscan$$"."_sp_trunc.out");
843
844    if ($opt_detail)
845    {
846		$opts->detail(1);
847	}
848
849    # Do NOT prompt before overwriting pre-existing output files;  good for use in batch-mode jobs
850    if ($opt_forceow != 0)
851    {
852        $opts->prompt_for_overwrite(0);
853    }
854
855    # set name of result file
856    if ($opt_output ne '')
857    {
858        $opts->results_to_stdout(0);
859        if ($opt_output eq "#")
860        {
861            $opts->out_file("$fafile.out");
862        }
863        else
864        {
865            $opts->out_file($opt_output);
866        }
867        &check_output_file($opts->out_file(), $opts->prompt_for_overwrite());
868    }
869
870    # save results in ACeDB output
871    if ($opt_acedb != 0)
872    {
873        $opts->ace_output(1);
874    }
875
876    # use brief output (suppress column header)
877    if ($opt_brief != 0)
878    {
879        $opts->brief_output(1);
880    }
881
882    # use quite mode (suppress credits & user-selected options)
883    if ($opt_quiet != 0)
884    {
885        $opts->quiet_mode(1);
886        $log->quiet_mode(1);
887    }
888
889    # save source of tRNA hit
890    if ($opt_hitsrc != 0)
891    {
892        if ($opt_mito ne "" || $opt_numt)
893        {
894            die "FATAL: Conflicting search options have been selected. -y cannot be combined with -M.\n";
895        }
896        $opts->save_source(1);
897    }
898
899    # disable pseudogene filtering
900    if ($opt_nopseudo != 0)
901    {
902        $cm->skip_pseudo_filter(1);
903    }
904
905    # translate anticodon to codon for output
906    if ($opt_codons != 0)
907    {
908        $opts->output_codon(1);
909    }
910
911    # search only sequences matching KEY name
912    # save original KEY expr
913    # turning KEY into regular expression notation
914    if ($opt_match ne '')
915    {
916        $opts->seq_key($opt_match);
917        $opts->raw_seq_key($opts->seq_key());
918        my $key = $opts->seq_key();
919        $key =~ s/(\W)/\\$1/g;
920        $key =~ s/\\\*/\\S\*/g;
921        $key =~ s/\\\?/\\S/g;
922        $key =~ s/[\"\']//g;
923        $opts->seq_key($key);
924    }
925    # search all sequences after matching KEY
926    # save original KEY expr
927    # turning KEY into regular expression notation
928    elsif ($opt_search ne '')
929    {
930        $opts->start_at_key(1);
931        $opts->seq_key($opt_search);
932        $opts->raw_seq_key($opts->seq_key());
933        my $key = $opts->seq_key();
934        $key =~ s/(\W)/\\$1/g;
935        $key =~ s/\\\*/\\S\*/g;
936        $key =~ s/\\\?/\\S/g;
937        $key =~ s/[\"\']//g;
938        $opts->seq_key($key);
939    }
940    else
941    {
942        $opts->seq_key('\S*');
943    }
944
945    if ($opt_isocm ne "" and $opt_isocm ne "on" and $opt_isocm ne "off")
946    {
947        die "FATAL: Invalid value for --isocm. Please use on or off or leave out the option for default setting\n";
948	}
949
950    if ($opt_alt != 0)
951    {
952        if ($opt_bact || $opt_arch || $opt_general || $opt_mito ne "" || $opt_metagenome || $opt_numt)
953        {
954            die "FATAL: Conflicting search options have been selected. -U cannot be combined with other sequence type search option.\n";
955        }
956        if ($opt_isocm eq "on")
957        {
958            die "FATAL: Conflicting search options have been selected. -U cannot be combined with --isocm.\n";
959        }
960
961        my $cms = $global_constants->get("alt_cm");
962        if (!defined $cms or scalar(keys %$cms) == 0)
963        {
964            die "FATAL: Alternate covariance models are not defined in the configuration file when using -U.\n";
965        }
966
967        $opt_inf = 1;
968        $opt_eufind = 0;
969        $opt_tscan = 0;
970        $opts->search_mode("alt");
971        $opt_euk = 0;
972        $opt_mid = 1;
973        $cm->skip_pseudo_filter(1);
974
975        $opts->no_isotype(1);
976        if ($opt_ncintron != 0)
977        {
978            die "FATAL: Conflicting search options have been selected. -U and --ncintron cannot be used simultaneously.\n";
979        }
980        if ($opt_frag ne '')
981        {
982            die "FATAL: Conflicting search options have been selected. -U and --frag cannot be used simultaneously.\n";
983        }
984
985        $opts->CM_mode("infernal");
986    }
987
988    if ($opt_bact != 0)
989    {
990        if ($opt_arch || $opt_general || $opt_mito ne "" || $opt_metagenome || $opt_numt)
991        {
992            die "FATAL: Conflicting search options have been selected. -B cannot be combined with other sequence type search option.\n";
993        }
994
995        $eufind->eufind_intscore($global_constants->get_subvalue("eufind", "bact_intscore"));
996        $opts->search_mode("bacteria");
997        $opt_euk = 0;
998        $opts->CM_mode("infernal");
999        $opt_inf = 1;
1000
1001        $opts->no_isotype(0);
1002        if ($opt_isocm eq "off")
1003        {
1004            $opts->no_isotype(1);
1005        }
1006
1007        if ($opt_mt ne '')
1008        {
1009            die "FATAL: Conflicting search options have been selected. -B and --mt cannot be used simultaneously.\n";
1010        }
1011        if ($opt_ncintron != 0)
1012        {
1013            die "FATAL: Conflicting search options have been selected. -B and --ncintron cannot be used simultaneously.\n";
1014        }
1015        if ($opt_frag ne '')
1016        {
1017            die "FATAL: Conflicting search options have been selected. -B and --frag cannot be used simultaneously.\n";
1018        }
1019    }
1020
1021    if ($opt_arch != 0)
1022    {
1023        if ($opt_bact || $opt_general || $opt_mito ne "" || $opt_metagenome || $opt_numt)
1024        {
1025            die "FATAL: Conflicting search options have been selected. -A cannot be combined with other sequence type search option.\n";
1026        }
1027
1028        $eufind->eufind_intscore($global_constants->get_subvalue("eufind", "arch_intscore"));
1029        $opts->search_mode("archaea");
1030        $opt_euk = 0;
1031        $opts->CM_mode("infernal");
1032        $opt_inf = 1;
1033
1034        # check for non-canonical introns
1035        $cm->CM_check_for_introns(1);
1036
1037        # check for tRNA fragments of split tRNAs
1038        if ($opt_frag ne '')
1039        {
1040            $cm->CM_check_for_split_halves(1);
1041            if ($opt_frag eq "#")
1042            {
1043                $opts->split_fragment_file("$fafile.frag");
1044            }
1045            elsif (($opt_frag eq "\$") || ($opt_frag eq "-"))
1046            {
1047                $opts->split_fragment_file("-");
1048
1049                # sends output to stdout instead of tabular output
1050                if ($opts->results_to_stdout())
1051                {
1052                    $opts->results_to_stdout(0);
1053                    $opts->out_file("/dev/null");
1054                }
1055            }
1056            else
1057            {
1058                $opts->split_fragment_file($opt_frag);
1059            }
1060            &check_output_file($opts->split_fragment_file(), $opts->prompt_for_overwrite());
1061        }
1062
1063        $opts->no_isotype(0);
1064        if ($opt_isocm eq "off")
1065        {
1066            $opts->no_isotype(0);
1067        }
1068
1069        if ($opt_mt ne '')
1070        {
1071            die "FATAL: Conflicting search options have been selected. -A and --mt cannot be used simultaneously.\n";
1072        }
1073    }
1074
1075    # use original general cove model with all tRNAs from 3 domains
1076    if ($opt_general != 0)
1077    {
1078        if ($opt_bact || $opt_arch || $opt_mito ne "" || $opt_metagenome || $opt_numt)
1079        {
1080            die "FATAL: Conflicting search options have been selected. -G cannot be combined with other sequence type search option.\n";
1081        }
1082        if ($opt_isocm eq "on")
1083        {
1084            die "FATAL: Conflicting search options have been selected. -G cannot be combined with --isocm.\n";
1085        }
1086
1087        $opts->search_mode("general");
1088        $opt_euk = 0;
1089        $opt_mid = 1;
1090        $opts->CM_mode("infernal");
1091        $opt_inf = 1;
1092        $opts->no_isotype(1);
1093
1094        if ($opt_ncintron != 0)
1095        {
1096            die "FATAL: Conflicting search options have been selected. -G and --ncintron cannot be used simultaneously.\n";
1097        }
1098        if ($opt_frag ne '')
1099        {
1100            die "FATAL: Conflicting search options have been selected. -G and --frag cannot be used simultaneously.\n";
1101        }
1102    }
1103
1104    if ($opt_organ != 0)
1105    {
1106        $opt_eufind = 0;
1107        $opt_tscan = 0;
1108
1109        $opts->search_mode("organelle");
1110        $opt_euk = 0;
1111        $opts->no_isotype(1);
1112        $opt_inf = 1;
1113        if ($opt_cove)
1114        {
1115            $opt_inf = 0;
1116        }
1117
1118        $cm->cm_cutoff($global_constants->get("organelle_cm_cutoff"));
1119
1120        # disable psuedogene checking
1121        $cm->skip_pseudo_filter(1);
1122
1123        if ($opt_ncintron != 0)
1124        {
1125            die "FATAL: Conflicting search options have been selected. -O and --ncintron cannot be used simultaneously.\n";
1126        }
1127        if ($opt_frag ne '')
1128        {
1129            die "FATAL: Conflicting search options have been selected. -O and --frag cannot be used simultaneously.\n";
1130        }
1131        if ($opt_isocm eq "on")
1132        {
1133            die "FATAL: Conflicting search options have been selected. -O cannot be combined with --isocm.\n";
1134        }
1135    }
1136
1137    if ($opt_mito ne "")
1138    {
1139        if ($opt_bact || $opt_arch || $opt_general || $opt_metagenome || $opt_numt)
1140        {
1141            die "FATAL: Conflicting search options have been selected. -M cannot be combined with other sequence type search option.\n";
1142        }
1143        if ($opt_isocm eq "on")
1144        {
1145            die "FATAL: Conflicting search options have been selected. -M cannot be combined with --isocm.\n";
1146        }
1147
1148        my $cms = $global_constants->get("mito_cm_".lc($opt_mito));
1149        if (!defined $cms or scalar(keys %$cms) == 0)
1150        {
1151            die "FATAL: Mt-tRNA covariance models $opt_mito are not defined in the configuration file when using -M.\n";
1152        }
1153
1154        $opts->search_mode("mito");
1155        $opts->mito_model(lc($opt_mito));
1156        $opt_euk = 0;
1157        $opt_mid = 1;
1158        $opt_inf = 1;
1159        $opts->no_isotype(1);
1160
1161        if ($opt_eufind || $opt_tscan || $opt_cove)
1162        {
1163            die "FATAL: Conflicting search options have been selected. -M is only supported by using Infernal search.\n";
1164        }
1165        if ($opt_ncintron != 0)
1166        {
1167            die "FATAL: Conflicting search options have been selected. -M and --ncintron cannot be used simultaneously.\n";
1168        }
1169        if ($opt_frag ne '')
1170        {
1171            die "FATAL: Conflicting search options have been selected. -M and --frag cannot be used simultaneously.\n";
1172        }
1173
1174        $opts->CM_mode("infernal");
1175
1176        if ($opts->mito_model() eq "mammal" or $opts->mito_model() eq "vert")
1177        {
1178            $opts->gc_file($global_constants->get("gc_vert_mito"));
1179            $opts->alt_gcode(1);
1180        }
1181        elsif ($opts->mito_model() eq "invert")
1182        {
1183            $opts->gc_file($global_constants->get("gc_invert_mito"));
1184            $opts->alt_gcode(1);
1185        }
1186        elsif ($opts->mito_model() eq "fungi")
1187        {
1188            $opts->gc_file($global_constants->get("gc_yeast_mito"));
1189            $opts->alt_gcode(1);
1190        }
1191    }
1192
1193    if ($opt_metagenome != 0)
1194    {
1195        if ($opt_bact || $opt_arch || $opt_general || $opt_mito ne "" || $opt_numt)
1196        {
1197            die "FATAL: Conflicting search options have been selected. -T cannot be combined with other sequence type search option.\n";
1198        }
1199        if ($opt_isocm eq "on")
1200        {
1201            die "FATAL: Conflicting search options have been selected. -T cannot be combined with --isocm.\n";
1202        }
1203
1204        $opts->search_mode("metagenome");
1205        $opt_euk = 0;
1206        $opt_inf = 1;
1207        $opts->no_isotype(1);
1208
1209        if ($opt_eufind || $opt_tscan || $opt_cove)
1210        {
1211            die "FATAL: Conflicting search options have been selected. -T is only supported by using Infernal search.\n";
1212        }
1213
1214        $opts->infernal_fp(1);
1215        $opts->CM_mode("infernal");
1216    }
1217
1218    if ($opt_numt != 0)
1219    {
1220        if ($opt_bact || $opt_arch || $opt_general || $opt_mito ne "" || $opt_metagenome)
1221        {
1222            die "FATAL: Conflicting search options have been selected. -N cannot be combined with other sequence type search option.\n";
1223        }
1224        if ($opt_isocm eq "on")
1225        {
1226            die "FATAL: Conflicting search options have been selected. -N cannot be combined with --isocm.\n";
1227        }
1228
1229        $opts->search_mode("numt");
1230        $opt_euk = 0;
1231        $opt_inf = 1;
1232        $opts->no_isotype(1);
1233
1234        if ($opt_eufind || $opt_tscan || $opt_cove)
1235        {
1236            die "FATAL: Conflicting search options have been selected. -N is only supported by using Infernal search.\n";
1237        }
1238        if ($opt_ncintron != 0)
1239        {
1240            die "FATAL: Conflicting search options have been selected. -N and --ncintron cannot be used simultaneously.\n";
1241        }
1242        if ($opt_frag ne '')
1243        {
1244            die "FATAL: Conflicting search options have been selected. -N and --frag cannot be used simultaneously.\n";
1245        }
1246
1247        $opts->infernal_fp(1);
1248        $opts->CM_mode("infernal");
1249    }
1250
1251    if ($opt_euk != 0)
1252    {
1253        $opts->search_mode("euk");
1254
1255        $opts->no_isotype(0);
1256        if ($opt_isocm eq "off")
1257        {
1258            $opts->no_isotype(1);
1259            if ($opt_mt ne '')
1260            {
1261                die "FATAL: Conflicting search options have been selected. -S and --mt cannot be used simultaneously.\n";
1262            }
1263        }
1264        else
1265        {
1266            if ($opt_mt ne '')
1267            {
1268                my $cms = $global_constants->get("mito_cm_".lc($opt_mt));
1269                if (!defined $cms or scalar(keys %$cms) == 0)
1270                {
1271                    die "FATAL: Mt-tRNA covariance models $opt_mt are not defined in the configuration file when using --mt.\n";
1272                }
1273                $opts->mito_model($opt_mt);
1274            }
1275        }
1276        if ($opt_ncintron != 0)
1277        {
1278            die "FATAL: Conflicting search options have been selected. --ncintron cannot be used simultaneously with default search mode.\n";
1279        }
1280        if ($opt_frag ne '')
1281        {
1282            die "FATAL: Conflicting search options have been selected. --frag cannot be used simultaneously with default search mode.\n";
1283        }
1284        $opt_inf = 1;
1285        $opts->CM_mode("infernal");
1286    }
1287
1288    if ($opt_legacy)
1289    {
1290        if ($opt_max or $opt_mid)
1291        {
1292            die "FATAL: Conflicting search options have been selected. -L, --fast, and --max cannot be used simultaneously.\n";
1293        }
1294        if ($opt_arch && ($opt_ncintron != 0 || $opt_frag ne ''))
1295        {
1296            die "FATAL: Conflicting search options have been selected. --ncintron and --frag are not supported with legacy mode.\n";
1297        }
1298        if ($opt_mito ne "" || $opt_metagenome || $opt_numt)
1299        {
1300            die "FATAL: Conflicting search options have been selected. -L cannot be combined with -M.\n";
1301        }
1302        if ($opt_isocm eq "on")
1303        {
1304            die "FATAL: Conflicting search options have been selected. -L cannot be combined with --isocm.\n";
1305        }
1306
1307        $opt_inf = 0;
1308        $opts->no_isotype(1);
1309        $opts->infernal_fp(0);
1310        $opts->CM_mode("cove");
1311        if ($opt_arch)
1312        {
1313			$cm->CM_check_for_introns(0);
1314		}
1315    }
1316
1317    # do Cove scan only
1318    if ($opt_cove != 0)
1319    {
1320        if ($opt_eufind || $opt_tscan || $opt_max || $opt_mid)
1321        {
1322            die "FATAL: Conflicting search options have been selected. -t, -e, --fast, --max, and -C cannot be used simultaneously.\n";
1323        }
1324        if ($opt_arch && ($opt_ncintron != 0 || $opt_frag ne ''))
1325        {
1326            die "FATAL: Conflicting search options have been selected. --ncintron and --frag are not supported with COVE only mode.\n";
1327        }
1328        if ($opt_mito ne "" || $opt_metagenome || $opt_numt)
1329        {
1330            die "FATAL: Conflicting search options have been selected. -C cannot be combined with -M.\n";
1331        }
1332        if ($opt_isocm eq "on")
1333        {
1334            die "FATAL: Conflicting search options have been selected. -C cannot be combined with --isocm.\n";
1335        }
1336
1337        $opt_inf = 0;
1338        $opts->no_isotype(1);
1339        $opts->CM_mode("cove");
1340        if ($opt_euk)
1341        {
1342            $opts->infernal_fp(0);
1343        }
1344        if ($opt_arch)
1345        {
1346			$cm->CM_check_for_introns(0);
1347		}
1348    }
1349
1350    # don't use tRNAscan unless also specified by -T option
1351    # don't use eufindtRNA unless also specified by -E option
1352    if ($opt_cove)
1353    {
1354        $opts->tscan_mode(0);
1355        $opts->eufind_mode(0);
1356    }
1357
1358    # do tRNAscan only
1359    if ($opt_tscan != 0)
1360    {
1361        # if only using tRNAscan, use strict tRNAscan 1.3 params
1362        # since Cove won't eliminate high false pos rate with default params
1363        $opts->tscan_mode(1);
1364        $tscan->tscan_params($global_constants->get_subvalue("tscan", "strict_param"));
1365        $opts->strict_params(1);
1366        $opt_inf = 0;
1367        $opts->no_isotype(1);
1368
1369        # if -C isn't also specified, turn off Cove filtering
1370        # if -i isn't also specified, turn off infernal filtering
1371        if (($opt_cove == 0) || ($opt_max == 0))
1372        {
1373            $opts->CM_mode("");
1374        }
1375
1376        # if -E isn't also specified, turn off eufindtRNA
1377        if ($opt_eufind == 0)
1378        {
1379            $opts->eufind_mode(0);
1380        }
1381        $opts->secondpass_int_result_file("");
1382        $opts->isotype_int_result_file("");
1383        $opts->truncated_int_result_file("");
1384    }
1385
1386     # set tRNAscan search params
1387    if ($opt_tmode ne '')
1388    {
1389        if (!$opt_tscan and !$opt_legacy)
1390        {
1391            die "FATAL: Conflicting search options have been selected. --tmode can only be used with -t or -L.\n";
1392        }
1393
1394        $opt_tmode = uc($opt_tmode);
1395        # use relaxed tRNAscan params
1396        if ($opt_tmode eq "R")
1397        {
1398            $tscan->tscan_params($global_constants->get_subvalue("tscan", "relaxed_param"));
1399            $opts->strict_params(0);
1400        }
1401        # use strict tRNAscan v1.3 params
1402        elsif ($opt_tmode eq "S")
1403        {
1404            $tscan->tscan_params($global_constants->get_subvalue("tscan", "strict_param"));
1405            $opts->strict_params(1);
1406        }
1407        # use alternate tRNAscan params
1408        elsif ($opt_tmode eq "A")
1409        {
1410            $tscan->tscan_params($global_constants->get_subvalue("tscan", "alt_param"));
1411            $opts->strict_params(0);
1412        }
1413        else
1414        {
1415            print STDERR "\nWARNING: tRNAscan parameter specified",
1416            " with -t option not recognized.\n",
1417            "         Defaulting to strict tRNAscan params\n\n";
1418            $tscan->tscan_params($global_constants->get_subvalue("tscan", "strict_param"));
1419            $opts->strict_params(1);
1420        }
1421    }
1422
1423    # don't merge redundant tRNAscan hits option only for diagnostic purposes
1424    if ($opt_nomerge != 0)
1425    {
1426        $tscan->keep_tscan_repeats(1);
1427    }
1428
1429    # use eufindtRNA
1430    if ($opt_eufind != 0)
1431    {
1432        $opts->eufind_mode(1);
1433
1434        # if -C isn't also specified, turn off Cove filtering
1435        # if -i isn't also specified, turn off infernal filtering
1436        if (($opt_cove == 0) || ($opt_max == 0))
1437        {
1438            $opts->CM_mode("");
1439        }
1440        $opt_inf = 0;
1441        $opts->no_isotype(1);
1442
1443        if (!$opts->cove_mode() && !$opts->infernal_mode())
1444        {
1445            # use more strict default params if no second-pass filtering
1446            $eufind->eufind_params("");
1447        }
1448        else
1449        {
1450            # use more relaxed params if using second-pass filtering
1451            $eufind->eufind_params($global_constants->get_subvalue("eufind", "relaxed_param"));
1452        }
1453
1454        # turn off tRNAscan if not specified on command line
1455        if ($opt_tscan == 0)
1456        {
1457            $opts->tscan_mode(0);
1458        }
1459        $opts->secondpass_int_result_file("");
1460        $opts->isotype_int_result_file("");
1461        $opts->truncated_int_result_file("");
1462    }
1463
1464    # set eufindtRNA search params
1465    if ($opt_emode ne '')
1466    {
1467        if (!$opt_eufind and !$opt_legacy)
1468        {
1469            die "FATAL: Conflicting search options have been selected. --emode can only be used with -e or -L.\n";
1470        }
1471
1472        $opt_emode = uc($opt_emode);
1473        # use relaxed params that does not look for poly T
1474        if ($opt_emode eq "R")
1475        {
1476            $eufind->eufind_params($global_constants->get_subvalue("eufind", "relaxed_param"));
1477        }
1478        # use default params and penalizes for no poly T
1479        elsif ($opt_emode eq "N")
1480        {
1481            $eufind->eufind_params("");
1482        }
1483        # use strict params and requires poly T
1484        # default intermediate cutoff for original algorithm
1485        elsif ($opt_emode eq "S")
1486        {
1487            $eufind->eufind_params($global_constants->get_subvalue("eufind", "strict_param"));
1488            $eufind->eufind_intscore($global_constants->get_subvalue("eufind", "orig_intscore"));
1489        }
1490        else
1491        {
1492            print STDERR "\nWARNING: EufindtRNA parameter specified",
1493            " with -e option not recognized.\n",
1494            "         Defaulting to relaxed EufindtRNA params\n\n";
1495            $eufind->eufind_params($global_constants->get_subvalue("eufind", "relaxed_param"));
1496        }
1497    }
1498
1499    if ($opt_inf)
1500    {
1501        if ($opt_legacy)
1502        {
1503            die "FATAL: Conflicting search options have been selected. -L and -I cannot be used simultaneously.\n";
1504        }
1505        if ($opt_eufind || $opt_tscan || $opt_cove)
1506        {
1507            die "FATAL: Conflicting search options have been selected. -I, -t, -e, and -C cannot be used simultaneously.\n";
1508        }
1509
1510        $opts->tscan_mode(0);
1511        $opts->eufind_mode(0);
1512        if ($opt_euk || $opt_bact || $opt_arch)
1513        {
1514            $opts->infernal_fp(1);
1515        }
1516        $opts->CM_mode("infernal");
1517        $opt_legacy = 0;
1518    }
1519
1520    # do max scan
1521    if ($opt_max != 0)
1522    {
1523        if ($opt_eufind || $opt_tscan || $opt_cove || $opt_legacy || ($opt_mid and $opt_mito != 0))
1524        {
1525            die "FATAL: Conflicting search options have been selected. --max, -t, -e, -C, and -L cannot be used simultaneously.\n";
1526        }
1527
1528        $opts->tscan_mode(0);
1529        $opts->eufind_mode(0);
1530        if ($opt_euk || $opt_bact || $opt_arch)
1531        {
1532            $opts->infernal_fp(0);
1533        }
1534	elsif ($opt_mito)
1535	{
1536	    $opt_mid = 0;
1537	}
1538    }
1539
1540    # set hmm filter flag for infernal
1541    if ($opt_mid)
1542    {
1543        if ($opt_eufind || $opt_tscan || $opt_cove || $opt_legacy || $opt_max)
1544        {
1545            die "FATAL: Conflicting search options have been selected. --fast, -t, -e, -C, and -L cannot be used simultaneously.\n";
1546        }
1547
1548        if ($opt_euk || $opt_bact || $opt_arch)
1549        {
1550            $opts->infernal_fp(0);
1551        }
1552        $opts->hmm_filter(1);
1553    }
1554
1555    if ($opt_isospecific ne "")
1556    {
1557        if ($opts->no_isotype())
1558        {
1559            die "FATAL: Conflicting search options have been selected. -s cannot be used when isotype model scan is disabled.\n";
1560        }
1561        if ($opt_mito ne "" || $opt_numt)
1562        {
1563            die "FATAL: Conflicting search options have been selected. -s cannot be combined with -M.\n";
1564        }
1565
1566        if ($opt_isospecific eq "#")
1567        {
1568            $opts->isotype_specific_file("$fafile.iso");
1569        }
1570        elsif (($opt_isospecific eq "\$") || ($opt_isospecific eq "-"))
1571        {
1572            $opts->isotype_specific_file("-");
1573
1574            # sends output to stdout instead of tabular output
1575            if ($opts->results_to_stdout())
1576            {
1577                $opts->results_to_stdout(0);
1578                $opts->out_file("/dev/null");
1579            }
1580        }
1581        else
1582        {
1583            $opts->isotype_specific_file($opt_isospecific);
1584        }
1585        &check_output_file($opts->isotype_specific_file(), $opts->prompt_for_overwrite());
1586    }
1587
1588    if ($opts->no_isotype())
1589    {
1590        $opts->isotype_specific_file("");
1591    }
1592
1593    if ($opt_bed ne "")
1594    {
1595        if ($opt_bed eq "#")
1596        {
1597            $opts->bed_file("$fafile.bed");
1598        }
1599        elsif (($opt_bed eq "\$") || ($opt_bed eq "-"))
1600        {
1601            $opts->bed_file("-");
1602
1603            # sends output to stdout instead of tabular output
1604            if ($opts->results_to_stdout())
1605            {
1606                $opts->results_to_stdout(0);
1607                $opts->out_file("/dev/null");
1608            }
1609        }
1610        else
1611        {
1612            $opts->bed_file($opt_bed);
1613        }
1614        &check_output_file($opts->bed_file(), $opts->prompt_for_overwrite());
1615    }
1616
1617    if ($opt_fasta ne "")
1618    {
1619        if ($opt_fasta eq "#")
1620        {
1621            $opts->output_fasta_file("$fafile.fa");
1622        }
1623        elsif (($opt_fasta eq "\$") || ($opt_fasta eq "-"))
1624        {
1625            $opts->output_fasta_file("-");
1626
1627            # sends output to stdout instead of tabular output
1628            if ($opts->results_to_stdout())
1629            {
1630                $opts->results_to_stdout(0);
1631                $opts->out_file("/dev/null");
1632            }
1633        }
1634        else
1635        {
1636            $opts->output_fasta_file($opt_fasta);
1637        }
1638        &check_output_file($opts->output_fasta_file(), $opts->prompt_for_overwrite());
1639    }
1640
1641    if ($opt_iscore != 1000)
1642    {
1643        if (!$opt_eufind and !$opt_legacy)
1644        {
1645            die "FATAL: Conflicting search options have been selected. --iscore can only be used with -e or -L.\n";
1646        }
1647        $eufind->eufind_intscore($opt_iscore);
1648    }
1649
1650    # pad both ends of first-pass hits with this
1651    # many extra bases before passing to Cove
1652    if ($opt_pad != 1000)
1653    {
1654        $opts->padding($opt_pad);
1655    }
1656
1657    # use alternate genetic code table
1658    if ($opt_gencode ne '')
1659    {
1660        $opts->gc_file($opt_gencode);
1661        $opts->alt_gcode(1);
1662    }
1663
1664    # get HMM score for tRNA hits
1665    if ($opt_breakdown != 0)
1666    {
1667        if ($opt_mito ne "" || $opt_numt)
1668        {
1669            die "FATAL: Conflicting search options have been selected. -H cannot be combined with -M.\n";
1670        }
1671        $cm->get_hmm_score(1);
1672    }
1673
1674    # save stats summary file
1675    if ($opt_stats ne '')
1676    {
1677        $opts->save_stats(1);
1678        if ($opt_stats eq "#")
1679        {
1680            $opts->stats_file("$fafile.stats");
1681        }
1682        else
1683        {
1684            $opts->stats_file($opt_stats);
1685        }
1686        &check_output_file($opts->stats_file(), $opts->prompt_for_overwrite());
1687    }
1688
1689    # save coves secondary structures for tRNA's whose acodons it couldn't call
1690    if ($opt_w ne '')
1691    {
1692        $opts->save_odd_struct(1);
1693        if ($opt_w eq "#")
1694        {
1695            $opts->odd_struct_file("$fafile.oddstruct");
1696        }
1697        else
1698        {
1699            $opts->odd_struct_file($opt_w);
1700        }
1701        &check_output_file($opts->odd_struct_file(), $opts->prompt_for_overwrite());
1702    }
1703
1704    # save all coves secondary structures
1705    if ($opt_struct ne '')
1706    {
1707        $opts->save_all_struct(1);
1708        if ($opt_struct eq "#")
1709        {
1710            $opts->all_struct_file("$fafile.ss");
1711        }
1712         # sends structure output to stdout instead of tabular output
1713        elsif (($opt_struct eq "\$") || ($opt_struct eq "-"))
1714        {
1715            $opts->all_struct_file("-");
1716            if ($opts->results_to_stdout())
1717            {
1718                $opts->results_to_stdout(0);
1719                $opts->out_file("/dev/null");
1720            }
1721        }
1722        else
1723        {
1724            $opts->all_struct_file($opt_struct);
1725        }
1726        &check_output_file($opts->all_struct_file(), $opts->prompt_for_overwrite());
1727    }
1728
1729    # save only seqs without a tRNA hit
1730    if ($opt_missed ne '')
1731    {
1732        $opts->save_missed(1);
1733        if ($opt_missed eq "#")
1734        {
1735            $opts->missed_seq_file("$fafile.missed");
1736        }
1737        else
1738        {
1739            $opts->missed_seq_file($opt_missed);
1740        }
1741        &check_output_file($opts->missed_seq_file(),$opts->prompt_for_overwrite());
1742    }
1743
1744    # outputs PID number in file for tRNAscan-SE web server program
1745    if ($opt_Y != 0)
1746    {
1747        &check_output_file("$fafile.pid", $opts->prompt_for_overwrite());
1748        &open_for_write(\*TESTF, "$fafile.pid");
1749        print TESTF "PID=$$\n";
1750        close(TESTF);
1751    }
1752
1753    # save verbose tRNAscan output
1754    if ($opt_verbose ne '')
1755    {
1756        $opts->save_verbose(1);
1757        my $tmp_verb = &tempname($global_constants->get("temp_dir"),".vb");  # get temp output file name
1758        &check_output_file($tmp_verb, $opts->prompt_for_overwrite());
1759        $tscan->tscan_params($tscan->tscan_params() . "-v $tmp_verb");
1760        if ($opt_verbose eq "#")
1761        {
1762            $opts->verb_file("$fafile.verb");
1763        }
1764        else
1765        {
1766            $opts->verb_file($opt_verbose);
1767        }
1768        &check_output_file($opts->verb_file(),$opts->prompt_for_overwrite());
1769    }
1770
1771    # use previous results output file
1772    if ($opt_filter ne '')
1773    {
1774        $opts->tscan_mode(0);
1775        $opts->eufind_mode(0);
1776        $opts->use_prev_ts_run(1);
1777        $opts->firstpass_result_file($opt_filter);
1778        if (!(-e $opts->firstpass_result_file())) {
1779            die "FATAL: Can't find formatted tRNA output file ",
1780                $opts->firstpass_result_file()."\n\n";
1781        }
1782    }
1783    # create named file for first pass results
1784    elsif ($opt_fsres ne '')
1785    {
1786        $opts->save_firstpass_res(1);
1787        if ($opt_fsres eq "#")
1788        {
1789            $opts->firstpass_result_file("$fafile.fpass.out");
1790        }
1791        else
1792        {
1793            $opts->firstpass_result_file($opt_fsres);
1794        }
1795        &check_output_file($opts->firstpass_result_file(), $opts->prompt_for_overwrite());
1796        $fp_result_file->init_fp_result_file($opts->firstpass_result_file());
1797    }
1798    # create temp file for firstpass output
1799    else
1800    {
1801        $opts->firstpass_result_file(&tempname($global_constants->get("temp_dir"), ".fpass"));
1802        &check_output_file($opts->firstpass_result_file(), $opts->prompt_for_overwrite());
1803        $fp_result_file->init_fp_result_file($opts->firstpass_result_file());
1804    }
1805    $opts->firstpass_flanking_file($global_constants->get("temp_dir")."/tscan$$"."_fp_flanking.out");
1806    $fp_result_file->flanking_file($opts->firstpass_flanking_file());
1807
1808    # save false positive tRNAs from first-pass scans that Cove bonked
1809    # save source of tRNA hit (-y option) if not using infernal
1810    if ($opt_falsepos ne '')
1811    {
1812        if ($opt_mito ne "" || $opt_numt)
1813        {
1814            die "FATAL: Conflicting search options have been selected. -F cannot be combined with -M.\n";
1815        }
1816        $opts->save_falsepos(1);
1817        if (!$opts->infernal_mode())
1818        {
1819            $opts->save_source(1);
1820        }
1821        if ($opt_falsepos eq "#")
1822        {
1823            $opts->falsepos_file("$fafile.fpos");
1824        }
1825        else
1826        {
1827            $opts->falsepos_file($opt_falsepos);
1828        }
1829        &check_output_file($opts->falsepos_file(), $opts->prompt_for_overwrite());
1830    }
1831
1832    # set MAX intron+variable loop region size used in EufindtRNA & Cove
1833    if ($opt_len > 0)
1834    {
1835        $opts->max_int_len($opt_len);
1836
1837        # look for long tRNAs if needed
1838        if ($opts->use_prev_ts_run() || $opts->eufind_mode())
1839        {
1840            $opts->find_long_tRNAs(1);
1841        }
1842        else
1843        {
1844            $cm->max_cove_tRNA_length($opts->max_int_len() + $cm->min_tRNA_no_intron());
1845        }
1846    }
1847
1848    if ($opt_progress != 0)
1849    {
1850        $log->open_file("-") || die "FATAL: Unable to open standard out to display program progress\n\n";
1851        $opts->display_progress(1);
1852    }
1853    elsif ($opt_log ne "")
1854    {
1855        if ($opt_log eq "#")
1856        {
1857            $opts->log_file("$fafile.log");
1858        }
1859        else
1860        {
1861            $opts->log_file($opt_log);
1862        }
1863        &check_output_file($opts->log_file(), $opts->prompt_for_overwrite());
1864
1865        $log->file_name($opts->log_file());
1866        $log->initialize_log("tRNAscan-SE", "", "");
1867
1868        $opts->save_progress(1);
1869    }
1870    else
1871    {
1872        $log->open_file("/dev/null") || die "FATAL: Unable to open /dev/null to record program progress\n\n";
1873    }
1874
1875    # use different Cove-score cutoff for reporting "real" tRNAs
1876    # dummy opt_X val is 10,000 to avoid overlap with a real value a user might specify
1877    if ($opt_score != 1000)
1878    {
1879        $cm->cm_cutoff($opt_score);
1880        if ($opt_score < $cm->infernal_fp_cutoff())
1881        {
1882		$cm->infernal_fp_cutoff($opt_score);
1883	}
1884    }
1885
1886    if ($opt_thread != 999)
1887    {
1888		if ($opt_thread < 0)
1889        {
1890            die "FATAL: Number of threads for running Infernal must be at least 0.\n";
1891		}
1892        if ($opt_eufind || $opt_tscan || $opt_cove || $opt_legacy)
1893        {
1894            die "FATAL: Conflicting search options have been selected. --thread, -t, -e, -C, and -L cannot be used simultaneously.\n";
1895        }
1896		$cm->infernal_thread($opt_thread);
1897	}
1898
1899    # only one seq file on command line
1900    if ($#ARGV == 0)
1901    {
1902        $opts->multiple_files(0);
1903        $opts->fasta_file($ARGV[0]);
1904    }
1905    else
1906    {
1907        $opts->multiple_files(1);
1908        my $tmp_multiseq_file = &tempname($global_constants->get("temp_dir"), ".mseq");
1909        &check_output_file($tmp_multiseq_file, $opts->prompt_for_overwrite());
1910        foreach my $filename (@ARGV)
1911        {
1912            system("cat $filename >> $tmp_multiseq_file");
1913        }
1914        $opts->fasta_file($tmp_multiseq_file);
1915    }
1916
1917    if ($opts->cove_mode())
1918    {
1919        $opts->second_pass_label("Cove");
1920    }
1921    if ($opts->infernal_mode())
1922    {
1923        $opts->second_pass_label("Infernal");
1924    }
1925
1926    $cm->CM_mode($opts->CM_mode());
1927}
1928