1# tRNAscanSE/CM.pm
2# This class contains parameters and functions for running CM tRNA search used in tRNAscan-SE.
3#
4# --------------------------------------------------------------
5# This module is part of the tRNAscan-SE program.
6# Copyright (C) 2017 Patricia Chan and Todd Lowe
7# --------------------------------------------------------------
8#
9
10package tRNAscanSE::CM;
11
12use strict;
13use File::Copy;
14use tRNAscanSE::Configuration;
15use tRNAscanSE::Options;
16use tRNAscanSE::Utils;
17use tRNAscanSE::LogFile;
18use tRNAscanSE::ScanResult;
19use tRNAscanSE::SS;
20use tRNAscanSE::Sequence;
21use tRNAscanSE::tRNA;
22use tRNAscanSE::ArraytRNA;
23use tRNAscanSE::IntResultFile;
24use tRNAscanSE::MultiResultFile;
25use tRNAscanSE::CMscanResultFile;
26use tRNAscanSE::ArrayCMscanResults;
27use tRNAscanSE::FpScanResultFile;
28
29sub new
30{
31    my $class = shift;
32    my $self = {};
33
34    initialize($self);
35
36    bless ($self, $class);
37    return $self;
38}
39
40sub DESTROY
41{
42    my $self = shift;
43}
44
45sub initialize
46{
47    my $self = shift;
48
49    $self->{CM_check_for_introns} = 0;          # check for non-canonical introns
50    $self->{CM_check_for_split_halves} = 0;     # check for split tRNA fragments
51    $self->{skip_pseudo_filter} = 0;            # enable filter for psuedogenes (Cove score <40,
52                                                # primary struct score <10 bits, secondary
53                                                # structure score < 5 bits)
54
55    $self->{get_hmm_score} = 0;                 # also score tRNA with covariance model
56                                                #  without sec structure info, similar
57                                                #  to getting hmm score for match of
58                                                #  seq to tRNA hmm  (-H option)
59
60
61    # Convariance model file path
62    $self->{main_cm_file_path} = {};
63    $self->{mainNS_cm_file_path} = {};
64    $self->{cove_cm_file_path} = '';
65    $self->{intron_cm_file_path} = {};
66    $self->{arch_five_half_cm_file_path} = '';
67    $self->{arch_three_half_cm_file_path} = '';
68    $self->{Pselc_cm_file_path} = '';
69    $self->{Eselc_cm_file_path} = '';
70    $self->{isotype_cm_db_file_path} = '';
71    $self->{mito_isotype_cm_db_file_path} = '';
72    $self->{isotype_cm_file_paths} = {};
73    $self->{mito_isotype_cm_file_paths} = {};
74
75    $self->{isotype_cm_cutoff} = {};
76
77    $self->{covels_bin} = "covels-SE";          # Application executable name
78    $self->{coves_bin} = "coves-SE";
79    $self->{cmsearch_bin} = "cmsearch";
80    $self->{cmscan_bin} = "cmscan";
81
82    $self->{infernal_thread} = -1;
83
84    $self->{tab_results} = +[];
85}
86
87sub CM_mode
88{
89    my $self = shift;
90    if (@_) { $self->{CM_mode} = shift; }
91    return $self->{CM_mode};
92}
93
94sub cove_mode
95{
96    my $self = shift;
97    return ($self->{CM_mode} eq 'cove');
98}
99
100sub infernal_mode
101{
102    my $self = shift;
103    return ($self->{CM_mode} eq 'infernal');
104}
105
106sub cm_cutoff
107{
108    my $self = shift;
109    if (@_) { $self->{cm_cutoff} = shift; }
110    return $self->{cm_cutoff};
111}
112
113sub organelle_cm_cutoff
114{
115    my $self = shift;
116    if (@_) { $self->{organelle_cm_cutoff} = shift; }
117    return $self->{organelle_cm_cutoff};
118}
119
120sub infernal_fp_cutoff
121{
122    my $self = shift;
123    if (@_) { $self->{infernal_fp_cutoff} = shift; }
124    return $self->{infernal_fp_cutoff};
125}
126
127sub BHB_cm_cutoff
128{
129    my $self = shift;
130    if (@_) { $self->{BHB_cm_cutoff} = shift; }
131    return $self->{BHB_cm_cutoff};
132}
133
134sub max_tRNA_length
135{
136    my $self = shift;
137    if (@_) { $self->{max_tRNA_length} = shift; }
138    return $self->{max_tRNA_length};
139}
140
141sub max_cove_tRNA_length
142{
143    my $self = shift;
144    if (@_) { $self->{max_cove_tRNA_length} = shift; }
145    return $self->{max_cove_tRNA_length};
146}
147
148sub max_cmsearch_tRNA_length
149{
150    my $self = shift;
151    if (@_) { $self->{max_cmsearch_tRNA_length} = shift; }
152    return $self->{max_cmsearch_tRNA_length};
153}
154
155sub CM_check_for_introns
156{
157    my $self = shift;
158    if (@_) { $self->{CM_check_for_introns} = shift; }
159    return $self->{CM_check_for_introns};
160}
161
162sub CM_check_for_split_halves
163{
164    my $self = shift;
165    if (@_) { $self->{CM_check_for_split_halves} = shift; }
166    return $self->{CM_check_for_split_halves};
167}
168
169sub min_tRNA_no_intron
170{
171    my $self = shift;
172    if (@_) { $self->{min_tRNA_no_intron} = shift; }
173    return $self->{min_tRNA_no_intron};
174}
175
176sub min_intron_length
177{
178    my $self = shift;
179    if (@_) { $self->{min_intron_length} = shift; }
180    return $self->{min_intron_length};
181}
182
183sub skip_pseudo_filter
184{
185    my $self = shift;
186    if (@_) { $self->{skip_pseudo_filter} = shift; }
187    return $self->{skip_pseudo_filter};
188}
189
190sub min_pseudo_filter_score
191{
192    my $self = shift;
193    if (@_) { $self->{min_pseudo_filter_score} = shift; }
194    return $self->{min_pseudo_filter_score};
195}
196
197sub min_ss_score
198{
199    my $self = shift;
200    if (@_) { $self->{min_ss_score} = shift; }
201    return $self->{min_ss_score};
202}
203
204sub min_hmm_score
205{
206    my $self = shift;
207    if (@_) { $self->{min_hmm_score} = shift; }
208    return $self->{min_hmm_score};
209}
210
211sub get_hmm_score
212{
213    my $self = shift;
214    if (@_) { $self->{get_hmm_score} = shift; }
215    return $self->{get_hmm_score};
216}
217
218sub main_cm_file_path
219{
220    my $self = shift;
221    if (@_) { %{$self->{main_cm_file_path}} = shift; }
222    return %{$self->{main_cm_file_path}};
223}
224
225sub add_main_cm_file_path
226{
227    my $self = shift;
228    my ($key, $file) = @_;
229    $self->{main_cm_file_path}->{$key} = $file;
230}
231
232sub mainNS_cm_file_path
233{
234    my $self = shift;
235    if (@_) { %{$self->{mainNS_cm_file_path}} = shift; }
236    return %{$self->{mainNS_cm_file_path}};
237}
238
239sub add_mainNS_cm_file_path
240{
241    my $self = shift;
242    my ($key, $file) = @_;
243    $self->{mainNS_cm_file_path}->{$key} = $file;
244}
245
246sub cove_cm_file_path
247{
248    my $self = shift;
249    if (@_) { $self->{cove_cm_file_path} = shift; }
250    return $self->{cove_cm_file_path};
251}
252
253sub isotype_cm_db_file_path
254{
255    my $self = shift;
256    if (@_) { $self->{isotype_cm_db_file_path} = shift; }
257    return $self->{isotype_cm_db_file_path};
258}
259
260sub mito_isotype_cm_db_file_path
261{
262    my $self = shift;
263    if (@_) { $self->{mito_isotype_cm_db_file_path} = shift; }
264    return $self->{mito_isotype_cm_db_file_path};
265}
266
267sub isotype_cm_file_paths
268{
269    my $self = shift;
270    if (@_) { %{$self->{isotype_cm_file_paths}} = shift; }
271    return %{$self->{isotype_cm_file_paths}};
272}
273
274sub add_isotype_cm_file_path
275{
276    my $self = shift;
277    my ($isotype, $file) = @_;
278    $self->{isotype_cm_file_paths}->{$isotype} = $file;
279}
280
281sub mito_isotype_cm_file_paths
282{
283    my $self = shift;
284    if (@_) { %{$self->{mito_isotype_cm_file_paths}} = shift; }
285    return %{$self->{mito_isotype_cm_file_paths}};
286}
287
288sub add_mito_isotype_cm_file_path
289{
290    my $self = shift;
291    my ($isotype, $file) = @_;
292    $self->{mito_isotype_cm_file_paths}->{$isotype} = $file;
293}
294
295sub intron_cm_file_path
296{
297    my $self = shift;
298    if (@_) { %{$self->{intron_cm_file_path}} = shift; }
299    return %{$self->{intron_cm_file_path}};
300}
301
302sub add_intron_cm_file_path
303{
304    my $self = shift;
305    my ($key, $file) = @_;
306    $self->{intron_cm_file_path}->{$key} = $file;
307}
308
309sub Pselc_cm_file_path
310{
311    my $self = shift;
312    if (@_) { $self->{Pselc_cm_file_path} = shift; }
313    return $self->{Pselc_cm_file_path};
314}
315
316sub Eselc_cm_file_path
317{
318    my $self = shift;
319    if (@_) { $self->{Eselc_cm_file_path} = shift; }
320    return $self->{Eselc_cm_file_path};
321}
322
323sub add_isotype_cm_cutoff
324{
325    my $self = shift;
326    my ($type, $cutoff) = @_;
327    $self->{isotype_cm_cutoff}->{$type} = $cutoff;
328}
329
330sub covels_bin
331{
332    my $self = shift;
333    if (@_) { $self->{covels_bin} = shift; }
334    return $self->{covels_bin};
335}
336
337sub coves_bin
338{
339    my $self = shift;
340    if (@_) { $self->{coves_bin} = shift; }
341    return $self->{coves_bin};
342}
343
344sub cmsearch_bin
345{
346    my $self = shift;
347    if (@_) { $self->{cmsearch_bin} = shift; }
348    return $self->{cmsearch_bin};
349}
350
351sub infernal_thread
352{
353    my $self = shift;
354    if (@_) { $self->{infernal_thread} = shift; }
355    return $self->{infernal_thread};
356}
357
358sub tab_results
359{
360    my $self = shift;
361    if (@_) { $self->{tab_results} = shift; }
362    return $self->{tab_results};
363}
364
365sub set_defaults
366{
367    my $self = shift;
368    my $global_vars = shift;
369    my $global_constants = $global_vars->{global_constants};
370
371    $self->{cm_mode} = $global_constants->get("cm_mode");
372    $self->{cm_cutoff} = $global_constants->get("cm_cutoff");
373    $self->{organelle_cm_cutoff} = $global_constants->get("organelle_cm_cutoff");
374    $self->{infernal_fp_cutoff} = $global_constants->get("infernal_fp_cutoff");
375    $self->{max_tRNA_length} = $global_constants->get("max_tRNA_length");
376    $self->{max_cove_tRNA_length} = $global_constants->get("max_cove_tRNA_length");
377    $self->{max_cmsearch_tRNA_length} = $global_constants->get("max_cmsearch_tRNA_length");
378    $self->{min_tRNA_no_intron} = $global_constants->get("min_tRNA_no_intron");
379    $self->{min_intron_length} = $global_constants->get("min_intron_length");
380    $self->{min_cove_pseudo_filter_score} = $global_constants->get("min_cove_pseudo_filter_score");
381    $self->{min_cmsearch_pseudo_filter_score} = $global_constants->get("min_cmsearch_pseudo_filter_score");
382    $self->{min_ss_score} = $global_constants->get("min_ss_score");
383    $self->{min_hmm_score} = $global_constants->get("min_hmm_score");
384    $self->{nci_scan_cutoff} = $global_constants->get("nci_scan_cutoff");
385    $self->{BHB_cm_cutoff} = $global_constants->get("BHB_cm_cutoff");
386    $self->{split_tRNA_scan_cutoff} = $global_constants->get("split_tRNA_scan_cutoff");
387    $self->{half_tRNA_cutoff} = $global_constants->get("half_tRNA_cutoff");
388    $self->{left_splicing_len} = $global_constants->get("left_splicing_len");
389    $self->{right_splicing_len} = $global_constants->get("right_splicing_len");
390    my $search_types = $global_constants->get("isotype_cm_cutoff");
391    foreach my $type (sort keys %$search_types)
392    {
393        $self->add_isotype_cm_cutoff($type, $search_types->{$type});
394    }
395}
396
397sub set_file_paths
398{
399    my $self = shift;
400    my $global_vars = shift;
401    my $opts = $global_vars->{options};
402    my $global_constants = $global_vars->{global_constants};
403    my $isotype_cms = "";
404
405    if ($opts->euk_mode())
406    {
407        if ($self->infernal_mode())
408        {
409            $self->{main_cm_file_path}->{Domain} = $global_constants->get_subvalue("cm", "eukaryota");     # default to eukar model
410            $self->{main_cm_file_path}->{SeC} = $global_constants->get_subvalue("euk_cm", "SeC");          # Euk SeC model
411            $self->{mainNS_cm_file_path}->{Domain} = $global_constants->get_subvalue("cm", "eukaryota-ns");          # no secondary struct
412            $self->{cove_cm_file_path} = $global_constants->get_subvalue("cove_cm", "eukaryota");          # default to eukar cove model
413        }
414        elsif ($self->cove_mode())
415        {
416            $self->{main_cm_file_path}->{Domain} = $global_constants->get_subvalue("cove_cm", "eukaryota");          # default to eukar cove model
417            $self->{mainNS_cm_file_path}->{Domain} = $global_constants->get_subvalue("cove_cm", "eukaryota-ns");     # no secondary struct
418        }
419        $self->{isotype_cm_db_file_path} = $global_constants->get_subvalue("isotype_cm", "eukaryota");
420        if ($opts->mito_model() ne "")
421        {
422            $self->{mito_isotype_cm_db_file_path} = $global_constants->get_subvalue("mito_cm", $opts->mito_model());
423        }
424    }
425    elsif ($opts->bact_mode())
426    {
427        if ($self->infernal_mode())
428        {
429            $self->{main_cm_file_path}->{Domain} = $global_constants->get_subvalue("cm", "bacteria");               # use bacterial covariance model
430            $self->{main_cm_file_path}->{SeC} = $global_constants->get_subvalue("bact_cm", "SeC");        # Bacterial SeC model
431            $self->{mainNS_cm_file_path}->{Domain} = $global_constants->get_subvalue("cm", "bacteria-ns");          # no sec struct
432            $self->{cove_cm_file_path} = $global_constants->get_subvalue("cove_cm", "bacteria");          # use bacterial covariance model
433        }
434        elsif ($self->cove_mode())
435        {
436            $self->{main_cm_file_path}->{Domain} = $global_constants->get_subvalue("cove_cm", "bacteria");          # use bacterial covariance model
437            $self->{mainNS_cm_file_path}->{Domain} = $global_constants->get_subvalue("cove_cm", "bacteria-ns");     # no sec struct
438        }
439        $self->{isotype_cm_db_file_path} = $global_constants->get_subvalue("isotype_cm", "bacteria");
440    }
441    elsif ($opts->arch_mode())
442    {
443        my $nci_cms = $global_constants->get("nci_cm");
444        foreach my $type (sort keys %$nci_cms)
445        {
446            $self->add_intron_cm_file_path($type, $nci_cms->{$type});
447        }
448        $self->{arch_five_half_cm_file_path} = $global_constants->get_subvalue("cm", "arch_5h");          # model for finding 5'half
449        $self->{arch_three_half_cm_file_path} = $global_constants->get_subvalue("cm", "arch_3h");         # model for finding 3'half
450        if ($self->infernal_mode())
451        {
452            $self->{main_cm_file_path}->{Domain} = $global_constants->get_subvalue("cm", "archaea");                # use archaea covariance model
453            $self->{main_cm_file_path}->{SeC} = $global_constants->get_subvalue("arch_cm", "SeC");        # Archaeal SeC model
454            $self->{mainNS_cm_file_path}->{Domain} = $global_constants->get_subvalue("cm", "archaea-ns");           # no sec struct
455            $self->{cove_cm_file_path} = $global_constants->get_subvalue("cove_cm", "archaea");           # use archaea covariance model
456        }
457        elsif ($opts->cove_mode())
458        {
459            $self->{main_cm_file_path}->{Domain} = $global_constants->get_subvalue("cove_cm", "archaea");           # use archaea covariance model
460            $self->{mainNS_cm_file_path}->{Domain} = $global_constants->get_subvalue("cove_cm", "archaea-ns");      # no sec struct
461        }
462        $self->{isotype_cm_db_file_path} = $global_constants->get_subvalue("isotype_cm", "archaea");
463    }
464    elsif ($opts->mito_mode())
465    {
466        if ($self->infernal_mode())
467        {
468            $isotype_cms = $global_constants->get("mito_cm_".$opts->mito_model());
469            foreach my $isotype (sort keys %$isotype_cms)
470            {
471                $self->{main_cm_file_path}->{$isotype} = $isotype_cms->{$isotype};
472            }
473        }
474    }
475    elsif ($opts->metagenome_mode())
476    {
477        if ($self->infernal_mode())
478        {
479            $self->{main_cm_file_path}->{general} = $global_constants->get_subvalue("cm", "general");               # use general covariance model
480            $self->{mainNS_cm_file_path}->{general} = $global_constants->get_subvalue("cm", "general-ns");          # no sec struct
481            $self->{main_cm_file_path}->{euk} = $global_constants->get_subvalue("cm", "eukayota");                # use eukayota covariance model
482            $self->{mainNS_cm_file_path}->{euk} = $global_constants->get_subvalue("cm", "eukayota-ns");           # no sec struct
483            $self->{main_cm_file_path}->{arch} = $global_constants->get_subvalue("cm", "archaea");                  # use archaea covariance model
484            $self->{mainNS_cm_file_path}->{arch} = $global_constants->get_subvalue("cm", "archaea-ns");             # no sec struct
485            $self->{main_cm_file_path}->{bact} = $global_constants->get_subvalue("cm", "bacteria");                # use bacteria covariance model
486            $self->{mainNS_cm_file_path}->{bact} = $global_constants->get_subvalue("cm", "bacteria-ns");           # no sec struct
487        }
488    }
489    elsif ($opts->numt_mode())
490    {
491        $isotype_cms = $global_constants->get("mito_cm_mammal");
492        foreach my $isotype (sort keys %$isotype_cms)
493        {
494            $self->add_isotype_cm_file_path($isotype, $isotype_cms->{$isotype});
495        }
496    }
497    elsif ($opts->general_mode())
498    {
499        if ($self->infernal_mode())
500        {
501            $self->{main_cm_file_path}->{general} = $global_constants->get_subvalue("cm", "general");                # use original covariance model
502            $self->{mainNS_cm_file_path}->{general} = $global_constants->get_subvalue("cm", "general-ns");           # no sec struct
503            $self->{cove_cm_file_path} = $global_constants->get_subvalue("cove_cm", "general");           # use original covariance model
504        }
505        elsif ($self->cove_mode())
506        {
507            $self->{main_cm_file_path}->{general} = $global_constants->get_subvalue("cove_cm", "general");           # use original covariance model
508            $self->{mainNS_cm_file_path}->{general} = $global_constants->get_subvalue("cove_cm", "general-ns");      # no sec struct
509        }
510    }
511    elsif ($opts->organelle_mode())
512    {
513        if ($self->infernal_mode())
514        {
515            $self->{main_cm_file_path}->{general} = $global_constants->get_subvalue("cm", "general1415");                # use original covariance model
516            $self->{mainNS_cm_file_path}->{general} = $global_constants->get_subvalue("cm", "general1415-ns");           # no sec struct
517            $self->{cove_cm_file_path} = $global_constants->get_subvalue("cove_cm", "general");           # use original covariance model
518        }
519        elsif ($self->cove_mode())
520        {
521            $self->{main_cm_file_path}->{general} = $global_constants->get_subvalue("cove_cm", "general");           # use original covariance model
522            $self->{mainNS_cm_file_path}->{general} = $global_constants->get_subvalue("cove_cm", "general-ns");      # no sec struct
523        }
524    }
525    elsif ($opts->alternate_mode())
526    {
527        my $cms = $global_constants->get("alt_cm");
528        foreach my $key (sort keys %$cms)
529        {
530            $self->add_main_cm_file_path($key, $cms->{$key});
531        }
532    }
533
534    if ($self->infernal_mode())
535    {
536        $self->{Pselc_cm_file_path} = $self->{main_cm_file_path}->{SeC};
537        $self->{Eselc_cm_file_path} = $self->{main_cm_file_path}->{SeC};
538    }
539    elsif ($self->cove_mode())
540    {
541        $self->{Pselc_cm_file_path} = $global_constants->get_subvalue("cove_cm", "PSELC");
542        $self->{Eselc_cm_file_path} = $global_constants->get_subvalue("cove_cm", "ESELC");
543    }
544}
545
546sub check_lib_files
547{
548    my $self = shift;
549    my ($opts) = @_;
550
551    foreach my $cur_cm (sort keys %{$self->{main_cm_file_path}})
552    {
553        if ($self->{main_cm_file_path}->{$cur_cm} ne "" and !-r $self->{main_cm_file_path}->{$cur_cm})
554        {
555            die "FATAL: Unable to open ".$self->{main_cm_file_path}->{$cur_cm}." covariance model file\n\n";
556        }
557    }
558
559    foreach my $cur_cm (sort keys %{$self->{mainNS_cm_file_path}})
560    {
561        if ($self->{mainNS_cm_file_path}->{$cur_cm} ne "" and !-r $self->{mainNS_cm_file_path}->{$cur_cm})
562        {
563            die "FATAL: Unable to open ".$self->{mainNS_cm_file_path}->{$cur_cm}." covariance model file\n\n";
564        }
565    }
566    if ($self->{cove_cm_file_path} ne "" and !-r $self->{cove_cm_file_path})
567    {
568        die "FATAL: Unable to open ".$self->{cove_cm_file_path}." covariance model file\n\n";
569    }
570
571    if ($self->{Pselc_cm_file_path} ne "" and !-r $self->{Pselc_cm_file_path})
572    {
573        die "FATAL: Unable to open ".$self->{Pselc_cm_file_path}." covariance model file\n\n";
574    }
575
576    if ($self->{Eselc_cm_file_path} ne "" and !-r $self->{Eselc_cm_file_path})
577    {
578        die "FATAL: Unable to open ".$self->{Eselc_cm_file_path}." covariance model file\n\n";
579    }
580
581    if ($self->{isotype_cm_db_file_path} ne "" and (!-r $self->{isotype_cm_db_file_path} || !-e "$self->{isotype_cm_db_file_path}".".i1f"))
582    {
583        die "FATAL: Unable to open ".$self->{isotype_cm_db_file_path}." covariance model file\n\n";
584    }
585
586    if ($self->{mito_isotype_cm_db_file_path} ne "" and (!-r $self->{mito_isotype_cm_db_file_path} || !-e "$self->{mito_isotype_cm_db_file_path}".".i1f"))
587    {
588        die "FATAL: Unable to open ".$self->{mito_isotype_cm_db_file_path}." covariance model file\n\n";
589    }
590
591    foreach my $isotype (sort keys %{$self->{isotype_cm_file_paths}})
592    {
593        if ($self->{isotype_cm_file_paths}->{$isotype} ne "" and !-r $self->{isotype_cm_file_paths}->{$isotype})
594        {
595            die "FATAL: Unable to open ".$self->{isotype_cm_file_paths}->{$isotype}." covariance model file\n\n";
596        }
597    }
598
599    if ($opts->arch_mode() && ($self->infernal_mode() ||  $self->cove_mode()))
600    {
601        foreach my $cur_cm (sort keys %{$self->{intron_cm_file_path}})
602        {
603            if ($self->{intron_cm_file_path}->{$cur_cm} ne "" and !-r $self->{intron_cm_file_path}->{$cur_cm})
604            {
605                die "FATAL: Unable to open ".$self->{intron_cm_file_path}->{$cur_cm}." covariance model file\n\n";
606            }
607        }
608        if ($self->{arch_five_half_cm_file_path} ne "" and !-r $self->{arch_five_half_cm_file_path})
609        {
610            die "FATAL: Unable to open ".$self->{arch_five_half_cm_file_path}." covariance model file\n\n";
611        }
612        if ($self->{arch_three_half_cm_file_path} ne "" and !-r $self->{arch_three_half_cm_file_path})
613        {
614            die "FATAL: Unable to open ".$self->{arch_three_half_cm_file_path}." covariance model file\n\n";
615        }
616    }
617}
618
619sub set_bin
620{
621    my $self = shift;
622    my $bindir = shift;
623
624    if ($^O =~ /^MSWin/)
625    {
626        $self->{cmsearch_bin} .= ".exe";
627        $self->{covels_bin} .= ".exe";
628        $self->{coves_bin} .= ".exe";
629    }
630    if (!(-x $self->{covels_bin}))
631    {
632        $self->{covels_bin} = $bindir."/".$self->{covels_bin};
633        if (!(-x $self->{covels_bin}))
634        {
635            die "FATAL: Unable to find ".$self->{covels_bin}." executable\n\n";
636        }
637    }
638    if (!(-x $self->{coves_bin}))
639    {
640        $self->{coves_bin} = $bindir."/".$self->{coves_bin};
641        if (!(-x $self->{coves_bin}))
642        {
643            die "FATAL: Unable to find ".$self->{coves_bin}." executable\n\n";
644        }
645    }
646}
647
648sub set_infernal_bin
649{
650    my $self = shift;
651    my $bindir = shift;
652
653    $self->{cmsearch_bin} = $bindir."/".$self->{cmsearch_bin};
654    if (!(-x $self->{cmsearch_bin}))
655    {
656        die "FATAL: Unable to find ".$self->{cmsearch_bin}." executable\n\n";
657    }
658
659    $self->{cmscan_bin} = $bindir."/".$self->{cmscan_bin};
660    if (!(-x $self->{cmscan_bin}))
661    {
662        die "FATAL: Unable to find ".$self->{cmscan_bin}." executable\n\n";
663    }
664}
665
666sub set_search_params
667{
668    my $self = shift;
669    my ($opts, $r_scan_len, $r_cur_cm_file, $max_search_tRNA_length, $trna) = @_;
670
671    # don't set '-W' param over 200 bp if a pre-scanner is being used,
672    #  use max window of 150 bp if cmsearch only (too slow otherwise)
673    if ($opts->eufind_mode() || $opts->tscan_mode() || $opts->use_prev_ts_run())
674    {
675        $$r_scan_len = &min($trna->len(), $self->{max_tRNA_length});
676    }
677    else
678    {
679        $$r_scan_len = $max_search_tRNA_length;
680    }
681
682    # set correct CM file for current tRNA
683    $$r_cur_cm_file = $self->{main_cm_file_path}->{Domain};
684    if ($opts->general_mode() or $opts->organelle_mode())
685    {
686        $$r_cur_cm_file = $self->{main_cm_file_path}->{general};
687    }
688
689    if ($opts->eufind_mode())
690    {
691        # use prok selcys model
692        if ($trna->isotype() eq "SeCp")
693        {
694            $$r_cur_cm_file = $self->{Pselc_cm_file_path};
695        }
696        # use euk selcys model
697        elsif  ($trna->isotype() eq "SeCe")
698        {
699            $$r_cur_cm_file = $self->{Eselc_cm_file_path};
700        }
701    }
702}
703
704# find anticodon loop & a-codon from coves or cmsearch output
705sub find_anticodon
706{
707    my $self = shift;
708    my ($opts, $trna, $gc) = @_;
709    my ($antiloop, $antiloop_end, $ac_index, $anticodon, $verify_ac);
710
711    my $seq = $trna->seq();
712    my $ss = $trna->ss();
713    my $model = $trna->model();
714    my $undef_anticodon = $gc->undef_anticodon();
715
716    my $antiloop_index = 0;
717    my $antiloop_len = 0;
718
719    # Match pattern in secondary structure output,
720    # looking for second stem-loop structure ">>>>...<<<<"
721    # that should be the anitocodon stem-loop
722
723    if ($opts->mito_mode() and $model eq "SerGCT")
724    {
725        if ($ss =~ /^([>.]+)>([.]{4,})<+.+[>.]+<[<.]+/o)
726        {
727            # set to index position of first base in anticodon loop
728            $antiloop_index = length($1) + 1;
729            $antiloop_len = length($2);   # anticodon loop length
730        }
731    }
732    elsif ($opts->mito_mode())
733    {
734        if ($ss =~ /^([>.]+<[<.]+[>.]*)>([.]{4,})<+.+[>.]+<[<.]+/o)
735        {
736            # set to index position of first base in anticodon loop
737            $antiloop_index = length($1) + 1;
738            $antiloop_len = length($2);   # anticodon loop length
739        }
740    }
741    elsif ($ss =~ /^([>.]+<[<.]+>[>.]*)>([.]{4,})<+.+[>.]+<[<.]+/o)
742    {
743        # set to index position of first base in anticodon loop
744        $antiloop_index = length($1) + 1;
745        $antiloop_len = length($2);   # anticodon loop length
746    }
747
748    if ($antiloop_index != 0 and $antiloop_len != 0)
749    {
750        # index of end of anticodon loop
751        $antiloop_end = $antiloop_index + $antiloop_len - 1;
752
753        $antiloop = substr($seq, $antiloop_index, $antiloop_len);
754
755        # remove '-' gaps from loop
756        $antiloop =~ s/[\-]//g;
757
758        # Don't guess if even number of bp in
759        # anticodon loop
760        if (!$opts->mito_mode())
761        {
762            # remove introns & non-canonical bases
763            $antiloop =~ s/[a-z]//g;
764
765            if ((length($antiloop) < 5) || ((length($antiloop) % 2) == 0))
766            {
767                return ($undef_anticodon, -1, -1, -1);
768            }
769            # get anticodon
770            $ac_index = (length($antiloop) - 3) / 2;
771            $anticodon = substr($antiloop, $ac_index, 3);
772            $verify_ac = substr($seq, $ac_index + $antiloop_index, 3);
773        }
774        else
775        {
776            $antiloop = uc($antiloop);
777            if (length($antiloop) < 5)
778            {
779                $trna->category("undetermined_ac");
780                return ($undef_anticodon, -1, -1, -1);
781            }
782            elsif ((length($antiloop) % 2) == 0)
783            {
784                $ac_index = int((length($antiloop) - 3) / 2);
785                $anticodon = substr($antiloop, $ac_index, 3);
786                my $isotype = $gc->get_tRNA_type($self, $anticodon, $self->{main_cm_file_path}->{$model}, $model, $self->cove_mode());
787                if ($model eq $isotype or ($model eq "SerGCT" and $isotype eq "Ser") or ($model eq "SerTGA" and $isotype eq "Ser")
788                    or ($model eq "LeuTAG" and $isotype eq "Leu") or ($model eq "LeuTAA" and $isotype eq "Leu"))
789                {
790                    $trna->category("mito_ac_mislocation");
791                    $trna->note("(".$anticodon.")");
792                }
793                else
794                {
795                    $ac_index = int((length($antiloop) - 3) / 2 + 1);
796                    $anticodon = substr($antiloop, $ac_index, 3);
797                    $isotype = $gc->get_tRNA_type($self, $anticodon, $self->{main_cm_file_path}->{$model}, $model, $self->cove_mode());
798                    if ($model eq $isotype or ($model eq "SerGCT" and $isotype eq "Ser") or ($model eq "SerTGA" and $isotype eq "Ser")
799                        or ($model eq "LeuTAG" and $isotype eq "Leu") or ($model eq "LeuTAA" and $isotype eq "Leu"))
800                    {
801                        $trna->category("mito_ac_mislocation");
802                        $trna->note("(".$anticodon.")");
803                    }
804                    else
805                    {
806                        $trna->category("mito_mismatch_ac");
807                        $trna->note("(".$anticodon.")");
808                    }
809                }
810                return ($undef_anticodon, -1, -1, -1);
811            }
812            else
813            {
814                # get anticodon
815                $ac_index = (length($antiloop) - 3) / 2;
816                $anticodon = substr($antiloop, $ac_index, 3);
817                $verify_ac = uc(substr($seq, $ac_index + $antiloop_index, 3));
818            }
819        }
820
821        # check to see if anticodon extracted from the entire
822        #  trna sequence (coveseq) is same as that extracted from
823        #  just the anticodon loop sequence (antiloop)
824
825        if ($verify_ac ne $anticodon)
826        {
827            $trna->category("undetermined_ac");
828            return ($undef_anticodon, -1, -1, -1);
829        }
830        return ($anticodon, $antiloop_index, $antiloop_end, $ac_index + $antiloop_index + 1);
831    }
832    else
833    {
834        $trna->category("undetermined_ac");
835        return ($undef_anticodon, -1, -1, -1);
836    }
837}
838
839sub find_intron
840{
841    my $self = shift;
842    my ($trna_seq, $antiloop_index, $antiloop_end) = @_;
843    my ($intron, $istart, $iend, $tmpstr, $antiloop_seq);
844    my $min_intron_length = $self->{min_intron_length};
845
846    # check to see if it was unable
847    # to determine the anticodon loop
848    if ($antiloop_index == -1)
849    {
850        return(0, 0, 0);
851    }
852    # get subsequence from start of anticodon loop
853    # to end of anticodon loop -- look for intron in it
854    $antiloop_seq = substr($trna_seq, $antiloop_index, $antiloop_end - $antiloop_index + 1);
855
856    if ($antiloop_seq =~ /^(.*[^a-z]+)([a-z]{$min_intron_length,})[^a-z]+/o)
857    {
858        $intron = $2;
859
860        # make sure to get the base index for the last (not nec. only) occurrence
861        # of the intron sequence string up to end of anticodon loop
862        $tmpstr = substr($trna_seq, 0, $antiloop_end+1);
863        $istart = index($tmpstr, $intron) + 1;
864        $iend = length($intron) + $istart - 1;
865    }
866    else
867    {
868        $intron = 0;
869    }
870    return ($intron, $istart, $iend);
871}
872
873# is_pseudo_gene
874#
875# Runs a covariance model without secondary structure information on predicted tRNA, puts this value
876# in "hmm_score". Contribution to total score from secondary structure derived by subtracting hmm_score from total score
877# Returns non-zero if tRNA scores fall below minima for either primary or secondary structure components of score
878sub is_pseudo_gene
879{
880    my $self = shift;
881    my $global_constants = shift;
882    my $opts = shift;
883    my $log = shift;
884    my ($trna, $prescan_tRNA_id, $tmp_trnaseq_file) = @_;
885    my ($dummy1, $dummy2, $hmm_score, $hit_start, $hit_end, $hit_ct, $min_pseudo_filter_score);
886
887    $dummy1 = $dummy2 = "";                 # return values not used
888
889    # skip check for pseudo gene if score is above minimum
890    # -D (disable pseudogene checking) is specified
891    # AND -H option (get hmm scores) is NOT specified
892    if ($self->cove_mode())
893    {
894        $min_pseudo_filter_score = $self->{min_cove_pseudo_filter_score};
895    }
896    elsif ($self->infernal_mode())
897    {
898        $min_pseudo_filter_score = $self->{min_cmsearch_pseudo_filter_score};
899    }
900
901    if ((($trna->score() >= $min_pseudo_filter_score) || $self->{skip_pseudo_filter}) && !$self->{get_hmm_score})
902    {
903        return 0;
904    }
905
906    $hmm_score = 0;
907    my $ss_score = 0;
908    my $score = 0;
909    my $ns_cm = "";
910    if ($self->cove_mode())
911    {
912        $ns_cm = $self->{mainNS_cm_file_path}->{Domain};
913        if ($opts->general_mode() or $opts->organelle_mode())
914        {
915            $ns_cm = $self->{mainNS_cm_file_path}->{general}
916        }
917
918        ($dummy1, $dummy2, $hmm_score) =
919            $self->run_coves($log, $tmp_trnaseq_file, $prescan_tRNA_id, $ns_cm);
920        $score = $trna->get_domain_model("cove")->{score};
921        $ss_score = $score - $hmm_score;
922        $trna->update_domain_model("cove", $score, $score, $hmm_score, $ss_score);
923    }
924    elsif ($self->infernal_mode())
925    {
926        $ns_cm = $self->{mainNS_cm_file_path}->{$trna->model()};
927        my $cms_output_file = $global_constants->get("temp_dir")."/tscan$$"."_cm_hmm.out";
928
929       ($hmm_score, $hit_start, $hit_end, $hit_ct) =
930            $self->cmsearch_scoring($opts, $log, $tmp_trnaseq_file, $prescan_tRNA_id, $ns_cm, $cms_output_file);
931        $score = $trna->get_domain_model("infernal")->{score};
932        $ss_score = $score - $hmm_score;
933        $trna->update_domain_model("infernal", $score, $score, $hmm_score, $ss_score);
934    }
935    else
936    {
937        return -1;                              # Error - no second pass scanner selected
938    }
939
940    if ((($ss_score < $self->{min_ss_score}) || ($hmm_score < $self->{min_hmm_score})) &&
941        ($score < $min_pseudo_filter_score))
942    {
943        return 1;
944    }
945}
946
947# Get anticodon, isotype, intron location, and pseudogene status for tRNA with noncanonical introns
948sub decode_nci_tRNA_properties
949{
950    my $self = shift;
951    my ($global_vars, $trna) = @_;
952    my $global_constants = $global_vars->{global_constants};
953    my $opts = $global_vars->{options};
954    my $gc = $global_vars->{gc};
955    my $log = $global_vars->{log_file};
956
957    my ($anticodon, $acodon_index, $acodon_end1, $acodon_index2, $trna_type, $intron, $istart, $iend,
958          $antiloop_index, $antiloop_end, $trna_len, $scan_len);
959
960    $anticodon = "ERR";
961
962    my $mat_seq = "";
963    my $precursor_seq = $trna->seq();
964    my @introns = $trna->ar_introns();
965    for (my $i = 0; $i < scalar(@introns); $i++)
966    {
967        if ($i == 0)
968        {
969            $mat_seq = substr($precursor_seq, 0, $introns[$i]->{rel_start} - 1);
970        }
971        else
972        {
973            $mat_seq .= substr($precursor_seq, $introns[$i-1]->{rel_end}, $introns[$i]->{rel_start} - $introns[$i-1]->{rel_end} - 1);
974        }
975    }
976    $mat_seq .= substr($precursor_seq, $introns[scalar(@introns)-1]->{rel_end});
977
978    if (uc($mat_seq) eq uc($trna->mat_seq()))
979    {
980		$trna->seq($trna->mat_seq());
981	}
982	else
983    {
984        $log->warning("Mismatch mature sequence for tRNA for noncanonical intron\t". $trna->tRNAscan_id());
985    }
986
987    if ($self->cove_mode() || $self->infernal_mode())
988    {
989        ($anticodon, $antiloop_index, $antiloop_end, $acodon_index) = $self->find_anticodon($opts, $trna, $gc);
990        $acodon_index2 = 0;
991        $trna->anticodon($anticodon);
992
993        for (my $i = 0; $i < scalar(@introns); $i++)
994        {
995            if ($acodon_index > $introns[$i]->{rel_start})
996            {
997				$acodon_index += ($introns[$i]->{rel_end} - $introns[$i]->{rel_start} + 1);
998			}
999			elsif ($acodon_index < $introns[$i]->{rel_start} and ($acodon_index + 2) >= $introns[$i]->{rel_start})
1000            {
1001                $acodon_end1 = $introns[$i]->{rel_start} - 1;
1002                $acodon_index2 = $introns[$i]->{rel_end} + 1;
1003            }
1004            elsif ($acodon_index + 2 < $introns[$i]->{rel_start})
1005            {
1006                last;
1007            }
1008        }
1009        if ($trna->get_ac_pos_count() > 0)
1010        {
1011            $trna->remove_ac_pos(0);
1012        }
1013        if ($acodon_index2 == 0)
1014        {
1015            $trna->add_ac_pos($acodon_index, $acodon_index + 2);
1016        }
1017        else
1018        {
1019            $trna->add_ac_pos($acodon_index, $acodon_end1);
1020            $trna->add_ac_pos($acodon_index2, (3 - ($acodon_end1 - $acodon_index + 1)) + $acodon_index2 - 1);
1021        }
1022    }
1023    else
1024    {
1025        die "Second pass mode not selected -- can't decode tRNA type\n\n";
1026    }
1027
1028    # check for problem parsing anticodon loop
1029    if (($anticodon eq $gc->undef_anticodon()) || ($trna->seq()  eq 'Error'))
1030    {
1031        $trna->anticodon($gc->undef_anticodon());
1032        $trna->isotype($gc->undef_isotype());
1033        $intron = 0;
1034
1035        if ($opts->save_odd_struct())
1036        {
1037            open(ODDTRNA, ">>".$opts->odd_struct_file()) ||
1038                die "FATAL: Can't open ".$opts->odd_struct_file()." to save seconary structures\n\n";
1039            print ODDTRNA $trna->tRNAscan_id()." (".$trna->start()."-".$trna->end()."):\n".$trna->seq()."\n".$trna->ss()."\n\n";
1040            close(ODDTRNA);
1041        }
1042    }
1043    # continue tRNA struct parsing
1044    else
1045    {
1046        ($intron, $istart, $iend) = $self->find_intron($trna->seq(), $antiloop_index, $antiloop_end);
1047
1048        if ($intron)
1049        {
1050            $mat_seq = substr($trna->seq(), 0, $istart - 1).substr($trna->seq(), $iend);
1051            $trna->mat_ss(substr($trna->ss(), 0, $istart - 1).substr($trna->ss(), $iend));
1052            $trna->ss($trna->mat_ss());
1053
1054            for (my $i = 0; $i < scalar(@introns); $i++)
1055            {
1056                if ($istart > $introns[$i]->{rel_start})
1057                {
1058                    $istart += ($introns[$i]->{rel_end} - $introns[$i]->{rel_start} + 1);
1059                    $iend += ($introns[$i]->{rel_end} - $introns[$i]->{rel_start} + 1);
1060                }
1061                elsif ($iend < $introns[$i]->{rel_start})
1062                {
1063                    last;
1064                }
1065            }
1066            my $intron_start = 0;
1067            my $intron_end = 0;
1068            if ($trna->strand() eq "+")
1069            {
1070                $intron_start = $istart + $trna->start() - 1;
1071                $intron_end = $iend + $trna->start() - 1
1072            }
1073            else
1074            {
1075                $intron_start = $trna->end() - $iend + 1;
1076                $intron_end = $trna->end() - $istart + 1;
1077            }
1078            $trna->add_intron($istart, $iend, $intron_start, $intron_end, "CI", $intron);
1079            $trna->merge_introns();
1080        }
1081
1082        $trna->isotype($gc->get_tRNA_type($self, $trna->anticodon(), $self->{main_cm_file_path}->{$trna->model()}, $trna->model(), $self->cove_mode()));
1083    }
1084
1085    # Reset CI type if marked as NCI
1086    @introns = $trna->ar_introns();
1087    my $has_CI = 0;
1088    for (my $i = 0; $i < scalar(@introns); $i++)
1089    {
1090        if ($introns[$i]->{type} eq "CI")
1091        {
1092			$has_CI = 1;
1093            last;
1094		}
1095    }
1096
1097    for (my $i = 0; !$has_CI and $i < scalar(@introns); $i++)
1098    {
1099        if ($acodon_index2 == 0)
1100        {
1101            if ($acodon_index + 2 + 2 == $introns[$i]->{rel_start} and $introns[$i]->{type} eq "NCI")
1102            {
1103                $trna->set_intron($i, $introns[$i]->{rel_start}, $introns[$i]->{rel_end}, "CI", $introns[$i]->{seq});
1104                last;
1105            }
1106        }
1107        else
1108        {
1109            if (((3 - ($acodon_end1 - $acodon_index + 1)) + $acodon_index2 - 1 + 2) == $introns[$i]->{rel_start} and $introns[$i]->{type} eq "NCI")
1110            {
1111                $trna->set_intron($i, $introns[$i]->{rel_start}, $introns[$i]->{rel_end}, "CI", $introns[$i]->{seq});
1112                last;
1113            }
1114        }
1115    }
1116
1117    # Write current tRNA to temp file for re-analysis with other models
1118    $trna_len = length($trna->seq());
1119    &write_tRNA($global_constants->get("tmp_trnaseq_file"), $trna->tRNAscan_id(), "", $trna->seq(), 1);
1120
1121    if (($trna->model() ne "SeC") &&
1122        ($self->is_pseudo_gene($global_constants, $opts, $log, $trna, $trna->tRNAscan_id(), $global_constants->get("tmp_trnaseq_file"))) &&
1123        (!$self->{skip_pseudo_filter}))
1124    {
1125        $trna->pseudo(1);
1126    }
1127    else
1128    {
1129        $trna->pseudo(0);
1130    }
1131
1132    $trna->mat_seq($mat_seq);
1133    $trna->seq($precursor_seq);
1134}
1135
1136# Get tRNA anticodon, isotype, intron location, and pseudogene status
1137sub decode_tRNA_properties
1138{
1139    my $self = shift;
1140    my ($global_vars, $trna, $prescan_tRNA, $cur_cm_file) = @_;
1141    my $global_constants = $global_vars->{global_constants};
1142    my $opts = $global_vars->{options};
1143    my $gc = $global_vars->{gc};
1144    my $log = $global_vars->{log_file};
1145
1146    my ($anticodon, $acodon_index, $trna_type, $intron, $istart, $iend,
1147          $antiloop_index, $antiloop_end, $trna_len, $scan_len);
1148
1149    $anticodon = "ERR";
1150
1151    if ($self->cove_mode() || $self->infernal_mode())
1152    {
1153        ($anticodon, $antiloop_index, $antiloop_end, $acodon_index) = $self->find_anticodon($opts, $trna, $gc);
1154        $trna->anticodon($anticodon);
1155        $trna->add_ac_pos($acodon_index, $acodon_index + 2);
1156    }
1157    else
1158    {
1159        die "Second pass mode not selected -- can't decode tRNA type\n\n";
1160    }
1161
1162    # check for problem parsing anticodon loop
1163    if (($anticodon eq $gc->undef_anticodon()) || ($trna->seq()  eq 'Error'))
1164    {
1165        $trna->anticodon($gc->undef_anticodon());
1166        $trna->isotype($gc->undef_isotype());
1167        $intron = 0;
1168
1169        if ($opts->save_odd_struct())
1170        {
1171            open(ODDTRNA, ">>".$opts->odd_struct_file()) ||
1172                die "FATAL: Can't open ".$opts->odd_struct_file()." to save seconary structures\n\n";
1173            print ODDTRNA $prescan_tRNA->tRNAscan_id()." (".$trna->start()."-".$trna->end()."):\n".$trna->seq()."\n".$trna->ss()."\n\n";
1174            close(ODDTRNA);
1175        }
1176    }
1177    # continue tRNA struct parsing
1178    else
1179    {
1180        ($intron, $istart, $iend) = $self->find_intron($trna->seq(), $antiloop_index, $antiloop_end);
1181
1182        if ($intron)
1183        {
1184            my $intron_start = 0;
1185            my $intron_end = 0;
1186            if ($trna->strand() eq "+")
1187            {
1188                $intron_start = $istart + $trna->start() - 1;
1189                $intron_end = $iend + $trna->start() - 1
1190            }
1191            else
1192            {
1193                $intron_start = $trna->end() - $iend + 1;
1194                $intron_end = $trna->end() - $istart + 1;
1195            }
1196            $trna->add_intron($istart, $iend, $intron_start, $intron_end, "CI", $intron);
1197        }
1198
1199        if (defined $prescan_tRNA->anticodon() and ($prescan_tRNA->anticodon() ne "???"))
1200        {
1201            if (($anticodon ne (uc($prescan_tRNA->anticodon()))) &&
1202                ($opts->tscan_mode() || $opts->eufind_mode()) && ($opts->strict_params()))
1203            {
1204                $log->broadcast($prescan_tRNA->tRNAscan_id()." - anticondon conflict\t".$opts->second_pass_label().": $anticodon\tfirstpass (".$prescan_tRNA->hit_source().")".
1205                    ": ".$prescan_tRNA->anticodon()."\n".$trna->seq()."\n".$trna->ss()."\n");
1206            }
1207        }
1208
1209        $trna->isotype($gc->get_tRNA_type($self, $trna->anticodon(), $cur_cm_file, $trna->model(), $self->cove_mode()));
1210
1211        if ($anticodon ne "TCA" and $trna->isotype() eq "SeC")
1212        {
1213            $trna->anticodon($gc->undef_anticodon());
1214            $trna->isotype($gc->undef_isotype());
1215        }
1216    }
1217
1218    # Write current tRNA to temp file for re-analysis with other models
1219    $trna_len = length($trna->seq());
1220    &write_tRNA($global_constants->get("tmp_trnaseq_file"), $prescan_tRNA->tRNAscan_id(), "", $trna->seq(), 1);
1221
1222    if (($trna->model() ne "SeC") &&
1223        ($self->is_pseudo_gene($global_constants, $opts, $log, $trna, $prescan_tRNA->tRNAscan_id(), $global_constants->get("tmp_trnaseq_file"))) &&
1224        (!$self->{skip_pseudo_filter}))
1225    {
1226        $trna->pseudo(1);
1227    }
1228}
1229
1230# Fix fMet with missing first base
1231sub fix_fMet
1232{
1233    my $self = shift;
1234    my ($global_vars, $trna) = @_;
1235    my $opts = $global_vars->{options};
1236    my $rescore = 0;
1237
1238    my $inf = $trna->get_domain_model("infernal");
1239
1240    if ($inf->{score} > 40 and $trna->isotype() eq "Met" and $opts->bact_mode())
1241    {
1242		if ((substr($trna->seq(), length($trna->seq())-3) eq "CCA" and substr($trna->ss(), length($trna->ss())-5) eq ".....") or
1243            (substr($trna->ss(), length($trna->ss())-5) eq "<<<.."))
1244        {
1245			if (substr($trna->ss(), 0, 1) ne ".")
1246            {
1247                if (($trna->strand() eq "+" and $trna->start() > 1) or
1248                    ($trna->strand() eq "-" and $trna->end() < $trna->src_seqlen()))
1249                {
1250                    $trna->seq(substr($trna->upstream(), length($trna->upstream())-1) . $trna->seq());
1251                    $trna->upstream(substr($trna->upstream(), 0, length($trna->upstream())-1));
1252                    $trna->ss(".".$trna->ss());
1253                    if ($trna->strand() eq "+")
1254                    {
1255                        $trna->start($trna->start() - 1);
1256                    }
1257                    else
1258                    {
1259                        $trna->end($trna->end() + 1);
1260                    }
1261                    my @ar_ac_pos = $trna->ar_ac_pos();
1262                    if (scalar(@ar_ac_pos) > 0)
1263                    {
1264                        $ar_ac_pos[0]->{rel_start} += 1;
1265                        $ar_ac_pos[0]->{rel_end} += 1;
1266                        $trna->ar_ac_pos(@ar_ac_pos);
1267                    }
1268
1269                    $rescore = 1;
1270                }
1271            }
1272            elsif (substr($trna->ss(), 0, 4) eq ".>.>" and substr($trna->seq(), 0, 2) eq "CG")
1273            {
1274                my $pos71 = "";
1275                if (substr($trna->seq(), length($trna->seq())-3) eq "CCA" and substr($trna->ss(), length($trna->ss())-5) eq ".....")
1276                {
1277					$pos71 = substr($trna->seq(), length($trna->ss())-6, 1);
1278				}
1279				elsif (substr($trna->ss(), length($trna->ss())-5) eq "<<<..")
1280                {
1281                    $pos71 = substr($trna->seq(), length($trna->ss())-3, 1);
1282                }
1283                if (&rev_comp_seq(uc($pos71)) eq uc(substr($trna->seq(), 2, 1)))
1284                {
1285					$trna->seq(substr($trna->seq(), 1, 1).uc(substr($trna->seq(), 2, 1)).substr($trna->seq(), 3));
1286                    $trna->ss(substr($trna->ss(), 0, 2).substr($trna->ss(), 3));
1287                    if ($trna->strand() eq "+")
1288                    {
1289                        $trna->start($trna->start() + 1);
1290                    }
1291                    else
1292                    {
1293                        $trna->end($trna->end() - 1);
1294                    }
1295                    my @ar_ac_pos = $trna->ar_ac_pos();
1296                    if (scalar(@ar_ac_pos) > 0)
1297                    {
1298                        $ar_ac_pos[0]->{rel_start} -= 1;
1299                        $ar_ac_pos[0]->{rel_end} -= 1;
1300                        $trna->ar_ac_pos(@ar_ac_pos);
1301                    }
1302                    $rescore = 1;
1303				}
1304            }
1305		}
1306	}
1307
1308    if ($rescore)
1309    {
1310		$self->rescore_tRNA($global_vars, $trna, $trna);
1311	}
1312}
1313
1314# Fix archaeal His for incorrect bulge
1315sub fix_His
1316{
1317    my $self = shift;
1318    my ($global_vars, $trna) = @_;
1319    my $opts = $global_vars->{options};
1320    my $rescore = 0;
1321
1322    my $inf = $trna->get_domain_model("infernal");
1323
1324    if ($inf->{score} > 35 and $trna->isotype() eq "His" and $opts->arch_mode())
1325    {
1326        my $pos5 = "";
1327        my $pos68 = "";
1328        my $mid = "";
1329		if (substr($trna->ss(), 0, 9) eq ">>>>.>>>." and substr($trna->ss(), length($trna->ss())-9) eq "<<<.<<<<.")
1330        {
1331            $pos5 = uc(substr($trna->seq(), 4, 1));
1332            $pos68 = uc(substr($trna->seq(), length($trna->seq())-6, 1));
1333            $mid = substr($trna->seq(), 5, length($trna->seq())-11);
1334		}
1335		if (($pos5 eq "A" and $pos68 eq "T") or ($pos5 eq "T" and $pos68 eq "A") or
1336            ($pos5 eq "G" and $pos68 eq "C") or ($pos5 eq "C" and $pos68 eq "G") or
1337            ($pos5 eq "G" and $pos68 eq "T") or ($pos5 eq "T" and $pos68 eq "G"))
1338        {
1339            $trna->upstream($trna->upstream().substr($trna->seq(), 0, 1));
1340            $trna->downstream(substr($trna->seq(), length($trna->seq()) - 1).$trna->downstream());
1341			$trna->seq(substr($trna->seq(), 1, 3).$pos5.$mid.$pos68.substr($trna->seq(), length($trna->seq())-5, 4));
1342			$trna->ss(substr($trna->ss(), 1, 3).">".substr($trna->ss(), 5, length($trna->ss())-11)."<".substr($trna->ss(), length($trna->ss())-5, 3).".");
1343            $trna->start($trna->start() + 1);
1344            $trna->end($trna->end() - 1);
1345            my @ar_ac_pos = $trna->ar_ac_pos();
1346            if (scalar(@ar_ac_pos) > 0)
1347            {
1348                for (my $i = 0; $i < scalar(@ar_ac_pos); $i++)
1349                {
1350                    $ar_ac_pos[$i]->{rel_start} -= 1;
1351                    $ar_ac_pos[$i]->{rel_end} -= 1;
1352                }
1353                $trna->ar_ac_pos(@ar_ac_pos);
1354            }
1355            $self->rescore_tRNA($global_vars, $trna, $trna);
1356		}
1357	}
1358}
1359
1360# Get tRNA anticodon, isotype, and intron location
1361sub decode_mito_tRNA_properties
1362{
1363    my $self = shift;
1364    my ($global_vars, $trna, $prescan_tRNA, $cur_cm_file) = @_;
1365    my $global_constants = $global_vars->{global_constants};
1366    my $opts = $global_vars->{options};
1367    my $gc = $global_vars->{gc};
1368    my $log = $global_vars->{log_file};
1369
1370    my ($anticodon, $acodon_index, $trna_type, $intron, $istart, $iend,
1371          $antiloop_index, $antiloop_end, $trna_len, $scan_len);
1372
1373    $anticodon = "ERR";
1374
1375    ($anticodon, $antiloop_index, $antiloop_end, $acodon_index) = $self->find_anticodon($opts, $trna, $gc);
1376    $trna->anticodon($anticodon);
1377    $trna->add_ac_pos($acodon_index, $acodon_index + 2);
1378
1379    # check for problem parsing anticodon loop
1380    if (($anticodon eq $gc->undef_anticodon()) || ($trna->seq() eq 'Error'))
1381    {
1382        $trna->anticodon($gc->undef_anticodon());
1383        $intron = 0;
1384
1385        if ($opts->save_odd_struct())
1386        {
1387            open(ODDTRNA, ">>".$opts->odd_struct_file()) ||
1388                die "FATAL: Can't open ".$opts->odd_struct_file()." to save seconary structures\n\n";
1389            print ODDTRNA $prescan_tRNA->tRNAscan_id()." (".$trna->start()."-".$trna->end()."):\n".$trna->seq()."\n".$trna->ss()."\n\n";
1390            close(ODDTRNA);
1391        }
1392    }
1393
1394    ($intron, $istart, $iend) = $self->find_intron($trna->seq(), $antiloop_index, $antiloop_end);
1395
1396    if ($intron)
1397    {
1398        my $intron_start = 0;
1399        my $intron_end = 0;
1400        if ($trna->strand() eq "+")
1401        {
1402            $intron_start = $istart + $trna->start() - 1;
1403            $intron_end = $iend + $trna->start() - 1
1404        }
1405        else
1406        {
1407            $intron_start = $trna->end() - $iend + 1;
1408            $intron_end = $trna->end() - $istart + 1;
1409        }
1410    }
1411
1412    my $isotype = $gc->get_tRNA_type($self, $trna->anticodon(), $cur_cm_file, $trna->model(), 0);
1413    my $vert_mito_isotype = $gc->get_vert_mito_type($trna->anticodon());
1414
1415    my $model_iso = $trna->model();
1416    my $model_ac = "";
1417
1418    if (length($trna->model()) > 3)
1419    {
1420        $model_iso = substr($trna->model(), 0, 3);
1421        $model_ac = substr($trna->model(), 3);
1422    }
1423    if ($model_iso ne $isotype)
1424    {
1425        if ($trna->category() eq "")
1426        {
1427            $trna->category("mito_iso_conflict");
1428            $trna->note("(".$isotype.")");
1429        }
1430        $log->broadcast($prescan_tRNA->tRNAscan_id().".trna".$trna->id()." - isotype/anticondon conflict\t Model: ".$trna->model()." Detected: ".$isotype.$anticodon."\n".
1431                        $trna->seq()."\n".$trna->ss()."\n");
1432    }
1433    if ($model_ac ne "" and $model_ac ne $anticodon)
1434    {
1435        if ($trna->category() eq "")
1436        {
1437            $trna->category("mito_mismatch_ac");
1438            $trna->note("(".$model_ac.")");
1439        }
1440    }
1441    if ($vert_mito_isotype eq "")
1442    {
1443        if ($trna->category() eq "")
1444        {
1445            $trna->category("mito_noncanonical_ac");
1446        }
1447    }
1448
1449    $trna->isotype($model_iso);
1450}
1451
1452sub scan_noncanonical_introns
1453{
1454    my $self = shift;
1455    my ($global_vars, $seq_name) = @_;
1456    my $global_constants = $global_vars->{global_constants};
1457    my $opts = $global_vars->{options};
1458    my $log = $global_vars->{log_file};
1459    my $sp_int_results = $global_vars->{sp_int_results};
1460    my $sp_tRNAs = $global_vars->{sp_tRNAs};
1461    my $tmp_trnaseq_file = $global_constants->get("tmp_trnaseq_file");
1462
1463    my $tRNA = tRNAscanSE::tRNA->new;
1464    my $tRNA_copy = tRNAscanSE::tRNA->new;
1465    my $cm_intron = tRNAscanSE::tRNA->new;
1466    my $cm_intron2 = tRNAscanSE::tRNA->new;
1467    my $previous_intron_len = 0;
1468    my $rnd2 = 0;
1469    my $add_ci = 0;
1470
1471    my $arrayCMscanResults = tRNAscanSE::ArrayCMscanResults->new;
1472    my $cms_merge_file_rnd1 = $global_constants->get("temp_dir")."/tscan$$"."_intron_merge.out";
1473    my $count = $self->run_cmsearch_intron($global_vars, $seq_name, $arrayCMscanResults, 0, $cms_merge_file_rnd1, 1);
1474    if ($cms_merge_file_rnd1 eq "")
1475    {
1476        $log->error("Fail to run infernal for $seq_name");
1477        return 0;
1478    }
1479
1480    $sp_int_results->clear_index();
1481    $sp_int_results->open_file("write");
1482
1483    $arrayCMscanResults->open_file($cms_merge_file_rnd1);
1484    $arrayCMscanResults->get_next_cmsearch_hit($cm_intron);
1485
1486    for (my $i = 0; $i < $sp_tRNAs->get_count(); $i++)
1487    {
1488        $rnd2 = 0;
1489        $add_ci = 0;
1490        $previous_intron_len = 0;
1491        $tRNA = $sp_tRNAs->get($i);
1492        $tRNA_copy->copy($tRNA);
1493        my $padded_seq = $tRNA->upstream().$tRNA->seq().$tRNA->downstream();
1494        if ($cm_intron->seqname() ne $tRNA->seqname().".trna".&pad_num($tRNA->id(), 6))
1495        {
1496			if ($tRNA->get_intron_count() == 0)
1497            {
1498                $sp_int_results->write_tRNA($tRNA);
1499                next;
1500			}
1501			else
1502            {
1503                $padded_seq = $tRNA->upstream().$tRNA->mat_seq().$tRNA->downstream();
1504                $rnd2 = 1;
1505                $add_ci = 1;
1506            }
1507		}
1508        else
1509        {
1510            while (($cm_intron->seqname() ne "") and ($cm_intron->seqname() eq $tRNA->seqname().".trna".&pad_num($tRNA->id(), 6)))
1511            {
1512                my ($ret_value, $duplicate, $clip_seq, $intron_len) = $self->check_intron_validity($global_vars, $tRNA, $cm_intron, $padded_seq, $previous_intron_len);
1513                if ($ret_value)
1514                {
1515					$padded_seq = $clip_seq;
1516                    $previous_intron_len = $intron_len;
1517                    $rnd2 = 1;
1518                    if ($duplicate)
1519                    {
1520                        $add_ci = 1;
1521                    }
1522				}
1523
1524                $arrayCMscanResults->get_next_cmsearch_hit($cm_intron);
1525            }
1526        }
1527
1528        if ($rnd2)
1529        {
1530            my $trna_file = tRNAscanSE::Sequence->new;
1531            $trna_file->open_file($global_constants->get("tmp_trnaseq_file"), "write");
1532            $trna_file->set_seq_info($tRNA->seqname().".trna".&pad_num($tRNA->id(), 6), $tRNA->tRNAscan_id(), length($padded_seq), $padded_seq);
1533            $trna_file->write_fasta();
1534            $trna_file->close_file();
1535
1536            my $rnd2IntronScanResults = tRNAscanSE::ArrayCMscanResults->new;
1537            my $cms_merge_file_rnd2 = $global_constants->get("temp_dir")."/tscan$$"."_intron_rnd2_merge.out";
1538            my $count = $self->run_cmsearch_intron($global_vars, $tRNA->tRNAscan_id(), $rnd2IntronScanResults, 0, $cms_merge_file_rnd2, 2);
1539            if ($cms_merge_file_rnd2 eq "")
1540            {
1541                $log->error("Fail to run infernal for ".$tRNA->tRNAscan_id());
1542                return 0;
1543            }
1544
1545            $previous_intron_len = 0;
1546            $rnd2IntronScanResults->open_file($cms_merge_file_rnd2);
1547            $rnd2IntronScanResults->get_next_cmsearch_hit($cm_intron2);
1548            while ($cm_intron2->seqname() ne "")
1549            {
1550                my ($ret_value, $duplicate, $clip_seq, $intron_len) = $self->check_intron_validity($global_vars, $tRNA, $cm_intron2, $padded_seq, $previous_intron_len);
1551                if ($ret_value)
1552                {
1553                    $padded_seq = $clip_seq;
1554                    $previous_intron_len = $intron_len;
1555                    if ($duplicate)
1556                    {
1557                        $add_ci = 1;
1558                    }
1559                }
1560
1561                $rnd2IntronScanResults->get_next_cmsearch_hit($cm_intron2);
1562            }
1563            $rnd2IntronScanResults->close_file();
1564        }
1565
1566        my @ar_introns = $tRNA->ar_introns();
1567        my $nci_count = 0;
1568        my $ci_index = -1;
1569        for (my $i = 0; $i < scalar(@ar_introns); $i++)
1570        {
1571            if ($ar_introns[$i]->{type} eq "NCI")
1572            {
1573                $nci_count++;
1574            }
1575            elsif ($ar_introns[$i]->{type} eq "CI")
1576            {
1577                $ci_index = $i;
1578            }
1579        }
1580
1581        if ($nci_count > 0)
1582        {
1583            my $ci_seq = "";
1584			if ($ci_index > -1)
1585            {
1586                if ($add_ci)
1587                {
1588					$ci_seq = $ar_introns[$ci_index]->{seq};
1589				}
1590				$tRNA->remove_intron($ci_index);
1591			}
1592			$tRNA->sort_introns();
1593
1594            if ($add_ci)
1595            {
1596                $self->add_canonical_intron($global_vars, $tRNA, $ci_seq);
1597            }
1598            $self->decode_nci_tRNA_properties($global_vars, $tRNA);
1599
1600            $tRNA->tRNAscan_id($tRNA->seqname().".tRNA".$tRNA->id()."-".$tRNA->isotype().$tRNA->anticodon());
1601            $tRNA->set_default_scores();
1602            $sp_int_results->write_tRNA($tRNA);
1603		}
1604		else
1605        {
1606            $sp_int_results->write_tRNA($tRNA_copy);
1607        }
1608    }
1609
1610    $sp_int_results->close_file();
1611    $arrayCMscanResults->close_file();
1612}
1613
1614sub add_canonical_intron
1615{
1616    my $self = shift;
1617    my ($global_vars, $tRNA, $ci_seq) = @_;
1618    my $global_constants = $global_vars->{global_constants};
1619    my $log = $global_vars->{log_file};
1620    my $ret_value = 1;
1621
1622    my $mat_seq = "";
1623    my $precursor_seq = $tRNA->seq();
1624    my @introns = $tRNA->ar_introns();
1625
1626    my $index = index(uc($precursor_seq), uc($ci_seq));
1627    if ($index > -1)
1628    {
1629        $precursor_seq = substr($precursor_seq, 0, $index).lc($ci_seq).substr($precursor_seq, $index + length($ci_seq));
1630    }
1631    for (my $i = 0; $i < scalar(@introns); $i++)
1632    {
1633        if ($i == 0)
1634        {
1635            $mat_seq = substr($precursor_seq, 0, $introns[$i]->{rel_start} - 1);
1636        }
1637        else
1638        {
1639            $mat_seq .= substr($precursor_seq, $introns[$i-1]->{rel_end}, $introns[$i]->{rel_start} - $introns[$i-1]->{rel_end} - 1);
1640        }
1641    }
1642    $mat_seq .= substr($precursor_seq, $introns[scalar(@introns)-1]->{rel_end});
1643
1644    if (uc($mat_seq) ne uc($tRNA->mat_seq()))
1645    {
1646        my $trna_file = tRNAscanSE::Sequence->new;
1647        $trna_file->open_file($global_constants->get("tmp_trnaseq_file"), "write");
1648        $trna_file->set_seq_info($tRNA->seqname().".trna".&pad_num($tRNA->id(), 6), $tRNA->tRNAscan_id(), length($mat_seq), $mat_seq);
1649        $trna_file->write_fasta();
1650        $trna_file->close_file();
1651
1652        my $scan_flag = 0;
1653        my $cms_output_file = "";
1654        my $file_idx = -1;
1655        my $arrayCMscanResults = tRNAscanSE::ArrayCMscanResults->new;
1656        my $cms_merge_file = $global_constants->get("temp_dir")."/tscan$$"."_add_intron_merge.out";
1657        my $cm_tRNA = tRNAscanSE::tRNA->new;
1658
1659        foreach my $cur_cm (sort keys %{$self->{main_cm_file_path}})
1660        {
1661            $cms_output_file = $global_constants->get("temp_dir")."/tscan$$"."_add_intron_".$cur_cm.".out";
1662            if ($self->exec_cmsearch($self->{main_cm_file_path}->{$cur_cm}, $scan_flag, $global_constants->get("tmp_trnaseq_file"), $tRNA->tRNAscan_id(), $cms_output_file, $log, $self->{cm_cutoff}))
1663            {
1664                $file_idx = $arrayCMscanResults->add_file($cms_output_file, $cur_cm);
1665                $arrayCMscanResults->merge_result_file($file_idx, 0);
1666            }
1667            else
1668            {
1669                $ret_value = 0;
1670            }
1671        }
1672        $arrayCMscanResults->write_merge_file($cms_merge_file, 1);
1673
1674        if ($arrayCMscanResults->get_result_count() > 0)
1675        {
1676            $arrayCMscanResults->open_file($cms_merge_file);
1677            $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
1678
1679            if ($cm_tRNA->seqname() ne "")
1680            {
1681                if ((uc(substr($cm_tRNA->seq(), length($cm_tRNA->seq()) - 3)) ne "CCA") and
1682                    (substr($cm_tRNA->ss(), length($cm_tRNA->ss()) - 4) eq "...."))
1683                {
1684                    if ($cm_tRNA->strand() eq "+")
1685                    {
1686                        $cm_tRNA->end($cm_tRNA->end() - 3);
1687                    }
1688                    else
1689                    {
1690                        $cm_tRNA->start($cm_tRNA->start() + 3);
1691                    }
1692                    $cm_tRNA->seq(substr($cm_tRNA->seq(), 0, length($cm_tRNA->seq()) - 3));
1693                    $cm_tRNA->ss(substr($cm_tRNA->ss(), 0, length($cm_tRNA->ss()) - 3));
1694                }
1695
1696                $tRNA->ss($cm_tRNA->ss());
1697                $tRNA->mat_seq($cm_tRNA->seq());
1698                $tRNA->mat_ss($tRNA->ss());
1699                $tRNA->update_domain_model("infernal", $cm_tRNA->score(), $cm_tRNA->score(), 0, 0);
1700                $tRNA->model($cm_tRNA->model());
1701            }
1702            $arrayCMscanResults->close_file();
1703       }
1704    }
1705    return $ret_value;
1706}
1707
1708# Run Infernal cmsearch for noncanonical introns
1709sub run_cmsearch_intron
1710{
1711    my $self = shift;
1712    my ($global_vars, $seq_name, $arrayCMscanResults, $merge_range, $cms_merge_file, $round) = @_;
1713    my $global_constants = $global_vars->{global_constants};
1714    my $log = $global_vars->{log_file};
1715
1716    my $scan_flag = 5;
1717    my $cms_output_file = "";
1718    my $file_idx = -1;
1719
1720    # run cmsearch
1721    foreach my $cur_cm (sort keys %{$self->{intron_cm_file_path}})
1722    {
1723        $cms_output_file = $global_constants->get("temp_dir")."/tscan$$"."_intron_".$cur_cm."_rnd".$round.".out";
1724        if ($self->exec_cmsearch($self->{intron_cm_file_path}->{$cur_cm}, $scan_flag, $global_constants->get("tmp_trnaseq_file"), $seq_name, $cms_output_file, $log, $self->{BHB_cm_cutoff}))
1725        {
1726            $file_idx = $arrayCMscanResults->add_file($cms_output_file, $cur_cm);
1727            $arrayCMscanResults->merge_result_file($file_idx, $merge_range);
1728        }
1729        else
1730        {
1731            return ("", 0);
1732        }
1733    }
1734    $arrayCMscanResults->write_merge_file($cms_merge_file, 0);
1735    my $count = $arrayCMscanResults->get_result_count();
1736
1737    return $count;
1738}
1739
1740sub check_intron_validity
1741{
1742    my $self = shift;
1743    my ($global_vars, $tRNA, $cm_intron, $padded_seq, $previous_intron_len) = @_;
1744    my $global_constants = $global_vars->{global_constants};
1745    my $log = $global_vars->{log_file};
1746    my $ret_value = 1;
1747    my $duplicate = 0;
1748
1749    my ($pre_intron, $intron, $post_intron, $pre_intron_seq, $intron_seq, $post_intron_seq);
1750    if ($cm_intron->ss() =~ /^([\<\-\.]{11,})(\-\<[<.]+[_.]{4,}[>.]{9,}\-[.]*\-)([-.>]+)$/)
1751    {
1752        $pre_intron  = $1;
1753        $intron      = $2;
1754        $post_intron = $3;
1755        my $full_intron_seq = $cm_intron->seq();
1756        $full_intron_seq =~ s/U/T/g;
1757        $full_intron_seq =~ s/u/t/g;
1758        $cm_intron->seq($full_intron_seq);
1759        $intron_seq = substr($cm_intron->seq(), length($pre_intron), length($intron));
1760        $pre_intron_seq = substr($cm_intron->seq(), 0, length($pre_intron));
1761        $post_intron_seq = substr($cm_intron->seq(), length($pre_intron) + length($intron));
1762        $intron_seq =~ s/-//g;
1763        $pre_intron_seq =~ s/-//g;
1764        $post_intron_seq =~ s/-//g;
1765
1766        $log->debug("Found intron ".$intron_seq." for ".$tRNA->tRNAscan_id());
1767    }
1768    else
1769    {
1770        $log->warning("Fail to parse intron sequence for ".$cm_intron->seqname()." from ".$cm_intron->seq()."\t".$cm_intron->ss());
1771        return 0;
1772    }
1773
1774    my $padded_full_seq = $tRNA->upstream().$tRNA->seq().$tRNA->downstream();
1775    my ($upstream_start, $upstream_end, $downstream_start, $downstream_end);
1776    if ($tRNA->strand() eq "+")
1777    {
1778        $upstream_start = $tRNA->start() - length($tRNA->upstream());
1779        $downstream_end = $tRNA->end() + length($tRNA->downstream());
1780    }
1781    else
1782    {
1783        $downstream_start = $tRNA->start() - length($tRNA->downstream());
1784        $upstream_end = $tRNA->end() + length($tRNA->upstream());
1785    }
1786
1787    my $clip_seq = substr($padded_seq, 0, $cm_intron->start() - $previous_intron_len + length($pre_intron_seq) - 1) . substr($padded_seq, $cm_intron->end() - $previous_intron_len - length($post_intron_seq));
1788    my $trna_file = tRNAscanSE::Sequence->new;
1789    $trna_file->open_file($global_constants->get("tmp_trnaseq_file"), "write");
1790    $trna_file->set_seq_info($tRNA->seqname().".trna".&pad_num($tRNA->id(), 6), $tRNA->tRNAscan_id(), length($clip_seq), $clip_seq);
1791    $trna_file->write_fasta();
1792    $trna_file->close_file();
1793
1794    my $scan_flag = 0;
1795    my $cms_output_file = "";
1796    my $file_idx = -1;
1797    my $arrayCMscanResults = tRNAscanSE::ArrayCMscanResults->new;
1798    my $cms_merge_file = $global_constants->get("temp_dir")."/tscan$$"."_clip_intron_merge.out";
1799    my $cm_tRNA = tRNAscanSE::tRNA->new;
1800
1801    foreach my $cur_cm (sort keys %{$self->{main_cm_file_path}})
1802    {
1803        $cms_output_file = $global_constants->get("temp_dir")."/tscan$$"."_clip_intron_".$cur_cm.".out";
1804        if ($self->exec_cmsearch($self->{main_cm_file_path}->{$cur_cm}, $scan_flag, $global_constants->get("tmp_trnaseq_file"), $tRNA->tRNAscan_id(), $cms_output_file, $log, $self->{cm_cutoff}))
1805        {
1806            $file_idx = $arrayCMscanResults->add_file($cms_output_file, $cur_cm);
1807            $arrayCMscanResults->merge_result_file($file_idx, 0);
1808        }
1809        else
1810        {
1811            $ret_value = 0;
1812        }
1813    }
1814    $arrayCMscanResults->write_merge_file($cms_merge_file, 1);
1815
1816    if ($arrayCMscanResults->get_result_count() > 0 and $ret_value)
1817    {
1818        $arrayCMscanResults->open_file($cms_merge_file);
1819        $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
1820
1821        while ($cm_tRNA->seqname() ne "")
1822        {
1823            $duplicate = 0;
1824            $ret_value = 1;
1825
1826            if ((uc(substr($cm_tRNA->seq(), length($cm_tRNA->seq()) - 3)) ne "CCA") and
1827                (substr($cm_tRNA->ss(), length($cm_tRNA->ss()) - 4) eq "...."))
1828            {
1829                if ($cm_tRNA->strand() eq "+")
1830                {
1831                    $cm_tRNA->end($cm_tRNA->end() - 3);
1832                }
1833                else
1834                {
1835                    $cm_tRNA->start($cm_tRNA->start() + 3);
1836                }
1837                $cm_tRNA->seq(substr($cm_tRNA->seq(), 0, length($cm_tRNA->seq()) - 3));
1838                $cm_tRNA->ss(substr($cm_tRNA->ss(), 0, length($cm_tRNA->ss()) - 3));
1839            }
1840
1841            my ($intron_start, $intron_end);
1842            my $upstream_len = $cm_tRNA->start() - 1;
1843            my $downstream_len = length($clip_seq) - $cm_tRNA->end();
1844            my $seq = substr($padded_full_seq, $cm_tRNA->start() - 1, length($padded_full_seq) - $upstream_len - $downstream_len);
1845            my $pos = index(uc($seq), uc($intron_seq));
1846            if ($pos == -1)
1847            {
1848                $ret_value = 0;
1849                $log->debug("Intron out of boundary for ".$tRNA->tRNAscan_id()." ".$intron_seq);
1850            }
1851            else
1852            {
1853                $intron_start = $pos + 1;
1854                $intron_end = length($intron_seq) + $pos;
1855                my @ar_introns = $tRNA->ar_introns();
1856                for (my $i = 0; $i < scalar(@ar_introns); $i++)
1857                {
1858                    if ($ar_introns[$i]->{rel_start} == $intron_start and $ar_introns[$i]->{rel_end} == $intron_end)
1859                    {
1860                        $duplicate = 1;
1861                        $log->debug("Duplicate intron detected for ".$tRNA->tRNAscan_id()." ".$intron_seq);
1862                        last;
1863					}
1864                    elsif ($ar_introns[$i]->{type} eq "CI" and
1865                           length($ar_introns[$i]->{seq}) > 40 and
1866                           $ar_introns[$i]->{rel_start} < $intron_start and $ar_introns[$i]->{rel_end} > $intron_start and
1867                           $ar_introns[$i]->{rel_start} < $intron_end and $ar_introns[$i]->{rel_end} > $intron_end)
1868                    {
1869                        $ret_value = 0;
1870                        $log->debug("Inclusion of intron ".$tRNA->tRNAscan_id()." ".$intron_seq);
1871                        last;
1872                    }
1873                    elsif ($ar_introns[$i]->{type} eq "CI" and
1874#                           length($ar_introns[$i]->{seq}) > 40 and
1875                            $ar_introns[$i]->{rel_start} == $intron_start and
1876                           &seg_overlap($ar_introns[$i]->{rel_start}, $ar_introns[$i]->{rel_end}, $intron_start, $intron_end, 0))
1877                    {
1878                        $ret_value = 0;
1879                        $log->debug("Overlap with intron ".$tRNA->tRNAscan_id()." ".$intron_seq);
1880                        last;
1881                    }
1882                }
1883            }
1884
1885            my ($start, $end);
1886            if ($tRNA->strand() eq "+")
1887            {
1888                $upstream_end = $upstream_start + $upstream_len - 1;
1889                $downstream_start = $downstream_end - $downstream_len + 1;
1890                $start = $upstream_end + 1;
1891                $end = $downstream_start - 1;
1892            }
1893            else
1894            {
1895                $downstream_end = $downstream_start + $downstream_len - 1;
1896                $upstream_start = $upstream_end - $upstream_len + 1;
1897                $start = $downstream_end + 1;
1898                $end = $upstream_start - 1;
1899            }
1900
1901            my $hit_overlap = &seg_overlap($tRNA->start(), $tRNA->end(), $start, $end, 40);
1902            if ($ret_value and $hit_overlap and $cm_tRNA->score() > $tRNA->score() and length($cm_tRNA->seq()) >= $self->{min_tRNA_no_intron})
1903            {
1904                $tRNA->upstream(substr($clip_seq, 0, $upstream_len));
1905                $tRNA->downstream(substr($clip_seq, $cm_tRNA->end()));
1906                $tRNA->seq($seq);
1907                $tRNA->ss($cm_tRNA->ss());
1908                $tRNA->mat_seq($cm_tRNA->seq());
1909                $tRNA->mat_ss($tRNA->ss());
1910                $tRNA->start($start);
1911                $tRNA->end($end);
1912                $tRNA->update_domain_model("infernal", $cm_tRNA->score(), $cm_tRNA->score(), 0, 0);
1913                $tRNA->set_default_scores();
1914                $tRNA->model($cm_tRNA->model());
1915                if (!$duplicate)
1916                {
1917                    $tRNA->add_rel_intron($intron_start, $intron_end, "NCI", $intron_seq);
1918                }
1919				last;
1920			}
1921            else
1922            {
1923                $ret_value = 0;
1924            }
1925            $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
1926        }
1927        $arrayCMscanResults->close_file();
1928    }
1929
1930    return ($ret_value, $duplicate, $clip_seq, length($intron_seq));
1931}
1932
1933sub parse_covels_hit
1934{
1935    my $self = shift;
1936    my ($opts, $covels_hit, $r_covels_hit_elements, $prescan_tRNA) = @_;
1937
1938    my $covels_hit_found = 0;
1939
1940    if ($covels_hit =~ /^\s*(\S+)\s+(\d+)\s+(\d+).+: (\S+)\s*/o)
1941    {
1942        $r_covels_hit_elements->{score} = $1;
1943        $r_covels_hit_elements->{subseq_start} = $2;
1944        $r_covels_hit_elements->{subseq_end} = $3;
1945        $r_covels_hit_elements->{hit_seqname} = $4;
1946        $covels_hit_found = 1;
1947    }
1948
1949    if ($covels_hit_found)
1950    {
1951        if ($prescan_tRNA->strand() eq "+")
1952        {
1953            $r_covels_hit_elements->{tRNA_start} = $prescan_tRNA->start() + $r_covels_hit_elements->{subseq_start} - 1;
1954            $r_covels_hit_elements->{tRNA_end} = $prescan_tRNA->start() + $r_covels_hit_elements->{subseq_end} - 1;
1955        }
1956        else
1957        {
1958            $r_covels_hit_elements->{tRNA_start} = $prescan_tRNA->start() - $r_covels_hit_elements->{subseq_start} + 1;
1959            $r_covels_hit_elements->{tRNA_end} = $prescan_tRNA->start() - $r_covels_hit_elements->{subseq_end} + 1;
1960        }
1961
1962        return 1;
1963    }
1964    else
1965    {
1966        return 0;
1967    }
1968}
1969
1970# Run covels, return hits in $covels_hit_list array
1971sub run_covels
1972{
1973    my $self = shift;
1974    my ($global_vars, $r_covels_hit_list, $r_cur_cm_file, $prescan_tRNA) = @_;
1975    my $global_constants = $global_vars->{global_constants};
1976    my $opts = $global_vars->{options};
1977    my $stats = $global_vars->{stats};
1978    my $log = $global_vars->{log_file};
1979
1980    my $scan_len = 0;
1981    my %covels_hit_elements = ();
1982
1983    $self->set_search_params($opts, \$scan_len, $r_cur_cm_file, $self->{max_cove_tRNA_length}, $prescan_tRNA);
1984
1985    # set covels reporting threshold below 0 (default) if -X param is
1986    # set below 0 by user
1987    my $report_cutoff = &min(0, $self->{cm_cutoff});
1988
1989    my $complement = "-c";
1990    if ($opts->tscan_mode() || $opts->eufind_mode() || $opts->infernal_fp())
1991    {
1992        $complement = "";
1993    }
1994
1995    # run Covels
1996    my $covels_cmd = $self->covels_bin()." $complement -w$scan_len -t$report_cutoff $$r_cur_cm_file ".$global_constants->get("tmp_trnaseq_file");
1997    $log->broadcast($covels_cmd);
1998    my $covels_output = `$covels_cmd`;
1999
2000    if ($? != 0)
2001    {
2002        $log->error("Covels-SE cannot be completed successfully for ".$prescan_tRNA->seqname().". (Exit Code: $?)");
2003        print "Exit first loop at 1\n";
2004        return 0;
2005    }
2006
2007    my ($junk, $allhits) = split(/----------\n\n/, $covels_output);
2008    @$r_covels_hit_list = split(/\n/, $allhits);
2009
2010    # count no. of hits over cutoff
2011    my $total_hits = 0;
2012    foreach my $covels_hit (@$r_covels_hit_list)
2013    {
2014        %covels_hit_elements = ();
2015        if (($self->parse_covels_hit($opts, $covels_hit, \%covels_hit_elements, $prescan_tRNA)) &&
2016            ($covels_hit_elements{score} >= $self->{cm_cutoff}))
2017        {
2018            $total_hits++;
2019        }
2020    }
2021
2022    # if no tRNAs detected when using a selenocysteine cove model,
2023    #  try main model and run again before giving up
2024    if (($total_hits == 0) &&
2025        (($$r_cur_cm_file eq $self->{Pselc_cm_file_path}) || ($$r_cur_cm_file eq $self->{Eselc_cm_file_path})))
2026    {
2027        $$r_cur_cm_file = $self->{main_cm_file_path}->{Domain};
2028
2029        # re-run Covels with main model
2030        $covels_cmd = $self->covels_bin()." -w$scan_len -t$report_cutoff $$r_cur_cm_file ".$global_constants->get("tmp_trnaseq_file");
2031        $covels_output = `$covels_cmd`;
2032        if ($? != 0)
2033        {
2034            $log->error("Covels-SE cannot be completed successfully for ".$prescan_tRNA->seqname().". (Exit Code: $?)");
2035            print "Exit first loop at 2\n";
2036            return 0;
2037        }
2038
2039        ($junk,$allhits) = split(/----------\n\n/, $covels_output);
2040        @$r_covels_hit_list = split(/\n/, $allhits);
2041    }
2042
2043    # Go thru hit list, save info for tRNA hits with sub-cutoff scores
2044    my $ct = 0;
2045    my $over_cutoff = 0;
2046    my $trna_desc = "";
2047
2048    foreach my $covels_hit (@$r_covels_hit_list)
2049    {
2050        %covels_hit_elements = ();
2051        if ($self->parse_covels_hit($opts, $covels_hit, \%covels_hit_elements, $prescan_tRNA))
2052        {
2053            $ct++;
2054            if ($covels_hit_elements{score} >= $self->{cm_cutoff})
2055            {
2056                $over_cutoff++;
2057            }
2058            else
2059            {
2060                $log->broadcast("Low covels score for ".$prescan_tRNA->tRNAscan_id().".$ct: $covels_hit_elements{score}");
2061                $trna_desc .= "(Cove Hit#$ct: $covels_hit_elements{tRNA_start}-$covels_hit_elements{tRNA_end},".
2062                    " Sc: $covels_hit_elements{score},  Len: ".(abs($covels_hit_elements{tRNA_start} - $covels_hit_elements{tRNA_end}) + 1).") ";
2063            }
2064        }
2065    }
2066
2067    # report if no scores over 0 bit reporting threshold
2068    if ($over_cutoff == 0)
2069    {
2070        if ((!$opts->results_to_stdout()) &&
2071            ($opts->eufind_mode() || $opts->tscan_mode() || $opts->use_prev_ts_run()))
2072        {
2073            $log->broadcast("Covels score(s) below cutoff for ".$prescan_tRNA->tRNAscan_id().". Skipping...");
2074        }
2075        if ($opts->save_falsepos())
2076        {
2077            my $fulltrnaDesc = "(Fp Hit: ".$prescan_tRNA->start()."-".$prescan_tRNA->end().", ".
2078                (abs($prescan_tRNA->start() - $prescan_tRNA->end()) + 1)." bp, Src: ".$prescan_tRNA->hit_source().") ".$trna_desc;
2079
2080            $stats->increment_fpos_base_ct(length($prescan_tRNA->seq()));
2081            &write_tRNA($opts->falsepos_file(), $prescan_tRNA->tRNAscan_id(), $fulltrnaDesc, $prescan_tRNA->seq(), 0);
2082        }
2083    }
2084
2085    return 1;
2086}
2087
2088sub run_coves
2089{
2090    my $self = shift;
2091    my ($log, $tmp_trnaseq_file, $seq_name, $cm_file) = @_;
2092
2093    my ($covseq, $covss, $coves_output, $junk, @coves_lines, $sec_struct, $coves_score);
2094
2095    my $coves_cmd = $self->{coves_bin}." -s $cm_file $tmp_trnaseq_file";
2096    $log->broadcast($coves_cmd);
2097    $coves_output = `$coves_cmd`;
2098    if ($? != 0)
2099    {
2100        $log->error("Coves-SE cannot be completed successfully for $seq_name. (Exit Code: $?)");
2101        return ("Error", "", -1);
2102    }
2103
2104    ($junk, $sec_struct) = split(/----------\n\n/, $coves_output);
2105    @coves_lines = split(/\n/,$sec_struct);
2106    $covseq = '';
2107    $covss = '';
2108    $coves_score = -1000;
2109    $seq_name =~ s/(\W)/\\$1/g;
2110
2111    foreach (@coves_lines)
2112    {
2113        if (/^\s+$seq_name\s([a-zA-Z\-]{1,60})\s*/)
2114        {
2115            $covseq .= $1;
2116        }
2117        if (/^\s+$seq_name\s([\.\<\>\ ]{1,60})/)
2118        {
2119            $covss .= $1;
2120        }
2121        if (/^\s*(\S+)\sbits\s:\s$seq_name/)
2122        {
2123            $coves_score = $1;
2124        }
2125    }
2126    $covss =~ s/\s//g;     #  take spaces out of alignment
2127    $covseq =~ s/-//g;     #  take '-' gaps out of seq
2128
2129    if (($covseq eq '') || ($covss eq ''))
2130    {
2131        print STDERR "Could not complete coves successfully for $seq_name ",
2132        "because unable to parse coves secondary structure string. ",
2133        "Skipping tRNA anticodon & type prediction\n";
2134        return ("Error", "", -1);
2135    }
2136
2137    return ($covseq, $covss, $coves_score);
2138}
2139
2140sub analyze_with_cove
2141{
2142    my $self = shift;
2143    my ($global_vars, $prescan_tRNA, $r_curseq_trnact) = @_;
2144    my $global_constants = $global_vars->{global_constants};
2145    my $opts = $global_vars->{options};
2146    my $stats = $global_vars->{stats};
2147    my $log = $global_vars->{log_file};
2148
2149    my (@covels_hit_list, $cur_cm_file, $cove_confirmed_ct);
2150    my ($covseq, $covss, $coves_score);
2151    my %covels_hit_elements = ();
2152    my $covels_tRNA = undef;
2153    my $cov_hit = {};
2154    my $rescore = 0;
2155
2156    $cove_confirmed_ct = 0;
2157    if (!$self->run_covels($global_vars, \@covels_hit_list, \$cur_cm_file, $prescan_tRNA))
2158    {
2159        return 0;
2160    }
2161
2162    # Loop to parse covels tRNA hit(s) and run Coves on each tRNA
2163    foreach my $covels_hit (@covels_hit_list)
2164    {
2165        $rescore = 0;
2166        %covels_hit_elements = ();
2167        if ((!$self->parse_covels_hit($opts, $covels_hit, \%covels_hit_elements, $prescan_tRNA)) ||
2168            ($covels_hit_elements{score} < $self->{cm_cutoff}))
2169        {
2170            next;
2171        }
2172        $covels_tRNA = tRNAscanSE::tRNA->new;
2173        $$r_curseq_trnact++;
2174
2175        $covels_tRNA->id($$r_curseq_trnact);
2176        $covels_tRNA->set_covels_hit(\%covels_hit_elements);
2177        $covels_tRNA->upstream($prescan_tRNA->upstream());
2178        $covels_tRNA->downstream($prescan_tRNA->downstream());
2179        if (($covels_hit_elements{subseq_start} == 1) && ($covels_hit_elements{subseq_end} == $prescan_tRNA->len()))
2180        {
2181            $covels_hit_elements{tRNA_len} = $prescan_tRNA->len();
2182        }
2183        else
2184        {
2185            # get correct subseq for coves & save to file
2186            if ($covels_hit_elements{subseq_start} < $covels_hit_elements{subseq_end})
2187            {
2188                $covels_hit_elements{tRNA_len} = $covels_hit_elements{subseq_end} - $covels_hit_elements{subseq_start} + 1;
2189                &write_tRNA($global_constants->get("tmp_trnaseq_file"), $covels_tRNA->seqname(), " ",
2190                        substr($prescan_tRNA->seq(), $covels_hit_elements{subseq_start} - 1, $covels_hit_elements{tRNA_len}), 1);
2191                if ($covels_hit_elements{subseq_start} > 1)
2192                {
2193                    $covels_tRNA->upstream($covels_tRNA->upstream() . substr($prescan_tRNA->seq(), 0, $covels_hit_elements{subseq_start} - 1));
2194                }
2195                if ($covels_hit_elements{subseq_end} < $prescan_tRNA->len())
2196                {
2197                    $covels_tRNA->downstream(substr($prescan_tRNA->seq(), $covels_hit_elements{subseq_end}) . $covels_tRNA->downstream());
2198                }
2199            }
2200            else
2201            {
2202                $covels_hit_elements{tRNA_len} = $covels_hit_elements{subseq_start} - $covels_hit_elements{subseq_end} + 1;
2203                &write_tRNA($global_constants->get("tmp_trnaseq_file"), $covels_tRNA->seqname(), " ",
2204                        &rev_comp_seq(substr($prescan_tRNA->seq(), $covels_hit_elements{subseq_end} - 1, $covels_hit_elements{tRNA_len})), 1);
2205                if ($covels_hit_elements{subseq_end} > 1)
2206                {
2207                    $covels_tRNA->upstream($covels_tRNA->upstream() . &rev_comp_seq(substr($prescan_tRNA->seq(), $covels_hit_elements{subseq_start} - 1, $global_constants->get("upstream_len"))));
2208                }
2209                if ($covels_hit_elements{subseq_start} < $prescan_tRNA->len())
2210                {
2211                    $covels_tRNA->downstream(&rev_comp_seq(substr($prescan_tRNA->seq(), $covels_hit_elements{subseq_end} - $global_constants->get("downstream_len"), $global_constants->get("downstream_len")) . $covels_tRNA->downstream()));
2212                }
2213            }
2214        }
2215        $stats->increment_coves_base_ct($covels_hit_elements{tRNA_len});
2216
2217        ($covseq, $covss, $coves_score) = $self->run_coves($log, $global_constants->get("tmp_trnaseq_file"), $prescan_tRNA->seqname(), $cur_cm_file);
2218        $covels_tRNA->seq($covseq);
2219        $covels_tRNA->ss($covss);
2220        $covels_tRNA->update_domain_model("cove", $coves_score, $coves_score, 0, 0);
2221
2222        if ((uc(substr($covels_tRNA->seq(), length($covels_tRNA->seq()) - 3)) ne "CCA") &&
2223            (substr($covels_tRNA->ss(), length($covels_tRNA->ss()) - 4) eq "....") &&
2224            ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cove_cm", "eukaryota")) &&
2225            ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cove_cm", "general")))
2226        {
2227            $covels_hit_elements{subseq_end} -= 3;
2228            if ($covels_tRNA->strand() eq "+")
2229            {
2230                $covels_tRNA->end($covels_tRNA->end() - 3);
2231            }
2232            else
2233            {
2234                $covels_tRNA->start($covels_tRNA->start() + 3);
2235            }
2236            $covels_tRNA->seq(substr($covels_tRNA->seq(), 0, length($covels_tRNA->seq()) - 3));
2237            $covels_tRNA->ss(substr($covels_tRNA->ss(), 0, length($covels_tRNA->ss()) - 3));
2238            $rescore = 1;
2239        }
2240        elsif ((uc(substr($covels_tRNA->seq(), length($covels_tRNA->seq()) - 3)) eq "CCA") &&
2241            (((substr($covels_tRNA->ss(), length($covels_tRNA->ss()) - 6) eq "<.....") &&
2242            (substr($covels_tRNA->seq(), length($covels_tRNA->seq()) - 5, 2) =~ /[acgtn][ACGTN]/)) ||
2243            ((substr($covels_tRNA->ss(), length($covels_tRNA->ss()) - 7) eq "<......") &&
2244            (substr($covels_tRNA->seq(), length($covels_tRNA->seq()) - 6, 3) =~ /[ACGTN][acgtn][ACGTN]/)) ||
2245            ((substr($covels_tRNA->ss(), length($covels_tRNA->ss()) - 7) eq "<......") &&
2246            (substr($covels_tRNA->seq(), length($covels_tRNA->seq()) - 6, 3) =~ /[acgtn]{2}[ACGTN]/))) &&
2247            ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cove_cm", "eukaryota")) &&
2248            ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cove_cm", "general")))
2249        {
2250            my $trim_len = 4;
2251            if (substr($covels_tRNA->ss(), length($covels_tRNA->ss()) - 7) eq "<......" && substr($covels_tRNA->seq(), length($covels_tRNA->seq()) - 6, 3) =~ /[acgtn]{2}[ACGTN]/)
2252            {
2253                $trim_len = 5;
2254            }
2255
2256            if ($covels_tRNA->strand() eq "+")
2257            {
2258                $covels_tRNA->end($covels_tRNA->end() - $trim_len);
2259            }
2260            else
2261            {
2262                $covels_tRNA->start($covels_tRNA->start() + $trim_len);
2263            }
2264            $covels_tRNA->seq(substr($covels_tRNA->seq(), 0, length($covels_tRNA->seq()) - $trim_len));
2265            $covels_tRNA->seq(substr($covels_tRNA->seq(), 0, length($covels_tRNA->seq()) - 1).uc(substr($covels_tRNA->seq(), length($covels_tRNA->seq()) - 1, 1)));
2266            $covels_tRNA->ss(substr($covels_tRNA->ss(), 0, length($covels_tRNA->ss()) - $trim_len));
2267            $rescore = 1;
2268        }
2269
2270        if ($rescore)
2271        {
2272            my $trna_file = tRNAscanSE::Sequence->new;
2273            $trna_file->open_file($global_constants->get("tmp_trnaseq_file"), "write");
2274            $trna_file->set_seq_info($prescan_tRNA->seqname(), $prescan_tRNA->seqname(), length($covels_tRNA->seq()), $covels_tRNA->seq());
2275            $trna_file->write_fasta();
2276            $trna_file->close_file();
2277            ($covseq, $covss, $coves_score) = $self->run_coves($log, $global_constants->get("tmp_trnaseq_file"), $prescan_tRNA->seqname(), $cur_cm_file);
2278            $covels_tRNA->update_domain_model("cove", $coves_score, $coves_score, 0, 0);
2279		}
2280
2281        # look for intron
2282        $self->decode_tRNA_properties($global_vars, $covels_tRNA, $prescan_tRNA, $cur_cm_file);
2283        $covels_tRNA->set_mature_tRNA();
2284
2285        $covels_tRNA->tRNAscan_id($covels_tRNA->seqname().".tRNA".$$r_curseq_trnact."-".$covels_tRNA->isotype().$covels_tRNA->anticodon());
2286        $covels_tRNA->src_seqlen($prescan_tRNA->src_seqlen());
2287        $covels_tRNA->src_seqid($covels_hit_elements{hit_seqname});
2288        $covels_tRNA->hit_source($prescan_tRNA->hit_source());
2289        $covels_tRNA->model($cur_cm_file);
2290        $covels_tRNA->ordered_seqname($prescan_tRNA->src_seqid());
2291        $covels_tRNA->set_default_scores();
2292
2293        if (!$self->{CM_check_for_introns})
2294        {
2295            $cove_confirmed_ct++;
2296        }
2297        $global_vars->{sp_int_results}->write_tRNA($covels_tRNA);
2298    }            # while more covels_hits
2299
2300    return $cove_confirmed_ct;
2301}
2302
2303# Format command and run Infernal cmscan
2304sub exec_cmscan
2305{
2306    my $self = shift;
2307    my ($cm_db, $scan_flag, $tmp_trnaseq_file, $seq_name, $cms_output_file, $cms_tab_file, $log) = @_;
2308
2309    my $cm_options = "-g --nohmm --toponly --notrunc";
2310    if ($scan_flag == 1)
2311    {
2312        $cm_options = "-g --mid --notrunc";
2313    }
2314    elsif ($scan_flag == 2)
2315    {
2316        $cm_options = "-g --mid --toponly --notrunc";
2317    }
2318    elsif ($scan_flag == 3)
2319    {
2320        $cm_options = "-g --mid --notrunc -T ".$self->{infernal_fp_cutoff};
2321    }
2322    elsif ($scan_flag == 4)
2323    {
2324        $cm_options = "-g --nohmm --notrunc";
2325    }
2326    if ($self->{infernal_thread} != -1)
2327    {
2328		$cm_options .= " --cpu ".$self->{infernal_thread};
2329	}
2330
2331    my $cms_cmd = "$self->{cmscan_bin} $cm_options --fmt 2 --tblout $cms_tab_file -o $cms_output_file $cm_db $tmp_trnaseq_file";
2332    $log->broadcast($cms_cmd);
2333    system($cms_cmd);
2334
2335    if ($? != 0)
2336    {
2337        $log->error("Infernal cmscan cannot be completed successfully for ".$seq_name.". $cms_cmd (Exit Code: $? $!)");
2338        return 0;
2339    }
2340    return 1;
2341}
2342
2343# Format command and run Infernal cmsearch
2344sub exec_cmsearch
2345{
2346    my $self = shift;
2347    my ($cm_file, $scan_flag, $tmp_trnaseq_file, $seq_name, $cms_output_file, $log, $score_cutoff) = @_;
2348
2349    my $cm_options = "-g --nohmm --toponly --notrunc";
2350    if ($scan_flag == 1)
2351    {
2352        $cm_options = "-g --mid --notrunc";
2353    }
2354    elsif ($scan_flag == 2)
2355    {
2356        $cm_options = "-g --mid --toponly --notrunc";
2357    }
2358    elsif ($scan_flag == 3)
2359    {
2360        $cm_options = "-g --mid --notrunc -T ".$self->{infernal_fp_cutoff};
2361    }
2362    elsif ($scan_flag == 4)
2363    {
2364        $cm_options = "-g --nohmm --notrunc";
2365    }
2366    elsif ($scan_flag == 5)
2367    {
2368        $cm_options = "-g --max --toponly --notrunc --notextw -T ".$self->{BHB_cm_cutoff};
2369    }
2370    elsif ($scan_flag == 6)
2371    {
2372        $cm_options = "-g --toponly --notextw";
2373    }
2374    elsif ($scan_flag == 7)
2375    {
2376        $cm_options = "-g --max --toponly --notrunc --notextw -T 0";
2377    }
2378
2379    if ($scan_flag != 3 and $scan_flag != 5 and $scan_flag != 7)
2380    {
2381		if ($score_cutoff <= 10)
2382        {
2383			$cm_options .= " -T ".$score_cutoff;
2384		}
2385	}
2386    if ($self->{infernal_thread} != -1)
2387    {
2388		$cm_options .= " --cpu ".$self->{infernal_thread};
2389	}
2390
2391    my $cms_cmd = "$self->{cmsearch_bin} $cm_options $cm_file $tmp_trnaseq_file > $cms_output_file";
2392    $log->broadcast($cms_cmd);
2393    system($cms_cmd);
2394
2395    if ($? != 0)
2396    {
2397        $log->error("Infernal cmsearch cannot be completed successfully for ".$seq_name.". $cms_cmd (Exit Code: $? $!)");
2398        return 0;
2399    }
2400
2401    return 1;
2402}
2403
2404# Run Infernal cmsearch, return results in $r_cms_hit_list array reference
2405sub run_cmsearch
2406{
2407    my $self = shift;
2408    my ($global_vars, $seq_name, $arrayCMscanResults, $merge_range) = @_;
2409    my $global_constants = $global_vars->{global_constants};
2410    my $opts = $global_vars->{options};
2411    my $stats = $global_vars->{stats};
2412    my $log = $global_vars->{log_file};
2413
2414    my $score_cutoff = -1;
2415    my $scan_flag = -1;
2416    my $cms_output_file = "";
2417    my $file_idx = -1;
2418    my $cms_merge_file = $global_constants->get("temp_dir")."/tscan$$"."_cm_merge.out";
2419
2420    # run cmsearch
2421    if ($opts->tscan_mode() || $opts->eufind_mode() || $opts->infernal_fp())
2422    {
2423        $scan_flag = 0;
2424        if ($opts->hmm_filter())
2425        {
2426            $scan_flag = 2;
2427        }
2428    }
2429    else
2430    {
2431        $scan_flag = 4;
2432        if ($opts->hmm_filter())
2433        {
2434            $scan_flag = 1;
2435        }
2436    }
2437
2438#    if ($opts->mito_mode())
2439#    {
2440#		$score_cutoff = $self->{organelle_cm_cutoff};
2441#	}
2442#	else
2443#   {
2444        $score_cutoff = $self->{cm_cutoff};
2445#   }
2446
2447    foreach my $cur_cm (sort keys %{$self->{main_cm_file_path}})
2448    {
2449        $cms_output_file = $global_constants->get("temp_dir")."/tscan$$"."_cm_".$cur_cm.".out";
2450        if ($self->exec_cmsearch($self->{main_cm_file_path}->{$cur_cm}, $scan_flag, $global_constants->get("tmp_trnaseq_file"), $seq_name, $cms_output_file, $log, $score_cutoff))
2451        {
2452            $file_idx = $arrayCMscanResults->add_file($cms_output_file, $cur_cm);
2453            $arrayCMscanResults->merge_result_file($file_idx, $merge_range);
2454        }
2455        else
2456        {
2457            return "";
2458        }
2459    }
2460    $arrayCMscanResults->write_merge_file($cms_merge_file, 1);
2461    my $count = $arrayCMscanResults->get_result_count();
2462
2463    return ($cms_merge_file, $count);
2464}
2465
2466# Run Infernal for scanning truncation in predicited tRNA
2467sub truncated_tRNA_search
2468{
2469    my $self = shift;
2470    my ($global_vars, $seq_name) = @_;
2471    my $global_constants = $global_vars->{global_constants};
2472    my $opts = $global_vars->{options};
2473    my $log = $global_vars->{log_file};
2474    my $arrayCMscanResults = tRNAscanSE::ArrayCMscanResults->new;
2475    my $sp_int_results = $global_vars->{sp_int_results};
2476    my $tmp_trnaseq_file = $global_constants->get("tmp_trnaseq_file");
2477    my $tRNA = tRNAscanSE::tRNA->new;
2478    my $cm_tRNA = tRNAscanSE::tRNA->new;
2479
2480    my %cms_output_file = ();
2481    my $scan_flag = 6;
2482    my $file_idx = -1;
2483    my $cms_merge_file = $global_constants->get("temp_dir")."/tscan$$"."_trunc_cm_merge.out";
2484
2485    my $sp_trunc_int_results = tRNAscanSE::IntResultFile->new;
2486    $sp_trunc_int_results->file_name($opts->truncated_int_result_file());
2487
2488    foreach my $cur_cm (sort keys %{$self->{main_cm_file_path}})
2489    {
2490        my $count = &write_tRNAs($tmp_trnaseq_file, $sp_int_results, 1, 1, $cur_cm);
2491
2492        if ($count > 0)
2493        {
2494            $cms_output_file{$cur_cm} = $global_constants->get("temp_dir")."/tscan$$"."_trunc_cm_".$cur_cm.".out";
2495            if ($self->exec_cmsearch($self->{main_cm_file_path}->{$cur_cm}, $scan_flag, $global_constants->get("tmp_trnaseq_file"), $seq_name, $cms_output_file{$cur_cm}, $log, $self->{cm_cutoff}))
2496            {
2497                $file_idx = $arrayCMscanResults->add_file($cms_output_file{$cur_cm}, $cur_cm);
2498                $arrayCMscanResults->merge_result_file($file_idx, 0);
2499            }
2500            else
2501            {
2502                return "";
2503            }
2504        }
2505    }
2506    $arrayCMscanResults->write_merge_file($cms_merge_file, 0);
2507
2508    my @sp_indexes = $sp_int_results->get_indexes();
2509    if ($sp_int_results->open_file("read") and $sp_trunc_int_results->open_file("write"))
2510    {
2511        $arrayCMscanResults->open_file($cms_merge_file);
2512        my $cm_model = $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
2513
2514        for (my $i = 0; $i < scalar(@sp_indexes); $i++)
2515        {
2516            $sp_int_results->get_tRNA($sp_indexes[$i]->[0], $tRNA);
2517            if (($cm_tRNA->seqname() ne "") and ($cm_tRNA->seqname() eq $tRNA->seqname().".t".&pad_num($tRNA->id(), 6)))
2518            {
2519                my $trunc_label = $self->check_truncation($tRNA, $cm_tRNA, $cm_model, $global_constants);
2520                $tRNA->trunc($trunc_label);
2521                $cm_model = $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
2522            }
2523            $sp_trunc_int_results->write_tRNA($tRNA);
2524        }
2525        $sp_int_results->close_file();
2526        $sp_trunc_int_results->close_file();
2527        $sp_int_results->clear_index();
2528        $global_vars->{sp_int_results} = $sp_trunc_int_results;
2529    }
2530}
2531
2532sub check_truncation
2533{
2534    my $self = shift;
2535    my ($tRNA, $trunc_tRNA, $cm_model, $global_constants) = @_;
2536
2537    my $label = "";
2538    if ($trunc_tRNA->trunc() eq "" or $trunc_tRNA->trunc() eq "no")
2539    {}
2540    else
2541    {
2542        if (index($trunc_tRNA->trunc(),"5'") > -1)
2543        {
2544            if ($cm_model =~/^\<\[(\d+)\]\*/)
2545            {
2546                $label = "trunc_start:".$1;
2547            }
2548        }
2549        if (index($trunc_tRNA->trunc(), "3'") > -1)
2550        {
2551            if ($cm_model =~/\*\[(\d+)\]\>$/)
2552            {
2553                my $diff = $1;
2554                if ($diff <= 3 and ((uc(substr($tRNA->mat_seq(), length($tRNA->mat_seq()) - 3)) ne "CCA" and index($trunc_tRNA->trunc(),"5'") == -1) or
2555                        index($trunc_tRNA->trunc(),"5'") > -1) and
2556                    ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cm", "eukaryota")) and
2557                    ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cm", "general")))
2558                {}
2559                else
2560                {
2561                    if ($label ne "")
2562                    {
2563						$label .= ",";
2564					}
2565                    $label .= "trunc_end:".$diff;
2566                }
2567            }
2568        }
2569    }
2570
2571    return $label;
2572}
2573
2574# Run Infernal with isotype specific CMs for predicited tRNA
2575sub isotype_cmsearch
2576{
2577    my $self = shift;
2578    my ($global_vars) = @_;
2579    my $global_constants = $global_vars->{global_constants};
2580    my $opts = $global_vars->{options};
2581    my $sp_int_results = $global_vars->{sp_int_results};
2582    my $iso_int_results = $global_vars->{iso_int_results};
2583    my $tmp_trnaseq_file = $global_constants->get("tmp_trnaseq_file");
2584    my $tRNA = tRNAscanSE::tRNA->new;
2585    my $seqname = "";
2586
2587    &write_tRNAs($tmp_trnaseq_file, $sp_int_results, 1, 1, "");
2588
2589    my @sp_indexes = $sp_int_results->get_indexes();
2590    if ($sp_int_results->open_file("read") && $iso_int_results->open_file("write"))
2591    {
2592        $iso_int_results->write_line("");
2593        for (my $i = 0; $i < scalar(@sp_indexes); $i++)
2594        {
2595            $sp_int_results->get_tRNA($sp_indexes[$i]->[0], $tRNA);
2596            $seqname = $tRNA->seqname();
2597            $iso_int_results->write_line($tRNA->seqname().".t".&pad_num($tRNA->id(), 6));
2598        }
2599        $iso_int_results->close_file();
2600        $sp_int_results->close_file();
2601    }
2602
2603    # run cmscan
2604    $self->scan_isotype_cm($global_vars, $self->{isotype_cm_db_file_path}, "cyto", $seqname);
2605    if ($opts->euk_mode() and $opts->mito_model() ne "")
2606    {
2607        $self->scan_isotype_cm($global_vars, $self->{mito_isotype_cm_db_file_path}, "mito", $seqname);
2608    }
2609}
2610
2611sub scan_isotype_cm
2612{
2613    my $self = shift;
2614    my ($global_vars, $cm_db, $type, $seq_name) = @_;
2615    my $global_constants = $global_vars->{global_constants};
2616    my $opts = $global_vars->{options};
2617    my $log = $global_vars->{log_file};
2618    my $iso_int_results = $global_vars->{iso_int_results};
2619    my $tmp_trnaseq_file = $global_constants->get("tmp_trnaseq_file");
2620    my $cmscanResults = undef;
2621    my $tRNA = tRNAscanSE::tRNA->new;
2622
2623    my $scan_flag = 2;
2624
2625    my $iso_result_file = $global_constants->get("temp_dir")."/tscan$$"."_iso_temp.out";
2626    my $iso_results_out = tRNAscanSE::MultiResultFile->new;
2627    my ($cur_iso_cm, $cur_iso_cm_file);
2628    my $cms_output_file = "";
2629    my $cms_tab_file = "";
2630    my $line = "";
2631    my $tRNAid = "";
2632    my $iso_index_ct = -1;
2633    my %found_models = ();
2634
2635    my $models = $self->get_models_from_db($cm_db);
2636
2637    $iso_results_out->file_name($iso_result_file);
2638
2639    $cms_output_file = $global_constants->get("temp_dir")."/tscan$$"."_iso_cm.out";
2640    $cms_tab_file = $global_constants->get("temp_dir")."/tscan$$"."_iso_cm.tab";
2641    if ($self->exec_cmscan($cm_db, $scan_flag, $global_constants->get("tmp_trnaseq_file"), $seq_name, $cms_output_file, $cms_tab_file, $log))
2642    {
2643        $cmscanResults = tRNAscanSE::CMscanResultFile->new($cms_tab_file, "multi");
2644    }
2645    else
2646    {
2647        return 0;
2648    }
2649
2650    if ($iso_int_results->open_file("read") && $iso_results_out->open_file("write"))
2651    {
2652        # header line
2653        $line = $iso_int_results->get_line();
2654        foreach $cur_iso_cm (sort @$models)
2655        {
2656			my $model = $cur_iso_cm;
2657        	if ($cur_iso_cm =~ /^arch-(\S+)/ || $cur_iso_cm =~ /^euk-(\S+)/ || $cur_iso_cm =~ /^bact-(\S+)/)
2658            {
2659				$model = $1;
2660			}
2661
2662            if ($type eq "mito")
2663            {
2664                $line = $line."\tmito_".$model;
2665            }
2666            else
2667            {
2668                $line = $line."\t".$model;
2669            }
2670        }
2671        $iso_results_out->write_line($line);
2672
2673        #content
2674        if ($cmscanResults->open_file())
2675        {
2676            my $hits = $cmscanResults->get_next_tab_seq_hits();
2677            ($tRNAid, $line) = $iso_int_results->get_next_record();
2678            while ($tRNAid ne "")
2679            {
2680                %found_models = ();
2681                if (scalar(@$hits) > 0)
2682                {
2683                    $iso_index_ct = 0;
2684                    if ($tRNAid eq $hits->[0]->[1])
2685                    {
2686                        foreach $cur_iso_cm (sort @$models)
2687                        {
2688                            if ($iso_index_ct < scalar(@$hits))
2689                            {
2690                                if ($cur_iso_cm eq $hits->[$iso_index_ct]->[0])
2691                                {
2692                                    if (!defined $found_models{$cur_iso_cm})
2693                                    {
2694                                        $line .= "\t".$hits->[$iso_index_ct]->[5];
2695                                        $found_models{$cur_iso_cm} = 1;
2696                                    }
2697                                    $iso_index_ct++;
2698                                }
2699                                elsif ($cur_iso_cm lt $hits->[$iso_index_ct]->[0])
2700                                {
2701                                    $line .= "\t";
2702                                }
2703                                elsif ($cur_iso_cm gt $hits->[$iso_index_ct]->[0])
2704                                {
2705                                    $iso_index_ct++;
2706                                }
2707                            }
2708                            else
2709                            {
2710                                $line .= "\t";
2711                            }
2712                        }
2713                        $iso_results_out->write_line($line);
2714
2715                        $hits = $cmscanResults->get_next_tab_seq_hits();
2716                        ($tRNAid, $line) = $iso_int_results->get_next_record();
2717                    }
2718                    elsif ($tRNAid lt $hits->[0]->[1])
2719                    {
2720                        for (my $i = 0; $i < scalar(@$models); $i++)
2721                        {
2722                            $line .= "\t";
2723                        }
2724                        $iso_results_out->write_line($line);
2725                        ($tRNAid, $line) = $iso_int_results->get_next_record();
2726                    }
2727                    elsif ($tRNAid gt $hits->[0]->[1])
2728                    {
2729                        $hits = $cmscanResults->get_next_tab_seq_hits();
2730                    }
2731                }
2732                else
2733                {
2734                    for (my $i = 0; $i < scalar(@$models); $i++)
2735                    {
2736                        $line .= "\t";
2737                    }
2738                    $iso_results_out->write_line($line);
2739                    ($tRNAid, $line) = $iso_int_results->get_next_record();
2740                }
2741            }
2742            $cmscanResults->close_file();
2743        }
2744        $iso_results_out->close_file();
2745        $iso_int_results->close_file();
2746        copy($iso_results_out->file_name(), $iso_int_results->file_name());
2747    }
2748}
2749
2750sub get_models_from_db
2751{
2752    my $self = shift;
2753    my ($cm_db) = @_;
2754    my $line = "";
2755    my @models = ();
2756    my $start = 0;
2757
2758    open(FILE_IN, "$cm_db") or die "Fail to open $cm_db\n";
2759    while ($line = <FILE_IN>)
2760    {
2761		chomp($line);
2762        if ($line =~ /^INFERNAL/)
2763        {
2764			$start = 1;
2765		}
2766        elsif ($start and $line =~ /^NAME\s+(\S+)/)
2767        {
2768			push(@models, $1);
2769            $start = 0;
2770		}
2771	}
2772	close(FILE_IN);
2773
2774    my @sorted_models = sort @models;
2775    return \@sorted_models;
2776}
2777
2778sub rescore_tRNA_cove
2779{
2780    my $self = shift;
2781    my ($global_vars, $sp_int_results) = @_;
2782    my $global_constants = $global_vars->{global_constants};
2783    my $opts = $global_vars->{options};
2784    my $log = $global_vars->{log_file};
2785    my $tmp_trnaseq_file = $global_constants->get("tmp_trnaseq_file");
2786
2787#    $sp_int_results->sort_records("tRNAscan_id");
2788
2789    &write_tRNAs($tmp_trnaseq_file, $sp_int_results, 1, 1, "");
2790
2791    # run cove
2792    my $cove_output_file = $global_constants->get("temp_dir")."/tscan$$"."_cove.out";
2793    my $coves_cmd = $self->{coves_bin}." -s ".$self->{cove_cm_file_path}." ".$tmp_trnaseq_file." > ".$cove_output_file;
2794    system($coves_cmd);
2795    if ($? != 0)
2796    {
2797        $log->error("Coves-SE for rescoring tRNAs cannot be completed successfully. (Exit Code: $?)");
2798        return ("Error", "", -1);
2799    }
2800
2801    my $line = "";
2802    my ($score, $trna_id);
2803    my $index = -1;
2804    my $cove_model = undef;
2805    my $new_int_result_file = $global_constants->get("temp_dir")."/tscan$$"."_sp_rescore.out";
2806    my $int_results_out = tRNAscanSE::IntResultFile->new;
2807    my $tRNA = tRNAscanSE::tRNA->new;
2808
2809    $int_results_out->file_name($new_int_result_file);
2810    &open_for_read(\*FILE_H, $cove_output_file);
2811    if ($sp_int_results->open_file("read") && $int_results_out->open_file("write"))
2812    {
2813        while ($line = <FILE_H>)
2814        {
2815            chomp($line);
2816            if ($line =~ /^\s*([0-9\.]+) bits : (.+)$/)
2817            {
2818                $score = $1;
2819                $trna_id = $2;
2820                $index = $sp_int_results->bsearch_tRNAscan_id($trna_id);
2821                if ($index > -1)
2822                {
2823                    $sp_int_results->get_tRNA($sp_int_results->get_pos($index), $tRNA);
2824                    $tRNA->tRNAscan_id($tRNA->seqname().".tRNA".$tRNA->id()."-".$tRNA->isotype().$tRNA->anticodon());
2825                    if ($tRNA->model() ne $self->{Pselc_cm_file_path} and $tRNA->model() ne $self->{Eselc_cm_file_path} and
2826                        $tRNA->model() ne $self->{isotype_cm_file_paths}->{SeC})
2827                    {
2828                        $cove_model = $tRNA->get_domain_model("cove");
2829                        if (!defined $cove_model)
2830                        {
2831                            $tRNA->set_domain_model("cove", $score);
2832                        }
2833                        elsif ($score > $cove_model->{score})
2834                        {
2835                            $tRNA->update_domain_model("cove", $score, $score, 0, 0);
2836                        }
2837                    }
2838                    $tRNA->set_default_scores();
2839                    $int_results_out->write_tRNA($tRNA);
2840                }
2841            }
2842        }
2843        $sp_int_results->close_file();
2844        $int_results_out->close_file();
2845        $opts->secondpass_int_result_file($new_int_result_file);
2846        $sp_int_results->clear();
2847        $sp_int_results->file_name($new_int_result_file);
2848    }
2849    close(FILE_H);
2850}
2851
2852sub rescore_tRNA
2853{
2854    my $self = shift;
2855    my ($global_vars, $cm_tRNA, $prescan_tRNA) = @_;
2856    my $global_constants = $global_vars->{global_constants};
2857    my $opts = $global_vars->{options};
2858    my $log = $global_vars->{log_file};
2859
2860    my $trna_file = tRNAscanSE::Sequence->new;
2861    $trna_file->open_file($global_constants->get("tmp_trnaseq_file"), "write");
2862    $trna_file->set_seq_info($cm_tRNA->seqname().".trna".&pad_num($cm_tRNA->id(), 6), $cm_tRNA->seqname(), length($cm_tRNA->seq()), $cm_tRNA->seq());
2863    $trna_file->write_fasta();
2864    $trna_file->close_file();
2865
2866    my $cm_file = $self->{main_cm_file_path}->{$cm_tRNA->model()};
2867    my $cms_output_file = $global_constants->get("temp_dir")."/tscan$$"."_cm_rescore.out";
2868
2869    my ($hit_score, $hit_start, $hit_end, $hit_ct) =
2870        $self->cmsearch_scoring($opts, $log, $global_constants->get("tmp_trnaseq_file"), $prescan_tRNA->tRNAscan_id(), $cm_file, $cms_output_file);
2871    $cm_tRNA->update_domain_model("infernal", $hit_score, $hit_score, 0, 0);
2872}
2873
2874# Runs Infernal cmsearch, and returns the score
2875sub cmsearch_scoring
2876{
2877    my $self = shift;
2878    my ($opts, $log, $tmp_trnaseq_file, $seq_name, $cur_cm_file, $cms_output_file) = @_;
2879
2880    my (@cms_lines, $subseq_start, $subseq_end, $evalue, $score, $besthit_score, $line,
2881        $besthit_start, $besthit_end, $hit_ct);
2882
2883    my $scan_flag = 0;
2884    if ($opts->arch_mode() or $opts->bact_mode())
2885    {
2886        $scan_flag = 7;
2887    }
2888    if (!$self->exec_cmsearch($cur_cm_file, $scan_flag, $tmp_trnaseq_file, $seq_name, $cms_output_file, $log, 0))
2889    {
2890        return 0;
2891    }
2892
2893    @cms_lines = split(/\n/, `cat $cms_output_file`);
2894    $hit_ct = 0;
2895    $score = 0;
2896    $besthit_score = 0;
2897
2898    foreach $line (@cms_lines)
2899    {
2900        if ($line =~ /^\s+\(\d+\)\s+\S+\s+([e0-9.\-]+)\s+([0-9.\-]+)\s+\S+\s+\S+\s+(\d+)\s+(\d+)\s+\S+\s+(\d+)\s+(\d+)\s+([+-])\s+\S+\s+\S+\s+(\S+)\s+([0-9.]+)/)
2901        {
2902            $evalue = $1;
2903            $score = $2;
2904            $subseq_start  = $5;
2905            $subseq_end    = $6;
2906            $hit_ct++;
2907
2908            if ($score > $besthit_score)
2909            {
2910                $besthit_score  = $score;
2911                $besthit_start  = $subseq_start;
2912                $besthit_end    = $subseq_end;
2913            }
2914        }
2915    }
2916    return ($besthit_score, $besthit_start, $besthit_end, $hit_ct);
2917}
2918
2919sub run_first_pass_cmsearch
2920{
2921    my $self = shift;
2922    my ($global_vars, $seq_name) = @_;
2923    my $global_constants = $global_vars->{global_constants};
2924    my $log = $global_vars->{log_file};
2925
2926    my $scan_flag = 3;
2927    my $cms_output_file = "";
2928    my $cms_merge_file = $global_constants->get("temp_dir")."/tscan$$"."_fp_cm_merge.out";
2929    my $arrayCMscanResults = tRNAscanSE::ArrayCMscanResults->new;
2930    my $file_idx = -1;
2931
2932    foreach my $cur_cm (sort keys %{$self->{main_cm_file_path}})
2933    {
2934        $cms_output_file = $global_constants->get("temp_dir")."/tscan$$"."_fp_cm_".$cur_cm.".out";
2935        if ($self->exec_cmsearch($self->{main_cm_file_path}->{$cur_cm}, $scan_flag, $global_constants->get("tmp_fa"), $seq_name, $cms_output_file, $log, $self->{infernal_fp_cutoff}))
2936        {
2937            $file_idx = $arrayCMscanResults->add_file($cms_output_file, $cur_cm);
2938            $arrayCMscanResults->merge_result_file($file_idx, 0);
2939        }
2940        else
2941        {
2942            return "";
2943        }
2944    }
2945    $arrayCMscanResults->write_merge_file($cms_merge_file, 1);
2946    my $count = $arrayCMscanResults->get_result_count();
2947
2948    return ($cms_merge_file, $count);
2949}
2950
2951sub process_fp_cmsearch_hits
2952{
2953    my $self = shift;
2954    my ($global_vars, $start_index, $cms_merge_file, $result_count) = @_;
2955    my $global_constants = $global_vars->{global_constants};
2956    my $fp_tRNAs = $global_vars->{fp_tRNAs};
2957    my $stats = $global_vars->{stats};
2958
2959    my $line = "";
2960    my @columns = ();
2961    my $trna = undef;
2962    my $i = 0;
2963    my $ct = 0;
2964
2965    &open_for_read(\*FILE_H, $cms_merge_file);
2966    while ($line = <FILE_H>)
2967    {
2968        $ct++;
2969        chomp($line);
2970        @columns = split(/\t/, $line);
2971
2972        $trna = tRNAscanSE::tRNA->new;
2973        $trna->seqname($columns[0]);
2974        $trna->score($columns[4]);
2975        if ($columns[3] eq "+")
2976        {
2977            $trna->start($columns[1] + $start_index);
2978            $trna->end($columns[2] + $start_index);
2979        }
2980        else
2981        {
2982            $trna->start($columns[2] + $start_index);
2983            $trna->end($columns[1] + $start_index);
2984        }
2985        $trna->strand($columns[3]);
2986        $trna->isotype("???");
2987        $trna->anticodon("???");
2988        $trna->ss($columns[5]);
2989        $trna->seq($columns[6]);
2990        $trna->model($columns[10]);
2991
2992        if ($trna->start() < $trna->end())
2993        {
2994            $trna->position($trna->start());
2995        }
2996        else
2997        {
2998            $trna->position($global_constants->get("really_big_number") - $trna->start() + 1);
2999        }
3000
3001        $trna->hit_source(0);
3002
3003        my $merge = 0;
3004        if ($ct < 3 or $ct > ($result_count - 3))
3005        {
3006            $merge = $self->merge_repeat_hit($global_vars->{stats}, $global_vars->{fp_tRNAs}, $trna);
3007        }
3008        if (!$merge)
3009        {
3010            # insert non-redundant hit in order
3011            # 'Merge_repeat_hits' depends on list being in order
3012            $i = &max(0, $i - 10);
3013            while (($i < $fp_tRNAs->get_count()) && ($fp_tRNAs->get($i)->position() < $trna->position()))
3014            {
3015                $i++;
3016            }
3017
3018            $fp_tRNAs->insert($trna, $i);
3019            $stats->increment_trnatotal();
3020        }
3021    }
3022    close(FILE_H);
3023}
3024
3025# check current hit for redundancy against all previous hits in hitlist
3026#
3027# if it IS a repeat, merge it with overlapping hit and return 1
3028# if it doesn't overlap with any hits, return 0
3029sub merge_repeat_hit
3030{
3031    my $self = shift;
3032    my ($stats, $fp_tRNAs, $trna) = @_;
3033
3034    for (my $i = 0; $i < $fp_tRNAs->get_count(); $i++)
3035    {
3036        if ($trna->strand() eq "+")
3037        {
3038            if (($fp_tRNAs->get($i)->strand() eq "+") &&
3039                (&seg_overlap($trna->start(), $trna->end(), $fp_tRNAs->get($i)->start(), $fp_tRNAs->get($i)->end(), 0)))
3040            {
3041                $fp_tRNAs->get($i)->start(&min($trna->start(), $fp_tRNAs->get($i)->start()));
3042                $fp_tRNAs->get($i)->end(&max($trna->end(), $fp_tRNAs->get($i)->end()));
3043                $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source());
3044                $fp_tRNAs->get($i)->isotype($trna->isotype());
3045                $fp_tRNAs->get($i)->score($trna->score());
3046
3047                # check to see if extended endpoint overlaps i+1 hit's start boundary
3048                # if so, combine hit[i] and hit[i+1] into one hit and delete hit[i+1]
3049                if (($i != ($fp_tRNAs->get_count() - 1)) and ($fp_tRNAs->get($i+1)->strand() eq "+")
3050                    and ($fp_tRNAs->get($i)->end() >= $fp_tRNAs->get($i+1)->start()))
3051                {
3052                    $fp_tRNAs->get($i)->end(&max($fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i+1)->end()));
3053                    $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $fp_tRNAs->get($i+1)->hit_source());
3054
3055                    $fp_tRNAs->remove($i+1);
3056                    $stats->decrement_trnatotal();
3057                }
3058                return 1;                                 # exit loop immediately
3059            }
3060        }
3061        else         # else (antisense) strand
3062        {
3063            if (($fp_tRNAs->get($i)->strand() eq "-") &&
3064                (&seg_overlap($trna->end(), $trna->start(), $fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i)->start(), 0)))
3065            {
3066                $fp_tRNAs->get($i)->start(&max($trna->start(), $fp_tRNAs->get($i)->start()));
3067                $fp_tRNAs->get($i)->end(&min($trna->end(), $fp_tRNAs->get($i)->end()));
3068                $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $self->{eufind_mask});
3069                $fp_tRNAs->get($i)->isotype($trna->isotype());
3070                $fp_tRNAs->get($i)->score($trna->score());
3071
3072                if (($i != ($fp_tRNAs->get_count() - 1)) and
3073                    ($fp_tRNAs->get($i)->end() <= $fp_tRNAs->get($i+1)->start()))
3074                {
3075                    $fp_tRNAs->get($i)->end(&min($fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i+1)->end()));
3076                    $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $fp_tRNAs->get($i+1)->hit_source());
3077
3078                    $fp_tRNAs->remove($i+1);
3079                    $stats->decrement_trnatotal();
3080                }
3081                return 1;                                 # exit loop immediately
3082            }
3083        } # else (antisense) strand
3084
3085    } # for each (hit)
3086
3087    return 0;                                             # current hit is not a repeat
3088}
3089
3090sub first_pass_scan
3091{
3092    my $self = shift;
3093    my ($global_vars, $start_index, $seq_name) = @_;
3094    my $opts = $global_vars->{options};
3095    my $log = $global_vars->{log_file};
3096
3097    if (!$opts->numt_mode())
3098    {
3099        my ($cms_merge_file, $count) = $self->run_first_pass_cmsearch($global_vars, $seq_name);
3100        if ($cms_merge_file eq "")
3101        {
3102            $log->error("Fail to run infernal for first pass scan");
3103            return 0;
3104        }
3105        else
3106        {
3107            $self->process_fp_cmsearch_hits($global_vars, $start_index, $cms_merge_file, $count);
3108        }
3109    }
3110
3111    return 1;
3112}
3113
3114sub analyze_alternate
3115{
3116    my $self = shift;
3117    my ($global_vars, $seqinfo_flag, $seq_ct, $seq_name, $r_curseq_trnact) = @_;
3118    my $global_constants = $global_vars->{global_constants};
3119    my $opts = $global_vars->{options};
3120    my $stats = $global_vars->{stats};
3121    my $log = $global_vars->{log_file};
3122    my $fp_result_file = $global_vars->{fp_result_file};
3123    my $arrayCMscanResults = tRNAscanSE::ArrayCMscanResults->new;
3124    my $prescan_tRNA = tRNAscanSE::tRNA->new;
3125    my $cm_tRNA = tRNAscanSE::tRNA->new;
3126
3127    my $over_cutoff = 0;
3128    my $cms_hit_count = 0;
3129    my ($trnaDesc, $fulltrnaDesc, $subseq_start, $subseq_end);
3130    my ($cms_confirmed_ct, $cur_cm_file);
3131    my $cm_hit = {};
3132
3133    $cms_confirmed_ct = 0;
3134
3135    if (scalar(keys %{$self->{main_cm_file_path}}) == 0)
3136    {
3137        $log->error("No covariance models are found for scanning $seq_name. Please check the configuration file.");
3138        return 0;
3139    }
3140
3141    my ($cms_merge_file, $count) = $self->run_cmsearch($global_vars, $seq_name, $arrayCMscanResults, 0);
3142    if ($cms_merge_file eq "")
3143    {
3144        $log->error("Fail to run infernal for $seq_name");
3145        return 0;
3146    }
3147
3148    $arrayCMscanResults->open_file($cms_merge_file);
3149    $fp_result_file->reset_current_seq();
3150    $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $prescan_tRNA);
3151    $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
3152    while ($cm_tRNA->seqname() ne "")
3153    {
3154        $over_cutoff = 0;
3155        while (($cm_tRNA->seqname() ne "") and ($cm_tRNA->seqname() eq $prescan_tRNA->seqname().".t".&pad_num($prescan_tRNA->id(), 6)))
3156        {
3157            $cms_hit_count++;
3158
3159            $subseq_start = $cm_tRNA->start();
3160            $subseq_end = $cm_tRNA->end();
3161            $cm_tRNA->start($cm_tRNA->start() + $prescan_tRNA->start() - 1);
3162            $cm_tRNA->end($cm_tRNA->end() + $prescan_tRNA->start() - 1);
3163
3164            if ($cm_tRNA->score() < $self->{cm_cutoff})
3165            {
3166                $log->broadcast("Low cmsearch score for ".$prescan_tRNA->tRNAscan_id().".$cms_hit_count: ".$cm_tRNA->score());
3167                $trnaDesc .= "(CMSearch Hit#$cms_hit_count: ".$cm_tRNA->start()."-".$cm_tRNA->end().",".
3168                    " Sc: ".$cm_tRNA->score().",  Len: ".(abs($cm_tRNA->start() - $cm_tRNA->end()) + 1).") ";
3169            }
3170            else
3171            {
3172                $over_cutoff++;
3173                $$r_curseq_trnact++;
3174
3175                $cm_tRNA->id($$r_curseq_trnact);
3176                $cm_tRNA->seqname($prescan_tRNA->seqname());
3177                $cm_tRNA->set_domain_model("infernal", $cm_tRNA->score());
3178                if ($cm_tRNA->strand() eq "-")
3179                {
3180                    my $temp = $cm_tRNA->start();
3181                    $cm_tRNA->start($cm_tRNA->end());
3182                    $cm_tRNA->end($temp);
3183                }
3184
3185                if ((length($prescan_tRNA->seq()) >= $subseq_end + 2) &&
3186                    (uc(substr($prescan_tRNA->seq(), $subseq_end, 3)) eq "CCA") &&
3187                    (substr($cm_tRNA->ss(), length($cm_tRNA->ss()) - 4) ne "...."))
3188                {
3189                    $subseq_end += 3;
3190                    if ($cm_tRNA->strand() eq "+")
3191                    {
3192                        $cm_tRNA->end($cm_tRNA->end() + 3);
3193                    }
3194                    else
3195                    {
3196                        $cm_tRNA->start($cm_tRNA->start() - 3);
3197                    }
3198                    $cm_tRNA->seq($cm_tRNA->seq()."CCA");
3199                    $cm_tRNA->ss($cm_tRNA->ss()."...");
3200                }
3201
3202                $cm_tRNA->upstream($prescan_tRNA->upstream());
3203                $cm_tRNA->downstream($prescan_tRNA->downstream());
3204                if ($subseq_start > 1)
3205                {
3206                    $cm_tRNA->upstream($cm_tRNA->upstream() . substr($prescan_tRNA->seq(), 0, $subseq_start - 1));
3207                }
3208                if ($subseq_end < $prescan_tRNA->len())
3209                {
3210                    $cm_tRNA->downstream(substr($prescan_tRNA->seq(), $subseq_end) . $cm_tRNA->downstream());
3211                }
3212                my $upstream_len   = $global_constants->get("upstream_len");
3213                my $downstream_len = $global_constants->get("downstream_len");
3214
3215                $cm_tRNA->upstream(substr($cm_tRNA->upstream(), &max((length($cm_tRNA->upstream()) - $upstream_len), 0)));
3216                $cm_tRNA->downstream(substr($cm_tRNA->downstream(), 0, &min(length($cm_tRNA->downstream()), $downstream_len)));
3217                $self->decode_tRNA_properties($global_vars, $cm_tRNA, $prescan_tRNA, $self->{main_cm_file_path}->{$cm_tRNA->model()});
3218
3219                $cm_tRNA->tRNAscan_id($cm_tRNA->seqname().".tRNA".$$r_curseq_trnact."-".$cm_tRNA->isotype().$cm_tRNA->anticodon());
3220                $cm_tRNA->set_mature_tRNA();
3221                $cm_tRNA->src_seqlen($prescan_tRNA->src_seqlen());
3222                $cm_tRNA->src_seqid($cm_tRNA->seqname());
3223                $cm_tRNA->ordered_seqname($prescan_tRNA->src_seqid());
3224                $cm_tRNA->set_default_scores();
3225
3226                $cms_confirmed_ct++;
3227                $global_vars->{sp_int_results}->write_tRNA($cm_tRNA);
3228            }
3229
3230            $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
3231        }
3232
3233        if ($over_cutoff == 0)
3234        {
3235            if ((!$opts->results_to_stdout()) && ($opts->eufind_mode() || $opts->tscan_mode() || $opts->use_prev_ts_run()))
3236            {
3237                $log->broadcast("CMSearch score(s) below cutoff for ".$prescan_tRNA->tRNAscan_id().". Skipping...");
3238            }
3239            if ($opts->save_falsepos())
3240            {
3241                $fulltrnaDesc = "(Fp Hit: ".$prescan_tRNA->start()."-".$prescan_tRNA->end().", ".
3242                    (abs($prescan_tRNA->start() - $prescan_tRNA->end()) + 1)." bp, Src: ".$prescan_tRNA->hit_source().") ".$trnaDesc;
3243
3244                $stats->increment_fpos_base_ct(length($prescan_tRNA->seq()));
3245                &write_tRNA($opts->falsepos_file(), $prescan_tRNA->tRNAscan_id(), $fulltrnaDesc, $prescan_tRNA->seq(), 0);
3246            }
3247        }
3248
3249        $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $prescan_tRNA);
3250    }
3251    $arrayCMscanResults->close_file();
3252
3253    return $cms_confirmed_ct;
3254}
3255
3256sub analyze_mito
3257{
3258    my $self = shift;
3259    my ($global_vars, $seqinfo_flag, $seq_ct, $seq_name, $r_curseq_trnact) = @_;
3260    my $global_constants = $global_vars->{global_constants};
3261    my $opts = $global_vars->{options};
3262    my $stats = $global_vars->{stats};
3263    my $log = $global_vars->{log_file};
3264    my $fp_result_file = $global_vars->{fp_result_file};
3265    my $arrayCMscanResults = tRNAscanSE::ArrayCMscanResults->new;
3266    my $prescan_tRNA = tRNAscanSE::tRNA->new;
3267    my $cm_tRNA = tRNAscanSE::tRNA->new;
3268
3269    my $over_cutoff = 0;
3270    my $cms_hit_count = 0;
3271    my ($trnaDesc, $fulltrnaDesc, $subseq_start, $subseq_end);
3272    my ($cms_confirmed_ct, $cur_cm_file);
3273    my $cm_hit = {};
3274
3275    $cms_confirmed_ct = 0;
3276
3277    if (scalar(keys %{$self->{main_cm_file_path}}) == 0)
3278    {
3279        $log->error("No covariance models are found for scanning $seq_name. Please check the configuration file.");
3280        return 0;
3281    }
3282
3283    my ($cms_merge_file, $count) = $self->run_cmsearch($global_vars, $seq_name, $arrayCMscanResults, 10);
3284    if ($cms_merge_file eq "")
3285    {
3286        $log->error("Fail to run infernal for $seq_name");
3287        return 0;
3288    }
3289
3290    $arrayCMscanResults->open_file($cms_merge_file);
3291    $fp_result_file->reset_current_seq();
3292    $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $prescan_tRNA);
3293    $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
3294    while ($cm_tRNA->seqname() ne "")
3295    {
3296        $over_cutoff = 0;
3297        while (($cm_tRNA->seqname() ne "") and ($cm_tRNA->seqname() eq $prescan_tRNA->seqname().".t".&pad_num($prescan_tRNA->id(), 6)))
3298        {
3299            $cms_hit_count++;
3300
3301            $subseq_start = $cm_tRNA->start();
3302            $subseq_end = $cm_tRNA->end();
3303            $cm_tRNA->start($cm_tRNA->start() + $prescan_tRNA->start() - 1);
3304            $cm_tRNA->end($cm_tRNA->end() + $prescan_tRNA->start() - 1);
3305
3306            if ($cm_tRNA->score() < $self->{cm_cutoff})
3307            {
3308                $log->broadcast("Low cmsearch score for ".$prescan_tRNA->tRNAscan_id().".$cms_hit_count: ".$cm_tRNA->score());
3309                $trnaDesc .= "(CMSearch Hit#$cms_hit_count: ".$cm_tRNA->start()."-".$cm_tRNA->end().",".
3310                    " Sc: ".$cm_tRNA->score().",  Len: ".(abs($cm_tRNA->start() - $cm_tRNA->end()) + 1).") ";
3311            }
3312            else
3313            {
3314                $over_cutoff++;
3315                $$r_curseq_trnact++;
3316
3317                $cm_tRNA->id($$r_curseq_trnact);
3318                $cm_tRNA->seqname($prescan_tRNA->seqname());
3319                $cm_tRNA->set_domain_model("infernal", $cm_tRNA->score());
3320                if ($cm_tRNA->strand() eq "-")
3321                {
3322                    my $temp = $cm_tRNA->start();
3323                    $cm_tRNA->start($cm_tRNA->end());
3324                    $cm_tRNA->end($temp);
3325                }
3326
3327                if ((length($prescan_tRNA->seq()) >= $subseq_end + 2) &&
3328                    (uc(substr($prescan_tRNA->seq(), $subseq_end, 3)) eq "CCA") &&
3329                    (substr($cm_tRNA->ss(), length($cm_tRNA->ss()) - 4) ne "...."))
3330                {
3331                    $subseq_end += 3;
3332                    if ($cm_tRNA->strand() eq "+")
3333                    {
3334                        $cm_tRNA->end($cm_tRNA->end() + 3);
3335                    }
3336                    else
3337                    {
3338                        $cm_tRNA->start($cm_tRNA->start() - 3);
3339                    }
3340                    $cm_tRNA->seq($cm_tRNA->seq()."CCA");
3341                    $cm_tRNA->ss($cm_tRNA->ss()."...");
3342                }
3343
3344                $cm_tRNA->upstream($prescan_tRNA->upstream());
3345                $cm_tRNA->downstream($prescan_tRNA->downstream());
3346                if ($subseq_start > 1)
3347                {
3348                    $cm_tRNA->upstream($cm_tRNA->upstream() . substr($prescan_tRNA->seq(), 0, $subseq_start - 1));
3349                }
3350                if ($subseq_end < $prescan_tRNA->len())
3351                {
3352                    $cm_tRNA->downstream(substr($prescan_tRNA->seq(), $subseq_end) . $cm_tRNA->downstream());
3353                }
3354                my $upstream_len   = $global_constants->get("upstream_len");
3355                my $downstream_len = $global_constants->get("downstream_len");
3356
3357                $cm_tRNA->upstream(substr($cm_tRNA->upstream(), &max((length($cm_tRNA->upstream()) - $upstream_len), 0)));
3358                $cm_tRNA->downstream(substr($cm_tRNA->downstream(), 0, &min(length($cm_tRNA->downstream()), $downstream_len)));
3359                $self->decode_mito_tRNA_properties($global_vars, $cm_tRNA, $prescan_tRNA, $self->{main_cm_file_path}->{$cm_tRNA->model()});
3360
3361                $cm_tRNA->tRNAscan_id($cm_tRNA->seqname().".tRNA".$$r_curseq_trnact."-".$cm_tRNA->isotype().$cm_tRNA->anticodon());
3362                $cm_tRNA->set_mature_tRNA();
3363                $cm_tRNA->src_seqlen($prescan_tRNA->src_seqlen());
3364                $cm_tRNA->src_seqid($cm_tRNA->seqname());
3365                $cm_tRNA->ordered_seqname($prescan_tRNA->src_seqid());
3366                $cm_tRNA->set_default_scores();
3367
3368                $cms_confirmed_ct++;
3369                $global_vars->{sp_int_results}->write_tRNA($cm_tRNA);
3370            }
3371
3372            $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
3373        }
3374
3375        if ($over_cutoff == 0)
3376        {
3377            if ((!$opts->results_to_stdout()) && ($opts->eufind_mode() || $opts->tscan_mode() || $opts->use_prev_ts_run()))
3378            {
3379                $log->broadcast("CMSearch score(s) below cutoff for ".$prescan_tRNA->tRNAscan_id().". Skipping...");
3380            }
3381            if ($opts->save_falsepos())
3382            {
3383                $fulltrnaDesc = "(Fp Hit: ".$prescan_tRNA->start()."-".$prescan_tRNA->end().", ".
3384                    (abs($prescan_tRNA->start() - $prescan_tRNA->end()) + 1)." bp, Src: ".$prescan_tRNA->hit_source().") ".$trnaDesc;
3385
3386                $stats->increment_fpos_base_ct(length($prescan_tRNA->seq()));
3387                &write_tRNA($opts->falsepos_file(), $prescan_tRNA->tRNAscan_id(), $fulltrnaDesc, $prescan_tRNA->seq(), 0);
3388            }
3389        }
3390
3391        $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $prescan_tRNA);
3392    }
3393    $arrayCMscanResults->close_file();
3394
3395    return $cms_confirmed_ct;
3396}
3397
3398sub analyze_with_cmsearch
3399{
3400    my $self = shift;
3401    my ($global_vars, $seqinfo_flag, $seq_ct, $seq_name, $r_curseq_trnact) = @_;
3402    my $global_constants = $global_vars->{global_constants};
3403    my $opts = $global_vars->{options};
3404    my $stats = $global_vars->{stats};
3405    my $log = $global_vars->{log_file};
3406    my $fp_result_file = $global_vars->{fp_result_file};
3407    my $arrayCMscanResults = tRNAscanSE::ArrayCMscanResults->new;
3408    my $prescan_tRNA = tRNAscanSE::tRNA->new;
3409    my $cm_tRNA = tRNAscanSE::tRNA->new;
3410    my $flanking_exist = 0;
3411
3412    my $over_cutoff = 0;
3413    my $cms_hit_count = 0;
3414    my $rescore = 0;
3415    my ($trnaDesc, $fulltrnaDesc, $subseq_start, $subseq_end);
3416    my ($cms_confirmed_ct, $cur_cm_file);
3417    my $cm_hit = {};
3418
3419    $cms_confirmed_ct = 0;
3420
3421    my ($cms_merge_file, $count) = $self->run_cmsearch($global_vars, $seq_name, $arrayCMscanResults, 0);
3422    if ($cms_merge_file eq "")
3423    {
3424        $log->error("Fail to run infernal for $seq_name");
3425        return 0;
3426    }
3427
3428    $arrayCMscanResults->open_file($cms_merge_file);
3429    $fp_result_file->reset_current_seq();
3430    $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $prescan_tRNA);
3431    if ($fp_result_file->open_flanking("read"))
3432    {
3433        $fp_result_file->read_tRNA_flanking($prescan_tRNA);
3434        $flanking_exist = 1;
3435    }
3436    $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
3437    while ($cm_tRNA->seqname() ne "")
3438    {
3439        $over_cutoff = 0;
3440        while (($cm_tRNA->seqname() ne "") and ($cm_tRNA->seqname() eq $prescan_tRNA->seqname().".t".&pad_num($prescan_tRNA->id(), 6)))
3441        {
3442            $rescore = 0;
3443            $cms_hit_count++;
3444            if ($prescan_tRNA->hit_source() ne "")
3445            {
3446                $cm_tRNA->hit_source($prescan_tRNA->hit_source());
3447            }
3448            $subseq_start = $cm_tRNA->start();
3449            $subseq_end = $cm_tRNA->end();
3450            if ($opts->infernal_fp())
3451            {
3452                $cm_tRNA->strand($prescan_tRNA->strand());
3453                if ($prescan_tRNA->strand() eq "+")
3454                {
3455                    $cm_tRNA->start($cm_tRNA->start() + $prescan_tRNA->start() - 1);
3456                    $cm_tRNA->end($cm_tRNA->end() + $prescan_tRNA->start() - 1);
3457                }
3458                else
3459                {
3460                    if ($prescan_tRNA->start() > $prescan_tRNA->end())
3461                    {
3462                        $cm_tRNA->start($prescan_tRNA->start() - $cm_tRNA->start() + 1);
3463                        $cm_tRNA->end($prescan_tRNA->start() - $cm_tRNA->end() + 1);
3464                    }
3465                    else
3466                    {
3467                        $cm_tRNA->start($prescan_tRNA->end() - $cm_tRNA->start() + 1);
3468                        $cm_tRNA->end($prescan_tRNA->end() - $cm_tRNA->end() + 1);
3469                    }
3470                }
3471            }
3472            else
3473            {
3474                $cm_tRNA->start($cm_tRNA->start() + $prescan_tRNA->start() - 1);
3475                $cm_tRNA->end($cm_tRNA->end() + $prescan_tRNA->start() - 1);
3476            }
3477
3478            if ($cm_tRNA->score() < $self->{cm_cutoff})
3479            {
3480                $log->broadcast("Low cmsearch score for ".$prescan_tRNA->tRNAscan_id().".$cms_hit_count: ".$cm_tRNA->score());
3481                $trnaDesc .= "(CMSearch Hit#$cms_hit_count: ".$cm_tRNA->start()."-".$cm_tRNA->end().",".
3482                    " Sc: ".$cm_tRNA->score().",  Len: ".(abs($cm_tRNA->start() - $cm_tRNA->end()) + 1).") ";
3483            }
3484            else
3485            {
3486                $over_cutoff++;
3487                $$r_curseq_trnact++;
3488
3489                $cm_tRNA->id($$r_curseq_trnact);
3490                $cm_tRNA->seqname($prescan_tRNA->seqname());
3491                $cm_tRNA->set_domain_model("infernal", $cm_tRNA->score());
3492                if ($cm_tRNA->strand() eq "-")
3493                {
3494                    my $temp = $cm_tRNA->start();
3495                    $cm_tRNA->start($cm_tRNA->end());
3496                    $cm_tRNA->end($temp);
3497                }
3498
3499                if ((length($prescan_tRNA->seq()) >= $subseq_end + 2) &&
3500                    (uc(substr($prescan_tRNA->seq(), $subseq_end, 3)) eq "CCA") &&
3501                    (substr($cm_tRNA->ss(), length($cm_tRNA->ss()) - 4) ne "....") &&
3502                    ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cm", "eukaryota")) &&
3503                    ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cm", "general")))
3504                {
3505                    $subseq_end += 3;
3506                    if ($cm_tRNA->strand() eq "+")
3507                    {
3508                        $cm_tRNA->end($cm_tRNA->end() + 3);
3509                    }
3510                    else
3511                    {
3512                        $cm_tRNA->start($cm_tRNA->start() - 3);
3513                    }
3514                    $cm_tRNA->seq($cm_tRNA->seq()."CCA");
3515                    $cm_tRNA->ss($cm_tRNA->ss()."...");
3516                    $rescore = 1;
3517                }
3518
3519                if ((uc(substr($cm_tRNA->seq(), length($cm_tRNA->seq()) - 3)) ne "CCA") &&
3520                    (substr($cm_tRNA->ss(), length($cm_tRNA->ss()) - 4) eq "....") &&
3521                    ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cm", "eukaryota")) &&
3522                    ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cm", "general")))
3523                {
3524                    $subseq_end -= 3;
3525                    if ($cm_tRNA->strand() eq "+")
3526                    {
3527                        $cm_tRNA->end($cm_tRNA->end() - 3);
3528                    }
3529                    else
3530                    {
3531                        $cm_tRNA->start($cm_tRNA->start() + 3);
3532                    }
3533                    $cm_tRNA->seq(substr($cm_tRNA->seq(), 0, length($cm_tRNA->seq()) - 3));
3534                    $cm_tRNA->ss(substr($cm_tRNA->ss(), 0, length($cm_tRNA->ss()) - 3));
3535                    $rescore = 1;
3536                }
3537                elsif ((uc(substr($cm_tRNA->seq(), length($cm_tRNA->seq()) - 3)) eq "CCA") &&
3538                    (((substr($cm_tRNA->ss(), length($cm_tRNA->ss()) - 6) eq "<.....") &&
3539                    (substr($cm_tRNA->seq(), length($cm_tRNA->seq()) - 5, 2) =~ /[acgtn][ACGTN]/)) ||
3540                    ((substr($cm_tRNA->ss(), length($cm_tRNA->ss()) - 7) eq "<......") &&
3541                    (substr($cm_tRNA->seq(), length($cm_tRNA->seq()) - 6, 3) =~ /[ACGTN][acgtn][ACGTN]/)) ||
3542                    ((substr($cm_tRNA->ss(), length($cm_tRNA->ss()) - 7) eq "<......") &&
3543                    (substr($cm_tRNA->seq(), length($cm_tRNA->seq()) - 6, 3) =~ /[acgtn]{2}[ACGTN]/))) &&
3544                    ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cm", "eukaryota")) &&
3545                    ($self->{main_cm_file_path}->{Domain} ne $global_constants->get_subvalue("cm", "general")))
3546                {
3547                    my $trim_len = 4;
3548                    if (substr($cm_tRNA->ss(), length($cm_tRNA->ss()) - 7) eq "<......" && substr($cm_tRNA->seq(), length($cm_tRNA->seq()) - 6, 3) =~ /[acgtn]{2}[ACGTN]/)
3549                    {
3550						$trim_len = 5;
3551					}
3552
3553                    $subseq_end -= $trim_len;
3554                    if ($cm_tRNA->strand() eq "+")
3555                    {
3556                        $cm_tRNA->end($cm_tRNA->end() - $trim_len);
3557                    }
3558                    else
3559                    {
3560                        $cm_tRNA->start($cm_tRNA->start() + $trim_len);
3561                    }
3562                    $cm_tRNA->seq(substr($cm_tRNA->seq(), 0, length($cm_tRNA->seq()) - $trim_len));
3563                    $cm_tRNA->seq(substr($cm_tRNA->seq(), 0, length($cm_tRNA->seq()) - 1).uc(substr($cm_tRNA->seq(), length($cm_tRNA->seq()) - 1, 1)));
3564                    $cm_tRNA->ss(substr($cm_tRNA->ss(), 0, length($cm_tRNA->ss()) - $trim_len));
3565                    $rescore = 1;
3566                }
3567
3568                if ($rescore)
3569                {
3570                    $self->rescore_tRNA($global_vars, $cm_tRNA, $prescan_tRNA);
3571                }
3572
3573                $cm_tRNA->upstream($prescan_tRNA->upstream());
3574                $cm_tRNA->downstream($prescan_tRNA->downstream());
3575                if ($subseq_start > 1)
3576                {
3577                    $cm_tRNA->upstream($cm_tRNA->upstream() . substr($prescan_tRNA->seq(), 0, $subseq_start - 1));
3578                }
3579                if ($subseq_end < $prescan_tRNA->len())
3580                {
3581                    $cm_tRNA->downstream(substr($prescan_tRNA->seq(), $subseq_end) . $cm_tRNA->downstream());
3582                }
3583                if (!$opts->tscan_mode() and !$opts->eufind_mode() and !$opts->infernal_fp())
3584                {
3585                    my $upstream_len   = $global_constants->get("upstream_len");
3586                    my $downstream_len = $global_constants->get("downstream_len");
3587
3588                    $cm_tRNA->upstream(substr($cm_tRNA->upstream(), &max((length($cm_tRNA->upstream()) - $upstream_len), 0)));
3589                    $cm_tRNA->downstream(substr($cm_tRNA->downstream(), 0, &min(length($cm_tRNA->downstream()), $downstream_len)));
3590                }
3591                $self->decode_tRNA_properties($global_vars, $cm_tRNA, $prescan_tRNA, $self->{main_cm_file_path}->{$cm_tRNA->model()});
3592
3593                $cm_tRNA->tRNAscan_id($cm_tRNA->seqname().".tRNA".$$r_curseq_trnact."-".$cm_tRNA->isotype().$cm_tRNA->anticodon());
3594                $cm_tRNA->src_seqlen($prescan_tRNA->src_seqlen());
3595                $cm_tRNA->src_seqid($cm_tRNA->seqname());
3596                $cm_tRNA->ordered_seqname($prescan_tRNA->src_seqid());
3597                $cm_tRNA->set_default_scores();
3598                $self->fix_fMet($global_vars, $cm_tRNA);
3599                $self->fix_His($global_vars, $cm_tRNA);
3600                $cm_tRNA->set_mature_tRNA();
3601
3602                $cms_confirmed_ct++;
3603                $global_vars->{sp_int_results}->write_tRNA($cm_tRNA);
3604            }
3605
3606            $arrayCMscanResults->get_next_cmsearch_hit($cm_tRNA);
3607        }
3608
3609        if ($over_cutoff == 0)
3610        {
3611            if ((!$opts->results_to_stdout()) && ($opts->eufind_mode() || $opts->tscan_mode() || $opts->use_prev_ts_run()))
3612            {
3613                $log->broadcast("CMSearch score(s) below cutoff for ".$prescan_tRNA->tRNAscan_id().". Skipping...");
3614            }
3615            if ($opts->save_falsepos())
3616            {
3617                $fulltrnaDesc = "(Fp Hit: ".$prescan_tRNA->start()."-".$prescan_tRNA->end().", ".
3618                    (abs($prescan_tRNA->start() - $prescan_tRNA->end()) + 1)." bp, Src: ".$prescan_tRNA->hit_source().") ".$trnaDesc;
3619
3620                $stats->increment_fpos_base_ct(length($prescan_tRNA->seq()));
3621                &write_tRNA($opts->falsepos_file(), $prescan_tRNA->tRNAscan_id(), $fulltrnaDesc, $prescan_tRNA->seq(), 0);
3622            }
3623        }
3624
3625        $fp_result_file->get_next_tRNA_candidate($opts, $seqinfo_flag, $seq_ct, $prescan_tRNA);
3626        if ($flanking_exist)
3627        {
3628			$fp_result_file->read_tRNA_flanking($prescan_tRNA);
3629		}
3630    }
3631    $arrayCMscanResults->close_file();
3632
3633    return $cms_confirmed_ct;
3634}
3635
3636sub sort_cm_hits_by_start
3637{
3638    my $self = shift;
3639    my $cms_hits = shift;
3640
3641    my $a_start = $a->{start};
3642    my $b_start = $b->{start};
3643
3644    if ($a->{strand} == 0) {
3645        $a_start = $a->{end};
3646    }
3647    if ($b->{strand} == 0) {
3648        $b_start = $b->{end};
3649    }
3650
3651    return ($a->{seqname} cmp $b->{seqname} ||
3652            $a_start <=> $b_start);
3653}
3654
36551;
3656