1# tRNAscanSE/Eufind.pm
2# This class contains parameters and functions for running eufindtRNA used in tRNAscan-SE.
3#
4# --------------------------------------------------------------
5# This module is part of the tRNAscan-SE program.
6# Copyright (C) 2017 Patricia Chan and Todd Lowe
7# --------------------------------------------------------------
8#
9
10package tRNAscanSE::Eufind;
11
12use strict;
13use tRNAscanSE::Configuration;
14use tRNAscanSE::Utils;
15use tRNAscanSE::ArraytRNA;
16use tRNAscanSE::tRNA;
17use tRNAscanSE::Stats;
18
19
20sub new
21{
22    my $class = shift;
23    my $self = {};
24
25    initialize($self);
26
27    bless ($self, $class);
28    return $self;
29}
30
31sub DESTROY
32{
33    my $self = shift;
34}
35
36sub initialize
37{
38    my $self = shift;
39
40    $self->{eufind_bin} = "eufindtRNA";
41    $self->{eufind_mask} = 2;           # Bit-wise masks for source of tRNA hits
42}
43
44sub eufind_params
45{
46    my $self = shift;
47    if (@_) { $self->{eufind_params} = shift; }
48    return $self->{eufind_params};
49}
50
51sub eufind_intscore
52{
53    my $self = shift;
54    if (@_) { $self->{eufind_intscore} = shift; }
55    return $self->{eufind_intscore};
56}
57
58sub eufind_bin
59{
60    my $self = shift;
61    if (@_) { $self->{eufind_bin} = shift; }
62    return $self->{eufind_bin};
63}
64
65sub eufind_mask
66{
67    my $self = shift;
68    return $self->{eufind_mask};
69}
70
71sub set_defaults
72{
73    my $self = shift;
74    my $global_vars = shift;
75    my $global_constants = $global_vars->{global_constants};
76
77    $self->{eufind_params} = $global_constants->get_subvalue("eufind", "relaxed_param");
78    $self->{eufind_intscore} = $global_constants->get_subvalue("eufind", "intscore");
79}
80
81sub set_bin
82{
83    my $self = shift;
84    my $bindir = shift;
85
86    if ($^O =~ /^MSWin/)
87    {
88        $self->{eufind_bin} .= ".exe";
89    }
90
91    if (!(-x $self->{eufind_bin}))
92    {
93        $self->{eufind_bin} = $bindir."/".$self->{eufind_bin};
94        if (!(-x $self->{eufind_bin}))
95        {
96            die "FATAL: Unable to find ".$self->{eufind_bin}." executable\n\n";
97        }
98    }
99}
100
101sub run_eufind
102{
103    my $self = shift;
104    my ($tmp_fa, $start_index, $max_int_len, $seq_name) = @_;
105    my $eufind_bin = $self->{eufind_bin};
106    my $eufind_intscore = $self->{eufind_intscore};
107    my $eufind_params = $self->{eufind_params};
108
109    # run default Eufind using selected param set
110    my $eufind_output = `$eufind_bin -i $start_index -F -I $eufind_intscore -l $max_int_len $eufind_params $tmp_fa`;
111    if (&error_exit_status("EufindtRNA",$seq_name))
112    {
113        $eufind_output = "";
114    }
115    return $eufind_output;
116}
117
118sub process_Eufind_hits
119{
120    my $self = shift;
121    my ($global_vars, $eufind_output) = @_;
122    my $global_constants = $global_vars->{global_constants};
123    my $fp_tRNAs = $global_vars->{fp_tRNAs};
124    my $stats = $global_vars->{stats};
125
126    my $trna = undef;
127    my $i = 0;
128
129    my (@eufind_lines) = split(/\n/, $eufind_output);
130    foreach (@eufind_lines)
131    {
132        if (/^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)/o)
133        {
134            $trna = tRNAscanSE::tRNA->new;
135            $trna->seqname($1);
136#            $trnact = $2;
137            $trna->start($3);
138            $trna->end($4);
139            $trna->isotype($5);
140            $trna->anticodon($6);
141            $trna->score($9);
142
143            if ($trna->start() < $trna->end())
144            {
145                $trna->position($trna->start());
146                $trna->strand("+");
147            }
148            else
149            {
150                $trna->position($global_constants->get("really_big_number") - $trna->start() + 1);
151                $trna->strand("-");
152            }
153
154            if ($trna->start() == $trna->end())
155            {
156                print STDERR "Error reading EufindtRNA results: tRNA of length 0";
157            }
158
159            if (!$self->merge_repeat_hit($global_vars->{stats}, $global_vars->{fp_tRNAs}, $trna))
160            {
161                # insert non-redundant hit in order
162                # 'Merge_repeat_hits' depends on list being in order
163                $i = 0;
164                while (($i < $fp_tRNAs->get_count()) && ($fp_tRNAs->get($i)->position() < $trna->position()))
165                {
166                    $i++;
167                }
168
169                $trna->hit_source($self->{eufind_mask});
170                $fp_tRNAs->insert($trna, $i);
171                $stats->increment_trnatotal();
172            }
173        }
174    }
175}
176
177# check current hit for redundancy against all previous hits in hitlist
178#
179# if it IS a repeat, merge it with overlapping hit and return 1
180# if it doesn't overlap with any hits, return 0
181sub merge_repeat_hit
182{
183    my $self = shift;
184    my ($stats, $fp_tRNAs, $trna) = @_;
185
186    for (my $i = 0; $i < $fp_tRNAs->get_count(); $i++)
187    {
188        if ($trna->strand() eq "+")
189        {
190            if (($fp_tRNAs->get($i)->strand() eq "+") &&
191                (&seg_overlap($trna->start(), $trna->end(), $fp_tRNAs->get($i)->start(), $fp_tRNAs->get($i)->end())))
192            {
193                $fp_tRNAs->get($i)->start(&min($trna->start(), $fp_tRNAs->get($i)->start()));
194                $fp_tRNAs->get($i)->end(&max($trna->end(), $fp_tRNAs->get($i)->end()));
195                $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $self->{eufind_mask});
196                $fp_tRNAs->get($i)->isotype($trna->isotype());
197                $fp_tRNAs->get($i)->score($trna->score());
198
199                # check to see if extended endpoint overlaps i+1 hit's start boundary
200                # if so, combine hit[i] and hit[i+1] into one hit and delete hit[i+1]
201                if (($i != ($fp_tRNAs->get_count() - 1)) and ($fp_tRNAs->get($i+1)->strand() eq "+")
202                    and ($fp_tRNAs->get($i)->end() >= $fp_tRNAs->get($i+1)->start()))
203                {
204                    $fp_tRNAs->get($i)->end(&max($fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i+1)->end()));
205                    $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $fp_tRNAs->get($i+1)->hit_source());
206
207#                    splice(@$r_hit_list,$i+1,1);          # toss out overlapping hit
208                    $fp_tRNAs->remove($i+1);
209#                    $$r_trnact--;
210                    $stats->decrement_trnatotal();
211                }
212                return 1;                                 # exit loop immediately
213            }
214        }
215        else         # else (antisense) strand
216        {
217            if (($fp_tRNAs->get($i)->strand() eq "-") &&
218                (&seg_overlap($trna->end(), $trna->start(), $fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i)->start())))
219            {
220                $fp_tRNAs->get($i)->start(&max($trna->start(), $fp_tRNAs->get($i)->start()));
221                $fp_tRNAs->get($i)->end(&min($trna->end(), $fp_tRNAs->get($i)->end()));
222                $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $self->{eufind_mask});
223                $fp_tRNAs->get($i)->isotype($trna->isotype());
224                $fp_tRNAs->get($i)->score($trna->score());
225
226                if (($i != ($fp_tRNAs->get_count() - 1)) and
227                    ($fp_tRNAs->get($i)->end() <= $fp_tRNAs->get($i+1)->start()))
228                {
229                    $fp_tRNAs->get($i)->end(&min($fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i+1)->end()));
230                    $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $fp_tRNAs->get($i+1)->hit_source());
231
232#                    splice(@$r_hit_list,$i+1,1);          # toss out overlapping hit
233                    $fp_tRNAs->remove($i+1);
234#                    $$r_trnact--;
235                    $stats->decrement_trnatotal();
236                }
237                return 1;                                 # exit loop immediately
238            }
239        } # else (antisense) strand
240
241    } # for each (hit)
242
243    return 0;                                             # current hit is not a repeat
244}
2451;
246