1# tRNAscanSE/Tscan.pm
2# This class contains parameters and functions for running tRNAscan used in tRNAscan-SE.
3#
4# --------------------------------------------------------------
5# This module is part of the tRNAscan-SE program.
6# Copyright (C) 2017 Patricia Chan and Todd Lowe
7# --------------------------------------------------------------
8#
9
10package tRNAscanSE::Tscan;
11
12use strict;
13use tRNAscanSE::Configuration;
14use tRNAscanSE::Utils;
15use tRNAscanSE::ArraytRNA;
16use tRNAscanSE::tRNA;
17use tRNAscanSE::GeneticCode;
18use tRNAscanSE::Stats;
19
20sub new
21{
22    my $class = shift;
23    my $self = {};
24
25    initialize($self);
26
27    bless ($self, $class);
28    return $self;
29}
30
31sub DESTROY
32{
33    my $self = shift;
34}
35
36sub initialize
37{
38    my $self = shift;
39
40    # set to non-zero if you do NOT want redundant, overlapping hits
41    #  found by tRNAscan merged into one hit
42    $self->{keep_tscan_repeats} = 0;
43
44    $self->{tscan_version} = 1.4;   # version of tRNAscan used by tRNAscan-SE
45    $self->{tscan_bin} = "trnascan-1.4";
46    $self->{tscan_mask} = 1;        # Bit-wise masks for source of tRNA hits
47}
48
49sub set_defaults
50{
51    my $self = shift;
52    my $global_vars = shift;
53    my $global_constants = $global_vars->{global_constants};
54
55    $self->{tscan_params} = $global_constants->get_subvalue("tscan", "strict_param");
56}
57
58sub keep_tscan_repeats
59{
60    my $self = shift;
61    if (@_) { $self->{keep_tscan_repeats} = shift; }
62    return $self->{keep_tscan_repeats};
63}
64
65sub tscan_params
66{
67    my $self = shift;
68    if (@_) { $self->{tscan_params} = shift; }
69    return $self->{tscan_params};
70}
71
72sub tscan_version
73{
74    my $self = shift;
75    if (@_) { $self->{tscan_version} = shift; }
76    return $self->{tscan_version};
77}
78
79sub tscan_bin
80{
81    my $self = shift;
82    if (@_) { $self->{tscan_bin} = shift; }
83    return $self->{tscan_bin};
84}
85
86sub tscan_mask
87{
88    my $self = shift;
89    return $self->{tscan_mask};
90}
91
92sub set_bin
93{
94    my $self = shift;
95    my $bindir = shift;
96
97    # choose correct name for version being run
98    # only version 1.4 is provided with distribution
99
100    if ($self->{tscan_version} == 1.4)
101    {
102        $self->{tscan_bin} = "trnascan-1.4";
103    }
104    elsif ($self->{tscan_version} == 1.39)
105    {
106        $self->{tscan_bin} = "trnascan-1.39";
107    }
108    elsif ($self->{tscan_version} == 2)
109    {
110        $self->{tscan_bin} = "TRNAscan";
111    }
112    elsif ($self->{tscan_version} == 1.3)
113    {
114        $self->{tscan_bin} = "trnascan-1.3";
115    }
116    else
117    {
118        die "FATAL:  Illegal tRNAscan version.\n\n";
119    }
120
121    if ($^O =~ /^MSWin/)
122    {
123        $self->{tscan_bin} .= ".exe";
124    }
125
126    if (!(-x $self->{tscan_bin}))
127    {
128        $self->{tscan_bin} = $bindir."/".$self->{tscan_bin};
129        if (!(-x $self->{tscan_bin}))
130        {
131            die "FATAL: Unable to find ".$self->{tscan_bin}." executable\n\n";
132        }
133    }
134}
135
136sub run_tRNAscan
137{
138    my $self = shift;
139    my ($tmp_fa, $tmp_raw, $start_index, $lib_dir, $seq_name) = @_;
140    my $tscan_version = $self->{tscan_version};
141    my $tscan_bin = $self->{tscan_bin};
142    my $tscan_params = $self->{tscan_params};
143
144    # version provided with distribution
145    if ($tscan_version == 1.4)
146    {
147        # run default tRNAscan 1.4 using selected param set
148        system ("$tscan_bin -i $start_index -c $tscan_params $tmp_fa > $tmp_raw");
149        if (&error_exit_status("tRNAscan", $seq_name))
150        {
151            return -1;
152        }
153    }
154
155    # run tRNAscan without conservative ambiguous base pairing rules
156    # not available in distribution version
157    elsif ($tscan_version == 1.39)
158    {
159        system ("$tscan_bin -c $tscan_params $tmp_fa > $tmp_raw");
160    }
161
162    # run tRNAscan v2.0, not available in distribution version
163    elsif ($tscan_version == 2)
164    {
165        system ("$tscan_bin -SEQ $tmp_fa -TEMPLATE SEtemplate -OUTPUT $tmp_raw > /dev/null");
166    }
167
168    # run original tRNAscan 1.3, not available in distribution version
169    elsif ($tscan_version == 1.3)
170    {
171        if (!(-r "./TPCsignal"))
172        {
173            system ("ln -s ".$lib_dir."TPCsignal TPCsignal");
174        }
175        if (!(-r "./Dsignal"))
176        {
177            system ("ln -s ".$lib_dir."Dsignal Dsignal");
178        }
179        system ("reformat -ld genbank $tmp_fa > tmp.gb");
180        system ("$tscan_bin tmp.gb $tmp_raw > /dev/null");
181        system ("rm tmp.gb");
182    }
183    else
184    {
185        die "FATAL:  Illegal tRNAscan version.\n\n";
186    }
187}
188
189# Append tRNAscan verbose output to
190#   result file with header tag
191sub append_verbfile
192{
193    my $self = shift;
194    my ($verb_file, $tmp_fa, $seq_name) = @_;
195
196    &open_for_append(\*TSCANVERB, $verb_file);
197    print TSCANVERB "\n>>>> tRNA-Scan verbose output for <$seq_name>\n\n";
198    close TSCANVERB;
199    system ("cat tscan.verb.out >> $verb_file");
200}
201
202# extract trna hits from raw result file while weeding out repeated hits
203# save non-redundant hits in "hit_list" array
204sub process_tRNAscan_hits
205{
206    my $self = shift;
207    my ($global_vars, $seq_name) = @_;
208    my $global_constants = $global_vars->{global_constants};
209    my $gc = $global_vars->{gc};
210    my $fp_tRNAs = $global_vars->{fp_tRNAs};
211    my $stats = $global_vars->{stats};
212
213    my $tmp_raw = $global_constants->get("tmp_raw");
214    my $trna = tRNAscanSE::tRNA->new;
215    my $i = 0;
216
217    # open trnascan raw output file for current seq
218    open (TSCANRAW, "$tmp_raw")  || die ("FATAL: Unable to open temp raw output file $tmp_raw\n\n");
219
220    # parse one complete hit per call
221    while ($self->parse_tscan_hit($global_constants, $gc, \*TSCANRAW, $trna))
222    {
223        # if NOT a repeat hit, put it on the hit list
224        if ($self->{keep_tscan_repeats} ||
225            (!$self->merge_repeat_hit($global_vars->{stats}, $global_vars->{fp_tRNAs}, $trna)))
226        {
227            # check to see if tscan 1.3 has incorrectly reported
228            #  start/end index (happens occassionally)
229            if ((abs($trna->end() - $trna->start()) + 1) != length($trna->seq()))
230            {
231                if ($trna->strand() eq "+")
232                {
233                    $trna->end() = $trna->start() + length($trna->seq()) - 1;
234                }
235                else
236                {
237                    $trna->end() = $trna->start() - length($trna->seq()) + 1;
238                }
239            }
240
241            $i = 0;
242            while (($i < $fp_tRNAs->get_count()) && ($fp_tRNAs->get($i)->position() < $trna->position()))
243            {
244                $i++;
245            }
246
247            $trna->seqname($seq_name);
248            $trna->hit_source($self->{tscan_mask});
249            $fp_tRNAs->insert($trna, $i);
250            $stats->increment_trnatotal();
251        }
252        $trna = tRNAscanSE::tRNA->new;
253    }        # while (&Parse_tscan_hit), more hits to process for cur seq
254}
255
256sub parse_tscan_hit
257{
258    my $self = shift;
259    my ($global_constants, $gc, $TSCANRAW, $trna) = @_;
260
261    if ($self->{tscan_version} <= 1.4)
262    {
263        while (<$TSCANRAW>)
264        {
265            if (/^start position=\s*(\d+)\s*end position=\s*(\d+)/o)
266            {
267                $trna->start($1);
268                $trna->end($2);
269                if ($trna->start() < $trna->end())
270                {
271                    $trna->strand("+");
272                    $trna->position($trna->start());
273                }
274                else
275                {
276                    $trna->strand("-");
277                    $trna->position($global_constants->get("really_big_number") - $trna->start() + 1);
278                }
279            }
280            elsif (/^potential tRNA sequence=\s(.+)\n/o)
281            {
282                $trna->seq($1);
283            }
284            elsif (/^tRNA predict as a tRNA-\s*(\S+)\s*: anticodon (\S+)/o)
285            {
286                $trna->isotype($1);
287                $trna->anticodon($2);
288            }
289            elsif (/^anticodon includes unknown bases/o)
290            {
291                $trna->isotype($gc->undef_isotype());
292                $trna->anticodon($gc->undef_anticodon());
293            }
294            elsif (/^potential intron between positions\s*(\d+)\s*(\d+)/o)
295            {
296                $trna->add_intron($1, $2, $1, $2, "CI");
297            }
298            # flag for end of current tRNA hit info
299            elsif (/^number of base pairing in the anticodon/o)
300            {
301                return 1;
302            }
303            elsif (/^number of predicted tRNA=(\d+)/o)
304            {
305                return 0;        # end of hits for this seq
306            }
307        }
308        return 0;                # reached end of raw hits file
309    }
310    else
311    {
312        die "FATAL: Illegal tRNAscan version selected.\n\n";
313    }
314}
315
316# check current hit for redundancy against all previous hits in hitlist
317# if it IS a repeat, merge it with overlapping hit and return 1
318# if it doesn't overlap with any hits, return 0
319sub merge_repeat_hit
320{
321    my $self = shift;
322    my ($stats, $fp_tRNAs, $trna) = @_;
323
324    for (my $i = 0; $i < $fp_tRNAs->get_count(); $i++)
325    {
326        if ($trna->strand() eq "+")
327        {
328            if (($fp_tRNAs->get($i)->strand() eq "+") &&
329                (&seg_overlap($trna->start(), $trna->end(), $fp_tRNAs->get($i)->start(), $fp_tRNAs->get($i)->end())))
330            {
331                $fp_tRNAs->get($i)->start(&min($trna->start(), $fp_tRNAs->get($i)->start()));
332                $fp_tRNAs->get($i)->end(&max($trna->end(), $fp_tRNAs->get($i)->end()));
333                $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $self->{tscan_mask});
334                $fp_tRNAs->get($i)->isotype($trna->isotype());
335                $fp_tRNAs->get($i)->score($trna->score());
336
337                # check to see if extended endpoint overlaps i+1 hit's start boundary
338                # if so, combine hit[i] and hit[i+1] into one hit and delete hit[i+1]
339                if (($i != ($fp_tRNAs->get_count() - 1)) and ($fp_tRNAs->get($i+1)->strand() eq "+")
340                    and ($fp_tRNAs->get($i)->end() >= $fp_tRNAs->get($i+1)->start()))
341                {
342                    $fp_tRNAs->get($i)->end(&max($fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i+1)->end()));
343                    $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $fp_tRNAs->get($i+1)->hit_source());
344
345#                    splice(@$r_hit_list,$i+1,1);          # toss out overlapping hit
346                    $fp_tRNAs->remove($i+1);
347#                    $$r_trnact--;
348                    $stats->decrement_trnatotal();
349                }
350                return 1;                                 # exit loop immediately
351            }
352        }
353        else         # else (antisense) strand
354        {
355            if (($fp_tRNAs->get($i)->strand() eq "-") &&
356                (&seg_overlap($trna->end(), $trna->start(), $fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i)->start())))
357            {
358                $fp_tRNAs->get($i)->start(&max($trna->start(), $fp_tRNAs->get($i)->start()));
359                $fp_tRNAs->get($i)->end(&min($trna->end(), $fp_tRNAs->get($i)->end()));
360                $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $self->{tscan_mask});
361                $fp_tRNAs->get($i)->isotype($trna->isotype());
362                $fp_tRNAs->get($i)->score($trna->score());
363
364                if (($i != ($fp_tRNAs->get_count() - 1)) and
365                    ($fp_tRNAs->get($i)->end() <= $fp_tRNAs->get($i+1)->start()))
366                {
367                    $fp_tRNAs->get($i)->end(&min($fp_tRNAs->get($i)->end(), $fp_tRNAs->get($i+1)->end()));
368                    $fp_tRNAs->get($i)->hit_source($fp_tRNAs->get($i)->hit_source() | $fp_tRNAs->get($i+1)->hit_source());
369
370#                    splice(@$r_hit_list,$i+1,1);          # toss out overlapping hit
371                    $fp_tRNAs->remove($i+1);
372#                    $$r_trnact--;
373                    $stats->decrement_trnatotal();
374                }
375                return 1;                                 # exit loop immediately
376            }
377        } # else (antisense) strand
378
379    }  # for each (hit)
380
381    return 0;                                             # current hit is not a repeat
382}
383
3841;