1# tRNAscanSE/FpScanResultFile.pm
2# This class defines the first pass scanning result file used in tRNAscan-SE.
3#
4# --------------------------------------------------------------
5# This module is part of the tRNAscan-SE program.
6# Copyright (C) 2017 Patricia Chan and Todd Lowe
7# --------------------------------------------------------------
8#
9
10package tRNAscanSE::FpScanResultFile;
11
12use strict;
13use tRNAscanSE::Utils;
14use tRNAscanSE::Sequence;
15use tRNAscanSE::tRNA;
16use tRNAscanSE::ArraytRNA;
17use tRNAscanSE::Options;
18
19sub new
20{
21    my $class = shift;
22    my $file_name = shift;
23    my $self = {
24        file_name => $file_name,
25        FILE_H => undef,
26        FLANKING_H => undef
27    };
28
29    $self->{indexes} = [];
30    $self->{flanking_file} = "";
31
32    bless ($self, $class);
33    $self->reset_current_seq();
34    return $self;
35}
36
37sub DESTROY
38{
39    my $self = shift;
40}
41
42sub clear
43{
44    my $self = shift;
45    $self->{file_name} = "";
46    @{$self->{indexes}} = ();
47    $self->reset_current_seq();
48}
49
50sub file_name
51{
52    my $self = shift;
53    if (@_) { $self->{file_name} = shift; }
54    return $self->{file_name};
55}
56
57sub flanking_file
58{
59    my $self = shift;
60    if (@_) { $self->{flanking_file} = shift; }
61    return $self->{flanking_file};
62}
63
64sub get_indexes
65{
66    my $self = shift;
67    return @{$self->{indexes}};
68}
69
70sub clear_indexes
71{
72    my $self = shift;
73    @{$self->{indexes}} = ();
74}
75
76sub get_hit_count
77{
78    my $self = shift;
79    my $count = 0;
80
81    for (my $i = 0; $i < scalar(@{$self->{indexes}}); $i++)
82    {
83        $count += $self->{indexes}->[$i]->[2];
84    }
85    return $count;
86}
87
88sub get_pos
89{
90    my $self = shift;
91    my $index = shift;
92    my $pos = undef;
93
94    if ($index > -1 and $index < scalar(@{$self->{indexes}}))
95    {
96        $pos = $self->{indexes}->[$index]->[0];
97    }
98    return $pos;
99}
100
101sub reset_current_seq
102{
103    my $self = shift;
104    $self->{current_seq} = -1;
105    $self->{current_record} = 0;
106}
107
108sub open_file
109{
110    my $self = shift;
111
112    my $success = 0;
113
114    if ($self->{file_name} ne "")
115    {
116        &open_for_read(\$self->{FILE_H}, $self->{file_name});
117        $success = 1;
118    }
119    else
120    {
121        die "First pass result file name is not set.\n"
122    }
123
124    return $success;
125}
126
127sub close_file
128{
129    my $self = shift;
130
131    if (defined $self->{FILE_H})
132    {
133        close($self->{FILE_H});
134    }
135}
136
137sub init_fp_result_file
138{
139    my $self = shift;
140    my ($file) = @_;
141    $self->{file_name} = $file;
142
143    &open_for_append(\$self->{FILE_H}, $self->{file_name});
144    my $fh = $self->{FILE_H};
145
146    print $fh "Sequence\t\ttRNA Bounds\ttRNA\tAnti\t\n";
147	print $fh "Name     \ttRNA #\tBegin\tEnd\tType\tCodon\tSeqID\tSeqLen\tScore\n";
148	print $fh "--------\t------\t-----\t---\t----\t-----\t-----\t------\t-----\n";
149
150    $self->close_file();
151}
152
153sub save_firstpass_output
154{
155    my $self = shift;
156    my ($opts, $fp_tRNAs, $r_fpass_trna_base_ct, $seq_len, $seq_id) = @_;
157    my $triplet = "";
158
159    my @source_tab = ('Inf', 'Ts', 'Eu', 'Bo');
160
161    if (!$opts->CM_mode())
162	{
163		&open_for_append(\$self->{FILE_H}, $opts->out_file());
164    }
165    else
166	{
167		&open_for_append(\$self->{FILE_H}, $self->{file_name});
168    }
169    my $fh = $self->{FILE_H};
170
171	for (my $i = 0; $i < $fp_tRNAs->get_count(); $i++)
172	{
173		my $trna = $fp_tRNAs->get($i);
174
175		$triplet = uc($trna->anticodon());
176		if ($opts->output_codon())
177		{
178			$triplet = &rev_comp_seq($triplet);
179		}
180
181		printf $fh "%-10s\t%d\t%d\t%d\t%s\t%s\t",
182			$trna->seqname(), $i + 1, $trna->start(), $trna->end(), $trna->isotype(), $triplet;
183
184		# save intron bounds if not doing covariance model analysis
185		if (!$opts->CM_mode())
186		{
187			if ($trna->get_intron_count() > 0)
188			{
189				my @ar_introns = $trna->ar_introns();
190				printf $fh "%d\t%d\t", $ar_introns[0]->{rel_start}, $ar_introns[0]->{rel_end};
191			}
192			else
193			{
194				printf $fh "0\t0\t";
195			}
196			printf $fh "%.2f", $trna->score();
197		}
198		# save seq id number and source seq length if needed for covariance model analysis
199		else
200		{
201			printf $fh "%d\t%d\t%.2f", $seq_id, $seq_len, $trna->score();
202			if ($trna->model() ne "")
203			{
204				printf $fh "\t%s", $trna->model();
205			}
206			else
207			{
208				printf $fh "\tnone";
209			}
210		}
211
212		if ($opts->save_source())
213		{
214			print $fh "\t", $source_tab[$trna->hit_source()];
215		}
216		print $fh "\n";
217
218		$$r_fpass_trna_base_ct += abs($trna->end() - $trna->start()) + 1;
219    }
220    $self->close_file();
221}
222
223# Create dummy first-pass result file with all sequences
224sub prep_for_secpass_only
225{
226    my $self = shift;
227    my ($opts, $stats, $seq_file) = @_;
228
229    $seq_file->open_file($opts->fasta_file(), "read");
230    &open_for_append(\$self->{FILE_H}, $self->{file_name});
231    my $fh = $self->{FILE_H};
232
233    # Don't look for a specific Seq number
234    while ($seq_file->read_fasta($opts, 0))
235	{
236		print $fh $seq_file->seq_name()."\t1\t1\t".$seq_file->seq_length()."\t???\t???\t".$seq_file->seq_id()."\t".$seq_file->seq_length()." C none\n";
237		$stats->increment_numscanned();
238    }
239
240    $self->close_file();
241    $seq_file->close_file();
242}
243
244sub index_results
245{
246    my $self = shift;
247    my ($r_seqinfo_flag) = @_;
248    my $line = "";
249    my $filepos = undef;
250    my $line_ct = 0;
251    my $prev_seqname = "";
252    my $seqname = "";
253    my $record = [];
254
255    if ($self->open_file())
256    {
257        my $fh = $self->{FILE_H};
258        $filepos = tell($fh);
259        while ($line = <$fh>)
260        {
261            if ($line =~ /Type\tCodon\tSeqID\tSeqLen/)
262            {
263                # Column header present if we record seqID's and lengths
264                $$r_seqinfo_flag = 1;
265            }
266            elsif ($line =~ /^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)/o)
267            {
268                $seqname = $1;
269                if ($seqname ne $prev_seqname)
270                {
271                    $record = [];
272                    push(@$record, $filepos);
273                    push(@$record, $seqname);
274                    push(@$record, 0);
275                    push(@{$self->{indexes}}, $record);
276
277                    if ($prev_seqname ne "")
278                    {
279                        $self->{indexes}->[scalar(@{$self->{indexes}})-2]->[2] = $line_ct;
280                    }
281
282                    $prev_seqname = $seqname;
283                    $line_ct = 0;
284                }
285                $line_ct++;
286            }
287            $filepos = tell($fh);
288        }
289        if ($prev_seqname ne "")
290        {
291            $self->{indexes}->[scalar(@{$self->{indexes}})-1]->[2] = $line_ct;
292        }
293        $self->close_file();
294    }
295}
296
297sub get_next_tRNA_candidate
298{
299    my $self = shift;
300    my ($opts, $seqinfo_flag, $seq_ct, $trna) = @_;
301    my $fh = $self->{FILE_H};
302    my $line = "";
303    my $padding = $opts->padding();
304    my ($trnact, $hit_source);
305
306    $trna->clear();
307    my $record = $self->{indexes}->[$self->{current_seq}];
308    if ($self->{current_seq} != $seq_ct)
309    {
310        $self->{current_seq} = $seq_ct;
311        $self->{current_record} = 0;
312        $record = $self->{indexes}->[$seq_ct];
313        seek($fh, $record->[0], 0);
314    }
315
316    if (defined $fh and $line = <$fh> and ($self->{current_record} < $record->[2]))
317    {
318		if ($line =~ /^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)/o)
319		{
320			$trna->seqname($1);
321			$trnact = $2;
322            $trna->id($trnact);
323			$trna->tRNAscan_id($trna->seqname().".t".$trnact);
324			$trna->start($3);	        # trna subseq absolute start index
325			$trna->end($4);				# trna subseq absolute end index
326			$trna->isotype($5);
327			$trna->anticodon($6);
328			$trna->src_seqid($7);
329			$trna->src_seqlen($8);
330			$trna->score($9);
331			$trna->model($10);
332			$hit_source = $';
333			$hit_source =~ s/[\s\t\n]//g;
334			$trna->hit_source($hit_source);
335
336			# if seqinfo_flag not set, file does not have SeqID info in
337			#  7th column of output, don't mistake number read for SeqID
338			if (!$seqinfo_flag)
339			{
340				$trna->src_seqid(0);
341			}
342
343			if ($trna->end() > $trna->start())
344			{
345				$trna->strand("+");
346
347				# pad ends of sequence only if EufindtRNA or Infernal first-pass is being used
348				#  and $seqinfo_flag is set (we know the seq lengths)
349				if (($opts->eufind_mode() || $opts->infernal_fp()) && $seqinfo_flag)
350				{
351					$trna->start(&max(1, $trna->start() - $padding));
352					$trna->end(&min($trna->src_seqlen(), $trna->end() + $padding));
353				}
354			}
355			else
356			{
357				$trna->strand("-");
358
359				if (($opts->eufind_mode() || $opts->infernal_fp()) && $seqinfo_flag)
360				{
361					$trna->start(&min($trna->src_seqlen(), $trna->start() + $padding));
362					$trna->end(&max(1, $trna->end() - $padding));
363				}
364		    }
365
366			if ($trna->end() == $trna->start())
367			{
368				print STDERR "Error reading $self->{file_name}: tRNA of length 0\n";
369			}
370
371		}
372        $self->{current_record}++;
373    }
374}
375
376sub open_flanking
377{
378    my $self = shift;
379    my $mode = shift;
380
381    my $success = 0;
382
383    if ($self->{flanking_file} ne "")
384    {
385        if ($mode eq "write")
386        {
387            &open_for_write(\$self->{FLANKING_H}, $self->{flanking_file});
388        }
389        else
390        {
391            &open_for_read(\$self->{FLANKING_H}, $self->{flanking_file});
392        }
393        $success = 1;
394    }
395    else
396    {
397        die "First pass flanking file name is not set.\n"
398    }
399
400    return $success;
401}
402
403sub close_flanking
404{
405    my $self = shift;
406
407    if (defined $self->{FLANKING_H})
408    {
409        close($self->{FLANKING_H});
410    }
411}
412
413sub write_tRNA_flanking
414{
415    my $self = shift;
416    my $trna = shift;
417
418    if (defined $self->{FLANKING_H})
419    {
420        my $fh = $self->{FLANKING_H};
421        print $fh $trna->seqname()."\t".$trna->id()."\t".$trna->seq()."\t".$trna->upstream()."\t".$trna->downstream()."\n";
422    }
423}
424
425sub read_tRNA_flanking
426{
427    my $self = shift;
428    my $trna = shift;
429
430    my $fh = $self->{FLANKING_H};
431    my $line = "";
432
433    if (defined $fh and $line = <$fh>)
434    {
435        chomp($line);
436        my @columns = split(/\t/, $line);
437        if ($trna->seqname() eq $columns[0] and $trna->id() == $columns[1])
438        {
439            $trna->seq($columns[2]);
440            $trna->upstream($columns[3]);
441            $trna->downstream($columns[4]);
442        }
443    }
444}
445
4461;
447