1# tRNAscanSE/ArrayCMscanResultFile.pm
2# This class defines the array of covariance model scanning result files used in tRNAscan-SE.
3#
4# --------------------------------------------------------------
5# This module is part of the tRNAscan-SE program.
6# Copyright (C) 2017 Patricia Chan and Todd Lowe
7# --------------------------------------------------------------
8#
9
10package tRNAscanSE::ArrayCMscanResults;
11
12use strict;
13use tRNAscanSE::CMscanResultFile;
14use tRNAscanSE::Utils;
15use tRNAscanSE::LogFile;
16
17sub new
18{
19    my $class = shift;
20    my $log = shift;
21    my $self = {};
22
23    $self->{files} = [];
24    $self->{indexes} = [];
25    $self->{FILE_H} = undef;
26    $self->{file_name} = "";
27    $self->{log} = $log;
28
29    bless ($self, $class);
30    return $self;
31}
32
33sub DESTROY
34{
35    my $self = shift;
36}
37
38sub clear
39{
40    my $self = shift;
41    $self->{file_name} = "";
42    @{$self->{files}} = ();
43    @{$self->{indexes}} = ();
44}
45
46sub file_name
47{
48    my $self = shift;
49    if (@_) { $self->{file_name} = shift; }
50    return $self->{file_name};
51}
52
53sub add_file
54{
55    my $self = shift;
56    my ($file_name, $type) = @_;
57    my $file = tRNAscanSE::CMscanResultFile->new($file_name, $type);
58    push(@{$self->{files}}, $file);
59    return (scalar(@{$self->{files}}) - 1);
60}
61
62sub get_files
63{
64    my $self = shift;
65    return @{$self->{files}};
66}
67
68sub get_indexes
69{
70    my $self = shift;
71    return @{$self->{indexes}};
72}
73
74sub append_index
75{
76    my $self = shift;
77    my ($file_index, $record_indexes) = @_;
78    my $record = [];
79
80    for (my $i = 0; $i < scalar(@$record_indexes); $i++)
81    {
82        $record = [];
83        push(@$record, $file_index);
84        push(@$record, @{$record_indexes->[$i]});
85        push(@{$self->{indexes}}, $record);
86    }
87}
88
89sub get_result_count
90{
91    my $self = shift;
92    return scalar(@{$self->{indexes}});
93}
94
95sub merge_results
96{
97    my $self = shift;
98    my $overlap_range = shift;
99    $self->sort_result_files();
100    $self->set_initial_indexes();
101    $self->merge_result_files($overlap_range);
102}
103
104sub sort_result_file
105{
106    my $self = shift;
107    my $file_idx = shift;
108
109    if ($file_idx > -1 and $file_idx < scalar(@{$self->{files}}))
110    {
111        $self->{files}->[$file_idx]->sort_cmsearch_records();
112    }
113}
114
115sub sort_result_files
116{
117    my $self = shift;
118
119    for (my $i = 0; $i < scalar(@{$self->{files}}); $i++)
120    {
121        $self->{files}->[$i]->sort_cmsearch_records();
122    }
123}
124
125sub set_initial_indexes
126{
127    my $self = shift;
128    my $record = [];
129
130    if (scalar(@{$self->{files}}) > 0)
131    {
132        my @record_indexes = $self->{files}->[0]->get_indexes();
133        $self->append_index(0, \@record_indexes);
134        $self->{files}->[0]->clear_indexes();
135    }
136}
137
138sub merge_result_file
139{
140    my $self = shift;
141    my $file_idx = shift;
142    my $overlap_range = shift;
143    my @record_indexes = ();
144
145    if ($file_idx > -1 and $file_idx < scalar(@{$self->{files}}))
146    {
147        $self->sort_result_file($file_idx);
148        if ($file_idx == 0)
149        {
150            $self->set_initial_indexes();
151        }
152        else
153        {
154            @record_indexes = $self->{files}->[$file_idx]->get_indexes();
155            $self->append_index($file_idx, \@record_indexes);
156            @{$self->{indexes}} = sort sort_by_tRNAscanSE_output @{$self->{indexes}};
157            $self->{files}->[$file_idx]->clear_indexes();
158        }
159        $self->merge_indexes($overlap_range);
160    }
161}
162
163sub merge_result_files
164{
165    my $self = shift;
166    my $overlap_range = shift;
167    my @record_indexes = ();
168
169    for (my $i = 1; $i < scalar(@{$self->{files}}); $i++)
170    {
171        @record_indexes = $self->{files}->[$i]->get_indexes();
172        $self->append_index($i, \@record_indexes);
173        @{$self->{indexes}} = sort sort_by_tRNAscanSE_output @{$self->{indexes}};
174        $self->merge_indexes($overlap_range);
175        $self->{files}->[$i]->clear_indexes();
176    }
177}
178
179sub merge_indexes
180{
181    my $self = shift;
182    my $overlap_range = shift;
183
184    my $hit_overlap = 0;
185    for (my $i = scalar(@{$self->{indexes}}) - 2; $i >= 0; $i--)
186    {
187        $hit_overlap = eval(($self->{indexes}->[$i]->[2] eq $self->{indexes}->[$i+1]->[2]) &&
188                 (&seg_overlap($self->{indexes}->[$i]->[3], $self->{indexes}->[$i]->[4], $self->{indexes}->[$i+1]->[3], $self->{indexes}->[$i+1]->[4], $overlap_range)));
189        if ($hit_overlap)
190        {
191            if ($self->{indexes}->[$i]->[6] >= $self->{indexes}->[$i+1]->[6])
192            {
193                splice(@{$self->{indexes}}, $i + 1, 1);
194            }
195            else
196            {
197                splice(@{$self->{indexes}}, $i, 1);
198            }
199        }
200    }
201}
202
203sub sort_by_tRNAscanSE_output
204{
205    if ((($a->[5] eq $b->[5]) && ($a->[5] eq "+")) ||
206        ($a->[5] ne $b->[5]))
207    {
208        return ($a->[2] cmp $b->[2] ||
209                $a->[5] cmp $b->[5] ||
210                $a->[3] <=> $b->[3] ||
211                $b->[6] <=> $a->[6]);
212    }
213    if (($a->[5] eq $b->[5]) && ($a->[5] eq "-"))
214    {
215        return ($a->[2] cmp $b->[2] ||
216                $b->[5] cmp $a->[5] ||
217                $b->[4] <=> $a->[4] ||
218                $b->[6] <=> $a->[6]);
219    }
220}
221
222sub write_merge_file
223{
224    my $self = shift;
225    my $merge_file = shift;
226    my $format = shift;
227
228    $self->{file_name} = $merge_file;
229    &open_for_write(\$self->{FILE_H}, $self->{file_name});
230    my $fh = $self->{FILE_H};
231    $self->open_result_files();
232    $self->parse_result_files($format);
233    $self->close_result_files();
234    close($fh);
235}
236
237sub open_result_files
238{
239    my $self = shift;
240
241    for (my $i = 0; $i < scalar(@{$self->{files}}); $i++)
242    {
243        if (!$self->{files}->[$i]->open_file())
244        {
245            $self->{log}->error("Fail to open cmsearch output file ".$self->{files}->[$i]->file_name());
246        }
247    }
248}
249
250sub close_result_files
251{
252    my $self = shift;
253
254    for (my $i = 0; $i < scalar(@{$self->{files}}); $i++)
255    {
256        $self->{files}->[$i]->close_file();
257    }
258}
259
260sub parse_result_files
261{
262    my $self = shift;
263    my $format = shift;
264    my $fh = $self->{FILE_H};
265    my $file = undef;
266    my ($ss, $seq, $model, $nc);
267
268    for (my $i = 0; $i < scalar(@{$self->{indexes}}); $i++)
269    {
270        $file = $self->{files}->[$self->{indexes}->[$i]->[0]];
271        ($ss, $seq, $model, $nc) = $file->get_cmsearch_record($self->{indexes}->[$i]->[1], $format);
272
273        for (my $j = 2; $j < scalar(@{$self->{indexes}->[$i]}); $j++)
274        {
275            print $fh $self->{indexes}->[$i]->[$j]."\t";
276        }
277        print $fh $ss."\t".$seq."\t".$model."\t".$nc."\t".$file->type()."\n";
278    }
279}
280
281
282sub open_file
283{
284    my $self = shift;
285    my $file_name = shift;
286    my $success = 0;
287
288    if ($file_name ne "")
289    {
290        $self->{file_name} = $file_name;
291        &open_for_read(\$self->{FILE_H}, $self->{file_name});
292        $success = 1;
293    }
294    else
295    {
296        die "Merge result file name is not set.\n"
297    }
298
299    return $success;
300}
301
302sub close_file
303{
304    my $self = shift;
305
306    if (defined $self->{FILE_H})
307    {
308        close($self->{FILE_H});
309    }
310}
311
312sub get_next_cmsearch_hit
313{
314    my $self = shift;
315    my ($trna) = @_;
316    my $fh = $self->{FILE_H};
317
318    my $line = "";
319    my $cm_model = "";
320    my @columns = ();
321    $trna->clear();
322
323    if (defined $fh and $line = <$fh>)
324    {
325        chomp($line);
326        @columns = split(/\t/, $line);
327
328        $trna->seqname($columns[0]);
329        $trna->score($columns[4]);
330        if ($columns[3] eq "+")
331        {
332            $trna->start($columns[1]);
333            $trna->end($columns[2]);
334        }
335        else
336        {
337            $trna->start($columns[2]);
338            $trna->end($columns[1]);
339        }
340        $trna->strand($columns[3]);
341        $trna->trunc($columns[5]);
342        $trna->ss($columns[6]);
343        $trna->seq($columns[7]);
344        $trna->model($columns[10]);
345        $trna->hit_source("Inf");
346        $cm_model = $columns[8];
347    }
348
349    return $cm_model;
350}
351
3521;
353