1# tRNAscanSE/ArrayCMscanResultFile.pm 2# This class defines the array of covariance model scanning result files used in tRNAscan-SE. 3# 4# -------------------------------------------------------------- 5# This module is part of the tRNAscan-SE program. 6# Copyright (C) 2017 Patricia Chan and Todd Lowe 7# -------------------------------------------------------------- 8# 9 10package tRNAscanSE::ArrayCMscanResults; 11 12use strict; 13use tRNAscanSE::CMscanResultFile; 14use tRNAscanSE::Utils; 15use tRNAscanSE::LogFile; 16 17sub new 18{ 19 my $class = shift; 20 my $log = shift; 21 my $self = {}; 22 23 $self->{files} = []; 24 $self->{indexes} = []; 25 $self->{FILE_H} = undef; 26 $self->{file_name} = ""; 27 $self->{log} = $log; 28 29 bless ($self, $class); 30 return $self; 31} 32 33sub DESTROY 34{ 35 my $self = shift; 36} 37 38sub clear 39{ 40 my $self = shift; 41 $self->{file_name} = ""; 42 @{$self->{files}} = (); 43 @{$self->{indexes}} = (); 44} 45 46sub file_name 47{ 48 my $self = shift; 49 if (@_) { $self->{file_name} = shift; } 50 return $self->{file_name}; 51} 52 53sub add_file 54{ 55 my $self = shift; 56 my ($file_name, $type) = @_; 57 my $file = tRNAscanSE::CMscanResultFile->new($file_name, $type); 58 push(@{$self->{files}}, $file); 59 return (scalar(@{$self->{files}}) - 1); 60} 61 62sub get_files 63{ 64 my $self = shift; 65 return @{$self->{files}}; 66} 67 68sub get_indexes 69{ 70 my $self = shift; 71 return @{$self->{indexes}}; 72} 73 74sub append_index 75{ 76 my $self = shift; 77 my ($file_index, $record_indexes) = @_; 78 my $record = []; 79 80 for (my $i = 0; $i < scalar(@$record_indexes); $i++) 81 { 82 $record = []; 83 push(@$record, $file_index); 84 push(@$record, @{$record_indexes->[$i]}); 85 push(@{$self->{indexes}}, $record); 86 } 87} 88 89sub get_result_count 90{ 91 my $self = shift; 92 return scalar(@{$self->{indexes}}); 93} 94 95sub merge_results 96{ 97 my $self = shift; 98 my $overlap_range = shift; 99 $self->sort_result_files(); 100 $self->set_initial_indexes(); 101 $self->merge_result_files($overlap_range); 102} 103 104sub sort_result_file 105{ 106 my $self = shift; 107 my $file_idx = shift; 108 109 if ($file_idx > -1 and $file_idx < scalar(@{$self->{files}})) 110 { 111 $self->{files}->[$file_idx]->sort_cmsearch_records(); 112 } 113} 114 115sub sort_result_files 116{ 117 my $self = shift; 118 119 for (my $i = 0; $i < scalar(@{$self->{files}}); $i++) 120 { 121 $self->{files}->[$i]->sort_cmsearch_records(); 122 } 123} 124 125sub set_initial_indexes 126{ 127 my $self = shift; 128 my $record = []; 129 130 if (scalar(@{$self->{files}}) > 0) 131 { 132 my @record_indexes = $self->{files}->[0]->get_indexes(); 133 $self->append_index(0, \@record_indexes); 134 $self->{files}->[0]->clear_indexes(); 135 } 136} 137 138sub merge_result_file 139{ 140 my $self = shift; 141 my $file_idx = shift; 142 my $overlap_range = shift; 143 my @record_indexes = (); 144 145 if ($file_idx > -1 and $file_idx < scalar(@{$self->{files}})) 146 { 147 $self->sort_result_file($file_idx); 148 if ($file_idx == 0) 149 { 150 $self->set_initial_indexes(); 151 } 152 else 153 { 154 @record_indexes = $self->{files}->[$file_idx]->get_indexes(); 155 $self->append_index($file_idx, \@record_indexes); 156 @{$self->{indexes}} = sort sort_by_tRNAscanSE_output @{$self->{indexes}}; 157 $self->{files}->[$file_idx]->clear_indexes(); 158 } 159 $self->merge_indexes($overlap_range); 160 } 161} 162 163sub merge_result_files 164{ 165 my $self = shift; 166 my $overlap_range = shift; 167 my @record_indexes = (); 168 169 for (my $i = 1; $i < scalar(@{$self->{files}}); $i++) 170 { 171 @record_indexes = $self->{files}->[$i]->get_indexes(); 172 $self->append_index($i, \@record_indexes); 173 @{$self->{indexes}} = sort sort_by_tRNAscanSE_output @{$self->{indexes}}; 174 $self->merge_indexes($overlap_range); 175 $self->{files}->[$i]->clear_indexes(); 176 } 177} 178 179sub merge_indexes 180{ 181 my $self = shift; 182 my $overlap_range = shift; 183 184 my $hit_overlap = 0; 185 for (my $i = scalar(@{$self->{indexes}}) - 2; $i >= 0; $i--) 186 { 187 $hit_overlap = eval(($self->{indexes}->[$i]->[2] eq $self->{indexes}->[$i+1]->[2]) && 188 (&seg_overlap($self->{indexes}->[$i]->[3], $self->{indexes}->[$i]->[4], $self->{indexes}->[$i+1]->[3], $self->{indexes}->[$i+1]->[4], $overlap_range))); 189 if ($hit_overlap) 190 { 191 if ($self->{indexes}->[$i]->[6] >= $self->{indexes}->[$i+1]->[6]) 192 { 193 splice(@{$self->{indexes}}, $i + 1, 1); 194 } 195 else 196 { 197 splice(@{$self->{indexes}}, $i, 1); 198 } 199 } 200 } 201} 202 203sub sort_by_tRNAscanSE_output 204{ 205 if ((($a->[5] eq $b->[5]) && ($a->[5] eq "+")) || 206 ($a->[5] ne $b->[5])) 207 { 208 return ($a->[2] cmp $b->[2] || 209 $a->[5] cmp $b->[5] || 210 $a->[3] <=> $b->[3] || 211 $b->[6] <=> $a->[6]); 212 } 213 if (($a->[5] eq $b->[5]) && ($a->[5] eq "-")) 214 { 215 return ($a->[2] cmp $b->[2] || 216 $b->[5] cmp $a->[5] || 217 $b->[4] <=> $a->[4] || 218 $b->[6] <=> $a->[6]); 219 } 220} 221 222sub write_merge_file 223{ 224 my $self = shift; 225 my $merge_file = shift; 226 my $format = shift; 227 228 $self->{file_name} = $merge_file; 229 &open_for_write(\$self->{FILE_H}, $self->{file_name}); 230 my $fh = $self->{FILE_H}; 231 $self->open_result_files(); 232 $self->parse_result_files($format); 233 $self->close_result_files(); 234 close($fh); 235} 236 237sub open_result_files 238{ 239 my $self = shift; 240 241 for (my $i = 0; $i < scalar(@{$self->{files}}); $i++) 242 { 243 if (!$self->{files}->[$i]->open_file()) 244 { 245 $self->{log}->error("Fail to open cmsearch output file ".$self->{files}->[$i]->file_name()); 246 } 247 } 248} 249 250sub close_result_files 251{ 252 my $self = shift; 253 254 for (my $i = 0; $i < scalar(@{$self->{files}}); $i++) 255 { 256 $self->{files}->[$i]->close_file(); 257 } 258} 259 260sub parse_result_files 261{ 262 my $self = shift; 263 my $format = shift; 264 my $fh = $self->{FILE_H}; 265 my $file = undef; 266 my ($ss, $seq, $model, $nc); 267 268 for (my $i = 0; $i < scalar(@{$self->{indexes}}); $i++) 269 { 270 $file = $self->{files}->[$self->{indexes}->[$i]->[0]]; 271 ($ss, $seq, $model, $nc) = $file->get_cmsearch_record($self->{indexes}->[$i]->[1], $format); 272 273 for (my $j = 2; $j < scalar(@{$self->{indexes}->[$i]}); $j++) 274 { 275 print $fh $self->{indexes}->[$i]->[$j]."\t"; 276 } 277 print $fh $ss."\t".$seq."\t".$model."\t".$nc."\t".$file->type()."\n"; 278 } 279} 280 281 282sub open_file 283{ 284 my $self = shift; 285 my $file_name = shift; 286 my $success = 0; 287 288 if ($file_name ne "") 289 { 290 $self->{file_name} = $file_name; 291 &open_for_read(\$self->{FILE_H}, $self->{file_name}); 292 $success = 1; 293 } 294 else 295 { 296 die "Merge result file name is not set.\n" 297 } 298 299 return $success; 300} 301 302sub close_file 303{ 304 my $self = shift; 305 306 if (defined $self->{FILE_H}) 307 { 308 close($self->{FILE_H}); 309 } 310} 311 312sub get_next_cmsearch_hit 313{ 314 my $self = shift; 315 my ($trna) = @_; 316 my $fh = $self->{FILE_H}; 317 318 my $line = ""; 319 my $cm_model = ""; 320 my @columns = (); 321 $trna->clear(); 322 323 if (defined $fh and $line = <$fh>) 324 { 325 chomp($line); 326 @columns = split(/\t/, $line); 327 328 $trna->seqname($columns[0]); 329 $trna->score($columns[4]); 330 if ($columns[3] eq "+") 331 { 332 $trna->start($columns[1]); 333 $trna->end($columns[2]); 334 } 335 else 336 { 337 $trna->start($columns[2]); 338 $trna->end($columns[1]); 339 } 340 $trna->strand($columns[3]); 341 $trna->trunc($columns[5]); 342 $trna->ss($columns[6]); 343 $trna->seq($columns[7]); 344 $trna->model($columns[10]); 345 $trna->hit_source("Inf"); 346 $cm_model = $columns[8]; 347 } 348 349 return $cm_model; 350} 351 3521; 353