1# tRNAscanSE/ScanResult.pm
2# This class describes the outputs of scan results used in tRNAscan-SE.
3#
4# --------------------------------------------------------------
5# This module is part of the tRNAscan-SE program.
6# Copyright (C) 2017 Patricia Chan and Todd Lowe
7# --------------------------------------------------------------
8#
9
10package tRNAscanSE::ScanResult;
11
12use strict;
13use tRNAscanSE::Utils;
14use tRNAscanSE::Sequence;
15use tRNAscanSE::tRNA;
16use tRNAscanSE::ArraytRNA;
17use tRNAscanSE::IntResultFile;
18use tRNAscanSE::MultiResultFile;
19use tRNAscanSE::Options;
20
21require Exporter;
22our @ISA = qw(Exporter);
23our @EXPORT = qw(save_Acedb_from_firstpass write_tRNA write_tRNAs output_tRNA write_bed output_split_fragments print_results_header);
24
25our $printed_header = 0;            # keeps track of whether or
26                                    # or not results column header
27                                    # has been printed yet
28our ($max_seq_name_width, $max_seq_len_width);
29
30
31sub save_Acedb_from_firstpass
32{
33    my ($output_codon, $r_one_let_trans_map, $fp_tRNAs, $out_file) = @_;
34    my $triplet = "";
35
36    &open_for_append(\*FILE_H, $out_file);
37
38	for (my $i = 0; $i < $fp_tRNAs->get_count(); $i++)
39	{
40		my $trna = $fp_tRNAs->get($i);
41
42		printf FILE_H "Sequence\t%s\nSubsequence\t%s.t%d %d %d\n\n",
43			$trna->seqname(), $trna->seqname(), $i + 1, $trna->start(), $trna->end();
44		printf FILE_H "Sequence\t%s.t%d\nSource\t\t%s\n", $trna->seqname(), $i + 1, $trna->seqname();
45
46		if ($trna->get_intron_count() > 0)
47		{
48			my @ar_introns = $trna->ar_introns();
49
50			if ($ar_introns[0]->{rel_start} < $ar_introns[0]->{rel_end})
51			{
52				printf FILE_H "Source_Exons\t1 %d\n", ($ar_introns[0]->{rel_start} - $trna->start());
53				printf FILE_H "Source_Exons\t%d %d\n",
54					$ar_introns[0]->{rel_end} - $trna->start() + 2,
55					$trna->end() - $trna->start() + 1;
56			}
57			else
58			{
59				printf FILE_H "Source_Exons\t1 %d\n", ($trna->start() - $ar_introns[0]->{rel_start} + 1);
60				printf FILE_H "Source_Exons\t%d %d\n",
61					$trna->start() - $ar_introns[0]->{rel_end} + 2,
62					$trna->start() - $trna->end() + 1;
63			}
64		}
65		printf FILE_H "Brief_identification tRNA-%s\n", $trna->isotype();
66
67		# either output Codon or Anticodon for tRNA
68		$triplet = uc($trna->anticodon());
69		if ($output_codon)
70		{
71			$triplet = &rev_comp_seq($triplet);
72		}
73
74		printf FILE_H "Transcript tRNA \"%s %s %s\"\n\n",
75			$triplet, $trna->isotype(), $r_one_let_trans_map->{$trna->isotype()};
76
77    }
78    close(FILE_H);
79}
80
81sub print_results_header
82{
83    my ($TABOUT, $opts, $get_hmm_score, $seq_name_width, $seq_len_width, $fp) = @_;
84    my ($label, $codon_label) = "";
85
86    if ($opts->cove_mode())
87	{
88		$label = "\tCove";
89    }
90    elsif ($opts->infernal_mode())
91	{
92		$label = "\tInf";
93    }
94    elsif ($opts->eufind_mode() && !$opts->tscan_mode())
95	{
96		$label = "\tEufind";
97    }
98
99    if ($opts->output_codon())
100	{
101		$codon_label = "   ";
102    }
103    else
104	{
105		$codon_label = "Anti";
106    }
107
108    if (!($opts->ace_output()))
109	{
110		printf {$$TABOUT} "%-".$seq_name_width."s\t\t","Sequence";
111		printf {$$TABOUT} "%-".$seq_len_width."s\t","tRNA";
112		printf {$$TABOUT} "%-".$seq_len_width."s\t","Bounds";
113		print  {$$TABOUT} "tRNA\t$codon_label\tIntron Bounds",$label;
114
115		if ($get_hmm_score)
116		{
117			print {$$TABOUT} "\tHMM\t2'Str";
118		}
119		if ($opts->infernal_score())
120		{
121			print {$$TABOUT} "\tInf";
122		}
123		if ($opts->save_source())
124		{
125			print {$$TABOUT} "\tHit";
126		}
127		if (!$fp and ((($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) and !$opts->no_isotype() and $opts->detail()) or $opts->metagenome_mode()))
128		{
129			print {$$TABOUT} "\tIsotype\tIsotype";
130			if ($opts->euk_mode() and $opts->mito_model() ne "")
131			{
132				print {$$TABOUT} "\tType";
133			}
134		}
135		print {$$TABOUT} "\t      ";
136		print {$$TABOUT} "\n";
137
138		printf {$$TABOUT} "%-".$seq_name_width."s\t","Name";
139		print  {$$TABOUT} "tRNA \#\t";
140		printf {$$TABOUT} "%-".$seq_len_width."s\t","Begin";
141		printf {$$TABOUT} "%-".$seq_len_width."s\t","End";
142
143		print {$$TABOUT} "Type\tCodon\tBegin\tEnd\tScore";
144
145		if  ($get_hmm_score)
146		{
147			print {$$TABOUT} "\tScore\tScore";
148		}
149		if ($opts->infernal_score())
150		{
151			print {$$TABOUT} "\tScore";
152		}
153		if ($opts->save_source())
154		{
155			print {$$TABOUT} "\tOrigin";
156		}
157		if (!$fp and ((($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) and !$opts->no_isotype() and $opts->detail()) or $opts->metagenome_mode()))
158		{
159			print {$$TABOUT} "\tCM\tScore";
160			if ($opts->euk_mode() and $opts->mito_model() ne "")
161			{
162				print {$$TABOUT} "\t         ";
163			}
164		}
165		print {$$TABOUT} "\tNote";
166		print {$$TABOUT} "\n";
167
168		printf {$$TABOUT} "%-".$seq_name_width."s\t","--------";
169		print  {$$TABOUT} "------\t";
170		printf {$$TABOUT} "%-".$seq_len_width."s\t","-----";
171		printf {$$TABOUT} "%-".$seq_len_width."s\t","------";
172		print  {$$TABOUT} "----\t-----\t-----\t----\t------";
173
174		if  ($get_hmm_score)
175		{
176			print {$$TABOUT} "\t-----\t-----";
177		}
178		if ($opts->infernal_score())
179		{
180			print {$$TABOUT} "\t-----";
181		}
182		if ($opts->save_source())
183		{
184			print {$$TABOUT} "\t------";
185		}
186		if (!$fp and ((($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) and !$opts->no_isotype() and $opts->detail()) or $opts->metagenome_mode()))
187		{
188			print {$$TABOUT} "\t-------\t-------";
189			if ($opts->euk_mode() and $opts->mito_model() ne "")
190			{
191				print {$$TABOUT} "\t---------";
192			}
193		}
194		print {$$TABOUT} "\t------";
195		print {$$TABOUT} "\n";
196    }
197}
198
199sub write_tRNA
200{
201    my ($file_name, $seq_name, $seq_desc, $seq, $overwrite) = @_;
202
203    my $trna_file = tRNAscanSE::Sequence->new;
204    my $write_mode = "append";
205    if ($overwrite) {
206		$write_mode = "write";
207    }
208    $trna_file->set_seq_info($seq_name, $seq_desc, length($seq), $seq);
209    $trna_file->open_file($file_name, $write_mode);
210    $trna_file->write_fasta();
211    $trna_file->close_file();
212}
213
214sub write_tRNAs
215{
216    my ($file_name, $sp_int_results, $mat_seq, $overwrite, $model) = @_;
217
218	my $count = 0;
219    my $trna_file = tRNAscanSE::Sequence->new;
220    my $write_mode = "append";
221    if ($overwrite)
222	{
223		$write_mode = "write";
224    }
225	my $seq = "";
226	my $tRNA = tRNAscanSE::tRNA->new;
227    $trna_file->open_file($file_name, $write_mode);
228	if ($sp_int_results->open_file("read"))
229	{
230        my @record_indexes = $sp_int_results->get_indexes();
231
232        for (my $i = 0; $i < scalar(@record_indexes); $i++)
233        {
234			$sp_int_results->get_tRNA($record_indexes[$i]->[0], $tRNA);
235			if ($model eq "" or $tRNA->model() eq $model)
236			{
237				if ($mat_seq)
238				{
239					$seq = $tRNA->mat_seq();
240				}
241				else
242				{
243					$seq = $tRNA->seq();
244				}
245				$trna_file->set_seq_info($tRNA->seqname().".t".&pad_num($tRNA->id(), 6), "", length($seq), $seq);
246				$trna_file->write_fasta();
247				$count++;
248			}
249		}
250	}
251    $trna_file->close_file();
252
253	return $count;
254}
255
256# Write final tRNA prediction to various selected output sources/files
257# Sets globals $MaxSeqNameWidth and $MaxSeqLenWidth and $printed_header
258
259sub output_tRNA
260{
261    my ($global_vars, $cm, $r_tab_results, $get_hmm_score, $program_id) = @_;
262	my $opts = $global_vars->{options};
263	my $log = $global_vars->{log_file};
264	my $gc = $global_vars->{gc};
265    my $sp_int_results = $global_vars->{sp_int_results};
266    my $iso_int_results = $global_vars->{iso_int_results};
267    my $tRNA = tRNAscanSE::tRNA->new;
268
269    my $results_line = "";
270	my $isotype_line = "";
271	my ($iso_models, $mito_models);
272
273    my @sp_indexes = $sp_int_results->get_indexes();
274    if ($sp_int_results->open_file("read"))
275    {
276		if (!$opts->no_isotype() and $iso_int_results->open_file("read"))
277		{
278			($iso_models, $mito_models) = &get_models($opts, $cm);
279			$iso_int_results->read_models();
280		}
281		if ($opts->isotype_specific_file() ne "")
282		{
283			&open_for_append(\*ISOTYPE, $opts->isotype_specific_file());
284		}
285		if ($opts->save_all_struct())
286		{
287			&open_for_append(\*SECSTRUCT, $opts->all_struct_file());
288		}
289		if ($opts->ace_output())
290		{
291		    &open_for_append(\*ACEOUT, $opts->out_file());
292		}
293		else
294		{
295			&open_for_append(\*TABOUT, $opts->out_file());
296		}
297		if ($opts->output_fasta_file() ne "")
298		{
299			&open_for_append(\*FASTA, $opts->output_fasta_file());
300		}
301
302		for (my $i = 0; $i < scalar(@sp_indexes); $i++)
303		{
304			$sp_int_results->get_tRNA($sp_indexes[$i]->[0], $tRNA);
305			if (!$opts->no_isotype())
306			{
307				$iso_int_results->get_next_tRNA($tRNA);
308			}
309
310			my ($type, $model, $score, $ss) = $tRNA->get_highest_score_model();
311			if ($tRNA->isotype() eq "Met" and $type eq "cyto" and ($model eq "iMet" or $model eq "fMet" or $model eq "Ile2"))
312			{
313				$tRNA->isotype($model);
314			}
315			elsif ($tRNA->isotype() eq "Met" and $type eq "cyto" and $model ne "Met" and $model ne "iMet" and $model ne "fMet")
316			{
317				$tRNA->sort_multi_models("model");
318				my ($met_iso_model, $met_iso_score, $met_iso_ss) = $tRNA->get_model_hit("cyto", $tRNA->isotype());
319				my ($ile2_iso_model, $ile2_iso_score, $ile2_iso_ss) = $tRNA->get_model_hit("cyto", "Ile2");
320				if ($ile2_iso_score > 0 and $met_iso_score > 0)
321				{
322					if (($score - $ile2_iso_score) <= 5 and ($ile2_iso_score - $met_iso_score) >= 5 and $tRNA->score() > 50)
323					{
324						$tRNA->isotype("Ile2");
325					}
326				}
327			}
328
329			if (!$opts->results_to_stdout())
330			{
331				$log->broadcast($tRNA->tRNAscan_id().":  ".$opts->second_pass_label()." type= ".$tRNA->isotype()."\t ".
332				"Score= ".$tRNA->score());
333			}
334			if ($opts->save_all_struct())
335			{
336				&save_allStruct_output(\*SECSTRUCT, $opts, $gc, $get_hmm_score, $tRNA);
337			}
338			if ($opts->output_fasta_file() ne "")
339			{
340				&write_tRNA_sequence(\*FASTA, $tRNA);
341			}
342
343			# Create tabular results line, ready for output
344			if (!$printed_header)
345			{
346				$max_seq_name_width = &max(length($tRNA->src_seqid()) + 1, 8);
347				$max_seq_len_width  = length($tRNA->src_seqlen());
348			}
349		    $results_line = &construct_tab_output($opts, $get_hmm_score, $tRNA, $max_seq_name_width, $max_seq_len_width);
350
351			# Internal copy of results saved for later uses
352			push(@$r_tab_results, $results_line);
353
354			if ($opts->ace_output())
355			{
356				&save_Acedb_from_secpass(\*ACEOUT, $opts, $gc, $tRNA, $program_id);
357			}
358			else
359			{
360				if (!($opts->brief_output() || $printed_header))
361				{
362					&print_results_header(\*TABOUT, $opts, $get_hmm_score, $max_seq_name_width, $max_seq_len_width, 0);
363					if ($opts->isotype_specific_file() ne "")
364					{
365						&print_isotype_specific_header(\*ISOTYPE, $opts, $iso_models, $mito_models);
366					}
367
368					$printed_header = 1;
369				}
370				print TABOUT $results_line;
371
372				if ($opts->isotype_specific_file() ne "")
373				{
374					$isotype_line = &construct_isotype_specific_output($opts, $iso_models, $mito_models, $tRNA);
375					print ISOTYPE $isotype_line;
376				}
377			}
378		}
379
380		if ($opts->ace_output())
381		{
382			close ACEOUT;
383		}
384		else
385		{
386			close TABOUT;
387		}
388		if ($opts->save_all_struct())
389		{
390			close SECSTRUCT;
391		}
392		if ($opts->isotype_specific_file() ne "")
393		{
394			close ISOTYPE;
395		}
396		if (!$opts->no_isotype())
397		{
398			$iso_int_results->close_file();
399		}
400		if (!$opts->output_fasta_file() ne "")
401		{
402			close FASTA;
403		}
404        $sp_int_results->close_file();
405	}
406}
407
408sub save_allStruct_output
409{
410    my ($SECSTRUCT, $opts, $gc, $get_hmm_score, $tRNA) = @_;
411
412    my $ruler = '    *    |' x 20;     # ruler printed out with
413                                       #  secondary structure output
414
415    my $seqlen = length($tRNA->seq());
416	my ($type, $model, $score, $ss) = $tRNA->get_highest_score_model();
417
418	if ($tRNA->strand() eq "+")
419	{
420		print {$$SECSTRUCT} $tRNA->seqname().".trna".$tRNA->id()." (".$tRNA->start()."-".$tRNA->end().")\t",
421			"Length: $seqlen bp\nType: ".$tRNA->isotype()."\t";
422	}
423	else
424	{
425		print {$$SECSTRUCT} $tRNA->seqname().".trna".$tRNA->id()." (".$tRNA->end()."-".$tRNA->start().")\t",
426			"Length: $seqlen bp\nType: ".$tRNA->isotype()."\t";
427	}
428
429    if ($opts->output_codon())
430	{
431		print {$$SECSTRUCT} "Codon: ", &rev_comp_seq($tRNA->anticodon()), " at ";
432    }
433    else
434	{
435		print {$$SECSTRUCT} "Anticodon: ".$tRNA->anticodon()." at ";
436    }
437
438    if ($tRNA->anticodon() eq $gc->undef_anticodon())
439	{
440		print {$$SECSTRUCT} "0-0 (0-0)\t";
441    }
442    else
443	{
444		my @ar_ac_pos = $tRNA->ar_ac_pos();
445		for (my $i = 0; $i < scalar(@ar_ac_pos); $i++)
446		{
447			if ($i > 0)
448			{
449				print {$$SECSTRUCT} ",";
450			}
451			print {$$SECSTRUCT} $ar_ac_pos[$i]->{rel_start}."-".$ar_ac_pos[$i]->{rel_end};
452		}
453		for (my $i = 0; $i < scalar(@ar_ac_pos); $i++)
454		{
455			if ($i == 0)
456			{
457				print {$$SECSTRUCT} " (";
458			}
459			elsif ($i > 0)
460			{
461				print {$$SECSTRUCT} ",";
462			}
463
464			if ($tRNA->strand() eq "+")
465			{
466				print {$$SECSTRUCT} $ar_ac_pos[$i]->{rel_start} + $tRNA->start() - 1, "-",
467					$ar_ac_pos[$i]->{rel_start} + $tRNA->start() + 1;
468			}
469			else
470			{
471				print {$$SECSTRUCT} $tRNA->end() - $ar_ac_pos[$i]->{rel_start} + 1, "-",
472					$tRNA->end() - $ar_ac_pos[$i]->{rel_start} - 1;
473			}
474			if ($i == (scalar(@ar_ac_pos) - 1))
475			{
476				print {$$SECSTRUCT} ")\t";
477			}
478
479		}
480	}
481
482    print {$$SECSTRUCT} "Score: ".$tRNA->score()."\n";
483	my @ar_introns = ();
484	my $nci_count = 0;
485    if ($tRNA->get_intron_count() > 0)
486	{
487		@ar_introns = $tRNA->ar_introns();
488		foreach my $intron (@ar_introns)
489		{
490			if (defined $intron)
491			{
492				print {$$SECSTRUCT} "Possible intron: $intron->{rel_start}-$intron->{rel_end} ";
493				if ($tRNA->strand() eq "+")
494				{
495					print {$$SECSTRUCT} "(".$intron->{start}."-".$intron->{end}.")\n";
496				}
497				else
498				{
499					print {$$SECSTRUCT} "(".$intron->{end}."-".$intron->{start}.")\n";
500				}
501			}
502		}
503	}
504
505	my $line = "";
506	my $note = "";
507    if ($tRNA->is_pseudo() and $tRNA->is_trunc())
508	{
509		$note = "Possible truncation, pseudogene";
510    }
511	elsif ($tRNA->is_pseudo())
512	{
513		$note = "Possible pseudogene";
514	}
515	elsif ($tRNA->is_trunc())
516	{
517		$note = "Possible truncation";
518	}
519
520	if ($note ne "")
521	{
522		$line = sprintf("%s", $note);
523	}
524
525	if ($get_hmm_score)
526	{
527		if ($note ne "")
528		{
529			$line .= ": ";
530		}
531
532		$line .= sprintf("HMM Sc=%.2f\tSec struct Sc=%.2f", $tRNA->hmm_score(), $tRNA->ss_score());
533    }
534
535	if ($opts->infernal_score())
536	{
537		my $inf = $tRNA->get_domain_model("infernal");
538		if (defined $inf)
539		{
540			$line .= "\tInfernal Sc=".$inf->{score};
541		}
542	}
543	if ($opts->mito_mode())
544	{
545		$note = "";
546		if ($tRNA->category() ne "")
547		{
548			$note = $tRNA->category();
549			$note =~ s/_/ /g;
550			$note =~ s/mito //g;
551			$note =~ s/iso/type/g;
552			$note =~ s/ac/anticodon/g;
553			$note = uc(substr($note, 0, 1)).substr($note, 1);
554			if ($tRNA->note() ne "")
555			{
556				$note = $note . " ". $tRNA->note();
557			}
558			$line .= "Note: ".$note;
559		}
560	}
561
562	if ($line ne "")
563	{
564		print {$$SECSTRUCT} $line."\n";
565	}
566
567	if (!$opts->arch_mode())
568	{
569	    print {$$SECSTRUCT} "     ",substr($ruler, 0, $seqlen - 1),"\n";
570	    print {$$SECSTRUCT} "Seq: ".$tRNA->seq()."\nStr: ".$tRNA->ss()."\n\n";
571	}
572	else
573	{
574	    print {$$SECSTRUCT} "     ",substr($ruler, 0, length($tRNA->mat_seq()) - 1),"\n";
575	    print {$$SECSTRUCT} "Seq: ".$tRNA->mat_seq()."\nStr: ".$tRNA->mat_ss()."\n";
576		if (uc($tRNA->seq()) ne uc($tRNA->mat_seq()))
577		{
578			my $precursor_seq = uc($tRNA->seq());
579			foreach my $intron (@ar_introns)
580			{
581				if (defined $intron)
582				{
583					my $intron_seq = uc(substr($tRNA->seq(), $intron->{rel_start} - 1, $intron->{rel_end} - $intron->{rel_start} + 1));
584					if ($intron_seq ne "")
585					{
586						$precursor_seq =~ s/$intron_seq/\[$intron_seq\]/;
587					}
588				}
589			}
590			print {$$SECSTRUCT} "Pre: ". $precursor_seq ."\n\n";
591		}
592		else
593		{
594			print {$$SECSTRUCT} "\n";
595		}
596	}
597}
598
599sub write_tRNA_sequence
600{
601    my ($FASTA, $tRNA) = @_;
602
603	my $tRNAscan_id = $tRNA->seqname().".trna".$tRNA->id();
604	print {$FASTA} ">".$tRNAscan_id." ".
605		$tRNA->seqname().":".$tRNA->start()."-".$tRNA->end()." (".$tRNA->strand().") ".
606		$tRNA->isotype()." (".$tRNA->anticodon().") ".length($tRNA->seq())." bp Sc: ".$tRNA->score();
607	if ($tRNA->is_pseudo())
608	{
609		print {$FASTA} " Possible pseudogene\n";
610	}
611	else
612	{
613		print {$FASTA} "\n";
614	}
615	my $parts = int(length($tRNA->seq()) / 60);
616	my $remain = length($tRNA->seq()) % 60;
617	for (my $j = 0; $j < $parts; $j++)
618	{
619		print {$FASTA} uc(substr($tRNA->seq(), $j * 60, 60))."\n";
620	}
621	if ($remain > 0)
622	{
623		print {$FASTA} uc(substr($tRNA->seq(), $parts * 60))."\n";
624	}
625}
626
627# Save tRNA hits in Tabular output
628sub construct_tab_output
629{
630    my ($opts, $get_hmm_score, $tRNA, $max_seq_name_width, $max_seq_len_width) = @_;
631
632    my ($result_line, $tRNA_type);
633	my ($type, $model, $score, $ss) = $tRNA->get_highest_score_model();
634	$tRNA->sort_multi_models("model");
635	my ($iso_model, $iso_score, $iso_ss) = $tRNA->get_model_hit("cyto", $tRNA->isotype());
636
637    $result_line = sprintf "%-".$max_seq_name_width."s\t", $tRNA->seqname();
638    $result_line .= $tRNA->id()."\t";
639
640	if ($tRNA->strand() eq "+")
641	{
642		$result_line .= sprintf "%-".$max_seq_len_width."d\t", $tRNA->start();
643		$result_line .= sprintf "%-".$max_seq_len_width."d\t", $tRNA->end();
644	}
645	else
646	{
647		$result_line .= sprintf "%-".$max_seq_len_width."d\t", $tRNA->end();
648		$result_line .= sprintf "%-".$max_seq_len_width."d\t", $tRNA->start();
649	}
650
651    $result_line .= $tRNA->isotype()."\t";
652
653    if ($opts->output_codon())
654	{
655		$result_line .= (&rev_comp_seq($tRNA->anticodon()))."\t";
656    }
657    else
658	{
659		$result_line .= $tRNA->anticodon()."\t";
660    }
661
662	my @ar_introns = ();
663    if ($tRNA->get_intron_count() == 0)
664	{
665		$result_line .= "0\t0";
666    }
667    else
668	{
669		my $intron_ct = 0;
670		@ar_introns = $tRNA->ar_introns();
671		for (my $i = 0; $i < scalar(@ar_introns); $i++)
672		{
673			if (defined $ar_introns[$i])
674			{
675				if ($intron_ct > 0)
676				{
677					$result_line .= ",";
678				}
679				if ($tRNA->strand() eq "+")
680				{
681					$result_line .= $ar_introns[$i]->{start};
682				}
683				else
684				{
685					$result_line .= $ar_introns[$i]->{end};
686				}
687				$intron_ct++;
688			}
689		}
690		$result_line .= "\t";
691		$intron_ct = 0;
692		for (my $i = 0; $i < scalar(@ar_introns); $i++)
693		{
694			if (defined $ar_introns[$i])
695			{
696				if ($intron_ct > 0)
697				{
698					$result_line .= ",";
699				}
700				if ($tRNA->strand() eq "+")
701				{
702					$result_line .= $ar_introns[$i]->{end};
703				}
704				else
705				{
706					$result_line .= $ar_introns[$i]->{start};
707				}
708				$intron_ct++;
709			}
710		}
711	}
712    $result_line .= "\t".$tRNA->score();
713
714    if ($get_hmm_score)
715	{
716		$result_line .= sprintf "\t%.2f\t%.2f", $tRNA->hmm_score(), $tRNA->ss_score();
717    }
718	if ($opts->infernal_score())
719	{
720		my $inf = $tRNA->get_domain_model("infernal");
721		if (defined $inf)
722		{
723			$result_line .= "\t".$inf->{score};
724		}
725	}
726    if ($opts->save_source())
727	{
728		$result_line .= "\t".$tRNA->hit_source();
729    }
730	if ((($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) and !$opts->no_isotype() and $opts->detail()) or $opts->metagenome_mode())
731	{
732		$result_line .= "\t".$model;
733		$result_line .= "\t".$score;
734		if ($opts->euk_mode() and $opts->mito_model() ne "")
735		{
736			$tRNA->category($type);
737			if ($type eq "cyto")
738			{
739				$result_line .= "\tcytosolic";
740			}
741			else
742			{
743				$result_line .= "\t".$type;
744			}
745		}
746	}
747	if (!$opts->mito_mode() and !$opts->numt_mode())
748	{
749		my $note = "\t";
750		if ($tRNA->is_pseudo())
751		{
752			$note .= "pseudo";
753		}
754
755		if ($opts->detail())
756		{
757			if ((($opts->euk_mode() or $opts->bact_mode() or $opts->arch_mode()) and !$opts->no_isotype()) or $opts->metagenome_mode())
758			{
759				if ($model ne "" and $tRNA->isotype() ne "Undet")
760				{
761					if ($model ne $tRNA->isotype())
762					{
763						if (($model eq "iMet" or $model eq "fMet" or $model eq "Ile2") and $tRNA->isotype() eq "Met")
764						{}
765						elsif (($model eq "LeuTAA" or $model eq "LeuTAG") and ($tRNA->isotype() eq "Leu" or $tRNA->isotype() eq "Undet") and $type eq "mito")
766						{}
767						elsif (($model eq "SerGCT" or $model eq "SerTGA") and ($tRNA->isotype() eq "Ser" or $tRNA->isotype() eq "Undet") and $type eq "mito")
768						{}
769						else
770						{
771							if ($note ne "\t")
772							{
773								$note .= ",";
774							}
775							$note .= sprintf("IPD:%0.2f", ($iso_score - $score));
776						}
777					}
778				}
779			}
780
781			if ($tRNA->is_trunc())
782			{
783				if ($note ne "\t")
784				{
785					$note .= ",";
786				}
787				$note .= $tRNA->trunc();
788			}
789
790			if ($opts->search_mode() eq "archaea")
791			{
792				if ($tRNA->get_intron_count() > 0)
793				{
794					my $ci_count = 0;
795					my $nci_count = 0;
796					for (my $i = 0; $i < scalar(@ar_introns); $i++)
797					{
798						if ($ar_introns[$i]->{type} eq "CI")
799						{
800							$ci_count++;
801						}
802						elsif ($ar_introns[$i]->{type} eq "NCI")
803						{
804							$nci_count++;
805						}
806					}
807					if ($ci_count > 0)
808					{
809						if ($note ne "\t")
810						{
811							$note .= ",";
812						}
813						$note .= "CI";
814					}
815					if ($nci_count > 0)
816					{
817						if ($note ne "\t")
818						{
819							$note .= ",";
820						}
821						$note .= "NCI:".$nci_count;
822					}
823				}
824			}
825		}
826
827		$result_line .= $note;
828	}
829	if ($opts->mito_mode())
830	{
831		my $note = "";
832		if ($tRNA->category() ne "" and $opts->detail())
833		{
834			$note = $tRNA->category();
835			$note =~ s/mito //g;
836			$note =~ s/iso/type/g;
837			if ($tRNA->note() ne "")
838			{
839				$note = $note . $tRNA->note();
840			}
841		}
842		$result_line .= "\t".$note;
843	}
844    $result_line .= "\n";
845
846    return $result_line;
847}
848
849sub get_models
850{
851	my ($opts, $cm) = @_;
852	my %iso_models = ();
853	my %mito_models = ();
854
855	my $models = $cm->get_models_from_db($cm->isotype_cm_db_file_path());
856    foreach my $cur_iso_cm (sort @$models)
857    {
858		my $model = $cur_iso_cm;
859		if ($cur_iso_cm =~ /^arch-(\S+)/ || $cur_iso_cm =~ /^euk-(\S+)/ || $cur_iso_cm =~ /^bact-(\S+)/)
860		{
861			$model = $1;
862		}
863		$iso_models{$model} = 1;
864	}
865
866	if ($opts->euk_mode() and $opts->mito_model() ne "")
867	{
868		$models = $cm->get_models_from_db($cm->mito_isotype_cm_db_file_path());
869		foreach my $cur_iso_cm (sort @$models)
870		{
871			$mito_models{$cur_iso_cm} = 1;
872		}
873	}
874
875	return (\%iso_models, \%mito_models);
876}
877
878sub print_isotype_specific_header
879{
880    my ($ISOTYPE, $opts, $iso_models, $mito_models) = @_;
881
882	print {$$ISOTYPE} "tRNAscanID\tAnticodon_predicted_isotype";
883    foreach my $cur_iso_cm (sort keys %$iso_models)
884    {
885		print {$$ISOTYPE} "\t".$cur_iso_cm;
886	}
887
888	if ($opts->euk_mode() and $opts->mito_model() ne "")
889	{
890		foreach my $cur_iso_cm (sort keys %$mito_models)
891		{
892			print {$$ISOTYPE} "\tmito_".$cur_iso_cm;
893		}
894	}
895
896	print {$$ISOTYPE} "\n";
897}
898
899sub construct_isotype_specific_output
900{
901    my ($opts, $iso_models, $mito_models, $tRNA) = @_;
902
903	my $result_line = $tRNA->seqname().".trna".$tRNA->id();
904	$result_line .= "\t".$tRNA->isotype();
905	$tRNA->sort_multi_models("model");
906
907    foreach my $cur_iso_cm (sort keys %$iso_models)
908    {
909		my ($model, $score, $ss) = $tRNA->get_model_hit("cyto", $cur_iso_cm);
910		if ($model ne "")
911		{
912			$result_line .= "\t".$score;
913		}
914		else
915		{
916			$result_line .= "\t-999";
917		}
918	}
919
920	if ($opts->euk_mode() and $opts->mito_model() ne "")
921	{
922		foreach my $cur_iso_cm (sort keys %$mito_models)
923		{
924			my ($model, $score, $ss) = $tRNA->get_model_hit("mito", $cur_iso_cm);
925			if ($model ne "")
926			{
927				$result_line .= "\t".$score;
928			}
929			else
930			{
931				$result_line .= "\t-999";
932			}
933		}
934	}
935
936	$result_line .= "\n";
937
938	return $result_line;
939}
940
941sub save_Acedb_from_secpass
942{
943    my ($ACEOUT, $opts, $gc, $tRNA, $program_id) = @_;
944
945    print {$$ACEOUT} "Sequence\t".$tRNA->seqname()."\nSubsequence\t".$tRNA->tRNAscan_id()." ".$tRNA->start()." ".$tRNA->end()."\n\n";
946    print {$$ACEOUT} "Sequence\t".$tRNA->tRNAscan_id()."\nSource\t\t".$tRNA->seqname()."\n";
947    if ($tRNA->get_intron_count() > 0)
948	{
949		my @ar_introns = $tRNA->ar_introns();
950		print {$$ACEOUT} "Source_Exons\t1 ", $ar_introns[0]->{rel_start} - 1,"\n";
951		print {$$ACEOUT} "Source_Exons\t", $ar_introns[0]->{rel_end} + 1," ", abs($tRNA->end() - $tRNA->start()) + 1,"\n";
952    }
953    print {$$ACEOUT} "Brief_identification tRNA-".$tRNA->isotype()."\n",
954        "Transcript tRNA \"";
955
956    if ($opts->output_codon())
957	{
958		print {$$ACEOUT} &rev_comp_seq($tRNA->anticodon());
959    }
960    else
961	{
962		print {$$ACEOUT} $tRNA->anticodon();
963    }
964
965    print {$$ACEOUT} " ".$tRNA->isotype()." ", $gc->one_let_trans_map()->{$tRNA->isotype()},
966        "\"\nScore $program_id ".$tRNA->score()."\n";
967
968    if ($tRNA->is_pseudo())
969	{
970		printf {$$ACEOUT} "Remark \"Likely pseudogene (HMM Sc=%.2f / Sec struct Sc=%.2f)\"\n",
971            $tRNA->hmm_score(), $tRNA->ss_score();
972    }
973    print {$$ACEOUT} "\n";
974}
975
976sub write_bed
977{
978	my ($global_vars) = @_;
979	my $opts = $global_vars->{options};
980    my $sp_int_results = $global_vars->{sp_int_results};
981    my $iso_int_results = $global_vars->{iso_int_results};
982
983	$sp_int_results->sort_records("bed_output");
984	if (!$opts->no_isotype())
985	{
986		$iso_int_results->sort_records("tRNAscan_id");
987	}
988
989	my $tRNA = tRNAscanSE::tRNA->new;
990    &open_for_append(\*FILE_OUT, $opts->bed_file());
991    my @sp_indexes = $sp_int_results->get_indexes();
992    my @ iso_indexes = $iso_int_results->get_indexes();
993    if ($sp_int_results->open_file("read"))
994    {
995		if (!$opts->no_isotype())
996		{
997			$iso_int_results->open_file("read");
998		}
999
1000		for (my $i = 0; $i < scalar(@sp_indexes); $i++)
1001		{
1002			$sp_int_results->get_tRNA($sp_indexes[$i]->[0], $tRNA);
1003
1004			if (!$opts->no_isotype())
1005			{
1006				my $id = $tRNA->seqname().".t".&pad_num($tRNA->id(), 6);
1007				my $index = $iso_int_results->bsearch_tRNAscan_id($id);
1008				if ($index > -1)
1009				{
1010					$iso_int_results->get_tRNA($iso_indexes[$index]->[0], $tRNA);
1011					my ($type, $model, $score, $ss) = $tRNA->get_highest_score_model();
1012					if ($tRNA->isotype() eq "Met" and $type eq "cyto" and ($model eq "iMet" or $model eq "fMet" or $model eq "Ile2"))
1013					{
1014						$tRNA->isotype($model);
1015						$tRNA->tRNAscan_id($tRNA->seqname().".tRNA".$tRNA->id()."-".$tRNA->isotype().$tRNA->anticodon());
1016					}
1017					elsif ($tRNA->isotype() eq "Met" and $type eq "cyto" and $model ne "Met" and $model ne "iMet" and $model ne "fMet")
1018					{
1019						$tRNA->sort_multi_models("model");
1020						my ($met_iso_model, $met_iso_score, $met_iso_ss) = $tRNA->get_model_hit("cyto", $tRNA->isotype());
1021						my ($ile2_iso_model, $ile2_iso_score, $ile2_iso_ss) = $tRNA->get_model_hit("cyto", "Ile2");
1022						if ($ile2_iso_score > 0 and $met_iso_score > 0)
1023						{
1024							if (($score - $ile2_iso_score) <= 5 and ($ile2_iso_score - $met_iso_score) >= 5 and $tRNA->score() > 50)
1025							{
1026								$tRNA->isotype("Ile2");
1027								$tRNA->tRNAscan_id($tRNA->seqname().".tRNA".$tRNA->id()."-".$tRNA->isotype().$tRNA->anticodon());
1028							}
1029						}
1030					}
1031				}
1032			}
1033
1034			print FILE_OUT $tRNA->seqname()."\t".($tRNA->start() - 1)."\t".$tRNA->end()."\t".$tRNA->tRNAscan_id()."\t".&convert_bed_score($tRNA->score())."\t".
1035				$tRNA->strand()."\t".($tRNA->start() - 1)."\t".$tRNA->end()."\t0\t".($tRNA->get_intron_count() + 1)."\t";
1036			if ($tRNA->get_intron_count() == 0)
1037			{
1038				print FILE_OUT ($tRNA->end() - $tRNA->start() + 1).",\t0,\n";
1039			}
1040			else
1041			{
1042				my @ar_introns = $tRNA->ar_introns();
1043				my $block_sizes = "";
1044				my $block_starts = "0,";
1045				my $prev_start = 1;
1046				if ($tRNA->strand() eq "+")
1047				{
1048					for (my $i = 0; $i < scalar(@ar_introns); $i++)
1049					{
1050						$block_sizes .= ($ar_introns[$i]->{rel_start} - $prev_start).",";
1051						$block_starts .= $ar_introns[$i]->{rel_end}.",";
1052						$prev_start = $ar_introns[$i]->{rel_end} + 1;
1053					}
1054					$block_sizes .= ($tRNA->end() - $ar_introns[(scalar(@ar_introns)-1)]->{end}).",";
1055				}
1056				else
1057				{
1058					$prev_start = length($tRNA->seq());
1059					for (my $i = (scalar(@ar_introns)-1); $i >= 0; $i--)
1060					{
1061						$block_sizes .= ($prev_start - $ar_introns[$i]->{rel_end}).",";
1062						$block_starts .= ($prev_start - $ar_introns[$i]->{rel_start} + 1).",";
1063						$prev_start = $ar_introns[$i]->{rel_start};
1064					}
1065					$block_sizes .= ($ar_introns[0]->{rel_start} - 1).",";
1066				}
1067				print FILE_OUT $block_sizes."\t".$block_starts."\n";
1068			}
1069		}
1070
1071		if (!$opts->no_isotype())
1072		{
1073			$iso_int_results->close_file();
1074		}
1075		$sp_int_results->close_file();
1076	}
1077	close(FILE_OUT);
1078}
1079
1080sub convert_bed_score
1081{
1082	my ($cm_score) = @_;
1083
1084	my $bed_score = $cm_score * 10;
1085	if ($bed_score > 1000)
1086	{
1087		$bed_score = 1000;
1088	}
1089
1090	return $bed_score;
1091}
1092
1093sub output_split_fragments
1094{
1095    my ($opts, $r_pairs, $r_5half_hits, $r_3half_hits) = @_;
1096
1097	my ($r_5half, $r_3half);
1098
1099	&open_for_append(\*SPLITFILE, $opts->split_fragment_file());
1100	printf SPLITFILE "Fragment1\tFragment2\tSeqName1\tStartPos1\tEndPos1\tSeqName2\tStartPos2\tEndPos2\tScore1\tScore2\n";
1101
1102	foreach my $r_pair (@$r_pairs)
1103	{
1104		if (defined $r_pair->{"5h"} && defined $r_pair->{"3h"})
1105		{
1106			$r_5half = $r_5half_hits->[$r_pair->{"5h"}];
1107			$r_3half = $r_3half_hits->[$r_pair->{"3h"}];
1108			print SPLITFILE $r_5half->{seq}."\t".$r_3half->{seq}."\t",
1109				$r_5half->{hit_seqname}."\t".$r_5half->{tRNA_start}."\t".$r_5half->{tRNA_end}."\t",
1110				$r_3half->{hit_seqname}."\t".$r_3half->{tRNA_start}."\t".$r_3half->{tRNA_end}."\t",
1111				$r_5half->{score}."\t".$r_3half->{score}."\n";
1112			print SPLITFILE $r_5half->{ss}."\t".$r_3half->{ss}."\t\t\t\t\t\t\t\t\n";
1113		}
1114		elsif (defined $r_pair->{"5h"} && !defined $r_pair->{"3h"})
1115		{
1116			$r_5half = $r_5half_hits->[$r_pair->{"5h"}];
1117			print SPLITFILE $r_5half->{seq}."\t\t",
1118				$r_5half->{hit_seqname}."\t".$r_5half->{tRNA_start}."\t".$r_5half->{tRNA_end}."\t",
1119				"\t\t\t",
1120				$r_5half->{score}."\t\n";
1121			print SPLITFILE $r_5half->{ss}."\t\t\t\t\t\t\t\t\t\n";
1122		}
1123		elsif (!defined $r_pair->{"5h"} && defined $r_pair->{"3h"})
1124		{
1125			$r_3half = $r_3half_hits->[$r_pair->{"3h"}];
1126			print SPLITFILE "\t".$r_3half->{seq}."\t",
1127				"\t\t\t",
1128				$r_3half->{hit_seqname}."\t".$r_3half->{tRNA_start}."\t".$r_3half->{tRNA_end}."\t",
1129				"\t".$r_3half->{score}."\n";
1130			print SPLITFILE "\t".$r_3half->{ss}."\t\t\t\t\t\t\t\t\n";
1131		}
1132	}
1133	close(SPLITFILE);
1134}
1135
11361;
1137