1#!/usr/local/bin/perl
2
3BEGIN {
4    $builddir = shift;
5    $srcdir   = shift;
6    $tmppfx   = shift;
7}
8
9if (! -x "$builddir/miniapps/esl-translate") { die "FAIL: didn't find esl-translate program in $builddir/miniapps"; }
10if (! -x "$builddir/miniapps/esl-reformat")  { die "FAIL: didn't find esl-reformat program in $builddir/miniapps"; }
11if (! -x "$builddir/miniapps/esl-shuffle")   { die "FAIL: didn't find esl-shuffle program in $builddir/miniapps"; }
12if (! -x "$builddir/miniapps/esl-sfetch")    { die "FAIL: didn't find esl-shuffle program in $builddir/miniapps"; }
13
14# Test the genetic code is correct, including spot checks of some alternative codes
15#
16# The reformatting to pfam format is solely to get the seq all on one line, simplifying parsing
17#
18open(TESTFILE,">$tmppfx.fa") || die "FAIL: couldn't open $tmppfx.fa for writing translation_test seqfile";
19print TESTFILE << "EOF";
20>translation_test
21TTGATAATG
22AAAAACAAGAATACAACCACGACTAGAAGCAGGAGTATAATCATGATT
23CAACACCAGCATCCACCCCCGCCTCGACGCCGGCGTCTACTCCTGCTT
24GAAGACGAGGATGCAGCCGCGGCTGGAGGCGGGGGTGTAGTCGTGGTT
25TACTATTCATCCTCGTCTTGCTGGTGTTTATTCTTGTTTTGATAATAG
26EOF
27close TESTFILE;
28
29$output = `$builddir/miniapps/esl-translate -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`;
30if ($output !~ / coords=1\.\.192\s+length=64\s+frame=1/)                                     { die "FAIL: standard genetic code mistranslated? unexpected desc line"; }
31if ($output !~ /orf1\s+LIMKNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLF\s+/) { die "FAIL: standard genetic code mistranslated"; }
32
33$output = `$builddir/miniapps/esl-translate -c 3 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`;
34if ($output !~ / coords=1\.\.195\s+length=65\s+frame=1/)                                      { die "FAIL: yeast mito genetic code mistranslated? unexpected desc line"; }
35if ($output !~ /orf1\s+LMMKNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVVYYSSSSCWCLFLFW\s+/) { die "FAIL: yeast mito genetic code mistranslated"; }
36
37$output = `$builddir/miniapps/esl-translate -c 14 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`;
38if ($output !~ / coords=1\.\.198\s+length=66\s+frame=1/)                                       { die "FAIL: alt flatworm genetic code mistranslated? unexpected desc line"; }
39if ($output !~ /orf1\s+LIMNNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLFWY\s+/) { die "FAIL: alt flatworm genetic code mistranslated"; }
40
41# Use initiators, -M option.
42# Code 1 uses UUG (also CUG, AUG). Code 3 uses AUA (also AUG). Code 14, only AUG.
43# Initiator is always M.
44$output = `$builddir/miniapps/esl-translate -M -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`;
45if ($output !~ / coords=1\.\.192\s+length=64\s+frame=1/)                                     { die "FAIL: standard genetic code initiators mishandled w/ -M? unexpected desc line"; }
46if ($output !~ /orf1\s+MIMKNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLF\s+/) { die "FAIL: standard genetic code initiators mishandled w/ -M"; }
47
48$output = `$builddir/miniapps/esl-translate -M -c 3 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`;
49if ($output !~ / coords=4\.\.195\s+length=64\s+frame=1/)                                     { die "FAIL: yeast mito genetic code initiators mishandled w/ -M? unexpected desc line"; }
50if ($output !~ /orf1\s+MMKNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVVYYSSSSCWCLFLFW\s+/) { die "FAIL: yeast mito genetic code initiators mishandled w/ -M"; }
51
52$output = `$builddir/miniapps/esl-translate -M -c 14 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`;
53if ($output !~ / coords=7\.\.198\s+length=64\s+frame=1/)                                     { die "FAIL: alt flatworm genetic code initiators mishandled w/ -M? unexpected desc line"; }
54if ($output !~ /orf1\s+MNNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLFWY\s+/) { die "FAIL: alt flatworm genetic code initiators mishandled w/ -M"; }
55
56# Use only AUG initiator, -m option.
57$output = `$builddir/miniapps/esl-translate -m -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`;
58if ($output !~ / coords=7\.\.192\s+length=62\s+frame=1/)                                   { die "FAIL: standard genetic code AUG starts mishandled w/ -m? unexpected desc line"; }
59if ($output !~ /orf1\s+MKNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLF\s+/) { die "FAIL: standard genetic code AUG starts mishandled w/ -m"; }
60
61$output = `$builddir/miniapps/esl-translate -m -c 3 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`;
62if ($output !~ / coords=7\.\.195\s+length=63\s+frame=1/)                                    { die "FAIL: yeast mito genetic code AUG starts mishandled w/ -m? unexpected desc line"; }
63if ($output !~ /orf1\s+MKNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVVYYSSSSCWCLFLFW\s+/) { die "FAIL: yeast mito genetic code AUG starts mishandled w/ -m"; }
64
65$output = `$builddir/miniapps/esl-translate -m -c 14 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`;
66if ($output !~ / coords=7\.\.198\s+length=64\s+frame=1/)                                     { die "FAIL: alt flatworm genetic code AUG starts mishandled w/ -m? unexpected desc line"; }
67if ($output !~ /orf1\s+MNNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLFWY\s+/) { die "FAIL: alt flatworm genetic code AUG starts mishandled w/ -m"; }
68
69
70# Now test ambiguous translation, using a second contrived sequence.
71# The test includes an ambiguous stop (UAA|UAG) which should correctly decode to *.
72# Also includes an ambig initiator HUG=AUG|CUG|UUG, which should decode to M with -M;
73# NNN, though, can't initiate (because it's consistent with a stop too).
74#
75# The test seq is short enough that we don't need to reformat to pfam, which
76# is essential, because the test seq translates to other peptides in other frames,
77# whereas above we could make it translate to a single seq.
78#
79open(TESTFILE,">$tmppfx.fa") || die "FAIL: couldn't open $tmppfx.fa for writing ambig_test seqfile";
80print TESTFILE << "EOF";
81>ambig_test
82NNNHUGUUYUURUCNUAYUGYCUNCCNCAYCARCGNAUHACNAAYAARAGYAGRGUNGCNGAYGARGGN
83YUUYCUYAUYGUUAR
84EOF
85close TESTFILE;
86
87$output = `$builddir/miniapps/esl-translate -l 25 --watson $tmppfx.fa`;
88if ($output !~ / coords=4\.\.81\s+length=26\s+frame=1/)       { die "FAIL: ambiguity codes mishandled by translation - unexpected desc line"; }
89if ($output !~ /\s+XFLSYCLPHQRITNKSRVADEGXXXX\s+/)        { die "FAIL: ambiguity codes mishandled by translation"; }
90
91$output = `$builddir/miniapps/esl-translate -M -l 25 --watson $tmppfx.fa`;
92if ($output !~ / coords=4\.\.81\s+length=26\s+frame=1/)       { die "FAIL: ambiguity codes mishandled by translation - unexpected desc line"; }
93if ($output !~ /\s+MFLSYCLPHQRITNKSRVADEGXXXX\s+/)        { die "FAIL: ambiguity codes mishandled by translation"; }
94
95
96# Generate a couple of large-ish random sequences, larger than the window size.
97# Default ReadSeq vs. -W ReadWindow should give the same answer.
98# Do more than one seq, to test that we can do that.
99#
100system("$builddir/miniapps/esl-shuffle -G --dna -N 2 -L 10000 > $tmppfx.fa");
101$out1 = `$builddir/miniapps/esl-translate $tmppfx.fa`;
102$out2 = `$builddir/miniapps/esl-translate -W $tmppfx.fa`;;
103if ($out1 ne $out2) { die "FAIL: default vs. windowed (-W) give different results"; }
104
105
106
107# Using those same large sequences, test coords.
108#
109system("$builddir/miniapps/esl-sfetch --index $tmppfx.fa > /dev/null");
110@output = `$builddir/miniapps/esl-translate -l 100 $tmppfx.fa`;
111foreach $line (@output)
112{
113    if ($line =~ /^>(orf\d+)\s+source=(\S+)\s+coords=(\d+)\.\.(\d+)\s+length=(\d+)\s+frame=(\d+)/)
114    {
115	$orfname = $1;
116	$source  = $2;
117	$start   = $3;
118	$end     = $4;
119	$aalen   = $5;
120	$frame   = $6;
121
122	if ($end > $start)
123	{
124	    if ($end - $start + 1 != $aalen * 3) { die "FAIL: $start..$end is not a multiple of 3"; }
125	    if ($start >= 4)    { $start -= 3; $has_leading_stop  = 1; } else { $has_leading_stop  = 0;  }
126	    if ($end   <= 9997) { $end   += 3; $has_trailing_stop = 1; } else { $has_trailing_stop = 0;  }
127	    $ntlen = $end - $start + 1;
128	}
129	else
130	{
131	    if ($start - $end + 1 != $aalen * 3) { die "FAIL: $start..$end is not a multiple of 3"; }
132	    if ($end >= 4)      { $end -= 3;   $has_trailing_stop = 1; } else { $has_trailing_stop = 0;  }
133	    if ($start <= 9997) { $start += 3; $has_leading_stop  = 1; } else { $has_leading_stop  = 0;  }
134	    $ntlen = $start - $end + 1;
135	}
136
137	$out2 = `$builddir/miniapps/esl-sfetch -c $start..$end $tmppfx.fa $source | $builddir/miniapps/esl-translate -l $aalen -`;
138	if ($has_leading_stop)  { $expected_start = 4; }        else { $expected_start = 1;      }
139	if ($has_trailing_stop) { $expected_end = $ntlen - 3; } else { $expected_end   = $ntlen; }
140	if ( $out2 !~ /^>orf\d+ source=.+ coords=$expected_start..$expected_end length=$aalen frame=1/)
141	{ die "FAIL: fetched & translated seq not identical to original translation - coord problem?"; }
142    }
143
144}
145
146print "ok\n";
147unlink "$tmppfx.fa";
148unlink "$tmppfx.fa.ssi";
149exit 0;
150