1#!/usr/local/bin/perl 2 3BEGIN { 4 $builddir = shift; 5 $srcdir = shift; 6 $tmppfx = shift; 7} 8 9if (! -x "$builddir/miniapps/esl-translate") { die "FAIL: didn't find esl-translate program in $builddir/miniapps"; } 10if (! -x "$builddir/miniapps/esl-reformat") { die "FAIL: didn't find esl-reformat program in $builddir/miniapps"; } 11if (! -x "$builddir/miniapps/esl-shuffle") { die "FAIL: didn't find esl-shuffle program in $builddir/miniapps"; } 12if (! -x "$builddir/miniapps/esl-sfetch") { die "FAIL: didn't find esl-shuffle program in $builddir/miniapps"; } 13 14# Test the genetic code is correct, including spot checks of some alternative codes 15# 16# The reformatting to pfam format is solely to get the seq all on one line, simplifying parsing 17# 18open(TESTFILE,">$tmppfx.fa") || die "FAIL: couldn't open $tmppfx.fa for writing translation_test seqfile"; 19print TESTFILE << "EOF"; 20>translation_test 21TTGATAATG 22AAAAACAAGAATACAACCACGACTAGAAGCAGGAGTATAATCATGATT 23CAACACCAGCATCCACCCCCGCCTCGACGCCGGCGTCTACTCCTGCTT 24GAAGACGAGGATGCAGCCGCGGCTGGAGGCGGGGGTGTAGTCGTGGTT 25TACTATTCATCCTCGTCTTGCTGGTGTTTATTCTTGTTTTGATAATAG 26EOF 27close TESTFILE; 28 29$output = `$builddir/miniapps/esl-translate -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`; 30if ($output !~ / coords=1\.\.192\s+length=64\s+frame=1/) { die "FAIL: standard genetic code mistranslated? unexpected desc line"; } 31if ($output !~ /orf1\s+LIMKNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLF\s+/) { die "FAIL: standard genetic code mistranslated"; } 32 33$output = `$builddir/miniapps/esl-translate -c 3 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`; 34if ($output !~ / coords=1\.\.195\s+length=65\s+frame=1/) { die "FAIL: yeast mito genetic code mistranslated? unexpected desc line"; } 35if ($output !~ /orf1\s+LMMKNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVVYYSSSSCWCLFLFW\s+/) { die "FAIL: yeast mito genetic code mistranslated"; } 36 37$output = `$builddir/miniapps/esl-translate -c 14 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`; 38if ($output !~ / coords=1\.\.198\s+length=66\s+frame=1/) { die "FAIL: alt flatworm genetic code mistranslated? unexpected desc line"; } 39if ($output !~ /orf1\s+LIMNNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLFWY\s+/) { die "FAIL: alt flatworm genetic code mistranslated"; } 40 41# Use initiators, -M option. 42# Code 1 uses UUG (also CUG, AUG). Code 3 uses AUA (also AUG). Code 14, only AUG. 43# Initiator is always M. 44$output = `$builddir/miniapps/esl-translate -M -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`; 45if ($output !~ / coords=1\.\.192\s+length=64\s+frame=1/) { die "FAIL: standard genetic code initiators mishandled w/ -M? unexpected desc line"; } 46if ($output !~ /orf1\s+MIMKNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLF\s+/) { die "FAIL: standard genetic code initiators mishandled w/ -M"; } 47 48$output = `$builddir/miniapps/esl-translate -M -c 3 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`; 49if ($output !~ / coords=4\.\.195\s+length=64\s+frame=1/) { die "FAIL: yeast mito genetic code initiators mishandled w/ -M? unexpected desc line"; } 50if ($output !~ /orf1\s+MMKNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVVYYSSSSCWCLFLFW\s+/) { die "FAIL: yeast mito genetic code initiators mishandled w/ -M"; } 51 52$output = `$builddir/miniapps/esl-translate -M -c 14 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`; 53if ($output !~ / coords=7\.\.198\s+length=64\s+frame=1/) { die "FAIL: alt flatworm genetic code initiators mishandled w/ -M? unexpected desc line"; } 54if ($output !~ /orf1\s+MNNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLFWY\s+/) { die "FAIL: alt flatworm genetic code initiators mishandled w/ -M"; } 55 56# Use only AUG initiator, -m option. 57$output = `$builddir/miniapps/esl-translate -m -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`; 58if ($output !~ / coords=7\.\.192\s+length=62\s+frame=1/) { die "FAIL: standard genetic code AUG starts mishandled w/ -m? unexpected desc line"; } 59if ($output !~ /orf1\s+MKNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLF\s+/) { die "FAIL: standard genetic code AUG starts mishandled w/ -m"; } 60 61$output = `$builddir/miniapps/esl-translate -m -c 3 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`; 62if ($output !~ / coords=7\.\.195\s+length=63\s+frame=1/) { die "FAIL: yeast mito genetic code AUG starts mishandled w/ -m? unexpected desc line"; } 63if ($output !~ /orf1\s+MKNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVVYYSSSSCWCLFLFW\s+/) { die "FAIL: yeast mito genetic code AUG starts mishandled w/ -m"; } 64 65$output = `$builddir/miniapps/esl-translate -m -c 14 -l 60 $tmppfx.fa | $builddir/miniapps/esl-reformat pfam -`; 66if ($output !~ / coords=7\.\.198\s+length=64\s+frame=1/) { die "FAIL: alt flatworm genetic code AUG starts mishandled w/ -m? unexpected desc line"; } 67if ($output !~ /orf1\s+MNNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYSSSSCWCLFLFWY\s+/) { die "FAIL: alt flatworm genetic code AUG starts mishandled w/ -m"; } 68 69 70# Now test ambiguous translation, using a second contrived sequence. 71# The test includes an ambiguous stop (UAA|UAG) which should correctly decode to *. 72# Also includes an ambig initiator HUG=AUG|CUG|UUG, which should decode to M with -M; 73# NNN, though, can't initiate (because it's consistent with a stop too). 74# 75# The test seq is short enough that we don't need to reformat to pfam, which 76# is essential, because the test seq translates to other peptides in other frames, 77# whereas above we could make it translate to a single seq. 78# 79open(TESTFILE,">$tmppfx.fa") || die "FAIL: couldn't open $tmppfx.fa for writing ambig_test seqfile"; 80print TESTFILE << "EOF"; 81>ambig_test 82NNNHUGUUYUURUCNUAYUGYCUNCCNCAYCARCGNAUHACNAAYAARAGYAGRGUNGCNGAYGARGGN 83YUUYCUYAUYGUUAR 84EOF 85close TESTFILE; 86 87$output = `$builddir/miniapps/esl-translate -l 25 --watson $tmppfx.fa`; 88if ($output !~ / coords=4\.\.81\s+length=26\s+frame=1/) { die "FAIL: ambiguity codes mishandled by translation - unexpected desc line"; } 89if ($output !~ /\s+XFLSYCLPHQRITNKSRVADEGXXXX\s+/) { die "FAIL: ambiguity codes mishandled by translation"; } 90 91$output = `$builddir/miniapps/esl-translate -M -l 25 --watson $tmppfx.fa`; 92if ($output !~ / coords=4\.\.81\s+length=26\s+frame=1/) { die "FAIL: ambiguity codes mishandled by translation - unexpected desc line"; } 93if ($output !~ /\s+MFLSYCLPHQRITNKSRVADEGXXXX\s+/) { die "FAIL: ambiguity codes mishandled by translation"; } 94 95 96# Generate a couple of large-ish random sequences, larger than the window size. 97# Default ReadSeq vs. -W ReadWindow should give the same answer. 98# Do more than one seq, to test that we can do that. 99# 100system("$builddir/miniapps/esl-shuffle -G --dna -N 2 -L 10000 > $tmppfx.fa"); 101$out1 = `$builddir/miniapps/esl-translate $tmppfx.fa`; 102$out2 = `$builddir/miniapps/esl-translate -W $tmppfx.fa`;; 103if ($out1 ne $out2) { die "FAIL: default vs. windowed (-W) give different results"; } 104 105 106 107# Using those same large sequences, test coords. 108# 109system("$builddir/miniapps/esl-sfetch --index $tmppfx.fa > /dev/null"); 110@output = `$builddir/miniapps/esl-translate -l 100 $tmppfx.fa`; 111foreach $line (@output) 112{ 113 if ($line =~ /^>(orf\d+)\s+source=(\S+)\s+coords=(\d+)\.\.(\d+)\s+length=(\d+)\s+frame=(\d+)/) 114 { 115 $orfname = $1; 116 $source = $2; 117 $start = $3; 118 $end = $4; 119 $aalen = $5; 120 $frame = $6; 121 122 if ($end > $start) 123 { 124 if ($end - $start + 1 != $aalen * 3) { die "FAIL: $start..$end is not a multiple of 3"; } 125 if ($start >= 4) { $start -= 3; $has_leading_stop = 1; } else { $has_leading_stop = 0; } 126 if ($end <= 9997) { $end += 3; $has_trailing_stop = 1; } else { $has_trailing_stop = 0; } 127 $ntlen = $end - $start + 1; 128 } 129 else 130 { 131 if ($start - $end + 1 != $aalen * 3) { die "FAIL: $start..$end is not a multiple of 3"; } 132 if ($end >= 4) { $end -= 3; $has_trailing_stop = 1; } else { $has_trailing_stop = 0; } 133 if ($start <= 9997) { $start += 3; $has_leading_stop = 1; } else { $has_leading_stop = 0; } 134 $ntlen = $start - $end + 1; 135 } 136 137 $out2 = `$builddir/miniapps/esl-sfetch -c $start..$end $tmppfx.fa $source | $builddir/miniapps/esl-translate -l $aalen -`; 138 if ($has_leading_stop) { $expected_start = 4; } else { $expected_start = 1; } 139 if ($has_trailing_stop) { $expected_end = $ntlen - 3; } else { $expected_end = $ntlen; } 140 if ( $out2 !~ /^>orf\d+ source=.+ coords=$expected_start..$expected_end length=$aalen frame=1/) 141 { die "FAIL: fetched & translated seq not identical to original translation - coord problem?"; } 142 } 143 144} 145 146print "ok\n"; 147unlink "$tmppfx.fa"; 148unlink "$tmppfx.fa.ssi"; 149exit 0; 150