1# tRNAscanSE/GeneticCode.pm
2# This class describes the genetic codes used in tRNAscan-SE.
3#
4# --------------------------------------------------------------
5# This module is part of the tRNAscan-SE program.
6# Copyright (C) 2017 Patricia Chan and Todd Lowe
7# --------------------------------------------------------------
8#
9
10package tRNAscanSE::GeneticCode;
11
12use strict;
13use tRNAscanSE::Utils;
14
15sub new
16{
17    my $class = shift;
18    my $self = {};
19
20    initialize($self);
21
22    bless ($self, $class);
23    return $self;
24}
25
26sub DESTROY
27{
28    my $self = shift;
29}
30
31sub initialize
32{
33    my $self = shift;
34
35    $self->{undef_anticodon} = "NNN";
36    $self->{undef_isotype}   = "Undet";
37
38    my @isotypes = ('Ala', 'Gly', 'Pro', 'Thr', 'Val',
39                 'Ser', 'Arg', 'Leu',
40                 'Phe','Asn', 'Lys', 'Asp', 'Glu', 'His', 'Gln',
41                 'Ile', 'Met', 'Tyr', 'Supres', 'Cys', 'Trp', 'SelCys');
42    $self->{isotypes} = \@isotypes;
43
44    # Amino acid -> Anti-codon list for printing out global tRNA summary
45
46    my %ac_list = (
47               'Ala' => [qw/AGC GGC CGC TGC/],
48               'Gly' => [qw/ACC GCC CCC TCC/],
49               'Pro' => [qw/AGG GGG CGG TGG/],
50               'Thr' => [qw/AGT GGT CGT TGT/],
51               'Val' => [qw/AAC GAC CAC TAC/],
52
53               'Ser' => [qw/AGA GGA CGA TGA ACT GCT/],
54               'Arg' => [qw/ACG GCG CCG TCG CCT TCT/],
55               'Leu' => [qw/AAG GAG CAG TAG CAA TAA/],
56
57               'Phe' => [qw/AAA GAA &nbsp &nbsp /],
58
59               'Asn' => [qw/ATT GTT &nbsp &nbsp /],
60               'Lys' => [qw/&nbsp &nbsp CTT TTT/],
61
62               'Asp' => [qw/ATC GTC &nbsp &nbsp /],
63               'Glu' => [qw/&nbsp &nbsp CTC TTC/],
64
65               'His' => [qw/ATG GTG &nbsp &nbsp /],
66               'Gln' => [qw/&nbsp &nbsp CTG TTG/],
67
68               'Tyr' => [qw/ATA GTA &nbsp &nbsp /],
69               'Supres' => [qw/&nbsp CTA TTA TCA/],
70
71               'Ile' => [qw/AAT GAT CAT TAT/],
72               'Met' => [qw/&nbsp &nbsp CAT &nbsp/],
73
74               'Cys' => [qw/ACA GCA &nbsp &nbsp /],
75               'Trp' => [qw/&nbsp &nbsp CCA &nbsp/],
76               'SelCys' => [qw/&nbsp &nbsp &nbsp TCA/]
77               );
78    $self->{ac_list} = \%ac_list;
79
80    $self->{aa_list} = {
81               'AGC'=>'Ala', 'GGC'=>'Ala', 'CGC'=>'Ala', 'TGC'=>'Ala',
82               'ACC'=>'Gly', 'GCC'=>'Gly', 'CCC'=>'Gly', 'TCC'=>'Gly',
83               'AGG'=>'Pro', 'GGG'=>'Pro', 'CGG'=>'Pro', 'TGG'=>'Pro',
84               'AGT'=>'Thr', 'GGT'=>'Thr', 'CGT'=>'Thr', 'TGT'=>'Thr',
85               'AAC'=>'Val', 'GAC'=>'Val', 'CAC'=>'Val', 'TAC'=>'Val',
86
87               'AGA'=>'Ser', 'GGA'=>'Ser', 'CGA'=>'Ser', 'TGA'=>'Ser', 'ACT'=>'Ser', 'GCT'=>'Ser',
88               'ACG'=>'Arg', 'GCG'=>'Arg', 'CCG'=>'Arg', 'TCG'=>'Arg', 'CCT'=>'Arg', 'TCT'=>'Arg',
89               'AAG'=>'Leu', 'GAG'=>'Leu', 'CAG'=>'Leu', 'TAG'=>'Leu', 'CAA'=>'Leu', 'TAA'=>'Leu',
90
91               'AAA'=>'Phe', 'GAA'=>'Phe',
92
93               'ATT'=>'Asn', 'GTT'=>'Asn',
94               'CTT'=>'Lys', 'TTT'=>'Lys',
95
96               'ATC'=>'Asp', 'GTC'=>'Asp',
97               'CTC'=>'Glu', 'TTC'=>'Glu',
98
99               'ATG'=>'His', 'GTG'=>'His',
100               'CTG'=>'Gln', 'TTG'=>'Gln',
101
102               'ATA'=>'Tyr', 'GTA'=>'Tyr',
103               'CTA'=>'Supres', 'TTA'=>'Supres',
104
105               'AAT'=>'Ile', 'GAT'=>'Ile', 'TAT'=>'Ile',
106               'CAT'=>'Met',
107
108               'ACA'=>'Cys', 'GCA'=>'Cys',
109               'CCA'=>'Trp',
110               'TCA'=>'SelCys',
111               '???'=>'Undet', 'NNN'=>'Undet'
112               };
113
114    $self->{vert_mito_aa_list} = {
115                'TGC'=>'Ala', 'TCC'=>'Gly', 'TGG'=>'Pro', 'TGT'=>'Thr', 'TAC'=>'Val',
116                'TGA'=>'Ser', 'GCT'=>'Ser', 'TCG'=>'Arg', 'TAG'=>'Leu', 'TAA'=>'Leu',
117                'GAA'=>'Phe', 'GTT'=>'Asn', 'TTT'=>'Lys', 'GTC'=>'Asp', 'TTC'=>'Glu',
118                'GTG'=>'His', 'TTG'=>'Gln', 'GTA'=>'Tyr',
119                'GAT'=>'Ile', 'TAT'=>'Met', 'CAT'=>'Met',
120                'GCA'=>'Cys', 'TCA'=>'Trp'
121    };
122
123    $self->{trans_map} = +{};
124    $self->{one_let_trans_map} = +{};
125}
126
127sub undef_anticodon
128{
129    my $self = shift;
130    return $self->{undef_anticodon};
131}
132
133sub undef_isotype
134{
135    my $self = shift;
136    return $self->{undef_isotype};
137}
138
139sub isotypes
140{
141    my $self = shift;
142    return $self->{isotypes};
143}
144
145sub ac_list
146{
147    my $self = shift;
148    return $self->{ac_list};
149}
150
151sub aa_list
152{
153    my $self = shift;
154    return $self->{aa_list};
155}
156
157sub get_isotype
158{
159    my $self = shift;
160    my $ac = shift;
161
162    my $isotype = "";
163    if (defined $self->{aa_list}->{$ac})
164    {
165        $isotype = $self->{aa_list}->{$ac};
166    }
167    return $isotype;
168}
169
170sub one_let_trans_map
171{
172    my $self = shift;
173    return $self->{one_let_trans_map};
174}
175
176sub read_transl_table
177{
178    my $self = shift;
179    my $opts = shift;
180    my $alt_gcode = $opts->alt_gcode();
181    my $gc_file = $opts->gc_file();
182
183    my %ambig_trans_map = ();
184    my %alt_trans_map = ();
185    my ($acodon, @expanded_set, $expanded_ac, $gc_file_path);
186
187    # Read in default genetic code table (may contain ambiguous bases) at
188    # end of this source file
189
190    while (<DATA>)
191    {
192        if ((/^[^\#]/) &&
193            (/^([ACGTUNRYSWMKBDHV]{3,3})\s+(\S+)\s+(\S)/i))
194        {
195            $acodon = uc($1);
196            $ambig_trans_map{&rev_comp_seq($acodon)} = $2;
197            $self->{one_let_trans_map}->{$2} = $3;
198        }
199    }
200
201    $self->{one_let_trans_map}->{$self->{undef_isotype}} = "?";
202    $self->{one_let_trans_map}->{"SeC(p)"} = "Z";
203    $self->{one_let_trans_map}->{"SeC(e)"} = "Z";
204
205    # Convert any ambiguous bases to make all non-ambigous codons
206    #  and save translated amino acid
207
208    @expanded_set = ();
209    foreach $acodon (sort keys(%ambig_trans_map))
210    {
211        push(@expanded_set, &expand_ambig($acodon));
212        foreach $expanded_ac (@expanded_set)
213        {
214            $self->{trans_map}->{$expanded_ac} =  $ambig_trans_map{$acodon};
215        }
216        @expanded_set = ();
217    }
218
219    if ($alt_gcode)
220    {
221        if (-r $gc_file)
222        {
223            $gc_file_path = $gc_file;
224        }
225        elsif (-r "/usr/local/lib/tRNAscanSE/".$gc_file)
226        {
227            $gc_file_path = "/usr/local/lib/tRNAscanSE/".$gc_file;
228        }
229        else
230        {
231            die "FATAL: Could not find $gc_file translation codon file\n\n";
232        }
233
234        open (GC_TABLE, "$gc_file_path") ||
235            die "FATAL: Could not find $gc_file translation codon file\n\n";
236
237        # Read in genetic code table (may contain ambiguous bases)
238
239        while (<GC_TABLE>)
240        {
241            if ((/^[^\#]/)
242                && (/^([ACGTUNRYSWMKBDHV]{3,3})\s+(\S+)\s+(\S)/i))
243            {
244                $acodon = uc($1);
245                $alt_trans_map{&rev_comp_seq($acodon)} = $2;
246                $self->{one_let_trans_map}->{$2} = $3;
247            }
248        }
249            close GC_TABLE;
250
251        # Convert any ambiguous bases to make all non-ambigous codons
252        #  and save translated amino acid
253
254        @expanded_set = ();
255        foreach $acodon (sort keys(%alt_trans_map))
256        {
257            push(@expanded_set, &expand_ambig($acodon));
258            foreach $expanded_ac (@expanded_set)
259            {
260                $self->{trans_map}->{$expanded_ac} =  $alt_trans_map{$acodon};
261            }
262            @expanded_set = ();
263        }
264    }
265}
266
267sub get_tRNA_type
268{
269    my $self = shift;
270    my $cm = shift;
271    my $ac = shift;                         # anticodon to be decoded
272    my $cm_file = shift;
273    my $model = shift;
274    my $cove_mode = shift;
275
276    my $Pselc_cm_file_path = $cm->Pselc_cm_file_path();
277    my $Eselc_cm_file_path = $cm->Eselc_cm_file_path();
278
279    my ($prev_type,$type);
280
281    if ($ac eq $self->{undef_anticodon})
282    {
283        return $self->{undef_isotype};
284    }
285    elsif ($cm_file eq $Pselc_cm_file_path)
286    {
287        return 'SeC';
288    }
289    elsif ($cm_file eq $Eselc_cm_file_path)
290    {
291        return 'SeC';
292    }
293    else
294    {
295        $prev_type = 'INIT';
296        foreach my $exp_codon (&expand_ambig($ac))
297        {
298            $type = $self->{trans_map}->{$exp_codon};
299            if ($type eq "SeC" and $model ne "SeC" and !$cove_mode)
300            {
301				$type = "Sup";
302			}
303            if (($type ne $prev_type) && ($prev_type ne 'INIT'))
304            {
305                return $self->{undef_isotype};
306            }
307            $prev_type = $type;
308        }
309        return $type;
310    }
311}
312
313sub get_vert_mito_type
314{
315    my $self = shift;
316    my ($ac) = @_;
317    my $type = "";
318    if (defined $self->{vert_mito_aa_list}->{$ac})
319    {
320		$type = $self->{vert_mito_aa_list}->{$ac};
321	}
322	return $type;
323}
324
325sub expand_ambig
326{
327    my ($ac) = @_;
328
329    $ac = " ".$ac." ";
330
331    while (index($ac, 'N') != -1)
332    {
333        $ac =~ s/(.*)\s(\S*)N(\S*)\s(.*)/$1 $2A$3 $2C$3 $2G$3 $2T$3 $4/g;
334    }
335    &expand2(\$ac, 'Y', 'C', 'T'); &expand2(\$ac, 'R', 'A', 'G');
336    &expand2(\$ac, 'W', 'A', 'T'); &expand2(\$ac, 'S', 'C', 'G');
337    &expand2(\$ac, 'M', 'A', 'C'); &expand2(\$ac, 'K', 'G', 'T');
338
339    &expand3(\$ac, 'V', 'A', 'C', 'G'); &expand3(\$ac, 'B', 'C', 'G', 'T');
340    &expand3(\$ac, 'H', 'A', 'C', 'T'); &expand3(\$ac, 'D', 'A', 'G', 'T');
341
342    $ac = substr($ac, 1);
343    return (split(/ /, $ac));
344}
345
346sub expand2
347{
348    my ($acodon, $ambig_base, $sub1, $sub2) = @_;
349
350    while (index($$acodon, $ambig_base) != -1)
351    {
352        $$acodon =~ s/(.*)\s(\S*)$ambig_base(\S*)\s(.*)/$1 $2$sub1$3 $2$sub2$3 $4/g;
353    }
354}
355
356sub expand3
357{
358    my($acodon, $ambig_base, $sub1, $sub2, $sub3) = @_;
359
360    while (index($$acodon, $ambig_base) != -1)
361    {
362        $$acodon =~ s/(.*)\s(\S*)$ambig_base(\S*)\s(.*)/$1 $2$sub1$3 $2$sub2$3 $2$sub3$3 $4/g;
363    }
364}
365
3661;
367
368__DATA__
369GCN        Ala        A
370TGY        Cys        C
371GAY        Asp        D
372GAR        Glu        E
373TTY        Phe        F
374GGN        Gly        G
375CAY        His        H
376ATH        Ile        I
377AAR        Lys        K
378TTR        Leu        L
379CTN        Leu        L
380ATG        Met        M
381AAY        Asn        N
382CCN        Pro        P
383CAR        Gln        Q
384AGR        Arg        R
385CGN        Arg        R
386AGY        Ser        S
387TCN        Ser        S
388ACN        Thr        T
389GTN        Val        V
390TGG        Trp        W
391TAY        Tyr        Y
392TAR        Sup        ?
393TGA        SeC        Z
394
395