1# tRNAscanSE/GeneticCode.pm 2# This class describes the genetic codes used in tRNAscan-SE. 3# 4# -------------------------------------------------------------- 5# This module is part of the tRNAscan-SE program. 6# Copyright (C) 2017 Patricia Chan and Todd Lowe 7# -------------------------------------------------------------- 8# 9 10package tRNAscanSE::GeneticCode; 11 12use strict; 13use tRNAscanSE::Utils; 14 15sub new 16{ 17 my $class = shift; 18 my $self = {}; 19 20 initialize($self); 21 22 bless ($self, $class); 23 return $self; 24} 25 26sub DESTROY 27{ 28 my $self = shift; 29} 30 31sub initialize 32{ 33 my $self = shift; 34 35 $self->{undef_anticodon} = "NNN"; 36 $self->{undef_isotype} = "Undet"; 37 38 my @isotypes = ('Ala', 'Gly', 'Pro', 'Thr', 'Val', 39 'Ser', 'Arg', 'Leu', 40 'Phe','Asn', 'Lys', 'Asp', 'Glu', 'His', 'Gln', 41 'Ile', 'Met', 'Tyr', 'Supres', 'Cys', 'Trp', 'SelCys'); 42 $self->{isotypes} = \@isotypes; 43 44 # Amino acid -> Anti-codon list for printing out global tRNA summary 45 46 my %ac_list = ( 47 'Ala' => [qw/AGC GGC CGC TGC/], 48 'Gly' => [qw/ACC GCC CCC TCC/], 49 'Pro' => [qw/AGG GGG CGG TGG/], 50 'Thr' => [qw/AGT GGT CGT TGT/], 51 'Val' => [qw/AAC GAC CAC TAC/], 52 53 'Ser' => [qw/AGA GGA CGA TGA ACT GCT/], 54 'Arg' => [qw/ACG GCG CCG TCG CCT TCT/], 55 'Leu' => [qw/AAG GAG CAG TAG CAA TAA/], 56 57 'Phe' => [qw/AAA GAA     /], 58 59 'Asn' => [qw/ATT GTT     /], 60 'Lys' => [qw/    CTT TTT/], 61 62 'Asp' => [qw/ATC GTC     /], 63 'Glu' => [qw/    CTC TTC/], 64 65 'His' => [qw/ATG GTG     /], 66 'Gln' => [qw/    CTG TTG/], 67 68 'Tyr' => [qw/ATA GTA     /], 69 'Supres' => [qw/  CTA TTA TCA/], 70 71 'Ile' => [qw/AAT GAT CAT TAT/], 72 'Met' => [qw/    CAT  /], 73 74 'Cys' => [qw/ACA GCA     /], 75 'Trp' => [qw/    CCA  /], 76 'SelCys' => [qw/      TCA/] 77 ); 78 $self->{ac_list} = \%ac_list; 79 80 $self->{aa_list} = { 81 'AGC'=>'Ala', 'GGC'=>'Ala', 'CGC'=>'Ala', 'TGC'=>'Ala', 82 'ACC'=>'Gly', 'GCC'=>'Gly', 'CCC'=>'Gly', 'TCC'=>'Gly', 83 'AGG'=>'Pro', 'GGG'=>'Pro', 'CGG'=>'Pro', 'TGG'=>'Pro', 84 'AGT'=>'Thr', 'GGT'=>'Thr', 'CGT'=>'Thr', 'TGT'=>'Thr', 85 'AAC'=>'Val', 'GAC'=>'Val', 'CAC'=>'Val', 'TAC'=>'Val', 86 87 'AGA'=>'Ser', 'GGA'=>'Ser', 'CGA'=>'Ser', 'TGA'=>'Ser', 'ACT'=>'Ser', 'GCT'=>'Ser', 88 'ACG'=>'Arg', 'GCG'=>'Arg', 'CCG'=>'Arg', 'TCG'=>'Arg', 'CCT'=>'Arg', 'TCT'=>'Arg', 89 'AAG'=>'Leu', 'GAG'=>'Leu', 'CAG'=>'Leu', 'TAG'=>'Leu', 'CAA'=>'Leu', 'TAA'=>'Leu', 90 91 'AAA'=>'Phe', 'GAA'=>'Phe', 92 93 'ATT'=>'Asn', 'GTT'=>'Asn', 94 'CTT'=>'Lys', 'TTT'=>'Lys', 95 96 'ATC'=>'Asp', 'GTC'=>'Asp', 97 'CTC'=>'Glu', 'TTC'=>'Glu', 98 99 'ATG'=>'His', 'GTG'=>'His', 100 'CTG'=>'Gln', 'TTG'=>'Gln', 101 102 'ATA'=>'Tyr', 'GTA'=>'Tyr', 103 'CTA'=>'Supres', 'TTA'=>'Supres', 104 105 'AAT'=>'Ile', 'GAT'=>'Ile', 'TAT'=>'Ile', 106 'CAT'=>'Met', 107 108 'ACA'=>'Cys', 'GCA'=>'Cys', 109 'CCA'=>'Trp', 110 'TCA'=>'SelCys', 111 '???'=>'Undet', 'NNN'=>'Undet' 112 }; 113 114 $self->{vert_mito_aa_list} = { 115 'TGC'=>'Ala', 'TCC'=>'Gly', 'TGG'=>'Pro', 'TGT'=>'Thr', 'TAC'=>'Val', 116 'TGA'=>'Ser', 'GCT'=>'Ser', 'TCG'=>'Arg', 'TAG'=>'Leu', 'TAA'=>'Leu', 117 'GAA'=>'Phe', 'GTT'=>'Asn', 'TTT'=>'Lys', 'GTC'=>'Asp', 'TTC'=>'Glu', 118 'GTG'=>'His', 'TTG'=>'Gln', 'GTA'=>'Tyr', 119 'GAT'=>'Ile', 'TAT'=>'Met', 'CAT'=>'Met', 120 'GCA'=>'Cys', 'TCA'=>'Trp' 121 }; 122 123 $self->{trans_map} = +{}; 124 $self->{one_let_trans_map} = +{}; 125} 126 127sub undef_anticodon 128{ 129 my $self = shift; 130 return $self->{undef_anticodon}; 131} 132 133sub undef_isotype 134{ 135 my $self = shift; 136 return $self->{undef_isotype}; 137} 138 139sub isotypes 140{ 141 my $self = shift; 142 return $self->{isotypes}; 143} 144 145sub ac_list 146{ 147 my $self = shift; 148 return $self->{ac_list}; 149} 150 151sub aa_list 152{ 153 my $self = shift; 154 return $self->{aa_list}; 155} 156 157sub get_isotype 158{ 159 my $self = shift; 160 my $ac = shift; 161 162 my $isotype = ""; 163 if (defined $self->{aa_list}->{$ac}) 164 { 165 $isotype = $self->{aa_list}->{$ac}; 166 } 167 return $isotype; 168} 169 170sub one_let_trans_map 171{ 172 my $self = shift; 173 return $self->{one_let_trans_map}; 174} 175 176sub read_transl_table 177{ 178 my $self = shift; 179 my $opts = shift; 180 my $alt_gcode = $opts->alt_gcode(); 181 my $gc_file = $opts->gc_file(); 182 183 my %ambig_trans_map = (); 184 my %alt_trans_map = (); 185 my ($acodon, @expanded_set, $expanded_ac, $gc_file_path); 186 187 # Read in default genetic code table (may contain ambiguous bases) at 188 # end of this source file 189 190 while (<DATA>) 191 { 192 if ((/^[^\#]/) && 193 (/^([ACGTUNRYSWMKBDHV]{3,3})\s+(\S+)\s+(\S)/i)) 194 { 195 $acodon = uc($1); 196 $ambig_trans_map{&rev_comp_seq($acodon)} = $2; 197 $self->{one_let_trans_map}->{$2} = $3; 198 } 199 } 200 201 $self->{one_let_trans_map}->{$self->{undef_isotype}} = "?"; 202 $self->{one_let_trans_map}->{"SeC(p)"} = "Z"; 203 $self->{one_let_trans_map}->{"SeC(e)"} = "Z"; 204 205 # Convert any ambiguous bases to make all non-ambigous codons 206 # and save translated amino acid 207 208 @expanded_set = (); 209 foreach $acodon (sort keys(%ambig_trans_map)) 210 { 211 push(@expanded_set, &expand_ambig($acodon)); 212 foreach $expanded_ac (@expanded_set) 213 { 214 $self->{trans_map}->{$expanded_ac} = $ambig_trans_map{$acodon}; 215 } 216 @expanded_set = (); 217 } 218 219 if ($alt_gcode) 220 { 221 if (-r $gc_file) 222 { 223 $gc_file_path = $gc_file; 224 } 225 elsif (-r "/usr/local/lib/tRNAscanSE/".$gc_file) 226 { 227 $gc_file_path = "/usr/local/lib/tRNAscanSE/".$gc_file; 228 } 229 else 230 { 231 die "FATAL: Could not find $gc_file translation codon file\n\n"; 232 } 233 234 open (GC_TABLE, "$gc_file_path") || 235 die "FATAL: Could not find $gc_file translation codon file\n\n"; 236 237 # Read in genetic code table (may contain ambiguous bases) 238 239 while (<GC_TABLE>) 240 { 241 if ((/^[^\#]/) 242 && (/^([ACGTUNRYSWMKBDHV]{3,3})\s+(\S+)\s+(\S)/i)) 243 { 244 $acodon = uc($1); 245 $alt_trans_map{&rev_comp_seq($acodon)} = $2; 246 $self->{one_let_trans_map}->{$2} = $3; 247 } 248 } 249 close GC_TABLE; 250 251 # Convert any ambiguous bases to make all non-ambigous codons 252 # and save translated amino acid 253 254 @expanded_set = (); 255 foreach $acodon (sort keys(%alt_trans_map)) 256 { 257 push(@expanded_set, &expand_ambig($acodon)); 258 foreach $expanded_ac (@expanded_set) 259 { 260 $self->{trans_map}->{$expanded_ac} = $alt_trans_map{$acodon}; 261 } 262 @expanded_set = (); 263 } 264 } 265} 266 267sub get_tRNA_type 268{ 269 my $self = shift; 270 my $cm = shift; 271 my $ac = shift; # anticodon to be decoded 272 my $cm_file = shift; 273 my $model = shift; 274 my $cove_mode = shift; 275 276 my $Pselc_cm_file_path = $cm->Pselc_cm_file_path(); 277 my $Eselc_cm_file_path = $cm->Eselc_cm_file_path(); 278 279 my ($prev_type,$type); 280 281 if ($ac eq $self->{undef_anticodon}) 282 { 283 return $self->{undef_isotype}; 284 } 285 elsif ($cm_file eq $Pselc_cm_file_path) 286 { 287 return 'SeC'; 288 } 289 elsif ($cm_file eq $Eselc_cm_file_path) 290 { 291 return 'SeC'; 292 } 293 else 294 { 295 $prev_type = 'INIT'; 296 foreach my $exp_codon (&expand_ambig($ac)) 297 { 298 $type = $self->{trans_map}->{$exp_codon}; 299 if ($type eq "SeC" and $model ne "SeC" and !$cove_mode) 300 { 301 $type = "Sup"; 302 } 303 if (($type ne $prev_type) && ($prev_type ne 'INIT')) 304 { 305 return $self->{undef_isotype}; 306 } 307 $prev_type = $type; 308 } 309 return $type; 310 } 311} 312 313sub get_vert_mito_type 314{ 315 my $self = shift; 316 my ($ac) = @_; 317 my $type = ""; 318 if (defined $self->{vert_mito_aa_list}->{$ac}) 319 { 320 $type = $self->{vert_mito_aa_list}->{$ac}; 321 } 322 return $type; 323} 324 325sub expand_ambig 326{ 327 my ($ac) = @_; 328 329 $ac = " ".$ac." "; 330 331 while (index($ac, 'N') != -1) 332 { 333 $ac =~ s/(.*)\s(\S*)N(\S*)\s(.*)/$1 $2A$3 $2C$3 $2G$3 $2T$3 $4/g; 334 } 335 &expand2(\$ac, 'Y', 'C', 'T'); &expand2(\$ac, 'R', 'A', 'G'); 336 &expand2(\$ac, 'W', 'A', 'T'); &expand2(\$ac, 'S', 'C', 'G'); 337 &expand2(\$ac, 'M', 'A', 'C'); &expand2(\$ac, 'K', 'G', 'T'); 338 339 &expand3(\$ac, 'V', 'A', 'C', 'G'); &expand3(\$ac, 'B', 'C', 'G', 'T'); 340 &expand3(\$ac, 'H', 'A', 'C', 'T'); &expand3(\$ac, 'D', 'A', 'G', 'T'); 341 342 $ac = substr($ac, 1); 343 return (split(/ /, $ac)); 344} 345 346sub expand2 347{ 348 my ($acodon, $ambig_base, $sub1, $sub2) = @_; 349 350 while (index($$acodon, $ambig_base) != -1) 351 { 352 $$acodon =~ s/(.*)\s(\S*)$ambig_base(\S*)\s(.*)/$1 $2$sub1$3 $2$sub2$3 $4/g; 353 } 354} 355 356sub expand3 357{ 358 my($acodon, $ambig_base, $sub1, $sub2, $sub3) = @_; 359 360 while (index($$acodon, $ambig_base) != -1) 361 { 362 $$acodon =~ s/(.*)\s(\S*)$ambig_base(\S*)\s(.*)/$1 $2$sub1$3 $2$sub2$3 $2$sub3$3 $4/g; 363 } 364} 365 3661; 367 368__DATA__ 369GCN Ala A 370TGY Cys C 371GAY Asp D 372GAR Glu E 373TTY Phe F 374GGN Gly G 375CAY His H 376ATH Ile I 377AAR Lys K 378TTR Leu L 379CTN Leu L 380ATG Met M 381AAY Asn N 382CCN Pro P 383CAR Gln Q 384AGR Arg R 385CGN Arg R 386AGY Ser S 387TCN Ser S 388ACN Thr T 389GTN Val V 390TGG Trp W 391TAY Tyr Y 392TAR Sup ? 393TGA SeC Z 394 395