1 /*****************************************************************
2  * SQUID - a library of functions for biological sequence analysis
3  * Copyright (C) 1992-2002 Washington University School of Medicine
4  *
5  *     This source code is freely distributed under the terms of the
6  *     GNU General Public License. See the files COPYRIGHT and LICENSE
7  *     for details.
8  *****************************************************************/
9 
10 /*
11  * translate.c - functions for translating nucleic acid sequence
12  * created Tue Jan 12 11:27:29 1993, SRE
13  *
14  * RCS $Id: translate.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: translate.c,v 1.2 1998/10/09 18:07:16 eddy Exp)
15  */
16 
17 #include <stdio.h>
18 #include <string.h>
19 #include "squid.h"
20 
21 
22 #ifdef MEMDEBUG
23 #include "dbmalloc.h"
24 #endif
25 
26 
27 
28 /* Function: Translate(char *seq, char **code)
29  *
30  * Given a ptr to the start of a nucleic acid sequence,
31  * and a genetic code, translate the sequence into
32  * amino acid sequence.
33  *
34  * code is an array of 65 strings, representing
35  * the translations of the 64 codons, arranged
36  * in order AAA, AAC, AAG, AAU, ..., UUA, UUC, UUG, UUU.
37  * '*' or '***' is used to represent termination
38  * codons, usually. The final string, code[64],
39  * is the code for an ambiguous amino acid.
40  *
41  * Because of the way space is allocated for the amino
42  * acid sequence, the amino acid strings cannot be
43  * longer than 3 letters each. (I don't foresee using
44  * anything but the single- and triple- letter codes.)
45  *
46  * Returns a ptr to the translation string on success,
47  * or NULL on failure.
48  */
49 char *
Translate(char * seq,char ** code)50 Translate(char *seq, char **code)
51 {
52   int   codon;			/* index for codon         */
53   char *aaseq;                  /* RETURN: the translation */
54   char *aaptr;                  /* ptr into aaseq */
55   int   i;
56 
57   if (seq == NULL)
58     { squid_errno = SQERR_NODATA; return NULL; }
59   if ((aaseq = (char *) calloc (strlen(seq) + 1, sizeof(char))) == NULL)
60     Die("calloc failed");
61 
62   aaptr = aaseq;
63   for (; *seq != '\0' && *(seq+1) != '\0' && *(seq+2) != '\0'; seq += 3)
64     {
65 				/* calculate the lookup value for
66 				   this codon */
67       codon = 0;
68       for (i = 0; i < 3; i++)
69 	{
70 	  codon *= 4;
71 	  switch (*(seq + i)) {
72 	  case 'A': case 'a':             break;
73 	  case 'C': case 'c': codon += 1; break;
74 	  case 'G': case 'g': codon += 2; break;
75 	  case 'T': case 't': codon += 3; break;
76 	  case 'U': case 'u': codon += 3; break;
77 	  default: codon = 64; break;
78 	  }
79 	  if (codon == 64) break;
80 	}
81 
82       strcpy(aaptr, code[codon]);
83       aaptr += strlen(code[codon]);
84     }
85   return aaseq;
86 }
87