1 /*****************************************************************
2 * SQUID - a library of functions for biological sequence analysis
3 * Copyright (C) 1992-2002 Washington University School of Medicine
4 *
5 * This source code is freely distributed under the terms of the
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
7 * for details.
8 *****************************************************************/
9
10 /*
11 * translate.c - functions for translating nucleic acid sequence
12 * created Tue Jan 12 11:27:29 1993, SRE
13 *
14 * RCS $Id: translate.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: translate.c,v 1.2 1998/10/09 18:07:16 eddy Exp)
15 */
16
17 #include <stdio.h>
18 #include <string.h>
19 #include "squid.h"
20
21
22 #ifdef MEMDEBUG
23 #include "dbmalloc.h"
24 #endif
25
26
27
28 /* Function: Translate(char *seq, char **code)
29 *
30 * Given a ptr to the start of a nucleic acid sequence,
31 * and a genetic code, translate the sequence into
32 * amino acid sequence.
33 *
34 * code is an array of 65 strings, representing
35 * the translations of the 64 codons, arranged
36 * in order AAA, AAC, AAG, AAU, ..., UUA, UUC, UUG, UUU.
37 * '*' or '***' is used to represent termination
38 * codons, usually. The final string, code[64],
39 * is the code for an ambiguous amino acid.
40 *
41 * Because of the way space is allocated for the amino
42 * acid sequence, the amino acid strings cannot be
43 * longer than 3 letters each. (I don't foresee using
44 * anything but the single- and triple- letter codes.)
45 *
46 * Returns a ptr to the translation string on success,
47 * or NULL on failure.
48 */
49 char *
Translate(char * seq,char ** code)50 Translate(char *seq, char **code)
51 {
52 int codon; /* index for codon */
53 char *aaseq; /* RETURN: the translation */
54 char *aaptr; /* ptr into aaseq */
55 int i;
56
57 if (seq == NULL)
58 { squid_errno = SQERR_NODATA; return NULL; }
59 if ((aaseq = (char *) calloc (strlen(seq) + 1, sizeof(char))) == NULL)
60 Die("calloc failed");
61
62 aaptr = aaseq;
63 for (; *seq != '\0' && *(seq+1) != '\0' && *(seq+2) != '\0'; seq += 3)
64 {
65 /* calculate the lookup value for
66 this codon */
67 codon = 0;
68 for (i = 0; i < 3; i++)
69 {
70 codon *= 4;
71 switch (*(seq + i)) {
72 case 'A': case 'a': break;
73 case 'C': case 'c': codon += 1; break;
74 case 'G': case 'g': codon += 2; break;
75 case 'T': case 't': codon += 3; break;
76 case 'U': case 'u': codon += 3; break;
77 default: codon = 64; break;
78 }
79 if (codon == 64) break;
80 }
81
82 strcpy(aaptr, code[codon]);
83 aaptr += strlen(code[codon]);
84 }
85 return aaseq;
86 }
87