1 /* @source transeq application
2 **
3 ** Translate nucleic acid sequences
4 **
5 ** @author Copyright (C) Gary Williams (gwilliam@hgmp.mrc.ac.uk)
6 ** Mar 4 17:18 1999 (ajb)
7 ** Jul 19 19:24 2000 (ajb)
8 ** Jun 29 16:50 2001 (gww) use new version of ajTrnSeqOrig
9 ** @@
10 **
11 ** This program is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU General Public License
13 ** as published by the Free Software Foundation; either version 2
14 ** of the License, or (at your option) any later version.
15 **
16 ** This program is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ** GNU General Public License for more details.
20 **
21 ** You should have received a copy of the GNU General Public License
22 ** along with this program; if not, write to the Free Software
23 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 ******************************************************************************/
25
26 #include "emboss.h"
27
28
29
30
31 static void transeq_Trim(AjPSeq seq);
32 static void transeq_GetFrames(AjPStr const *framelist, AjBool *frames);
33 static void transeq_Clean(AjPSeq seq);
34
35
36
37
38 /* @prog transeq **************************************************************
39 **
40 ** Translate nucleic acid sequences
41 **
42 ******************************************************************************/
43
main(int argc,char ** argv)44 int main(int argc, char **argv)
45 {
46 AjPSeqall seqall;
47 AjPSeqout seqout;
48 AjPSeq seq;
49 AjPTrn trnTable;
50 AjPSeq pep = NULL;
51 AjPStr *framelist;
52 AjBool frames[6]; /* frames to be translated 1 to 3, -1 to -3 */
53 AjPStr tablename;
54 ajint table;
55 AjPRange regions;
56 AjPRange seqregions;
57 AjBool trim;
58 AjBool clean;
59 AjBool wholeseq = ajFalse; /* true if the range covers the whole sequence */
60 AjBool alternate;
61 AjBool methionine;
62
63 int i;
64
65 embInit("transeq", argc, argv);
66
67 seqout = ajAcdGetSeqoutall("outseq");
68 seqall = ajAcdGetSeqall("sequence");
69 framelist = ajAcdGetList("frame");
70 tablename = ajAcdGetListSingle("table");
71 regions = ajAcdGetRange("regions");
72 trim = ajAcdGetBoolean("trim");
73 clean = ajAcdGetBoolean("clean");
74 alternate = ajAcdGetBoolean("alternative");
75 methionine = ajAcdGetBoolean("methionine");
76
77 /* get the frames to be translated */
78 transeq_GetFrames(framelist, frames);
79
80 /* initialise the translation table */
81 ajStrToInt(tablename, &table);
82 trnTable = ajTrnNewI(table);
83
84 /* shift values of translate region to match -sbegin=n parameter */
85 /* ajRangeSetOffset(regions, ajSeqallGetseqBegin(seqall));*/
86
87
88 while(ajSeqallNext(seqall, &seq))
89 {
90 ajSeqTrim(seq);
91
92 seqregions = ajRangeNewRange(regions);
93 wholeseq = ajRangeIsWhole(seqregions, seq);
94
95 /* get regions to translate */
96 if(!wholeseq)
97 ajRangeSeqExtract(seqregions, seq);
98
99 for(i=0; i<6; i++)
100 {
101 ajDebug("try frame: %d\n", i);
102 if(frames[i])
103 {
104 if(methionine && !wholeseq)
105 {
106 if(i<3)
107 pep = ajTrnSeqInit(trnTable, seq, i+1);
108 else
109 if(alternate) /* frame -1 uses codons starting at end */
110 pep = ajTrnSeqInit(trnTable, seq, -i-1);
111 else /* frame -1 uses frame 1 codons */
112 pep = ajTrnSeqInit(trnTable, seq, 2-i);
113 }
114 else
115 {
116 if(i<3)
117 pep = ajTrnSeqOrig(trnTable, seq, i+1);
118 else
119 if(alternate) /* frame -1 uses codons starting at end */
120 pep = ajTrnSeqOrig(trnTable, seq, -i-1);
121 else /* frame -1 uses frame 1 codons */
122 pep = ajTrnSeqOrig(trnTable, seq, 2-i);
123 }
124
125 if(trim)
126 transeq_Trim(pep);
127
128 if(clean)
129 transeq_Clean(pep); /* clean after the trim */
130
131 ajSeqoutWriteSeq(seqout, pep);
132 ajSeqDel(&pep);
133 }
134 }
135 ajRangeDel(&seqregions);
136 }
137
138 ajSeqoutClose(seqout);
139
140 ajTrnDel(&trnTable);
141 ajSeqallDel(&seqall);
142 ajSeqDel(&seq);
143 ajSeqDel(&pep);
144 ajStrDelarray(&framelist);
145 ajStrDel(&tablename);
146 ajSeqoutDel(&seqout);
147 ajRangeDel(®ions);
148 ajRangeDel(&seqregions);
149
150 embExit();
151 return 0;
152 }
153
154
155
156
157
158 /* @funcstatic transeq_Trim ***************************************************
159 **
160 ** Removes X, and/or * characters from the end of the translation
161 **
162 ** @param [u] seq [AjPSeq] sequence to trim
163 ** @return [void]
164 ** @@
165 ******************************************************************************/
166
transeq_Trim(AjPSeq seq)167 static void transeq_Trim(AjPSeq seq)
168 {
169 AjPStr s;
170 char *p;
171 char c;
172 ajint i;
173 ajint len;
174
175 s = ajSeqGetSeqCopyS(seq);
176 p = ajStrGetuniquePtr(&s);
177
178 len = ajStrGetLen(s)-1;
179
180 for(i=len; i>=0; i--)
181 {
182 c = *(p+i);
183
184 if(c != 'X' && c != '*' )
185 break;
186 }
187
188 if(i < len)
189 ajStrTruncateLen(&s, i+1);
190 ajSeqAssignSeqS(seq, s);
191
192 return;
193 }
194
195
196
197
198 /* @funcstatic transeq_Clean *************************************************
199 **
200 ** Converts * characters to X's in the translation
201 **
202 **
203 ** @param [u] seq [AjPSeq] sequence to clean
204 ** @return [void]
205 ** @@
206 ******************************************************************************/
207
transeq_Clean(AjPSeq seq)208 static void transeq_Clean(AjPSeq seq)
209 {
210 AjPStr str;
211
212 str = ajSeqGetSeqCopyS(seq);
213
214 ajStrExchangeSetCC(&str, "*", "X");
215 ajSeqAssignSeqS(seq, str);
216
217 return;
218 }
219
220
221
222
223 /* @funcstatic transeq_GetFrames **********************************************
224 **
225 ** Converts the list of frame numbers into a boolean vector.
226 ** Frame numbers are ordered in the vector as:
227 ** 1, 2, 3 -1, -2, -3
228 **
229 ** @param [r] framelist [AjPStr const *] list of frame numbers
230 ** @param [w] frames [AjBool*] Boolean vector
231 ** @return [void]
232 ** @@
233 ******************************************************************************/
234
transeq_GetFrames(AjPStr const * framelist,AjBool * frames)235 static void transeq_GetFrames(AjPStr const *framelist, AjBool *frames)
236 {
237 int i;
238
239 /* reset the vector */
240 for(i=0; i<6; i++)
241 frames[i] = ajFalse;
242
243
244 for(i=0; framelist[i]; i++)
245 {
246 if(ajStrMatchC(framelist[i], "1"))
247 frames[0] = ajTrue;
248 else if(ajStrMatchC(framelist[i], "2"))
249 frames[1] = ajTrue;
250 else if(ajStrMatchC(framelist[i], "3"))
251 frames[2] = ajTrue;
252 else if(ajStrMatchC(framelist[i], "-1"))
253 frames[3] = ajTrue;
254 else if(ajStrMatchC(framelist[i], "-2"))
255 frames[4] = ajTrue;
256 else if(ajStrMatchC(framelist[i], "-3"))
257 frames[5] = ajTrue;
258 else if(ajStrMatchC(framelist[i], "F"))
259 {
260 frames[0] = ajTrue;
261 frames[1] = ajTrue;
262 frames[2] = ajTrue;
263 }
264 else if(ajStrMatchC(framelist[i], "R"))
265 {
266 frames[3] = ajTrue;
267 frames[4] = ajTrue;
268 frames[5] = ajTrue;
269 }
270 else if(ajStrMatchC(framelist[i], "6"))
271 {
272 frames[0] = ajTrue;
273 frames[1] = ajTrue;
274 frames[2] = ajTrue;
275 frames[3] = ajTrue;
276 frames[4] = ajTrue;
277 frames[5] = ajTrue;
278 }
279 else
280 ajErr("Unknown frame: '%S'", framelist[i]);
281 }
282
283 return;
284 }
285