1 /* @source checktrans application
2 **
3 ** Check translations made with transeq (document these translations)
4 **
5 ** @author Copyright (C) Rodrigo Lopez & Alan Bleasby
6 ** @@
7 ** Adapted from work done by Alan Bleasy
8 ** Modified by Gary Williams 19 April 2000 to remove output to STDOUT and to
9 ** write ORF sequences to a single file instead of many individual ones.
10 **
11 ** This program is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU General Public License
13 ** as published by the Free Software Foundation; either version 2
14 ** of the License, or (at your option) any later version.
15 **
16 ** This program is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ** GNU General Public License for more details.
20 **
21 ** You should have received a copy of the GNU General Public License
22 ** along with this program; if not, write to the Free Software
23 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 ******************************************************************************/
25
26
27 #include "emboss.h"
28 #include <math.h>
29 #include <stdlib.h>
30
31
32
33
34 static void checktrans_findorfs(AjPSeqout outseq, AjPFile outf, ajint s,
35 ajint len, const char *seq, const char *name,
36 ajint orfml, AjBool addedasterisk);
37
38 static void checktrans_ajbseq(AjPSeqout outseq, const char *seq,
39 ajint begin, int end,
40 const char *name, ajint count);
41
42 static void checktrans_dumptofeat(AjPFeattabOut featout, ajint from, ajint to,
43 const char *p, const char *seqname,
44 ajint min_orflength);
45
46
47
48
49 /* @prog checktrans ***********************************************************
50 **
51 ** Reports STOP codons and ORF statistics of a protein sequence
52 **
53 ******************************************************************************/
54
main(int argc,char ** argv)55 int main(int argc, char **argv)
56 {
57 AjPSeqall seqall;
58 AjPSeq seq = NULL;
59 AjPFile outf = NULL;
60 const AjPStr strand = NULL;
61 AjPStr substr = NULL;
62 AjPSeqout outseq = NULL;
63
64 AjPFeattabOut featout = NULL;
65 AjBool addlast;
66
67 ajint begin;
68 ajint end;
69 ajint len;
70 ajint orfml;
71 AjBool addedasterisk = ajFalse;
72
73 embInit("checktrans",argc,argv);
74
75 seqall = ajAcdGetSeqall("sequence");
76 outf = ajAcdGetOutfile("outfile");
77 orfml = ajAcdGetInt("orfml");
78 outseq = ajAcdGetSeqoutall("outseq");
79 featout = ajAcdGetFeatout("outfeat");
80 addlast = ajAcdGetBoolean("addlast");
81
82 substr = ajStrNew();
83
84
85 while(ajSeqallNext(seqall, &seq))
86 {
87 begin = ajSeqGetBegin(seq);
88 end = ajSeqGetEnd(seq);
89
90 strand = ajSeqGetSeqS(seq);
91
92 ajStrAssignSubS(&substr,strand,begin-1,end-1);
93
94 /* end with a '*' if needed and there is not one there already */
95 if(addlast && ajSeqGetSeqC(seq)[end-1] != '*')
96 {
97 ajStrAppendK(&substr,'*');
98 addedasterisk = ajTrue;
99 }
100 ajDebug("After appending, sequence=%S\n", substr);
101 ajStrFmtUpper(&substr);
102
103 len=ajStrGetLen(substr);
104
105 ajFmtPrintF(outf,"\n\nCHECKTRANS of %s from %d to %d\n\n",
106 ajSeqGetNameC(seq),begin,begin+len-1);
107
108 checktrans_findorfs(outseq, outf, 0, len, ajStrGetPtr(substr),
109 ajSeqGetNameC(seq), orfml, addedasterisk);
110
111 checktrans_dumptofeat(featout,0,len,ajStrGetPtr(substr),
112 ajSeqGetNameC(seq),
113 orfml);
114 }
115
116 ajSeqallDel(&seqall);
117 ajFileClose(&outf);
118 ajSeqoutClose(outseq);
119 ajSeqoutDel(&outseq);
120 ajFeattabOutDel(&featout);
121
122 ajSeqDel(&seq);
123 ajStrDel(&substr);
124
125 embExit();
126
127 return 0;
128 }
129
130
131
132
133 /* @funcstatic checktrans_findorfs ********************************************
134 **
135 ** Undocumented.
136 **
137 ** @param [u] outseq [AjPSeqout] Undocumented
138 ** @param [u] outf [AjPFile] Undocumented
139 ** @param [r] from [ajint] Undocumented
140 ** @param [r] to [ajint] Undocumented
141 ** @param [r] p [const char*] Undocumented
142 ** @param [r] name [const char*] Undocumented
143 ** @param [r] min_orflength [ajint] Undocumented
144 ** @param [r] addedasterisk [AjBool] True if an asterisk was added at the end
145 ** @@
146 ******************************************************************************/
147
checktrans_findorfs(AjPSeqout outseq,AjPFile outf,ajint from,ajint to,const char * p,const char * name,ajint min_orflength,AjBool addedasterisk)148 static void checktrans_findorfs (AjPSeqout outseq, AjPFile outf, ajint from,
149 ajint to, const char *p, const char *name,
150 ajint min_orflength, AjBool addedasterisk)
151
152 {
153 ajint i;
154 ajint count = 1;
155 ajint last_stop = 0;
156 ajint orflength = 0;
157
158 ajFmtPrintF(outf,"\tORF#\tPos\tLen\tORF Range\tSequence name\n\n");
159
160 for(i=from;i<to;++i)
161 {
162 if(p[i]=='*')
163 {
164 orflength=i-last_stop;
165 if(orflength >= min_orflength)
166 {
167 ajFmtPrintF(outf,"\t%d\t%d\t%d\t%d-%d\t%s_%d\n", count,
168 i+1, orflength, i-orflength+1, i, name,count);
169 checktrans_ajbseq(outseq, p,i-orflength,i-1,name,count);
170
171 }
172 last_stop = ++i;
173 ++count;
174 while(p[i] == '*')
175 {
176 /* check to see if we have consecutive ****s */
177 last_stop = ++i;
178 ++count;
179 }
180 }
181 }
182
183 /* don't count the last asterisk if it was added by the program */
184 if(addedasterisk)
185 --count;
186
187 ajFmtPrintF(outf,"\n\tTotal STOPS: %5d\n\n ",count-1);
188
189 return;
190 }
191
192
193
194
195 /* @funcstatic checktrans_ajbseq **********************************************
196 **
197 ** Undocumented.
198 **
199 ** @param [u] outseq [AjPSeqout] Undocumented
200 ** @param [r] seq [const char*] Undocumented
201 ** @param [r] begin [ajint] Undocumented
202 ** @param [r] end [int] Undocumented
203 ** @param [r] name [const char*] Undocumented
204 ** @param [r] count [ajint] Undocumented
205 ** @@
206 ******************************************************************************/
207
checktrans_ajbseq(AjPSeqout outseq,const char * seq,ajint begin,int end,const char * name,ajint count)208 static void checktrans_ajbseq(AjPSeqout outseq, const char *seq,
209 ajint begin, int end,
210 const char *name, ajint count)
211 {
212 AjPSeq sq;
213 AjPStr str;
214 AjPStr nm;
215
216 sq = ajSeqNew();
217 str = ajStrNew();
218 nm = ajStrNew();
219
220 ajStrAssignSubC(&str,seq,begin,end);
221 ajSeqAssignSeqS(sq,str);
222
223 ajFmtPrintS(&nm,"%s_%d",name,count);
224 ajSeqAssignNameS(sq,nm);
225
226 ajSeqoutWriteSeq(outseq, sq);
227
228 ajStrDel(&nm);
229 ajStrDel(&str);
230 ajSeqDel(&sq);
231
232 return;
233 }
234
235
236
237
238 /* @funcstatic checktrans_dumptofeat ******************************************
239 **
240 ** Undocumented.
241 **
242 ** @param [u] featout [AjPFeattabOut] Undocumented
243 ** @param [r] from [ajint] Undocumented
244 ** @param [r] to [ajint] Undocumented
245 ** @param [r] p [const char*] Undocumented
246 ** @param [r] seqname [const char*] Undocumented
247 ** @param [r] min_orflength [ajint] Undocumented
248 ** @@
249 ******************************************************************************/
250
checktrans_dumptofeat(AjPFeattabOut featout,ajint from,ajint to,const char * p,const char * seqname,ajint min_orflength)251 static void checktrans_dumptofeat(AjPFeattabOut featout, ajint from, ajint to,
252 const char *p, const char *seqname,
253 ajint min_orflength)
254 {
255 ajint i;
256 ajint count = 1;
257 ajint last_stop = 0;
258 ajint orflength = 0;
259 AjPFeattable feattable;
260 AjPStr name = NULL;
261 AjPStr source = NULL;
262 AjPStr type = NULL;
263 char strand = '+';
264 ajint frame = 0;
265 AjPFeature feature;
266 float score = 0.0;
267
268 name = ajStrNew();
269 source = ajStrNew();
270 type = ajStrNew();
271
272
273 ajStrAssignC(&name,seqname);
274
275 feattable = ajFeattableNewProt(name);
276
277 ajStrAssignC(&source,"checktrans");
278 ajStrAssignC(&type,"misc_feature");
279
280
281 for(i=from;i<to;++i)
282 {
283 if(p[i]=='*')
284 {
285 orflength=i-last_stop;
286 if(orflength >= min_orflength)
287 {
288 feature = ajFeatNew(feattable, source, type,
289 i-orflength+1,i, score, strand, frame) ;
290 if(!feature)
291 ajDebug("Error adding feature to table");
292 }
293 last_stop = ++i;
294 ++count;
295 while(p[i] == '*')
296 {
297 /* check to see if there are consecutive ****s */
298 last_stop = ++i;
299 ++count;
300 }
301 }
302 }
303
304 ajFeatSortByStart(feattable);
305 ajFeattableWrite(featout, feattable);
306 ajFeattableDel(&feattable);
307
308 ajStrDel(&name);
309 ajStrDel(&source);
310 ajStrDel(&type);
311
312 return;
313 }
314