1 /* @source checktrans application
2 **
3 ** Check translations made with transeq (document these translations)
4 **
5 ** @author Copyright (C) Rodrigo Lopez & Alan Bleasby
6 ** @@
7 ** Adapted from work done by Alan Bleasy
8 ** Modified by Gary Williams 19 April 2000 to remove output to STDOUT and to
9 **	write ORF sequences to a single file instead of many individual ones.
10 **
11 ** This program is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU General Public License
13 ** as published by the Free Software Foundation; either version 2
14 ** of the License, or (at your option) any later version.
15 **
16 ** This program is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ** GNU General Public License for more details.
20 **
21 ** You should have received a copy of the GNU General Public License
22 ** along with this program; if not, write to the Free Software
23 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
24 ******************************************************************************/
25 
26 
27 #include "emboss.h"
28 #include <math.h>
29 #include <stdlib.h>
30 
31 
32 
33 
34 static void checktrans_findorfs(AjPSeqout outseq, AjPFile outf, ajint s,
35 				ajint len, const char *seq, const char *name,
36 				ajint orfml, AjBool addedasterisk);
37 
38 static void checktrans_ajbseq(AjPSeqout outseq, const char *seq,
39 			      ajint begin, int end,
40 			      const char *name, ajint count);
41 
42 static void checktrans_dumptofeat(AjPFeattabOut featout, ajint from, ajint to,
43 				  const char *p, const char *seqname,
44 				  ajint min_orflength);
45 
46 
47 
48 
49 /* @prog checktrans ***********************************************************
50 **
51 ** Reports STOP codons and ORF statistics of a protein sequence
52 **
53 ******************************************************************************/
54 
main(int argc,char ** argv)55 int main(int argc, char **argv)
56 {
57     AjPSeqall seqall;
58     AjPSeq    seq    = NULL;
59     AjPFile   outf   = NULL;
60     const AjPStr    strand = NULL;
61     AjPStr    substr = NULL;
62     AjPSeqout outseq = NULL;
63 
64     AjPFeattabOut featout = NULL;
65     AjBool addlast;
66 
67     ajint begin;
68     ajint end;
69     ajint len;
70     ajint orfml;
71     AjBool addedasterisk = ajFalse;
72 
73     embInit("checktrans",argc,argv);
74 
75     seqall  = ajAcdGetSeqall("sequence");
76     outf    = ajAcdGetOutfile("outfile");
77     orfml   = ajAcdGetInt("orfml");
78     outseq  = ajAcdGetSeqoutall("outseq");
79     featout = ajAcdGetFeatout("outfeat");
80     addlast = ajAcdGetBoolean("addlast");
81 
82     substr    = ajStrNew();
83 
84 
85     while(ajSeqallNext(seqall, &seq))
86     {
87         begin = ajSeqGetBegin(seq);
88         end   = ajSeqGetEnd(seq);
89 
90         strand = ajSeqGetSeqS(seq);
91 
92 	ajStrAssignSubS(&substr,strand,begin-1,end-1);
93 
94         /* end with a '*' if needed and there is not one there already */
95         if(addlast && ajSeqGetSeqC(seq)[end-1] != '*')
96 	{
97             ajStrAppendK(&substr,'*');
98             addedasterisk = ajTrue;
99         }
100 	ajDebug("After appending, sequence=%S\n", substr);
101         ajStrFmtUpper(&substr);
102 
103         len=ajStrGetLen(substr);
104 
105 	ajFmtPrintF(outf,"\n\nCHECKTRANS of %s from %d to %d\n\n",
106 		    ajSeqGetNameC(seq),begin,begin+len-1);
107 
108         checktrans_findorfs(outseq, outf, 0, len, ajStrGetPtr(substr),
109 			    ajSeqGetNameC(seq), orfml, addedasterisk);
110 
111 	checktrans_dumptofeat(featout,0,len,ajStrGetPtr(substr),
112 			      ajSeqGetNameC(seq),
113 			      orfml);
114     }
115 
116     ajSeqallDel(&seqall);
117     ajFileClose(&outf);
118     ajSeqoutClose(outseq);
119     ajSeqoutDel(&outseq);
120     ajFeattabOutDel(&featout);
121 
122     ajSeqDel(&seq);
123     ajStrDel(&substr);
124 
125     embExit();
126 
127     return 0;
128 }
129 
130 
131 
132 
133 /* @funcstatic checktrans_findorfs ********************************************
134 **
135 ** Undocumented.
136 **
137 ** @param [u] outseq [AjPSeqout] Undocumented
138 ** @param [u] outf [AjPFile] Undocumented
139 ** @param [r] from [ajint] Undocumented
140 ** @param [r] to [ajint] Undocumented
141 ** @param [r] p [const char*] Undocumented
142 ** @param [r] name [const char*] Undocumented
143 ** @param [r] min_orflength [ajint] Undocumented
144 ** @param [r] addedasterisk [AjBool] True if an asterisk was added at the end
145 ** @@
146 ******************************************************************************/
147 
checktrans_findorfs(AjPSeqout outseq,AjPFile outf,ajint from,ajint to,const char * p,const char * name,ajint min_orflength,AjBool addedasterisk)148 static void checktrans_findorfs (AjPSeqout outseq, AjPFile outf, ajint from,
149 				 ajint to, const char *p, const char *name,
150 				 ajint min_orflength, AjBool addedasterisk)
151 
152 {
153     ajint i;
154     ajint count = 1;
155     ajint last_stop = 0;
156     ajint orflength = 0;
157 
158     ajFmtPrintF(outf,"\tORF#\tPos\tLen\tORF Range\tSequence name\n\n");
159 
160     for(i=from;i<to;++i)
161     {
162 	if(p[i]=='*')
163 	{
164 	    orflength=i-last_stop;
165 	    if(orflength >= min_orflength)
166 	    {
167 		ajFmtPrintF(outf,"\t%d\t%d\t%d\t%d-%d\t%s_%d\n", count,
168 			    i+1, orflength, i-orflength+1, i, name,count);
169 		checktrans_ajbseq(outseq, p,i-orflength,i-1,name,count);
170 
171 	    }
172 	    last_stop = ++i;
173 	    ++count;
174             while(p[i] == '*')
175 	    {
176 		/* check to see if we have consecutive ****s */
177 		last_stop = ++i;
178 		++count;
179             }
180 	}
181     }
182 
183     /* don't count the last asterisk if it was added by the program */
184     if(addedasterisk)
185     	--count;
186 
187     ajFmtPrintF(outf,"\n\tTotal STOPS: %5d\n\n ",count-1);
188 
189     return;
190 }
191 
192 
193 
194 
195 /* @funcstatic checktrans_ajbseq **********************************************
196 **
197 ** Undocumented.
198 **
199 ** @param [u] outseq [AjPSeqout] Undocumented
200 ** @param [r] seq [const char*] Undocumented
201 ** @param [r] begin [ajint] Undocumented
202 ** @param [r] end [int] Undocumented
203 ** @param [r] name [const char*] Undocumented
204 ** @param [r] count [ajint] Undocumented
205 ** @@
206 ******************************************************************************/
207 
checktrans_ajbseq(AjPSeqout outseq,const char * seq,ajint begin,int end,const char * name,ajint count)208 static void checktrans_ajbseq(AjPSeqout outseq, const char *seq,
209 			      ajint begin, int end,
210 			      const char *name, ajint count)
211 {
212     AjPSeq sq;
213     AjPStr str;
214     AjPStr nm;
215 
216     sq  = ajSeqNew();
217     str = ajStrNew();
218     nm  = ajStrNew();
219 
220     ajStrAssignSubC(&str,seq,begin,end);
221     ajSeqAssignSeqS(sq,str);
222 
223     ajFmtPrintS(&nm,"%s_%d",name,count);
224     ajSeqAssignNameS(sq,nm);
225 
226     ajSeqoutWriteSeq(outseq, sq);
227 
228     ajStrDel(&nm);
229     ajStrDel(&str);
230     ajSeqDel(&sq);
231 
232     return;
233 }
234 
235 
236 
237 
238 /* @funcstatic checktrans_dumptofeat ******************************************
239 **
240 ** Undocumented.
241 **
242 ** @param [u] featout [AjPFeattabOut] Undocumented
243 ** @param [r] from [ajint] Undocumented
244 ** @param [r] to [ajint] Undocumented
245 ** @param [r] p [const char*] Undocumented
246 ** @param [r] seqname [const char*] Undocumented
247 ** @param [r] min_orflength [ajint] Undocumented
248 ** @@
249 ******************************************************************************/
250 
checktrans_dumptofeat(AjPFeattabOut featout,ajint from,ajint to,const char * p,const char * seqname,ajint min_orflength)251 static void checktrans_dumptofeat(AjPFeattabOut featout, ajint from, ajint to,
252 				  const char *p, const char *seqname,
253 				  ajint min_orflength)
254 {
255     ajint i;
256     ajint count = 1;
257     ajint last_stop = 0;
258     ajint orflength = 0;
259     AjPFeattable feattable;
260     AjPStr name   = NULL;
261     AjPStr source = NULL;
262     AjPStr type   = NULL;
263     char strand = '+';
264     ajint frame = 0;
265     AjPFeature feature;
266     float score = 0.0;
267 
268     name   = ajStrNew();
269     source = ajStrNew();
270     type   = ajStrNew();
271 
272 
273     ajStrAssignC(&name,seqname);
274 
275     feattable = ajFeattableNewProt(name);
276 
277     ajStrAssignC(&source,"checktrans");
278     ajStrAssignC(&type,"misc_feature");
279 
280 
281     for(i=from;i<to;++i)
282     {
283 	if(p[i]=='*')
284 	{
285 	    orflength=i-last_stop;
286 	    if(orflength >= min_orflength)
287 	    {
288 		feature = ajFeatNew(feattable, source, type,
289 				    i-orflength+1,i, score, strand, frame) ;
290 		if(!feature)
291 		    ajDebug("Error adding feature to table");
292 	    }
293 	    last_stop = ++i;
294 	    ++count;
295 	    while(p[i] == '*')
296 	    {
297 		/* check to see if there are consecutive ****s */
298 		last_stop = ++i;
299 		++count;
300 	    }
301 	}
302     }
303 
304     ajFeatSortByStart(feattable);
305     ajFeattableWrite(featout, feattable);
306     ajFeattableDel(&feattable);
307 
308     ajStrDel(&name);
309     ajStrDel(&source);
310     ajStrDel(&type);
311 
312     return;
313 }
314