1 /* @source prettyseq application
2 **
3 ** Pretty translation of DNA sequences for publication
4 **
5 ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk)
6 ** @@
7 **
8 ** This program is free software; you can redistribute it and/or
9 ** modify it under the terms of the GNU General Public License
10 ** as published by the Free Software Foundation; either version 2
11 ** of the License, or (at your option) any later version.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21 ******************************************************************************/
22 
23 #include "emboss.h"
24 #include <string.h>
25 #include <ctype.h>
26 
27 
28 #define POFF 1000000	/* Printing flag */
29 
30 
31 
32 
33 static void prettyseq_makeRuler(ajint len, ajint begin, char *ruler,
34 				ajint *npos);
35 static void prettyseq_calcProteinPos(ajint *ppos, const AjPStr pro,
36 				     ajint len);
37 static void prettyseq_showTrans(const ajint *ppos, const ajint *npos,
38 				const AjPStr pro, const AjPStr substr,
39 				ajint len, const char *ruler,
40 				ajint begin, AjPFile outf, AjBool isrule,
41 				AjBool isp, AjBool isn, ajint width,
42 				const char *name);
43 static void prettyseq_showTransb(const ajint *ppos, const ajint *npos,
44 				 const AjPStr pro, const AjPStr substr,
45 				 const char *ruler,
46 				 AjPFile outf, AjBool isrule,
47 				 AjBool isp, AjBool isn, ajint start,
48 				 ajint end);
49 static void prettyseq_Translate(ajint beg, ajint end,
50 				AjPStr *s, const AjPCod codon,
51 				const AjPRange range, AjPStr* pro);
52 
53 
54 
55 
56 /* @prog prettyseq ************************************************************
57 **
58 ** Output sequence with translated ranges
59 **
60 ******************************************************************************/
61 
main(int argc,char ** argv)62 int main(int argc, char **argv)
63 {
64     AjPSeq a;
65     AjPFile outf;
66     AjPStr codestr;
67     AjPCod codon;
68     AjPStr substr;
69     AjPRange range;
70     AjBool isrule;
71     AjBool isp;
72     AjBool isn;
73 
74     AjPStr pro;
75 
76     ajint beg;
77     ajint end;
78     ajint len;
79 
80     ajint width;
81 
82     char *ruler;
83     ajint *ppos = NULL;
84     ajint *npos = NULL;
85     ajint gcode;
86 
87 
88     embInit("prettyseq", argc, argv);
89 
90     a      = ajAcdGetSeq("sequence");
91     codestr = ajAcdGetListSingle("table");
92     width  = ajAcdGetInt("width");
93     range  = ajAcdGetRange("range");
94     outf   = ajAcdGetOutfile("outfile");
95     isrule = ajAcdGetBoolean("ruler");
96     isp    = ajAcdGetBoolean("plabel");
97     isn    = ajAcdGetBoolean("nlabel");
98 
99     ajStrToInt(codestr, &gcode);
100     codon  = ajCodNewCodenum(gcode);
101     beg = ajSeqGetBegin(a);
102     end = ajSeqGetEnd(a);
103 
104     substr = ajStrNew();
105     ajStrAssignSubC(&substr,ajSeqGetSeqC(a),beg-1,end-1);
106     ajStrFmtUpper(&substr);
107     pro=ajStrNewS(substr);
108     len = ajStrGetLen(substr);
109 
110     AJCNEW(ruler, len);
111     AJCNEW(npos, len);
112     AJCNEW(ppos, len);
113 
114     prettyseq_Translate(beg,end,&substr,codon,range,&pro);
115     prettyseq_makeRuler(len,beg,ruler,npos);
116     prettyseq_calcProteinPos(ppos,pro,len);
117     prettyseq_showTrans(ppos,npos,pro,substr,len,ruler,beg,
118 			outf,isrule,isp,isn,width,ajSeqGetNameC(a));
119 
120     AJFREE(npos);
121     AJFREE(ppos);
122     AJFREE(ruler);
123 
124     ajStrDel(&substr);
125     ajCodDel(&codon);
126     ajRangeDel(&range);
127 
128     ajSeqDel(&a);
129     ajFileClose(&outf);
130     ajStrDel(&codestr);
131     ajStrDel(&pro);
132 
133     embExit();
134 
135     return 0;
136 }
137 
138 
139 
140 
141 /* @funcstatic prettyseq_Translate ********************************************
142 **
143 ** Undocumented.
144 **
145 ** @param [r] beg [ajint] start position
146 ** @param [r] end [ajint] end position
147 ** @param [u] s [AjPStr*] nucleic acid sequence (regions to be case converted)
148 ** @param [r] codon [const AjPCod] translation CU table
149 ** @param [r] range [const AjPRange] region to translate
150 ** @param [w] pro [AjPStr*] protein
151 ** @@
152 ******************************************************************************/
153 
prettyseq_Translate(ajint beg,ajint end,AjPStr * s,const AjPCod codon,const AjPRange range,AjPStr * pro)154 static void prettyseq_Translate(ajint beg, ajint end,
155 				AjPStr *s,
156 				const AjPCod codon, const AjPRange range,
157 				AjPStr* pro)
158 {
159     ajuint limit;
160     ajuint i;
161     ajuint j;
162 
163     ajuint nr;
164     ajuint st;
165     ajuint en;
166 
167     char *p;
168     char *q;
169     char c;
170 
171     char tri[4];
172     ajint idx;
173 
174     tri[3]='\0';
175 
176     /* Convert ranges to subsequence values */
177     nr = ajRangeGetSize(range);
178     for(i=0;i<nr;++i)
179     {
180 	range->start[i] -= beg;
181 	range->end[i] -= beg;
182     }
183     limit = ajStrGetLen(*s);
184 
185     /* Test ranges for validity */
186     for(i=0;i<nr;++i)
187     {
188 	st = range->start[i];
189 	en = range->end[i];
190 	if(st>=limit || en>=limit)
191 	    ajFatal("Range outside length of sequence [%d-%d]",st+beg,
192 		    end+beg);
193     }
194 
195     /* Set areas of sequence to translate to lower case */
196     p = ajStrGetuniquePtr(s);
197     for(i=0;i<nr;++i)
198     {
199 	ajRangeElementGetValues(range,i,&st,&en);
200 	for(j=st;j<=en;++j)
201 	    p[j] = (char)tolower((ajint)p[j]);
202     }
203 
204 
205 /* GSK
206 * Need to ensure we can group-by-3 so ignore any trailing 1 or 2 nucls
207 * otherwise we may get overwrites in arrays(?)
208 */
209     limit = 3*((limit+1)/3) - 1;
210 
211     /* Do the translation */
212     q=ajStrGetuniquePtr(pro);
213     for(i=0;i<limit;++i)
214     {
215 	if(isupper((ajint)p[i]))
216 	{
217 	    q[i] = ' ';
218 	    continue;
219 	}
220 	tri[0]=toupper((int)p[i]);
221 	if(!p[i+1] || isupper((ajint)p[i+1]) || !p[i+2] ||
222 	   isupper((ajint)p[i+2]))
223 	{
224 	    q[i] = ' ';
225 	    if(q[i+1])
226 		q[i+1] = ' ';
227 	    if(q[i+2])
228 		q[i+2] = ' ';
229 	    i += 2;
230 	    continue;
231 	}
232 	tri[1] = toupper((int)p[i+1]);
233 	tri[2] = toupper((int)p[i+2]);
234 
235 	if(!strcmp(tri,"NNN"))
236 	    c = 'X';
237 	else
238 	{
239 	    idx = ajCodIndexC(tri);
240 	    if(codon->aa[idx]==27)
241 		c = '*';
242 	    else
243 		c = (char)(codon->aa[idx]+'A');
244 	}
245 
246 	q[i] = c;
247 	q[i+1] = q[i+2] = ' ';
248 	i += 2;
249     }
250     j = ajStrGetLen(*s)-limit;
251     while(j--)
252         q[i++] = ' ';
253 
254     return;
255 }
256 
257 
258 
259 
260 /* @funcstatic prettyseq_makeRuler ********************************************
261 **
262 ** Create a ruler
263 **
264 ** @param [r] len [ajint] length for ruler
265 ** @param [r] begin [ajint] numbering start
266 ** @param [w] ruler [char*] ruler
267 ** @param [w] npos [ajint*] numbering
268 ** @@
269 ******************************************************************************/
270 
prettyseq_makeRuler(ajint len,ajint begin,char * ruler,ajint * npos)271 static void prettyseq_makeRuler(ajint len, ajint begin, char *ruler,
272 				ajint *npos)
273 {
274     ajint i;
275 
276     for(i=0;i<len;++i)
277     {
278 	npos[i] = i+begin;
279 
280 	if(!((i+begin)%10))
281 	    ruler[i] = '|';
282 	else
283 	    ruler[i] = '-';
284     }
285 
286     return;
287 }
288 
289 
290 
291 
292 /* @funcstatic prettyseq_calcProteinPos ***************************************
293 **
294 ** Protein translation positions
295 **
296 ** @param [w] ppos [ajint*] positions
297 ** @param [r] pro [const AjPStr] protein
298 ** @param [r] len [ajint] length
299 ** @@
300 ******************************************************************************/
301 
302 
prettyseq_calcProteinPos(ajint * ppos,const AjPStr pro,ajint len)303 static void prettyseq_calcProteinPos(ajint *ppos, const AjPStr pro, ajint len)
304 {
305     ajint j;
306 
307     ajint pos;
308     ajint v;
309 
310     const char *p;
311 
312     pos = 0;
313     v   = 1;
314 
315     p = ajStrGetPtr(pro);
316     while(p[pos]==' ')
317 	ppos[pos++]=0;
318 
319     while(pos<len)
320     {
321 	if(p[pos]=='*')
322 	{
323 	    ppos[pos] = 0;
324 	    ++pos;
325 	    while(pos<len && p[pos]==' ')
326 	    {
327 		ppos[pos] = 0;
328 		++pos;
329 	    }
330 	    v = 1;
331 	    continue;
332 	}
333 
334 	if(p[pos]!=' ')
335 	{
336 	    ppos[pos] = v+POFF;
337 	    ++pos;
338 	    for(j=0;j<2 && p[pos];++j,++pos)
339 		ppos[pos] = v;
340 	    if(p[pos]==' ')
341 		v = 1;
342 	    else
343 		++v;
344 	}
345 	else
346 	    ppos[pos++]=0;
347     }
348 
349     return;
350 }
351 
352 
353 
354 
355 /* @funcstatic prettyseq_showTrans ********************************************
356 **
357 ** Show translations
358 **
359 ** @param [r] ppos [const ajint*] protein positions
360 ** @param [r] npos [const ajint*] nucleic positions
361 ** @param [r] pro [const AjPStr] protein
362 ** @param [r] substr [const AjPStr] dna
363 ** @param [r] len [ajint] length
364 ** @param [r] ruler [const char*] ruler
365 ** @param [r] begin [ajint] start in dna
366 ** @param [u] outf [AjPFile] outfile
367 ** @param [r] isrule [AjBool] show ruler
368 ** @param [r] isp [AjBool] show protein
369 ** @param [r] isn [AjBool] show dna
370 ** @param [r] width [ajint] display width
371 ** @param [r] name [const char*] name of dna
372 ** @@
373 ******************************************************************************/
374 
prettyseq_showTrans(const ajint * ppos,const ajint * npos,const AjPStr pro,const AjPStr substr,ajint len,const char * ruler,ajint begin,AjPFile outf,AjBool isrule,AjBool isp,AjBool isn,ajint width,const char * name)375 static void prettyseq_showTrans(const ajint *ppos, const ajint *npos,
376 				const AjPStr pro, const AjPStr substr,
377 				ajint len, const char *ruler,
378 				ajint begin, AjPFile outf, AjBool isrule,
379 				AjBool isp, AjBool isn, ajint width,
380 				const char *name)
381 {
382     ajint pos;
383 
384     ajFmtPrintF(outf,"PRETTYSEQ of %s from %d to %d\n\n",name,begin,
385 		begin+len-1);
386 
387 
388     pos = 0;
389     while(pos<len)
390     {
391 	if(pos+width<len)
392 	{
393 	    prettyseq_showTransb(ppos,npos,pro,substr,ruler,
394 				 outf,isrule,isp,isn,pos,pos+width-1);
395 	    pos += width;
396 	    continue;
397 	}
398 	prettyseq_showTransb(ppos,npos,pro,substr,ruler,
399 			     outf,isrule,isp,isn,pos,len-1);
400 	break;
401     }
402 
403     return;
404 }
405 
406 
407 
408 
409 /* @funcstatic prettyseq_showTransb *******************************************
410 **
411 ** Low level display
412 **
413 ** @param [r] ppos [const ajint*] protein positions
414 ** @param [r] npos [const ajint*] nucleic positions
415 ** @param [r] pro [const AjPStr] protein
416 ** @param [r] substr [const AjPStr] dna
417 ** @param [r] ruler [const char*] ruler
418 ** @param [u] outf [AjPFile] outfile
419 ** @param [r] isrule [AjBool] show ruler
420 ** @param [r] isp [AjBool] show protein
421 ** @param [r] isn [AjBool] show dna
422 ** @param [r] start [ajint] start pos
423 ** @param [r] end [ajint] end pos
424 ** @@
425 ******************************************************************************/
426 
prettyseq_showTransb(const ajint * ppos,const ajint * npos,const AjPStr pro,const AjPStr substr,const char * ruler,AjPFile outf,AjBool isrule,AjBool isp,AjBool isn,ajint start,ajint end)427 static void prettyseq_showTransb(const ajint *ppos, const ajint *npos,
428 				 const AjPStr pro, const AjPStr substr,
429 				 const char *ruler,
430 				 AjPFile outf, AjBool isrule,
431 				 AjBool isp, AjBool isn, ajint start,
432 				 ajint end)
433 {
434     AjPStr s;
435     ajint b;
436     ajint e = 0;
437     ajint v;
438     ajint pos;
439 
440     s = ajStrNew();
441 
442     if(isrule)
443     {
444 	ajStrAssignSubC(&s,ruler,start,end);
445 	ajFmtPrintF(outf,"           %s\n",ajStrGetPtr(s));
446     }
447 
448     if(isn)
449 	ajFmtPrintF(outf,"%10d ",npos[start]);
450     else
451 	ajFmtPrintF(outf,"           ");
452 
453     ajStrAssignSubS(&s,substr,start,end);
454     ajFmtPrintF(outf,"%s ",ajStrGetPtr(s));
455 
456     if(isn)
457 	ajFmtPrintF(outf,"%d",npos[end]);
458     ajFmtPrintF(outf,"\n");
459 
460     if(isp)
461     {
462 	pos = start;
463 	b = e = 0;
464 	while(pos<=end)
465 	{
466 	    if(!(v=ppos[pos]))
467 	    {
468 		++pos;
469 		continue;
470 	    }
471 
472 	    if(v<POFF)
473 	    {
474 		while(ppos[pos]==v && pos<=end)
475 		    ++pos;
476 		if(pos>end)
477 		    break;
478 		continue;
479 	    }
480 	    b = v-POFF;
481 	    break;
482 	}
483 
484 	pos = end;
485 	while(pos>=start)
486 	{
487 	    if(!(v=ppos[pos]))
488 	    {
489 		--pos;
490 		continue;
491 	    }
492 	    if(v>POFF)
493 		v -= POFF;
494 	    else
495 		while(ppos[pos]==v)
496 		{
497 		    --pos;
498 		    if(pos<start)
499 		    {
500 			v = 0;
501 			break;
502 		    }
503 		}
504 	    e = v;
505 	    break;
506 	}
507 
508 	if(!b)
509 	    ajFmtPrintF(outf,"           ");
510 	else
511 	    ajFmtPrintF(outf,"   %7d ",b);
512     }
513     else
514 	ajFmtPrintF(outf,"           ");
515 
516     ajStrAssignSubS(&s,pro,start,end);
517     ajFmtPrintF(outf,"%s ",ajStrGetPtr(s));
518     if(isp && e)
519 	ajFmtPrintF(outf,"%d",e);
520     ajFmtPrintF(outf,"\n");
521 
522 
523     ajFmtPrintF(outf,"\n");
524 
525     ajStrDel(&s);
526 
527     return;
528 }
529