1 /* @source prettyseq application
2 **
3 ** Pretty translation of DNA sequences for publication
4 **
5 ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk)
6 ** @@
7 **
8 ** This program is free software; you can redistribute it and/or
9 ** modify it under the terms of the GNU General Public License
10 ** as published by the Free Software Foundation; either version 2
11 ** of the License, or (at your option) any later version.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 ******************************************************************************/
22
23 #include "emboss.h"
24 #include <string.h>
25 #include <ctype.h>
26
27
28 #define POFF 1000000 /* Printing flag */
29
30
31
32
33 static void prettyseq_makeRuler(ajint len, ajint begin, char *ruler,
34 ajint *npos);
35 static void prettyseq_calcProteinPos(ajint *ppos, const AjPStr pro,
36 ajint len);
37 static void prettyseq_showTrans(const ajint *ppos, const ajint *npos,
38 const AjPStr pro, const AjPStr substr,
39 ajint len, const char *ruler,
40 ajint begin, AjPFile outf, AjBool isrule,
41 AjBool isp, AjBool isn, ajint width,
42 const char *name);
43 static void prettyseq_showTransb(const ajint *ppos, const ajint *npos,
44 const AjPStr pro, const AjPStr substr,
45 const char *ruler,
46 AjPFile outf, AjBool isrule,
47 AjBool isp, AjBool isn, ajint start,
48 ajint end);
49 static void prettyseq_Translate(ajint beg, ajint end,
50 AjPStr *s, const AjPCod codon,
51 const AjPRange range, AjPStr* pro);
52
53
54
55
56 /* @prog prettyseq ************************************************************
57 **
58 ** Output sequence with translated ranges
59 **
60 ******************************************************************************/
61
main(int argc,char ** argv)62 int main(int argc, char **argv)
63 {
64 AjPSeq a;
65 AjPFile outf;
66 AjPStr codestr;
67 AjPCod codon;
68 AjPStr substr;
69 AjPRange range;
70 AjBool isrule;
71 AjBool isp;
72 AjBool isn;
73
74 AjPStr pro;
75
76 ajint beg;
77 ajint end;
78 ajint len;
79
80 ajint width;
81
82 char *ruler;
83 ajint *ppos = NULL;
84 ajint *npos = NULL;
85 ajint gcode;
86
87
88 embInit("prettyseq", argc, argv);
89
90 a = ajAcdGetSeq("sequence");
91 codestr = ajAcdGetListSingle("table");
92 width = ajAcdGetInt("width");
93 range = ajAcdGetRange("range");
94 outf = ajAcdGetOutfile("outfile");
95 isrule = ajAcdGetBoolean("ruler");
96 isp = ajAcdGetBoolean("plabel");
97 isn = ajAcdGetBoolean("nlabel");
98
99 ajStrToInt(codestr, &gcode);
100 codon = ajCodNewCodenum(gcode);
101 beg = ajSeqGetBegin(a);
102 end = ajSeqGetEnd(a);
103
104 substr = ajStrNew();
105 ajStrAssignSubC(&substr,ajSeqGetSeqC(a),beg-1,end-1);
106 ajStrFmtUpper(&substr);
107 pro=ajStrNewS(substr);
108 len = ajStrGetLen(substr);
109
110 AJCNEW(ruler, len);
111 AJCNEW(npos, len);
112 AJCNEW(ppos, len);
113
114 prettyseq_Translate(beg,end,&substr,codon,range,&pro);
115 prettyseq_makeRuler(len,beg,ruler,npos);
116 prettyseq_calcProteinPos(ppos,pro,len);
117 prettyseq_showTrans(ppos,npos,pro,substr,len,ruler,beg,
118 outf,isrule,isp,isn,width,ajSeqGetNameC(a));
119
120 AJFREE(npos);
121 AJFREE(ppos);
122 AJFREE(ruler);
123
124 ajStrDel(&substr);
125 ajCodDel(&codon);
126 ajRangeDel(&range);
127
128 ajSeqDel(&a);
129 ajFileClose(&outf);
130 ajStrDel(&codestr);
131 ajStrDel(&pro);
132
133 embExit();
134
135 return 0;
136 }
137
138
139
140
141 /* @funcstatic prettyseq_Translate ********************************************
142 **
143 ** Undocumented.
144 **
145 ** @param [r] beg [ajint] start position
146 ** @param [r] end [ajint] end position
147 ** @param [u] s [AjPStr*] nucleic acid sequence (regions to be case converted)
148 ** @param [r] codon [const AjPCod] translation CU table
149 ** @param [r] range [const AjPRange] region to translate
150 ** @param [w] pro [AjPStr*] protein
151 ** @@
152 ******************************************************************************/
153
prettyseq_Translate(ajint beg,ajint end,AjPStr * s,const AjPCod codon,const AjPRange range,AjPStr * pro)154 static void prettyseq_Translate(ajint beg, ajint end,
155 AjPStr *s,
156 const AjPCod codon, const AjPRange range,
157 AjPStr* pro)
158 {
159 ajuint limit;
160 ajuint i;
161 ajuint j;
162
163 ajuint nr;
164 ajuint st;
165 ajuint en;
166
167 char *p;
168 char *q;
169 char c;
170
171 char tri[4];
172 ajint idx;
173
174 tri[3]='\0';
175
176 /* Convert ranges to subsequence values */
177 nr = ajRangeGetSize(range);
178 for(i=0;i<nr;++i)
179 {
180 range->start[i] -= beg;
181 range->end[i] -= beg;
182 }
183 limit = ajStrGetLen(*s);
184
185 /* Test ranges for validity */
186 for(i=0;i<nr;++i)
187 {
188 st = range->start[i];
189 en = range->end[i];
190 if(st>=limit || en>=limit)
191 ajFatal("Range outside length of sequence [%d-%d]",st+beg,
192 end+beg);
193 }
194
195 /* Set areas of sequence to translate to lower case */
196 p = ajStrGetuniquePtr(s);
197 for(i=0;i<nr;++i)
198 {
199 ajRangeElementGetValues(range,i,&st,&en);
200 for(j=st;j<=en;++j)
201 p[j] = (char)tolower((ajint)p[j]);
202 }
203
204
205 /* GSK
206 * Need to ensure we can group-by-3 so ignore any trailing 1 or 2 nucls
207 * otherwise we may get overwrites in arrays(?)
208 */
209 limit = 3*((limit+1)/3) - 1;
210
211 /* Do the translation */
212 q=ajStrGetuniquePtr(pro);
213 for(i=0;i<limit;++i)
214 {
215 if(isupper((ajint)p[i]))
216 {
217 q[i] = ' ';
218 continue;
219 }
220 tri[0]=toupper((int)p[i]);
221 if(!p[i+1] || isupper((ajint)p[i+1]) || !p[i+2] ||
222 isupper((ajint)p[i+2]))
223 {
224 q[i] = ' ';
225 if(q[i+1])
226 q[i+1] = ' ';
227 if(q[i+2])
228 q[i+2] = ' ';
229 i += 2;
230 continue;
231 }
232 tri[1] = toupper((int)p[i+1]);
233 tri[2] = toupper((int)p[i+2]);
234
235 if(!strcmp(tri,"NNN"))
236 c = 'X';
237 else
238 {
239 idx = ajCodIndexC(tri);
240 if(codon->aa[idx]==27)
241 c = '*';
242 else
243 c = (char)(codon->aa[idx]+'A');
244 }
245
246 q[i] = c;
247 q[i+1] = q[i+2] = ' ';
248 i += 2;
249 }
250 j = ajStrGetLen(*s)-limit;
251 while(j--)
252 q[i++] = ' ';
253
254 return;
255 }
256
257
258
259
260 /* @funcstatic prettyseq_makeRuler ********************************************
261 **
262 ** Create a ruler
263 **
264 ** @param [r] len [ajint] length for ruler
265 ** @param [r] begin [ajint] numbering start
266 ** @param [w] ruler [char*] ruler
267 ** @param [w] npos [ajint*] numbering
268 ** @@
269 ******************************************************************************/
270
prettyseq_makeRuler(ajint len,ajint begin,char * ruler,ajint * npos)271 static void prettyseq_makeRuler(ajint len, ajint begin, char *ruler,
272 ajint *npos)
273 {
274 ajint i;
275
276 for(i=0;i<len;++i)
277 {
278 npos[i] = i+begin;
279
280 if(!((i+begin)%10))
281 ruler[i] = '|';
282 else
283 ruler[i] = '-';
284 }
285
286 return;
287 }
288
289
290
291
292 /* @funcstatic prettyseq_calcProteinPos ***************************************
293 **
294 ** Protein translation positions
295 **
296 ** @param [w] ppos [ajint*] positions
297 ** @param [r] pro [const AjPStr] protein
298 ** @param [r] len [ajint] length
299 ** @@
300 ******************************************************************************/
301
302
prettyseq_calcProteinPos(ajint * ppos,const AjPStr pro,ajint len)303 static void prettyseq_calcProteinPos(ajint *ppos, const AjPStr pro, ajint len)
304 {
305 ajint j;
306
307 ajint pos;
308 ajint v;
309
310 const char *p;
311
312 pos = 0;
313 v = 1;
314
315 p = ajStrGetPtr(pro);
316 while(p[pos]==' ')
317 ppos[pos++]=0;
318
319 while(pos<len)
320 {
321 if(p[pos]=='*')
322 {
323 ppos[pos] = 0;
324 ++pos;
325 while(pos<len && p[pos]==' ')
326 {
327 ppos[pos] = 0;
328 ++pos;
329 }
330 v = 1;
331 continue;
332 }
333
334 if(p[pos]!=' ')
335 {
336 ppos[pos] = v+POFF;
337 ++pos;
338 for(j=0;j<2 && p[pos];++j,++pos)
339 ppos[pos] = v;
340 if(p[pos]==' ')
341 v = 1;
342 else
343 ++v;
344 }
345 else
346 ppos[pos++]=0;
347 }
348
349 return;
350 }
351
352
353
354
355 /* @funcstatic prettyseq_showTrans ********************************************
356 **
357 ** Show translations
358 **
359 ** @param [r] ppos [const ajint*] protein positions
360 ** @param [r] npos [const ajint*] nucleic positions
361 ** @param [r] pro [const AjPStr] protein
362 ** @param [r] substr [const AjPStr] dna
363 ** @param [r] len [ajint] length
364 ** @param [r] ruler [const char*] ruler
365 ** @param [r] begin [ajint] start in dna
366 ** @param [u] outf [AjPFile] outfile
367 ** @param [r] isrule [AjBool] show ruler
368 ** @param [r] isp [AjBool] show protein
369 ** @param [r] isn [AjBool] show dna
370 ** @param [r] width [ajint] display width
371 ** @param [r] name [const char*] name of dna
372 ** @@
373 ******************************************************************************/
374
prettyseq_showTrans(const ajint * ppos,const ajint * npos,const AjPStr pro,const AjPStr substr,ajint len,const char * ruler,ajint begin,AjPFile outf,AjBool isrule,AjBool isp,AjBool isn,ajint width,const char * name)375 static void prettyseq_showTrans(const ajint *ppos, const ajint *npos,
376 const AjPStr pro, const AjPStr substr,
377 ajint len, const char *ruler,
378 ajint begin, AjPFile outf, AjBool isrule,
379 AjBool isp, AjBool isn, ajint width,
380 const char *name)
381 {
382 ajint pos;
383
384 ajFmtPrintF(outf,"PRETTYSEQ of %s from %d to %d\n\n",name,begin,
385 begin+len-1);
386
387
388 pos = 0;
389 while(pos<len)
390 {
391 if(pos+width<len)
392 {
393 prettyseq_showTransb(ppos,npos,pro,substr,ruler,
394 outf,isrule,isp,isn,pos,pos+width-1);
395 pos += width;
396 continue;
397 }
398 prettyseq_showTransb(ppos,npos,pro,substr,ruler,
399 outf,isrule,isp,isn,pos,len-1);
400 break;
401 }
402
403 return;
404 }
405
406
407
408
409 /* @funcstatic prettyseq_showTransb *******************************************
410 **
411 ** Low level display
412 **
413 ** @param [r] ppos [const ajint*] protein positions
414 ** @param [r] npos [const ajint*] nucleic positions
415 ** @param [r] pro [const AjPStr] protein
416 ** @param [r] substr [const AjPStr] dna
417 ** @param [r] ruler [const char*] ruler
418 ** @param [u] outf [AjPFile] outfile
419 ** @param [r] isrule [AjBool] show ruler
420 ** @param [r] isp [AjBool] show protein
421 ** @param [r] isn [AjBool] show dna
422 ** @param [r] start [ajint] start pos
423 ** @param [r] end [ajint] end pos
424 ** @@
425 ******************************************************************************/
426
prettyseq_showTransb(const ajint * ppos,const ajint * npos,const AjPStr pro,const AjPStr substr,const char * ruler,AjPFile outf,AjBool isrule,AjBool isp,AjBool isn,ajint start,ajint end)427 static void prettyseq_showTransb(const ajint *ppos, const ajint *npos,
428 const AjPStr pro, const AjPStr substr,
429 const char *ruler,
430 AjPFile outf, AjBool isrule,
431 AjBool isp, AjBool isn, ajint start,
432 ajint end)
433 {
434 AjPStr s;
435 ajint b;
436 ajint e = 0;
437 ajint v;
438 ajint pos;
439
440 s = ajStrNew();
441
442 if(isrule)
443 {
444 ajStrAssignSubC(&s,ruler,start,end);
445 ajFmtPrintF(outf," %s\n",ajStrGetPtr(s));
446 }
447
448 if(isn)
449 ajFmtPrintF(outf,"%10d ",npos[start]);
450 else
451 ajFmtPrintF(outf," ");
452
453 ajStrAssignSubS(&s,substr,start,end);
454 ajFmtPrintF(outf,"%s ",ajStrGetPtr(s));
455
456 if(isn)
457 ajFmtPrintF(outf,"%d",npos[end]);
458 ajFmtPrintF(outf,"\n");
459
460 if(isp)
461 {
462 pos = start;
463 b = e = 0;
464 while(pos<=end)
465 {
466 if(!(v=ppos[pos]))
467 {
468 ++pos;
469 continue;
470 }
471
472 if(v<POFF)
473 {
474 while(ppos[pos]==v && pos<=end)
475 ++pos;
476 if(pos>end)
477 break;
478 continue;
479 }
480 b = v-POFF;
481 break;
482 }
483
484 pos = end;
485 while(pos>=start)
486 {
487 if(!(v=ppos[pos]))
488 {
489 --pos;
490 continue;
491 }
492 if(v>POFF)
493 v -= POFF;
494 else
495 while(ppos[pos]==v)
496 {
497 --pos;
498 if(pos<start)
499 {
500 v = 0;
501 break;
502 }
503 }
504 e = v;
505 break;
506 }
507
508 if(!b)
509 ajFmtPrintF(outf," ");
510 else
511 ajFmtPrintF(outf," %7d ",b);
512 }
513 else
514 ajFmtPrintF(outf," ");
515
516 ajStrAssignSubS(&s,pro,start,end);
517 ajFmtPrintF(outf,"%s ",ajStrGetPtr(s));
518 if(isp && e)
519 ajFmtPrintF(outf,"%d",e);
520 ajFmtPrintF(outf,"\n");
521
522
523 ajFmtPrintF(outf,"\n");
524
525 ajStrDel(&s);
526
527 return;
528 }
529