1 /* @source getorf application
2 **
3 ** Finds and extracts open reading frames (ORFs)
4 **
5 ** @author Copyright (C) Gary Williams (gwilliam@hgmp.mrc.ac.uk)
6 ** @@
7 **
8 ** This program is free software; you can redistribute it and/or
9 ** modify it under the terms of the GNU General Public License
10 ** as published by the Free Software Foundation; either version 2
11 ** of the License, or (at your option) any later version.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21 ******************************************************************************/
22 
23 #include "emboss.h"
24 
25 
26 
27 
28 static void getorf_WriteORF(const AjPSeq seq, ajint len, ajint seqlen,
29 			    AjBool sense, ajint find, ajint *orf_no,
30 			    ajint start, ajint pos, const AjPStr str,
31 			    AjPSeqout seqout, ajint around);
32 
33 static void getorf_AppORF(ajint find, AjPStr *str,
34 			  const char *chrseq, ajint pos,
35 			  char aa);
36 
37 static void getorf_FindORFs(const AjPSeq seq, ajint len, const AjPTrn trnTable,
38 			    ajuint minsize, ajuint maxsize, AjPSeqout seqout,
39 			    AjBool sense, AjBool circular, ajint find,
40 			    ajint *orf_no, AjBool methionine, ajint around);
41 
42 
43 
44 
45 /* types of control codon */
46 #define START 1
47 #define STOP -1
48 
49 /* types of ORF to find */
50 #define P_STOP2STOP      0
51 #define P_START2STOP     1
52 #define N_STOP2STOP      2
53 #define N_START2STOP     3
54 #define AROUND_START     4
55 #define AROUND_INIT_STOP 5
56 #define AROUND_END_STOP  6
57 
58 
59 
60 
61 /* @prog getorf ***************************************************************
62 **
63 ** Finds and extracts open reading frames (ORFs)
64 **
65 ******************************************************************************/
66 
main(int argc,char ** argv)67 int main(int argc, char **argv)
68 {
69 
70     AjPSeqall seqall;
71     AjPSeqout seqout;
72     AjPStr tablestr;
73     ajint table;
74     ajuint minsize;
75     ajuint maxsize;
76     AjPStr findstr;
77     ajint find;
78     AjBool methionine;
79     AjBool circular;
80     AjBool reverse;
81     ajint around;
82 
83     AjPSeq seq = NULL;
84     AjPTrn trnTable;
85     AjPStr sseq = NULL;	/* sequence string */
86 
87     /* ORF number to append to name of sequence to create unique name */
88     ajint orf_no;
89 
90 
91     AjBool sense;	/* ajTrue = forward sense */
92     ajint len;
93 
94     embInit("getorf", argc, argv);
95 
96     seqout     = ajAcdGetSeqoutall("outseq");
97     seqall     = ajAcdGetSeqall("sequence");
98     tablestr   = ajAcdGetListSingle("table");
99     minsize    = ajAcdGetInt("minsize");
100     maxsize    = ajAcdGetInt("maxsize");
101     findstr    = ajAcdGetListSingle("find");
102     methionine = ajAcdGetBoolean("methionine");
103     circular   = ajAcdGetBoolean("circular");
104     reverse    = ajAcdGetBoolean("reverse");
105     around     = ajAcdGetInt("flanking");
106 
107 
108     /* initialise the translation table */
109     ajStrToInt(tablestr, &table);
110     trnTable = ajTrnNewI(table);
111 
112     /* what sort of ORF are we looking for */
113     ajStrToInt(findstr, &find);
114 
115     /*
116     ** get the minimum size converted to protein length if storing
117     ** protein sequences
118     */
119     if(find == P_STOP2STOP || find == P_START2STOP || find == AROUND_START)
120     {
121 	minsize /= 3;
122 	maxsize /= 3;
123     }
124 
125     while(ajSeqallNext(seqall, &seq))
126     {
127         ajSeqTrim(seq);
128 
129         orf_no = 1;		   /* number of the next ORF */
130 	sense = ajTrue;		   /* forward sense initially */
131 
132 	/* get the length of the sequence */
133 	len = ajSeqGetLen(seq);
134 
135 	/*
136 	** if the sequence is circular, append it to itself to triple its
137 	**  length so can deal easily with wrapped ORFs, but don't update
138 	** len
139 	*/
140 	if(circular)
141 	{
142 	    ajStrAssignS(&sseq, ajSeqGetSeqS(seq));
143 	    ajStrAppendS(&sseq, ajSeqGetSeqS(seq));
144 	    ajStrAppendS(&sseq, ajSeqGetSeqS(seq));
145 	    ajSeqAssignSeqS(seq, sseq);
146 	}
147 
148 	/* find the ORFs */
149 	getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense,
150 			circular, find, &orf_no, methionine, around);
151 
152 	/* now reverse complement the sequence and do it again */
153 	if(reverse)
154 	{
155 	    sense = ajFalse;
156 	    ajSeqReverseForce(seq);
157 	    getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense,
158 			    circular, find, &orf_no, methionine,
159 			    around);
160 	}
161     }
162 
163     ajSeqoutClose(seqout);
164     ajTrnDel(&trnTable);
165 
166     ajSeqallDel(&seqall);
167     ajSeqDel(&seq);
168     ajStrDel(&sseq);
169     ajSeqoutDel(&seqout);
170     ajStrDel(&tablestr);
171     ajStrDel(&findstr);
172 
173     embExit();
174 
175     return 0;
176 }
177 
178 
179 
180 
181 /* @funcstatic getorf_FindORFs ************************************************
182 **
183 ** finds all orfs in the current sense and writes them out
184 **
185 ** @param [r] seq [const AjPSeq] Nucleotide sequence
186 ** @param [r] len [ajint] Sequence length
187 ** @param [r] trnTable [const AjPTrn] Translation table
188 ** @param [r] minsize [ajuint] Minimum size ORF to find
189 ** @param [r] maxsize [ajuint] Maximum size ORF to find
190 ** @param [u] seqout [AjPSeqout] Sequence output object
191 ** @param [r] sense [AjBool] Forward (sense) strand if true, else reverse strand
192 ** @param [r] circular [AjBool] True if sequence is circular
193 ** @param [r] find [ajint] Find code (see main program)
194 ** @param [w] orf_no [ajint*] ORF counter updated
195 ** @param [r] methionine [AjBool] If true report all start codond as 'M'
196 ** @param [r] around [ajint] Number of bases around start/stop codons
197 ** @@
198 ******************************************************************************/
199 
getorf_FindORFs(const AjPSeq seq,ajint len,const AjPTrn trnTable,ajuint minsize,ajuint maxsize,AjPSeqout seqout,AjBool sense,AjBool circular,ajint find,ajint * orf_no,AjBool methionine,ajint around)200 static void getorf_FindORFs(const AjPSeq seq, ajint len, const AjPTrn trnTable,
201 			    ajuint minsize, ajuint maxsize, AjPSeqout seqout,
202 			    AjBool sense, AjBool circular, ajint find,
203 			    ajint *orf_no, AjBool methionine, ajint around)
204 {
205     AjBool ORF[3];			/* true if found an ORF */
206     AjBool LASTORF[3];		 /* true if hit the end of an ORF past
207 				    the end on the genome in this
208 				    frame */
209     AjBool GOTSTOP[3];		 /* true if found a STOP in a circular
210 				    genome's frame when
211 				    find = P_STOP2STOP or
212 				    N_STOP2STOP */
213     ajint start[3];		  /* possible starting position of the
214 				     three frames */
215     ajint pos;
216     ajint codon;
217     char aa;
218     ajint frame;
219     AjPStr newstr[3];		 /* strings of the three frames of ORF
220 				    sequences that we are growing */
221     AjPSeq pep = NULL;
222     ajint i;
223 
224     ajint seqlen;
225     const char *chrseq;
226 
227     seqlen = ajSeqGetLen(seq);
228     chrseq = ajSeqGetSeqC(seq);
229 
230     /* initialise the ORF sequences */
231     newstr[0] = NULL;
232     newstr[1] = NULL;
233     newstr[2] = NULL;
234 
235     /*
236     ** initialise flags for found the last ORF past the end of a circular
237     ** genome
238     */
239     LASTORF[0] = ajFalse;
240     LASTORF[1] = ajFalse;
241     LASTORF[2] = ajFalse;
242 
243     /* initialise flags for found at least one STOP codon in a frame */
244     GOTSTOP[0] = ajFalse;
245     GOTSTOP[1] = ajFalse;
246     GOTSTOP[2] = ajFalse;
247 
248     if(circular || find == P_START2STOP || find == N_START2STOP ||
249        find == AROUND_START)
250     {
251 	ORF[0] = ajFalse;
252 	ORF[1] = ajFalse;
253 	ORF[2] = ajFalse;
254     }
255     else
256     {
257 	/*
258 	** assume already in a ORF so we get ORFs at the start of the
259 	** sequence
260 	*/
261 	ORF[0] = ajTrue;
262 	ORF[1] = ajTrue;
263 	ORF[2] = ajTrue;
264 	start[0] = 0;
265 	start[1] = 1;
266 	start[2] = 2;
267     }
268 
269     for(pos=0; pos<seqlen-2; pos++)
270     {
271 	codon = ajTrnCodonstrTypeC(trnTable, &chrseq[pos], &aa);
272 	frame = pos % 3;
273 	ajDebug("len=%d, Pos=%d, Frame=%d start/stop=%d, aa=%c '%c%c%c'\n",
274 		len, pos, frame, codon, aa,
275                 chrseq[pos], chrseq[pos+1], chrseq[pos+2]);
276 
277 	/* don't want to find extra ORFs when already been round circ */
278 	if(LASTORF[frame])
279 	    continue;
280 
281 	if(find == P_STOP2STOP || find == N_STOP2STOP ||
282 	   find == AROUND_INIT_STOP || find == AROUND_END_STOP)
283 	{   /* look for stop codon to begin reporting ORF */
284 	    /* note that there was at least one STOP in a circular genome */
285 	    if(codon == STOP)
286 	    {
287 		GOTSTOP[frame] = ajTrue;
288 	    }
289 
290 	    /* write details if a STOP is hit or the end of the sequence */
291 	    if(codon == STOP || pos >= seqlen-5)
292 	    {
293 
294 		/*
295 		** End of the sequence? If so, append any
296 		** last codon to the sequence - otherwise, ignore the STOP
297 		** codon
298 		*/
299 		if(codon != STOP)
300 		    getorf_AppORF(find, &newstr[frame], chrseq, pos,
301 				  aa);
302 
303 		/* Already have a sequence to write out? */
304 		if(ORF[frame])
305 		{
306 		    if(ajStrGetLen(newstr[frame]) >= minsize &&
307 		       ajStrGetLen(newstr[frame]) <= maxsize)
308 		    {
309 			/* create a new sequence */
310 			if(codon == STOP)
311 			    getorf_WriteORF(seq, len, seqlen, sense,
312 					    find, orf_no, start[frame],
313 					    pos-1, newstr[frame],
314 					    seqout, around);
315 			else
316 			    getorf_WriteORF(seq, len, seqlen, sense,
317 					    find, orf_no, start[frame],
318 					    pos+2, newstr[frame],
319 					    seqout, around);
320 		    }
321 
322 		    ajStrSetClear(&newstr[frame]);
323 		}
324 
325 		/*
326 		** if its a circular genome and the STOP codon hits past
327 		** the end of the genome in all frames, then break
328 		*/
329 		if(circular && pos >= len)
330 		{
331 		    ORF[frame] = ajFalse; /* past the end of the genome */
332 		    LASTORF[frame] = ajTrue; /* finished getting ORFs */
333 		    if(LASTORF[0] && LASTORF[1] && LASTORF[2])
334 			break;
335 		}
336 		else
337 		{
338 		    /*
339 		    ** hit a STOP, therefore a potential ORF to write
340 		    ** out next time, even if the genome is circular
341 		    */
342 		    ORF[frame]   = ajTrue;
343 		    start[frame] = pos+3; /* next start of the ORF */
344 		}
345 
346 	    }
347 	    else if(ORF[frame])
348 		/* append sequence to newstr if in an ORF */
349 		getorf_AppORF(find, &newstr[frame], chrseq, pos, aa);
350 	}
351 	else /* Look for start: P_START2STOP N_START2STOP AROUND_START */
352 	{
353 
354 	    if(codon == START && !ORF[frame])
355 	    {
356 		/* not in a ORF already and found a START */
357 		if(pos < len)
358 		{
359 		    /*
360 		    **  reset the newstr to zero length to enable
361 		    **  storing the ORF for this
362 		    */
363 		    ajStrSetClear(&newstr[frame]);
364 		    ORF[frame] = ajTrue; /* now in an ORF */
365 		    start[frame] = pos;	/* start of the ORF for this frame */
366 		    if(methionine)
367 			getorf_AppORF(find, &newstr[frame], chrseq,
368 				      pos, 'M');
369 		    else
370 			getorf_AppORF(find, &newstr[frame], chrseq,
371 				      pos, aa);
372 		}
373 	    }
374 	    else if(codon == STOP)
375             {
376 		/* hit a STOP */
377 
378 		/* Already have a sequence to write out? */
379 		if(ORF[frame])
380 		{
381 		    ORF[frame] = ajFalse; /* not in an ORF */
382 
383 		    if(ajStrGetLen(newstr[frame]) >= minsize &&
384 		       ajStrGetLen(newstr[frame]) <= maxsize)
385 		    {
386 			/* create a new sequence */
387 			getorf_WriteORF(seq, len, seqlen, sense,
388 					    find, orf_no, start[frame],
389 					    pos-1, newstr[frame],
390 					    seqout, around);
391 		    }
392 		}
393 
394 		/*
395 		** if a circular genome and hit the STOP past
396 		** the end of the genome in all frames, then break
397 		*/
398 		if(circular && pos >= len)
399 		{
400 		    LASTORF[frame] = ajTrue; /* finished getting ORFs */
401 		    if(LASTORF[0] && LASTORF[1] && LASTORF[2]) break;
402 		}
403 
404 		ajStrSetClear(&newstr[frame]);
405             }
406             else if(pos >= seqlen-5)
407             {
408 		/* hit the end of the sequence  without a stop */
409 
410 		/* Already have a sequence to write out? */
411 		if(ORF[frame])
412 		{
413 		    ORF[frame] = ajFalse; /* not in an ORF */
414 
415 		    /*
416 		    ** End of the sequence? If so, append any
417 		    ** last codon to the sequence - otherwise, ignore the
418 		    ** STOP codon
419 		    */
420 		    if(pos >= seqlen-5 && pos < seqlen-2)
421 			getorf_AppORF(find, &newstr[frame], chrseq,
422 				      pos, aa);
423 
424 		    if(ajStrGetLen(newstr[frame]) >= minsize &&
425 		       ajStrGetLen(newstr[frame]) <= maxsize)
426 		    {
427 			/* create a new sequence */
428 			getorf_WriteORF(seq, len, seqlen, sense,
429                                         find, orf_no, start[frame],
430                                         pos+2, newstr[frame],
431                                         seqout, around);
432 		    }
433 		}
434 
435 		/*
436 		** if a circular genome and hit the STOP past
437 		** the end of the genome in all frames, then break
438 		*/
439 		if(circular && pos >= len)
440 		{
441 		    LASTORF[frame] = ajTrue; /* finished getting ORFs */
442 		    if(LASTORF[0] && LASTORF[1] && LASTORF[2]) break;
443 		}
444 
445 		ajStrSetClear(&newstr[frame]);
446 	    }
447 	    else
448 		if(ORF[frame])
449 		    getorf_AppORF(find, &newstr[frame], chrseq, pos,
450 				  aa);
451 
452 	}
453     }
454 
455     /*
456     ** Currently miss reporting a STOP-to-STOP ORF that is
457     ** the full length of a circular genome when there are no STOP codons in
458     ** that frame
459     */
460     if((find == P_STOP2STOP || find == N_STOP2STOP) && circular)
461     {
462 	if(!GOTSTOP[0])
463 	{
464 	    /* translate frame 1 into pep */
465 	    pep = ajTrnSeqOrig(trnTable, seq, 1);
466 	    if(ajSeqGetLen(pep) >= minsize &&
467 	       ajSeqGetLen(pep) <= maxsize)
468 		getorf_WriteORF(seq, len, seqlen, sense, find, orf_no,
469 				0, seqlen-1, ajSeqGetSeqS(pep), seqout,
470 				around);
471 	    ajSeqDel(&pep);
472 	}
473 
474 	if(!GOTSTOP[1])
475 	{
476 	    /* translate frame 2 into pep */
477 	    pep = ajTrnSeqOrig(trnTable, seq, 2);
478 	    if(ajSeqGetLen(pep) >= minsize &&
479 	       ajSeqGetLen(pep) <= maxsize)
480 		getorf_WriteORF(seq, len, seqlen, sense, find, orf_no,
481 				1, seqlen-1, ajSeqGetSeqS(pep), seqout,
482 				around);
483 	    ajSeqDel(&pep);
484 	}
485 
486 	if(!GOTSTOP[2])
487 	{
488 	    /* translate frame 3 into pep */
489 	    pep = ajTrnSeqOrig(trnTable, seq, 3);
490 	    if(ajSeqGetLen(pep) >= minsize &&
491 	       ajSeqGetLen(pep) >= maxsize)
492 		getorf_WriteORF(seq, len, seqlen, sense, find, orf_no,
493 				2, seqlen-1, ajSeqGetSeqS(pep), seqout,
494 				around);
495 	    ajSeqDel(&pep);
496 	}
497     }
498 
499     for(i=0;i<3;++i)
500 	ajStrDel(&newstr[i]);
501 
502     return;
503 }
504 
505 
506 
507 
508 /* @funcstatic getorf_WriteORF ************************************************
509 **
510 ** Undocumented.
511 **
512 ** @param [r] seq [const AjPSeq] Undocumented
513 ** @param [r] len [ajint] Undocumented
514 ** @param [r] seqlen [ajint] Undocumented
515 ** @param [r] sense [AjBool] Undocumented
516 ** @param [r] find [ajint] Undocumented
517 ** @param [w] orf_no [ajint*] Undocumented
518 ** @param [r] start [ajint] Undocumented
519 ** @param [r] pos [ajint] Undocumented
520 ** @param [r] str [const AjPStr] Undocumented
521 ** @param [u] seqout [AjPSeqout] Undocumented
522 ** @param [r] around [ajint] Undocumented
523 ** @@
524 ******************************************************************************/
525 
getorf_WriteORF(const AjPSeq seq,ajint len,ajint seqlen,AjBool sense,ajint find,ajint * orf_no,ajint start,ajint pos,const AjPStr str,AjPSeqout seqout,ajint around)526 static void getorf_WriteORF(const AjPSeq seq,
527 			    ajint len, ajint seqlen, AjBool sense,
528 			    ajint find, ajint *orf_no, ajint start, ajint pos,
529 			    const AjPStr str, AjPSeqout seqout, ajint around)
530 {
531     AjPSeq new;
532     AjPStr name  = NULL;       		/* name of the ORF */
533     AjPStr value = NULL;       		/* string value of the ORF number */
534     ajint s;
535     ajint e;				/* start and end positions */
536     AjPStr aroundstr = NULL;		/* holds sequence string around the
537 					   codon of interest */
538     ajint codonpos = 0;			/* holds position of start of codon
539 					   of interest */
540 
541     s = start+1;
542     e = pos+1;
543 
544     /*
545     ** it is possible for an ORF in a circular genome to appear to start
546     ** past the end of the genome.
547     ** Move the reported positions back to start in the range 1..len
548     ** for readability.
549     */
550     while(s > len)
551     {
552 	s -= len;
553 	e -= len;
554     }
555 
556     new = ajSeqNew();
557     if(find == N_STOP2STOP || find == N_START2STOP ||
558 	find == AROUND_INIT_STOP || find == AROUND_END_STOP ||
559 	find == AROUND_START)
560 	ajSeqSetNuc(new);
561     else
562 	ajSeqSetProt(new);
563 
564 
565     /*
566     ** Set the start and end positions to report and get the sequence for
567     ** the AROUND* sequences
568     */
569     if(find == AROUND_INIT_STOP)
570     {
571 	codonpos = s-3;
572 	s = codonpos - around;		/* 50 before the initial STOP */
573 	e = codonpos + around+2;	/* 50 after the end of the STOP */
574 	if(s < 1)
575 	    return;
576 
577 	if(e > seqlen)
578 	    return;
579 	ajStrAssignSubS(&aroundstr, ajSeqGetSeqS(seq), s-1, e-1);
580 
581     }
582     else if(find == AROUND_START)
583     {
584 	codonpos = s;
585 	s = codonpos - around;		/* 50 before the initial STOP */
586 	e = codonpos + around+2;	/* 50 after the end of the STOP */
587 	if(s < 1)
588 	    return;
589 
590 	if(e > seqlen)
591 	    return;
592 	ajStrAssignSubS(&aroundstr, ajSeqGetSeqS(seq), s-1, e-1);
593 
594     }
595     else if(find == AROUND_END_STOP)
596     {
597 	codonpos = e+1;
598 	s = codonpos - around;		/* 50 before the initial STOP */
599 	e = codonpos + around+2;	/* 50 after the end of the STOP */
600 	if(s < 1)
601 	    return;
602 
603 	if(e > seqlen)
604 	    return;
605 	ajStrAssignSubS(&aroundstr, ajSeqGetSeqS(seq), s-1, e-1);
606     }
607 
608 
609 
610     /* set the name and description */
611     ajStrAssignS(&name, ajSeqGetNameS(seq));
612     ajStrAppendC(&name, "_");
613 
614     /* post-increment the ORF number for the next ORF */
615     ajStrFromInt(&value,(*orf_no)++);
616 
617     ajStrAppendS(&name, value);
618     ajSeqAssignNameS(new, name);
619 
620     /* set the description of the translation */
621     ajStrAssignC(&name, "[");
622 
623     /* Reverse the reported positions if this is the reverse sense */
624     if(!sense)
625     {
626 	s = len-s+1;
627 	e = len-e+1;
628 
629         /*
630 	** shift the positions back into the range 1..len as far as possible
631         ** without going into negative numbers
632 	*/
633         while(e > len)
634 	{
635             s -= len;
636             e -= len;
637         }
638 
639         while(e < 0 || s < 0)
640 	{
641             s += len;
642             e += len;
643         }
644     }
645 
646     /* the base before the stop codon (numbering bases from 1) */
647     ajStrFromInt(&value, s + ajSeqGetOffset(seq));
648 
649     ajStrAppendS(&name, value);
650     ajStrAppendC(&name, " - ");
651 
652     /* the base before the stop codon (numbering bases from 1) */
653     ajStrFromInt(&value, e + ajSeqGetOffset(seq));
654 
655 
656     ajStrAppendS(&name, value);
657     ajStrAppendC(&name, "] ");
658 
659     /* make it clear if this is the reverse sense */
660     if(!sense)
661         ajStrAppendC(&name, "(REVERSE SENSE) ");
662 
663 
664     /*
665     ** make it clear if this is a circular genome and the ORF crosses
666     ** the breakpoint
667     */
668     if(s> len || e > len)
669     	ajStrAppendC(&name, "(ORF crosses the breakpoint) ");
670 
671 
672     if(find == AROUND_INIT_STOP || find == AROUND_START ||
673 	find == AROUND_END_STOP)
674     {
675 	ajStrAppendC(&name, "Around codon at ");
676 	ajStrFromInt(&value, codonpos);
677 	ajStrAppendS(&name, value);
678 	ajStrAppendC(&name, ". ");
679     }
680 
681     ajStrAppendS(&name, ajSeqGetDescS(seq));
682     ajSeqAssignDescS(new, name);
683 
684 
685     /* replace newstr in new */
686     if(find == N_STOP2STOP || find == N_START2STOP || find == P_STOP2STOP ||
687 	find == P_START2STOP)
688 	ajSeqAssignSeqS(new, str);
689     else
690 	/* sequence to be 50 bases around the codon */
691 	ajSeqAssignSeqS(new, aroundstr);
692 
693     ajSeqoutWriteSeq(seqout, new);
694 
695     ajSeqDel(&new);
696     ajStrDel(&value);
697     ajStrDel(&name);
698 
699     return;
700 }
701 
702 
703 
704 
705 /* @funcstatic getorf_AppORF **************************************************
706 **
707 ** append aa to ORF sequence string
708 **
709 ** @param [r] find [ajint] Find code
710 ** @param [u] str [AjPStr*] Sequence string
711 ** @param [r] chrseq [const char*] Undocumented
712 ** @param [r] pos [ajint] Codon triplet position in chrseq
713 ** @param [r] aa [char] Undocumented
714 ** @@
715 ******************************************************************************/
716 
getorf_AppORF(ajint find,AjPStr * str,const char * chrseq,ajint pos,char aa)717 static void getorf_AppORF(ajint find, AjPStr *str,
718 			  const char *chrseq, ajint pos,
719 			  char aa)
720 {
721 
722     if(find == N_STOP2STOP || find == N_START2STOP ||
723 	find == AROUND_INIT_STOP || find == AROUND_END_STOP)
724     {
725 	ajStrAppendK(str, chrseq[pos]);
726 	ajStrAppendK(str, chrseq[pos+1]);
727 	ajStrAppendK(str, chrseq[pos+2]);
728     }
729     else if(find == P_STOP2STOP || find == P_START2STOP ||
730 	    find == AROUND_START)
731 	ajStrAppendK(str, aa);
732 
733     return;
734 }
735