1 /* @source ajcod **************************************************************
2 **
3 ** AJAX codon functions
4 **
5 ** @author Copyright (C) 1999 Alan Bleasby
6 ** @version $Revision: 1.60 $
7 ** @modified Aug 07 ajb First version
8 ** @modified $Date: 2012/12/07 10:12:59 $ by $Author: rice $
9 ** @@
10 **
11 ** This library is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU Lesser General Public
13 ** License as published by the Free Software Foundation; either
14 ** version 2.1 of the License, or (at your option) any later version.
15 **
16 ** This library is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ** Lesser General Public License for more details.
20 **
21 ** You should have received a copy of the GNU Lesser General Public
22 ** License along with this library; if not, write to the Free Software
23 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 ** MA 02110-1301, USA.
25 **
26 ******************************************************************************/
27
28
29 #include "ajlib.h"
30
31 #include "ajcod.h"
32 #include "ajtime.h"
33 #include "ajfileio.h"
34 #include "ajfiledata.h"
35 #include "ajbase.h"
36 #include "ajtranslate.h"
37
38 #include <math.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <ctype.h>
43
44 #include <limits.h>
45
46
47
48 static AjPStr codReadLine = NULL;
49 static AjPStr codTmpLine = NULL;
50
51
52 static void codCalcFraction(AjPCod thys);
53 static AjBool codCommentProcess(AjPCod thys, const AjPStr line);
54 static void codFix(AjPCod thys);
55 static AjBool codGcgProcess(AjPCod thys, const AjPStr line);
56 static AjBool codIsAa(AjPStr* token);
57 static AjBool codIsAa3(AjPStr* token, ajint* idx);
58 static AjBool codIsCodon(AjPStr* token);
59 static AjBool codIsFraction(const AjPStr token);
60 static AjBool codIsFreq(const AjPStr token);
61 static AjBool codIsNumber(const AjPStr token);
62 static AjBool codIsNumberF(AjPStr* token);
63 static double codRandom(ajint NA, ajint NC, ajint NG, ajint NT, ajint len,
64 const char *p);
65 static AjBool codReadCodehop(AjPCod thys, AjPFilebuff inbuff);
66 static AjBool codReadCutg(AjPCod thys, AjPFilebuff inbuff);
67 static AjBool codReadEmboss(AjPCod thys, AjPFilebuff inbuff);
68 static AjBool codReadGcg(AjPCod thys, AjPFilebuff inbuff);
69 static AjBool codReadSpsum(AjPCod thys, AjPFilebuff inbuff);
70 static AjBool codReadStaden(AjPCod thys, AjPFilebuff inbuff);
71 static AjBool codTripletAdd (const AjPCod thys, const char residue,
72 char triplet[4]);
73 static void codTripletBases(char* triplet);
74 static void codWriteCherry(const AjPCod thys, AjPFile outf);
75 static void codWriteCodehop(const AjPCod thys, AjPFile outf);
76 static void codWriteCutg(const AjPCod thys, AjPFile outf);
77 static void codWriteCutgaa(const AjPCod thys, AjPFile outf);
78 static void codWriteEmboss(const AjPCod thys, AjPFile outf);
79 static void codWriteGcg(const AjPCod thys, AjPFile outf);
80 static void codWriteStaden(const AjPCod thys, AjPFile outf);
81 static void codWriteSpsum(const AjPCod thys, AjPFile outf);
82 static void codWriteTransterm(const AjPCod thys, AjPFile outf);
83 static double* codGetWstat(const AjPCod thys);
84
85 #define AJCODSIZE 66
86 #define AJCODSTART 64
87 #define AJCODSTOP 65
88 #define AJCODEND 65
89 #define AJCODAMINOS 28
90
91
92
93 /* @conststatic spsumcodons ***************************************************
94 **
95 ** CUTG format species summary codons list
96 **
97 ******************************************************************************/
98
99 static const char *spsumcodons[]=
100 {
101 "CGA","CGC","CGG","CGT","AGA","AGG","CTA","CTC",
102 "CTG","CTT","TTA","TTG","TCA","TCC","TCG","TCT",
103 "AGC","AGT","ACA","ACC","ACG","ACT","CCA","CCC",
104 "CCG","CCT","GCA","GCC","GCG","GCT","GGA","GGC",
105 "GGG","GGT","GTA","GTC","GTG","GTT","AAA","AAG",
106 "AAC","AAT","CAA","CAG","CAC","CAT","GAA","GAG",
107 "GAC","GAT","TAC","TAT","TGC","TGT","TTC","TTT",
108 "ATA","ATC","ATT","ATG","TGG","TAA","TAG","TGA"
109 };
110
111
112
113
114 /* @conststatic spsumaas ***************************************************
115 **
116 ** CUTG format species summary amino acid codes list
117 **
118 ******************************************************************************/
119
120 static const char *spsumaa= "RRRRRRLLLLLLSSSSSSTTTTPPPPAAAAGG"
121 "GGVVVVKKNNQQHHEEDDYYCCFFIIIMW***";
122
123
124
125
126 /* @datastatic CodPInFormat ***************************************************
127 **
128 ** Codon usage input formats data structure
129 **
130 ** @alias CodSInFormat
131 ** @alias CodOInFormat
132 **
133 ** @attr Name [const char*] Format name
134 ** @attr Try [AjBool] If true, try for an unknown input. Duplicate names
135 ** and read-anything formats are set false
136 ** @attr Padding [ajint] Padding to alignment boundary
137 ** @attr Desc [const char*] Format description
138 ** @attr Read [AjBool function] Input function, returns ajTrue on success
139 ** @attr Comment [const char*] Format comments
140 ** @@
141 ******************************************************************************/
142
143 typedef struct CodSInFormat
144 {
145 const char *Name;
146 AjBool Try;
147 ajint Padding;
148 const char *Desc;
149 AjBool (*Read) (AjPCod thys, AjPFilebuff inbuff);
150 const char *Comment;
151 } CodOInFormat;
152
153 #define CodPInFormat CodOInFormat*
154
155
156 static CodOInFormat codInFormatDef[] =
157 {
158 /* "Name", Try "Description" */
159 /* ReadFunction "Comment" */
160 {"emboss", AJTRUE, 0, "EMBOSS codon usage file",
161 &codReadEmboss, "All numbers read, #comments for extras"},
162 {"cut", AJFALSE, 0, "EMBOSS codon usage file",
163 &codReadEmboss, "Same as EMBOSS, output default format is 'cut'"},
164 {"gcg", AJTRUE, 0, "GCG codon usage file",
165 &codReadGcg, "All numbers read, #comments for extras"},
166 {"cutg", AJTRUE, 0, "CUTG codon usage file",
167 &codReadCutg, "All numbers (cutgaa) read or fraction calculated, "
168 "extras in first line"},
169 {"cutgaa", AJFALSE, 0, "CUTG codon usage file with aminoacids",
170 &codReadCutg, "Cutg with all numbers"},
171 {"spsum", AJTRUE, 0, "CUTG species summary file",
172 &codReadSpsum, "Number only, species and CDSs in header"},
173 {"cherry", AJFALSE, 0, "Mike Cherry codonusage database file",
174 &codReadGcg, "GCG format with species and CDSs in header"},
175 {"transterm", AJFALSE, 0, "TransTerm database file",
176 &codReadGcg, "GCG format with no extras"},
177 {"codehop", AJTRUE, 0, "FHCRC codehop program codon usage file",
178 &codReadCodehop, "Freq only, extras at end"},
179 {"staden", AJTRUE, 0, "Staden package codon usage file with numbers",
180 &codReadStaden, "Number only, no extras."},
181 {"numstaden", AJFALSE, 0, "Staden package codon usage file with numbers",
182 &codReadStaden, "Number only, no extras. Obsolete name for 'staden'"},
183 {NULL, 0, 0, NULL, NULL, NULL}
184 };
185
186
187
188
189 /* @datastatic CodPOutFormat **************************************************
190 **
191 ** Codon usage output formats data structure
192 **
193 ** @alias CodSOutFormat
194 ** @alias CodOOutFormat
195 **
196 ** @attr Name [const char*] Format name
197 ** @attr Desc [const char*] Format description
198 ** @attr Write[void function] Output function
199 ** @@
200 ******************************************************************************/
201
202 typedef struct CodSOutFormat
203 {
204 const char *Name;
205 const char *Desc;
206 void (*Write) (const AjPCod thys, AjPFile outfile);
207 } CodOOutFormat;
208
209 #define CodPOutFormat CodOOutFormat*
210
211
212 static CodOOutFormat codoutFormatDef[] =
213 {
214 /* "Name", "Description" */
215 /* WriteFunction */
216 {"emboss", "EMBOSS codon usage file",
217 &codWriteEmboss},
218 {"cut", "EMBOSS codon usage file",
219 &codWriteEmboss},
220 {"gcg", "GCG codon usage file",
221 &codWriteGcg},
222 {"cutg", "CUTG codon usage file",
223 &codWriteCutg},
224 {"cutgaa", "CUTG codon usage file with aminoacids",
225 &codWriteCutgaa},
226 {"spsum", "CUTG species summary file",
227 &codWriteSpsum},
228 {"cherry", "Mike Cherry codonusage database file",
229 &codWriteCherry},
230 {"transterm", "TransTerm database file",
231 &codWriteTransterm},
232 {"codehop", "FHCRC codehop program codon usage file",
233 &codWriteCodehop},
234 {"staden", "Staden package codon usage file with numbers",
235 &codWriteStaden},
236 {"numstaden", "Staden package codon usage file with numbers",
237 &codWriteStaden},
238 {NULL, NULL, NULL}
239 };
240
241
242
243
244 /* @section Codon Constructors ************************************************
245 **
246 ** All constructors return a new object by pointer. It is the responsibility
247 ** of the user to first destroy any previous object. The target pointer
248 ** does not need to be initialised to NULL, but it is good programming practice
249 ** to do so anyway.
250 **
251 ******************************************************************************/
252
253
254
255
256 /* @func ajCodNew *************************************************************
257 **
258 ** Default constructor for empty AJAX codon objects.
259 **
260 ** @return [AjPCod] Pointer to an codon object
261 **
262 ** @release 1.0.0
263 ** @@
264 ******************************************************************************/
265
ajCodNew(void)266 AjPCod ajCodNew(void)
267 {
268 AjPCod thys;
269
270 AJNEW0(thys);
271
272 thys->Name = ajStrNew();
273 thys->Species = ajStrNew();
274 thys->Division = ajStrNew();
275 thys->Release = ajStrNew();
276 thys->Desc = ajStrNew();
277 AJCNEW0(thys->back, AJCODAMINOS);
278 AJCNEW0(thys->aa, AJCODSIZE);
279 AJCNEW0(thys->num, AJCODSIZE);
280 AJCNEW0(thys->tcount, AJCODSIZE);
281 AJCNEW0(thys->fraction, AJCODSIZE);
282
283 return thys;
284 }
285
286
287
288
289 /* @func ajCodNewCodenum ******************************************************
290 **
291 ** Default constructor for empty AJAX codon usage objects, with the
292 ** amino acid assignments taken from a standard genetic code.
293 **
294 ** @param [r] code [ajint] Genetic code number
295 ** @return [AjPCod] Pointer to an codon object
296 **
297 ** @release 6.2.0
298 ** @@
299 ******************************************************************************/
300
ajCodNewCodenum(ajint code)301 AjPCod ajCodNewCodenum(ajint code)
302 {
303 AjPCod thys;
304 AjPTrn trn = NULL;
305 AjPStr aa = NULL;
306 ajint i;
307 ajint idx;
308 ajint c;
309
310 trn = ajTrnNewI(code);
311
312 if(!trn)
313 return NULL;
314
315 AJNEW0(thys);
316
317 thys->Name = ajStrNew();
318 thys->Species = ajStrNew();
319 thys->Division = ajStrNew();
320 thys->Release = ajStrNew();
321 thys->Desc = ajStrNew();
322 AJCNEW0(thys->back, AJCODAMINOS);
323 AJCNEW0(thys->aa, AJCODSIZE);
324 AJCNEW0(thys->num, AJCODSIZE);
325 AJCNEW0(thys->tcount, AJCODSIZE);
326 AJCNEW0(thys->fraction, AJCODSIZE);
327
328 for(i=0;i<64;i++)
329 {
330 idx = ajCodIndexC(spsumcodons[i]);
331 ajStrAssignK(&aa, ajTrnCodonC(trn, spsumcodons[i]));
332 c = ajBasecodeToInt((ajint)ajStrGetCharFirst(aa));
333
334 if(c>25)
335 c=27;
336
337 thys->aa[idx] = c;
338 }
339
340 ajTrnDel(&trn);
341 ajStrDel(&aa);
342
343 return thys;
344 }
345
346
347
348
349 /* @func ajCodNewCod **********************************************************
350 **
351 ** Duplicate a codon object
352 **
353 ** @param [r] thys [const AjPCod] Codon to duplicate
354 **
355 ** @return [AjPCod] Pointer to an codon object
356 **
357 ** @release 6.2.0
358 ** @@
359 ******************************************************************************/
360
ajCodNewCod(const AjPCod thys)361 AjPCod ajCodNewCod(const AjPCod thys)
362 {
363 AjPCod dupcod;
364 ajint i;
365
366 dupcod = ajCodNew();
367
368 ajStrAssignS(&dupcod->Name,thys->Name);
369 ajStrAssignS(&dupcod->Species,thys->Species);
370 ajStrAssignS(&dupcod->Division,thys->Division);
371 ajStrAssignS(&dupcod->Release,thys->Release);
372 ajStrAssignS(&dupcod->Desc,thys->Desc);
373
374 dupcod->CdsCount = thys->CdsCount;
375 dupcod->CodonCount = thys->CodonCount;
376 dupcod->GeneticCode = thys->GeneticCode;
377
378 for(i=0;i<AJCODSIZE;++i)
379 {
380 dupcod->aa[i] = thys->aa[i];
381 dupcod->num[i] = thys->num[i];
382 dupcod->tcount[i] = thys->tcount[i];
383 dupcod->fraction[i] = thys->fraction[i];
384 }
385
386 for(i=0;i<AJCODAMINOS;++i)
387 dupcod->back[i] = thys->back[i];
388
389 return dupcod;
390 }
391
392
393
394
395 /* @section Codon Destructors ************************************************
396 **
397 ** Destructor(s) for AjPCod objects
398 **
399 ******************************************************************************/
400
401
402
403
404 /* @func ajCodDel *************************************************************
405 **
406 ** Default destructor for AJAX codon objects.
407 **
408 ** @param [w] pthys [AjPCod *] codon usage structure
409 **
410 ** @return [void]
411 **
412 ** @release 1.0.0
413 ** @@
414 ******************************************************************************/
415
ajCodDel(AjPCod * pthys)416 void ajCodDel(AjPCod *pthys)
417 {
418 AjPCod thys = *pthys;
419
420 if(!thys)
421 return;
422
423 AJFREE(thys->fraction);
424 AJFREE(thys->tcount);
425 AJFREE(thys->num);
426 AJFREE(thys->aa);
427 AJFREE(thys->back);
428
429 ajStrDel(&thys->Name);
430 ajStrDel(&thys->Species);
431 ajStrDel(&thys->Division);
432 ajStrDel(&thys->Release);
433 ajStrDel(&thys->Desc);
434
435 AJFREE(*pthys);
436
437 return;
438 }
439
440
441
442
443 /* @section Codon Functions ************************************************
444 **
445 ** Function(s) for AjPCod objects
446 **
447 ******************************************************************************/
448
449
450
451
452 /* @func ajCodBacktranslate ***************************************************
453 **
454 ** Back translate a string
455 **
456 ** @param [w] b [AjPStr *] back translated sequence
457 ** @param [r] a [const AjPStr] sequence
458 ** @param [r] thys [const AjPCod] codon usage object
459 **
460 ** @return [void]
461 **
462 ** @release 1.0.0
463 ** @@
464 ******************************************************************************/
ajCodBacktranslate(AjPStr * b,const AjPStr a,const AjPCod thys)465 void ajCodBacktranslate(AjPStr *b, const AjPStr a, const AjPCod thys)
466 {
467 const char *p;
468 char q;
469 ajint idx;
470
471 ajStrAssignClear(b);
472
473 p = ajStrGetPtr(a);
474
475 while(*p)
476 {
477 if(*p=='-')
478 {
479 ++p;
480 continue;
481 }
482
483 if(toupper((int)*p)==(int)'X')
484 {
485 ajStrAppendC(b,"NNN");
486 ++p;
487 continue;
488 }
489
490 q = *p;
491
492 if(toupper((int)q)==(int)'B')
493 q = 'D';
494
495 if(toupper((int)q)==(int)'Z')
496 q = 'E';
497
498 idx = thys->back[ajBasecodeToInt(q)];
499 /*
500 if(thys->aa[idx]==27)
501 {
502 ajStrAppendC(b,"End");
503 ++p;
504 continue;
505 }
506 */
507 ajStrAppendC(b,ajCodTriplet(idx));
508 ++p;
509 }
510
511 return;
512 }
513
514
515
516
517 /* @func ajCodBacktranslateAmbig **********************************************
518 **
519 ** Back translate a string to a fully ambiguous nucleotide sequence as a string
520 **
521 ** @param [w] b [AjPStr *] back translated sequence
522 ** @param [r] a [const AjPStr] sequence
523 ** @param [r] thys [const AjPCod] codon usage object
524 **
525 ** @return [void]
526 **
527 ** @release 4.0.0
528 ** @@
529 ******************************************************************************/
530
ajCodBacktranslateAmbig(AjPStr * b,const AjPStr a,const AjPCod thys)531 void ajCodBacktranslateAmbig(AjPStr *b, const AjPStr a, const AjPCod thys)
532 {
533 const char *p;
534 char q;
535 char codon[4] = "NNN";
536
537 ajStrAssignClear(b);
538
539 ajDebug("ajCodBacktranslateAmbig '%S'\n", a);
540 p = ajStrGetPtr(a);
541
542 while(*p)
543 {
544 codon[0] = codon[1] = codon[2] = '\0';
545
546 q = *p;
547
548 if(toupper((int)q)==(int)'-')
549 {
550 strcpy(codon, "---");
551 }
552 else if(toupper((int)q)==(int)'X')
553 {
554 strcpy(codon,"NNN");
555 }
556 else if(toupper((int)q)==(int)'B')
557 {
558 codTripletAdd(thys, 'D', codon);
559 codTripletAdd(thys, 'N', codon);
560 codTripletBases(codon);
561 }
562 else if(toupper((int)q)==(int)'Z')
563 {
564 codTripletAdd(thys, 'E', codon);
565 codTripletAdd(thys, 'Q', codon);
566 codTripletBases(codon);
567 }
568 else if(toupper((int)q)==(int)'J') /* Unlikely - NMR code */
569 {
570 codTripletAdd(thys, 'I', codon);
571 codTripletAdd(thys, 'L', codon);
572 codTripletBases(codon);
573 }
574 else if(toupper((int)q)==(int)'U') /* selenocysteine if found */
575 {
576 strcpy(codon,"TGA");
577 }
578 else
579 {
580 codTripletAdd(thys, q, codon);
581 codTripletBases(codon);
582 }
583
584 ajStrAppendC(b,codon);
585 ++p;
586 }
587
588 return;
589 }
590
591
592
593
594 /* @funcstatic codTripletAdd **************************************************
595 **
596 ** Add all possible bases at each position to a codon for one amino acid
597 **
598 ** @param [r] thys [const AjPCod] Codon usage table
599 ** @param [r] residue [const char] Amino acid code
600 ** @param [u] triplet [char[4]] Codon triplet
601 ** @return [AjBool] ajTrue on success, ajFalse if the amino acid is not found..
602 **
603 ** @release 4.0.0
604 ******************************************************************************/
605
codTripletAdd(const AjPCod thys,const char residue,char triplet[4])606 static AjBool codTripletAdd (const AjPCod thys,
607 const char residue, char triplet[4])
608 {
609 AjBool ret = ajFalse;
610 ajint i;
611 char aa;
612 char* codon;
613
614 aa = (char) toupper((ajint) residue);
615
616 for (i=0;i<AJCODSTART;++i)
617 {
618 ajDebug("testing '%c' %d '%c' %2x%2x%2x\n",
619 residue, i, ajBasecodeFromInt(thys->aa[i]),
620 triplet[0], triplet[1], triplet[2]);
621
622 if(ajBasecodeFromInt(thys->aa[i]) == aa)
623 {
624 codon = ajCodTriplet(i);
625 ajDebug("codTripletAdd '%c' %d '%c' %2x%2x%2x '%s'\n",
626 residue, i, ajBasecodeFromInt(thys->aa[i]),
627 triplet[0], triplet[1], triplet[2], codon);
628 triplet[0] |= ajBaseAlphaToBin(codon[0]);
629 triplet[1] |= ajBaseAlphaToBin(codon[1]);
630 triplet[2] |= ajBaseAlphaToBin(codon[2]);
631 ajDebug("codTripletAdd now %2x%2x%2x\n",
632 triplet[0], triplet[1], triplet[2]);
633 ret = ajTrue;
634 }
635 }
636
637 return ret;
638 }
639
640
641
642
643 /* @funcstatic codTripletBases ************************************************
644 **
645 ** Converts a triplet of binary codes to base codes
646 **
647 ** @param [u] triplet [char*] Triplet in binary a=1 C=2 G=4 T=8
648 ** return [void]
649 **
650 ** @release 4.0.0
651 ******************************************************************************/
652
codTripletBases(char * triplet)653 static void codTripletBases(char* triplet)
654 {
655 ajint j;
656
657 for(j=0;j<3;j++)
658 triplet[j] = ajBaseBinToAlpha(triplet[j]);
659
660 return;
661 }
662
663
664
665
666 /* @func ajCodBase ************************************************************
667 **
668 ** Return one codon value given a possibly ambiguous base
669 **
670 ** @param [r] c [ajint] base
671 **
672 ** @return [ajint] single base value
673 **
674 ** @release 1.0.0
675 ** @@
676 ******************************************************************************/
ajCodBase(ajint c)677 ajint ajCodBase(ajint c)
678 {
679 ajint v;
680
681 v = ajBaseAlphaToBin(c);
682
683 if(v & 1)
684 return 0;
685
686 if(v & 2)
687 return 1;
688
689 if(v & 4)
690 return 2;
691
692 if(v & 8)
693 return 3;
694
695 return 0;
696 }
697
698
699
700
701 /* @func ajCodClear ***********************************************************
702 **
703 ** Zero all entries
704 **
705 ** To retain the genetics code (the amino acids for each codon) use
706 ** ajCodClearData instead.
707 **
708 ** @param [w] thys [AjPCod] codon usage structure
709 **
710 ** @return [void]
711 **
712 ** @release 1.0.0
713 ** @@
714 ******************************************************************************/
ajCodClear(AjPCod thys)715 void ajCodClear(AjPCod thys)
716 {
717 ajint i;
718
719 ajStrAssignClear(&thys->Name);
720 ajStrAssignClear(&thys->Species);
721 ajStrAssignClear(&thys->Division);
722 ajStrAssignClear(&thys->Release);
723 ajStrAssignClear(&thys->Desc);
724
725 thys->CdsCount = 0;
726 thys->CodonCount = 0;
727 thys->GeneticCode = 0;
728
729 for(i=0;i<AJCODSIZE;++i)
730 {
731 thys->fraction[i] = 0.0;
732 thys->tcount[i] = 0.0;
733 thys->num[i] = 0;
734 thys->aa[i] = 0;
735 }
736
737 for(i=0;i<AJCODAMINOS;++i)
738 thys->back[i] = 0;
739
740 return;
741 }
742
743
744
745
746 /* @func ajCodClearData *******************************************************
747 **
748 ** Zero the name, number count and fraction codon entries
749 **
750 ** @param [w] thys [AjPCod] codon usage structure
751 **
752 ** @return [void]
753 **
754 ** @release 3.0.0
755 ** @@
756 ******************************************************************************/
ajCodClearData(AjPCod thys)757 void ajCodClearData(AjPCod thys)
758 {
759 ajint i;
760
761 ajStrAssignClear(&thys->Name);
762 ajStrAssignClear(&thys->Species);
763 ajStrAssignClear(&thys->Division);
764 ajStrAssignClear(&thys->Release);
765 ajStrAssignClear(&thys->Desc);
766
767 thys->CdsCount = 0;
768 thys->CodonCount = 0;
769 thys->GeneticCode = 0;
770
771 for(i=0;i<AJCODSIZE;++i)
772 {
773 thys->fraction[i] = 0.0;
774 thys->tcount[i] = 0.0;
775 thys->num[i] = 0;
776 }
777
778 for(i=0;i<AJCODAMINOS;++i)
779 thys->back[i] = 0;
780
781 return;
782 }
783
784
785
786
787 /* @func ajCodSetTripletsS ****************************************************
788 **
789 ** Load the num array of a codon structure
790 **
791 ** Used for creating a codon usage table
792 **
793 ** Skips triplets with ambiguity codes and any incomplete triplet at the end.
794 **
795 ** @param [w] thys [AjPCod] Codon object
796 ** @param [r] s [const AjPStr] dna sequence
797 ** @param [w] c [ajint *] triplet count
798 **
799 ** @return [void]
800 **
801 ** @release 6.2.0
802 ** @@
803 ******************************************************************************/
ajCodSetTripletsS(AjPCod thys,const AjPStr s,ajint * c)804 void ajCodSetTripletsS(AjPCod thys, const AjPStr s, ajint *c)
805 {
806 const char *p;
807 ajuint last;
808 ajuint i;
809 ajint idx;
810
811 p = ajStrGetPtr(s);
812 last = ajStrGetLen(s)-2;
813
814 for(i=0;i<last;i+=3,p+=3,++(*c))
815 {
816 if((idx=ajCodIndexC(p))!=-1)
817 ++thys->num[idx];
818 else
819 {
820 ajDebug("ajCodSetTripletsS skipping triplet %3.3s\n", p);
821 --(*c);
822 }
823 }
824
825 return;
826 }
827
828
829
830
831 /* @func ajCodIndex ***********************************************************
832 **
833 ** Return a codon index given a three character codon
834 **
835 ** Can be called with any coding sequence of 3 or more bases, and will
836 ** use only the first 3 bases of the input.
837 **
838 ** @param [r] s [const AjPStr] Codon
839 **
840 ** @return [ajint] Codon index AAA=0 TTT=3f
841 **
842 ** @release 1.0.0
843 ** @@
844 ******************************************************************************/
845
ajCodIndex(const AjPStr s)846 ajint ajCodIndex(const AjPStr s)
847 {
848 return ajCodIndexC(ajStrGetPtr(s));
849 }
850
851
852
853
854 /* @func ajCodIndexC **********************************************************
855 **
856 ** Return a codon index given a three character codon
857 **
858 ** Can be called with any coding sequence of 3 or more bases, and will
859 ** use only the first 3 bases of the input.
860 **
861 ** @param [r] codon [const char *] Codon pointer
862 **
863 ** @return [ajint] codon index AAA=0 TTT=3f
864 **
865 ** @release 1.0.0
866 ** @@
867 ******************************************************************************/
868
ajCodIndexC(const char * codon)869 ajint ajCodIndexC(const char *codon)
870 {
871 const char *p;
872 ajint sum;
873 ajint c;
874
875 p = codon;
876 sum = 0;
877
878 if(!(c=*(p++)))
879 return -1;
880
881 sum += (ajCodBase(toupper(c))<<4);
882
883 if(!(c=*(p++)))
884 return -1;
885
886 sum += (ajCodBase(toupper(c))<<2);
887
888 if(!(c=*p))
889 return -1;
890
891 sum += ajCodBase(toupper(c));
892
893 return sum;
894 }
895
896
897
898
899 /* @func ajCodRead ************************************************************
900 **
901 ** Read a codon index from a filename using a specified format.
902 **
903 ** The format can be in the format argument, as a prefix format:: to the
904 ** filename, or empty to allow all known formats to be tried.
905 **
906 ** @param [w] thys [AjPCod] Codon object
907 ** @param [r] fn [const AjPStr] filename
908 ** @param [r] format [const AjPStr] format
909 **
910 ** @return [AjBool] ajTrue on success
911 ** @category input [AjPCod] Read codon index from a file
912 **
913 ** @release 1.0.0
914 ** @@
915 ******************************************************************************/
ajCodRead(AjPCod thys,const AjPStr fn,const AjPStr format)916 AjBool ajCodRead(AjPCod thys, const AjPStr fn, const AjPStr format)
917 {
918 AjBool ret = ajFalse;
919 AjPFile inf = NULL;
920 AjPFilebuff inbuff = NULL;
921 AjPStr formatstr = NULL;
922 AjPStr fname = NULL;
923 AjPStr filename = NULL;
924 ajlong i;
925
926 i = ajStrFindC(fn, "::");
927
928 if(i == -1)
929 {
930 ajStrAssignS(&filename, fn);
931 ajStrAssignS(&formatstr, format);
932 }
933 else
934 {
935 ajStrAssignSubS(&formatstr, fn, 0, i-1);
936 ajStrAssignSubS(&fname, fn, i+1, -1);
937 }
938
939 fname = ajStrNewS(filename);
940
941 inf = ajDatafileNewInNameS(fname);
942
943 if(!inf)
944 {
945 ajStrAssignC(&fname,"CODONS/");
946 ajStrAppendS(&fname,filename);
947 inf = ajDatafileNewInNameS(fname);
948
949 if(!inf)
950 {
951 ajStrDel(&fname);
952 return ajFalse;
953 }
954 }
955 ajStrDel(&fname);
956 inbuff = ajFilebuffNewFromFile(inf);
957
958
959 for(i=0;codInFormatDef[i].Name; i++)
960 {
961 if(ajStrGetLen(formatstr))
962 {
963 if(!ajStrMatchC(formatstr, codInFormatDef[i].Name))
964 continue;
965 }
966 else
967 {
968 if(!codInFormatDef[i].Try)
969 continue;
970 }
971
972 ajDebug("ajCodRead Try format '%s'\n", codInFormatDef[i].Name);
973 ret = (*codInFormatDef[i].Read)(thys, inbuff);
974
975 if(ret)
976 {
977 ajStrAssignS(&thys->Name, filename);
978 codFix(thys);
979 ajDebug("ajCodRead Format '%s' success\n", codInFormatDef[i].Name);
980 ajFilebuffDel(&inbuff);
981 ajStrDel(&filename);
982 ajStrDel(&formatstr);
983 return ajTrue;
984 }
985
986 ajDebug("ajCodRead Format '%s' failed\n", codInFormatDef[i].Name);
987 ajCodClear(thys);
988 ajFilebuffReset(inbuff);
989 }
990
991 ajFilebuffDel(&inbuff);
992 ajStrDel(&filename);
993 ajStrDel(&formatstr);
994
995 return ret;
996 }
997
998
999
1000
1001 /* @funcstatic codReadEmboss **************************************************
1002 **
1003 ** Read a codon index from a filename in EMBOSS format
1004 **
1005 ** First readable line format is either the comment or the data
1006 **
1007 ** #Codon AA Fraction Frequency Number<br>
1008 ** GCA A 0.070 7.080 544<br>
1009 **
1010 ** @param [w] thys [AjPCod] Codon object
1011 ** @param [u] inbuff [AjPFilebuff] Input file buffer
1012 **
1013 ** @return [AjBool] ajTrue on success
1014 ** @category input [AjPCod] Read codon index from a file
1015 **
1016 ** @release 3.0.0
1017 ** @@
1018 ******************************************************************************/
codReadEmboss(AjPCod thys,AjPFilebuff inbuff)1019 static AjBool codReadEmboss(AjPCod thys, AjPFilebuff inbuff)
1020 {
1021 AjPStr t;
1022 const char *p;
1023 ajint idx;
1024 ajint c;
1025 ajint num;
1026 double tcount;
1027 double fraction;
1028 AjPStr tok = NULL;
1029 AjPStrTok handle = NULL;
1030
1031 t = ajStrNew();
1032
1033 while(ajBuffreadLine(inbuff,&codReadLine))
1034 {
1035 p=ajStrGetPtr(codReadLine);
1036
1037 if(*p=='\n')
1038 continue;
1039 else if(*p=='#')
1040 {
1041 codCommentProcess(thys, codReadLine);
1042 continue;
1043 }
1044 else if(*p=='!')
1045 continue;
1046
1047 /* check record format */
1048
1049 if(ajStrParseCountC(codReadLine, " \t\r\n") != 5)
1050 return ajFalse;
1051
1052 ajStrTokenAssignC(&handle, codReadLine, " \t\r\n");
1053 ajStrTokenNextParse(handle, &tok);
1054
1055 if(!codIsCodon(&tok))
1056 return ajFalse;
1057
1058 idx = ajCodIndex(tok);
1059
1060 ajStrTokenNextParse(handle, &tok);
1061
1062 if(!codIsAa(&tok))
1063 return ajFalse;
1064
1065 c = ajBasecodeToInt((ajint)ajStrGetCharFirst(tok));
1066
1067 if(c>25)
1068 c=27; /* stop */
1069
1070 ajStrTokenNextParse(handle, &tok);
1071
1072 if(!codIsFraction(tok))
1073 return ajFalse;
1074
1075 ajStrToDouble(tok,&fraction);
1076
1077 ajStrTokenNextParse(handle, &tok);
1078 if(!codIsFreq(tok))
1079 return ajFalse;
1080 ajStrToDouble(tok,&tcount);
1081
1082 ajStrTokenNextParse(handle, &tok);
1083
1084 if(!codIsNumber(tok))
1085 return ajFalse;
1086
1087 ajStrToInt(tok,&num);
1088
1089
1090 thys->aa[idx] = c;
1091 thys->num[idx] = num;
1092 thys->tcount[idx] = tcount;
1093 thys->fraction[idx] = fraction;
1094 }
1095
1096 ajStrDel(&t);
1097 ajStrTokenDel(&handle);
1098 ajStrDel(&tok);
1099
1100 return ajTrue;
1101 }
1102
1103
1104
1105
1106 /* @funcstatic codReadStaden **************************************************
1107 **
1108 ** Read a codon index from a filename in Staden format with counts.
1109 **
1110 ** The first lines have the format:
1111 **
1112 ** ===========================================<br>
1113 ** F TTT 170 S TCT 160 Y TAT 10 C TGT 20<br>
1114 **
1115 ** Note that in this format the Staden package allows two codon usage tables
1116 ** in the same file. The first is the one we want as it is for the protein
1117 ** coding regions. The second is for the regions between CDSs.
1118 **
1119 ** @param [w] thys [AjPCod] Codon object
1120 ** @param [u] inbuff [AjPFilebuff] Input file buffer
1121 **
1122 ** @return [AjBool] ajTrue on success
1123 ** @category input [AjPCod] Read codon index from a file
1124 **
1125 ** @release 3.0.0
1126 ** @@
1127 ******************************************************************************/
1128
codReadStaden(AjPCod thys,AjPFilebuff inbuff)1129 static AjBool codReadStaden(AjPCod thys, AjPFilebuff inbuff)
1130 {
1131 AjPStr tok = NULL;
1132 AjPStrTok handle = NULL;
1133 ajint i;
1134 ajint c;
1135 ajint idx;
1136 ajint num;
1137 double tcount;
1138 ajint cdscount = 0;
1139 ajint icod = 0;
1140
1141 while(ajBuffreadLine(inbuff,&codReadLine))
1142 {
1143 ajStrRemoveWhiteExcess(&codReadLine);
1144
1145 if(!ajStrGetLen(codReadLine))
1146 continue;
1147
1148 if(ajStrGetCharFirst(codReadLine) == '#')
1149 {
1150 codCommentProcess(thys, codReadLine);
1151 continue;
1152 }
1153
1154 if(ajStrGetCharFirst(codReadLine) == '!')
1155 continue;
1156
1157 if(ajStrFindRestC(codReadLine, "=") == -1)
1158 continue;
1159
1160 if(icod > 63) /* ignore second non-coding set */
1161 continue;
1162
1163 /* check record format */
1164
1165 if(ajStrParseCountC(codReadLine, " \t\r\n") != 12)
1166 return ajFalse;
1167
1168 ajStrTokenAssignC(&handle, codReadLine, " \t\r\n");
1169 for(i=0;i<4;i++)
1170 {
1171 ajStrTokenNextParse(handle, &tok);
1172
1173 if(!codIsAa(&tok))
1174 return ajFalse;
1175
1176 c = ajBasecodeToInt((ajint)ajStrGetCharFirst(tok));
1177
1178 if(c>25)
1179 c=27; /* stop */
1180
1181 ajStrTokenNextParse(handle, &tok);
1182
1183 if(!codIsCodon(&tok))
1184 return ajFalse;
1185
1186 idx = ajCodIndex(tok);
1187
1188 ajStrTokenNextParse(handle, &tok);
1189
1190 if(!codIsNumber(tok))
1191 return ajFalse;
1192
1193 ajStrToDouble(tok,&tcount);
1194 ajStrToInt(tok, &num);
1195
1196 thys->aa[idx] = c;
1197 cdscount += num;
1198 thys->num[idx] = num;
1199 thys->tcount[idx] = 0.0; /* unknown - can calculate */
1200 thys->fraction[idx] = 0.0; /* unknown fraction - can calculate */
1201
1202 icod++;
1203
1204 }
1205 }
1206
1207 codCalcFraction(thys);
1208
1209 ajStrTokenDel(&handle);
1210 ajStrDel(&tok);
1211
1212 return ajTrue;
1213 }
1214
1215
1216
1217
1218 /* @funcstatic codReadSpsum ***************************************************
1219 **
1220 ** Read a codon index from a filename in CUTG .spsum file format
1221 **
1222 ** @param [w] thys [AjPCod] Codon object
1223 ** @param [u] inbuff [AjPFilebuff] Input file buffer
1224 **
1225 ** @return [AjBool] ajTrue on success
1226 ** @category input [AjPCod] Read codon index from a file
1227 **
1228 ** @release 3.0.0
1229 ** @@
1230 ******************************************************************************/
1231
codReadSpsum(AjPCod thys,AjPFilebuff inbuff)1232 static AjBool codReadSpsum(AjPCod thys, AjPFilebuff inbuff)
1233 {
1234 AjPStrTok handle = NULL;
1235 AjPStr tok = NULL;
1236 ajint i;
1237 ajint j;
1238 ajint c;
1239 ajint num;
1240
1241 if(!ajBuffreadLine(inbuff,&codReadLine))
1242 return ajFalse;
1243
1244 if(ajStrParseCountC(codReadLine, ":") != 2)
1245 return ajFalse;
1246
1247 ajStrTokenAssignC(&handle, codReadLine, ":");
1248 ajStrTokenNextParse(handle, &tok);
1249 ajStrTrimWhite(&tok);
1250 ajStrAssignS(&thys->Species, tok);
1251
1252 ajStrTokenNextParse(handle, &tok);
1253 ajStrTrimWhite(&tok);
1254
1255 if(!ajStrToInt(tok, &thys->CdsCount))
1256 return ajFalse;
1257
1258 if(!ajBuffreadLine(inbuff,&codReadLine))
1259 return ajFalse;
1260
1261 if(ajStrParseCountC(codReadLine, " \t\n\r") != 64)
1262 return ajFalse;
1263
1264 ajStrTokenAssignC(&handle, codReadLine, " \t\r\n");
1265
1266 for(i=0;i<64;i++)
1267 {
1268 ajStrTokenNextParse(handle, &tok);
1269
1270 if(!ajStrToInt(tok, &num))
1271 return ajFalse;
1272
1273 j = ajCodIndexC(spsumcodons[i]);
1274 thys->num[j] = num;
1275 c = ajBasecodeToInt((ajint)spsumaa[i]);
1276
1277 if(c>25)
1278 c=27;
1279
1280 thys->aa[j] = c;
1281 }
1282
1283 ajStrTokenDel(&handle);
1284 ajStrDel(&tok);
1285
1286 return ajTrue;
1287 }
1288
1289
1290
1291
1292 /* @funcstatic codReadCutg ****************************************************
1293 **
1294 ** Read a codon index from a filename in CUTG web site format
1295 **
1296 ** @param [w] thys [AjPCod] Codon object
1297 ** @param [u] inbuff [AjPFilebuff] Input file buffer
1298 **
1299 ** @return [AjBool] ajTrue on success
1300 ** @category input [AjPCod] Read codon index from a file
1301 **
1302 ** @release 3.0.0
1303 ** @@
1304 ******************************************************************************/
codReadCutg(AjPCod thys,AjPFilebuff inbuff)1305 static AjBool codReadCutg(AjPCod thys, AjPFilebuff inbuff)
1306 {
1307 AjPStr tok = NULL;
1308 AjPStrTok handle = NULL;
1309 ajint i;
1310 ajint j;
1311 AjBool hasaa;
1312 ajint ilines = 0;
1313 ajint c;
1314 ajint idx;
1315 ajint num;
1316 double tcount;
1317 double fraction;
1318 AjPTrn trn = NULL;
1319
1320 /* species [division]: 000 CDS's (0000 codons) */
1321
1322 ajBuffreadLine(inbuff,&codReadLine);
1323
1324 if(-1 == ajStrFindC(codReadLine, "CDS's"))
1325 return ajFalse;
1326
1327 if(-1 == ajStrFindC(codReadLine, "codons)"))
1328 return ajFalse;
1329
1330 if(-1 == ajStrFindC(codReadLine, "]:"))
1331 return ajFalse;
1332
1333 ajStrTokenAssignC(&handle, codReadLine, "[");
1334
1335 ajStrTokenNextParse(handle, &tok);
1336
1337 if(!ajStrGetLen(tok))
1338 return ajFalse;
1339
1340 ajStrRemoveWhiteExcess(&tok);
1341 ajStrAssignS(&thys->Species, tok);
1342
1343 ajStrTokenNextParseC(handle, "]: ", &tok);
1344
1345 if(!ajStrGetLen(tok))
1346 return ajFalse;
1347
1348 ajStrAssignS(&thys->Division, tok);
1349
1350 ajStrTokenNextParseC(handle, "CDS's (", &tok);
1351
1352 if(!codIsNumber(tok))
1353 return ajFalse;
1354
1355 ajStrToInt(tok, &thys->CdsCount);
1356
1357 ajStrTokenNextParseC(handle, " codons)", &tok);
1358
1359 if(!codIsNumber(tok))
1360 return ajFalse;
1361
1362 ajStrToInt(tok, &thys->CodonCount);
1363
1364
1365 while(ajBuffreadLine(inbuff,&codReadLine))
1366 {
1367 ajStrTrimWhite(&codReadLine);
1368
1369 if(ajStrPrefixC(codReadLine, "Coding GC "))
1370 continue;
1371 else if(ajStrPrefixC(codReadLine, "Genetic code "))
1372 {
1373 ajStrCutStart(&codReadLine, 13);
1374 ajStrTokenAssignC(&handle, codReadLine, " :");
1375 ajStrTokenNextParse(handle, &tok);
1376
1377 if(codIsNumber(tok))
1378 ajStrToInt(tok, &thys->GeneticCode);
1379
1380 continue;
1381 }
1382 else
1383 {
1384 i = ajStrParseCountC(codReadLine, " ()");
1385
1386 if(i == 20)
1387 hasaa = ajTrue;
1388 else if(i == 12)
1389 hasaa = ajFalse;
1390 else
1391 continue;
1392
1393 ajStrTokenAssignC(&handle, codReadLine, " ");
1394
1395 for(j=0;j<4;j++)
1396 {
1397 ajStrTokenNextParse(handle, &tok);
1398
1399 if(!codIsCodon(&tok))
1400 return ajFalse;
1401
1402 idx = ajCodIndex(tok);
1403
1404 if(hasaa)
1405 {
1406 ajStrTokenNextParse(handle, &tok);
1407
1408 if(!codIsAa(&tok))
1409 return ajFalse;
1410
1411 c = ajBasecodeToInt((ajint)ajStrGetCharFirst(tok));
1412
1413 ajStrTokenNextParse(handle, &tok);
1414
1415 if(!codIsFraction(tok))
1416 return ajFalse;
1417
1418 ajStrToDouble(tok,&fraction);
1419 }
1420 else
1421 {
1422 if(!trn)
1423 trn = ajTrnNewI(0);
1424
1425 ajStrAssignK(&tok, ajTrnCodonS(trn, tok));
1426 c = ajBasecodeToInt((ajint)ajStrGetCharFirst(tok));
1427 }
1428
1429 if(c>25)
1430 c=27; /* stop */
1431
1432 ajStrTokenNextParseC(handle, " (", &tok);
1433
1434 if(!codIsFreq(tok))
1435 return ajFalse;
1436
1437 ajStrToDouble(tok,&tcount);
1438
1439 ajStrTokenNextParseC(handle, " )", &tok);
1440
1441 if(!codIsNumber(tok))
1442 return ajFalse;
1443
1444 ajStrToInt(tok,&num);
1445
1446
1447
1448 thys->aa[idx] = c;
1449 thys->num[idx] = num;
1450 thys->tcount[idx] = tcount;
1451
1452 if(hasaa)
1453 thys->fraction[idx] = fraction;
1454 }
1455 ilines++;
1456 }
1457 }
1458
1459 ajStrTokenDel(&handle);
1460 ajStrDel(&tok);
1461
1462 if(!ilines)
1463 return ajFalse;
1464
1465 ajTrnDel(&trn);
1466
1467 return ajTrue;
1468 }
1469
1470
1471
1472
1473 /* @funcstatic codReadCodehop *************************************************
1474 **
1475 ** Read a codon index from a filename in FHCRC codehop program format
1476 **
1477 ** @param [w] thys [AjPCod] Codon object
1478 ** @param [u] inbuff [AjPFilebuff] Input file buffer
1479 **
1480 ** @return [AjBool] ajTrue on success
1481 ** @category input [AjPCod] Read codon index from a file
1482 **
1483 ** @release 3.0.0
1484 ** @@
1485 ******************************************************************************/
1486
codReadCodehop(AjPCod thys,AjPFilebuff inbuff)1487 static AjBool codReadCodehop(AjPCod thys, AjPFilebuff inbuff)
1488 {
1489 AjPStr line = NULL;
1490 AjPStrTok handle = NULL;
1491 AjPStr tok1 = NULL;
1492 AjPStr tok2 = NULL;
1493 AjPStr tok3 = NULL;
1494 AjPStr tok4 = NULL;
1495 ajlong i;
1496 ajint c;
1497 ajint idx;
1498 double fraction;
1499 double tcount;
1500 double res;
1501
1502 AjPTrn trn = NULL;
1503
1504 while(ajBuffreadLine(inbuff,&line))
1505 {
1506 if(ajStrGetCharFirst(line) == '-')
1507 break;
1508
1509 if(ajStrParseCountC(line, " \t\r\n[]") != 4)
1510 return ajFalse;
1511
1512 ajStrTokenAssignC(&handle, line, " \t\r\n[]");
1513 ajStrTokenNextParse(handle, &tok1);
1514 ajStrTokenNextParse(handle, &tok2);
1515 ajStrTokenNextParse(handle, &tok3);
1516 ajStrTokenNextParse(handle, &tok4);
1517
1518 if(!codIsFreq(tok1))
1519 return ajFalse;
1520
1521 if(!codIsCodon(&tok4))
1522 return ajFalse;
1523
1524 ajStrToDouble(tok1, &fraction);
1525 tcount = fraction * 1000.0;
1526
1527 if(!trn)
1528 trn = ajTrnNewI(0);
1529
1530 ajStrAssignK(&tok2, ajTrnCodonS(trn, tok4));
1531 c = ajBasecodeToInt((ajint)ajStrGetCharFirst(tok2));
1532
1533 if(c>25)
1534 c=27; /* stop */
1535
1536 idx = ajCodIndex(tok4);
1537 thys->aa[idx] = c;
1538 thys->tcount[idx] = tcount;
1539 }
1540
1541 ajBuffreadLine(inbuff,&line);
1542 ajBuffreadLine(inbuff,&line);
1543 ajStrRemoveWhiteExcess(&line);
1544
1545 if(ajStrPrefixC(line, "Codon usage for "))
1546 {
1547 ajStrCutStart(&line, 16);
1548 i = ajStrFindlastC(line, ":");
1549
1550 if(i>0)
1551 {
1552 ajStrAssignSubS(&thys->Species, line, 0, i-1);
1553 ajStrAssignSubS(&tok1, line, i+1, -1);
1554 ajStrRemoveWhiteExcess(&tok1);
1555
1556 if(codIsNumber(tok1))
1557 ajStrToInt(tok1, &thys->CdsCount);
1558 }
1559 }
1560 ajBuffreadLine(inbuff,&line);
1561 ajStrRemoveWhiteExcess(&line);
1562
1563 if(ajStrPrefixC(line, "from "))
1564 {
1565 ajStrCutStart(&line, 5);
1566 ajStrRemoveWhiteExcess(&line);
1567 i = ajStrFindlastC(line, ":");
1568
1569 if(i>0)
1570 {
1571 ajStrAssignSubS(&tok1, line, 0, i-1);
1572 ajStrAssignSubS(&tok2, line, i+1, -1);
1573 ajStrRemoveWhiteExcess(&tok1);
1574 ajStrRemoveWhiteExcess(&tok2);
1575 i = ajStrFindC(tok1, "/");
1576
1577 if(i>0)
1578 {
1579 ajStrAssignSubS(&thys->Release, tok1, 0, i-1);
1580 ajStrAssignSubS(&thys->Division, tok1, i+1, -1);
1581 }
1582 else
1583 ajStrAssignS(&thys->Release, tok1);
1584
1585 i = ajStrFindC(tok2, " ");
1586
1587 if(i>0)
1588 {
1589 ajStrAssignSubS(&tok3, tok2, 0, i-1);
1590
1591 if(codIsNumber(tok3))
1592 ajStrToInt(tok3, &thys->CodonCount);
1593 }
1594 }
1595 }
1596
1597 if(thys->CodonCount)
1598 for(i=0;i<64;i++)
1599 {
1600 res = ((double)thys->CodonCount* thys->tcount[i] / 1000.0 + 0.1);
1601 thys->num[i] = (ajint) res;
1602 }
1603
1604 ajStrDel(&tok1);
1605 ajStrDel(&tok2);
1606 ajStrDel(&tok3);
1607 ajStrDel(&tok4);
1608 ajStrDel(&line);
1609 ajStrTokenDel(&handle);
1610 ajTrnDel(&trn);
1611
1612 return ajTrue;
1613 }
1614
1615
1616
1617
1618 /* @funcstatic codReadGcg *****************************************************
1619 **
1620 ** Read a codon index from a filename in GCG format
1621 **
1622 ** @param [w] thys [AjPCod] Codon object
1623 ** @param [u] inbuff [AjPFilebuff] Input file buffer
1624 **
1625 ** @return [AjBool] ajTrue on success
1626 ** @category input [AjPCod] Read codon index from a file
1627 **
1628 ** @release 3.0.0
1629 ** @@
1630 ******************************************************************************/
1631
codReadGcg(AjPCod thys,AjPFilebuff inbuff)1632 static AjBool codReadGcg(AjPCod thys, AjPFilebuff inbuff)
1633 {
1634 AjPStr line = NULL;
1635 AjBool header = ajTrue;
1636 AjPStr saveheader = NULL;
1637 AjPStr tok1 = NULL;
1638 ajlong i;
1639
1640 while(ajBuffreadLine(inbuff,&line))
1641 {
1642 if(ajStrSuffixC(line, "..\n"))
1643 {
1644 header = ajFalse;
1645 continue;
1646 }
1647
1648 if(header)
1649 {
1650 ajStrRemoveWhiteExcess(&line);
1651
1652 if(!ajStrGetLen(line))
1653 continue;
1654
1655 if(ajStrGetCharFirst(line) == '!')
1656 continue;
1657
1658 if(ajStrGetCharFirst(line) == '#')
1659 {
1660 codCommentProcess(thys, line);
1661 continue;
1662 }
1663
1664 i = ajStrFindC(line, " genes found in "); /* Lossy cast */
1665
1666 if(i == -1)
1667 ajStrAssignS(&saveheader, line);
1668 else
1669 {
1670 ajStrAssignSubS(&tok1, line, 0, i-1);
1671
1672 if(!codIsNumber(tok1))
1673 continue;
1674
1675 ajStrToInt(tok1, &thys->CdsCount);
1676 ajStrAssignSubS(&thys->Release, line, i+16, -1);
1677 ajStrTrimEndC(&thys->Release, ".");
1678 ajStrExchangeKK(&thys->Release, ' ', '_');
1679 ajStrAssignS(&thys->Species, saveheader);
1680 }
1681 continue;
1682 }
1683
1684 codGcgProcess(thys, line);
1685 }
1686
1687 ajStrDel(&line);
1688 ajStrDel(&saveheader);
1689 ajStrDel(&tok1);
1690
1691 if(header)
1692 return ajFalse;
1693
1694 return ajTrue;
1695 }
1696
1697
1698
1699
1700 /* @funcstatic codGcgProcess **************************************************
1701 **
1702 ** Read a codon index from a filename in GCG format
1703 **
1704 ** @param [u] thys [AjPCod] Codon object
1705 ** @param [r] line [const AjPStr] Input line
1706 **
1707 ** @return [AjBool] ajTrue on success
1708 **
1709 ** @release 3.0.0
1710 ** @@
1711 ******************************************************************************/
1712
codGcgProcess(AjPCod thys,const AjPStr line)1713 static AjBool codGcgProcess(AjPCod thys, const AjPStr line)
1714 {
1715 AjPStrTok handle = NULL;
1716 AjPStr tok = NULL;
1717 ajint idx;
1718 ajint c;
1719 ajint num;
1720 double tcount;
1721 double fraction;
1722
1723 if(ajStrParseCountC(line, " \t\n\r") != 5)
1724 return ajFalse;
1725
1726 ajStrTokenAssignC(&handle, line, " \t\r\n");
1727
1728 ajStrTokenNextParse(handle, &tok);
1729
1730 if(!codIsAa3(&tok, &c))
1731 return ajFalse;
1732
1733 ajStrTokenNextParse(handle, &tok);
1734
1735 if(!codIsCodon(&tok))
1736 return ajFalse;
1737
1738 idx = ajCodIndex(tok);
1739
1740 ajStrTokenNextParse(handle, &tok);
1741
1742 if(!codIsNumberF(&tok))
1743 return ajFalse;
1744
1745 ajStrToInt(tok, &num);
1746
1747 ajStrTokenNextParse(handle, &tok);
1748
1749 if(!codIsFreq(tok))
1750 return ajFalse;
1751
1752 ajStrToDouble(tok, &tcount);
1753
1754 ajStrTokenNextParse(handle, &tok);
1755
1756 if(!codIsFraction(tok))
1757 return ajFalse;
1758
1759 ajStrToDouble(tok, &fraction);
1760
1761 thys->aa[idx] = c;
1762 thys->tcount[idx] = tcount;
1763 thys->fraction[idx] = fraction;
1764 thys->num[idx] = num;
1765
1766 ajStrTokenDel(&handle);
1767 ajStrDel(&tok);
1768
1769 return ajTrue;
1770 }
1771
1772
1773
1774
1775 /* @funcstatic codCommentProcess **********************************************
1776 **
1777 ** Read codon usage extra information from an EMBOSS-style comment
1778 **
1779 ** @param [w] thys [AjPCod] Codon object
1780 ** @param [r] ccline [const AjPStr] Input line
1781 **
1782 ** @return [AjBool] ajTrue if something was read
1783 **
1784 ** @release 3.0.0
1785 ** @@
1786 ******************************************************************************/
1787
codCommentProcess(AjPCod thys,const AjPStr ccline)1788 static AjBool codCommentProcess(AjPCod thys, const AjPStr ccline)
1789 {
1790 ajStrAssignS(&codTmpLine, ccline);
1791
1792 if(ajStrPrefixC(codTmpLine, "#Species:"))
1793 {
1794 ajStrCutStart(&codTmpLine, 9);
1795 ajStrRemoveWhiteExcess(&codTmpLine);
1796 ajStrAssignS(&thys->Species, codTmpLine);
1797
1798 return ajTrue;
1799 }
1800 else if(ajStrPrefixC(codTmpLine, "#Division:"))
1801 {
1802 ajStrCutStart(&codTmpLine, 10);
1803 ajStrRemoveWhiteExcess(&codTmpLine);
1804 ajStrAssignS(&thys->Division, codTmpLine);
1805
1806 return ajTrue;
1807 }
1808 else if(ajStrPrefixC(codTmpLine, "#Release:"))
1809 {
1810 ajStrCutStart(&codTmpLine, 9);
1811 ajStrRemoveWhiteExcess(&codTmpLine);
1812 ajStrAssignS(&thys->Release, codTmpLine);
1813
1814 return ajTrue;
1815 }
1816 else if(ajStrPrefixC(codTmpLine, "#Description:"))
1817 {
1818 ajStrCutStart(&codTmpLine, 13);
1819 ajStrRemoveWhiteExcess(&codTmpLine);
1820 ajStrAssignS(&thys->Desc, codTmpLine);
1821
1822 return ajTrue;
1823 }
1824 else if(ajStrPrefixC(codTmpLine, "#CdsCount:"))
1825 {
1826 ajStrCutStart(&codTmpLine, 10);
1827 ajStrRemoveWhiteExcess(&codTmpLine);
1828 ajStrToInt(codTmpLine, &thys->CdsCount);
1829
1830 return ajTrue;
1831 }
1832 else if(ajStrPrefixC(codTmpLine, "#CodonCount:"))
1833 {
1834 ajStrCutStart(&codTmpLine, 12);
1835 ajStrRemoveWhiteExcess(&codTmpLine);
1836 ajStrToInt(codTmpLine, &thys->CodonCount);
1837
1838 return ajTrue;
1839 }
1840 else if(ajStrPrefixC(codTmpLine, "#GeneticCode:"))
1841 {
1842 ajStrCutStart(&codTmpLine, 13);
1843 ajStrRemoveWhiteExcess(&codTmpLine);
1844 ajStrToInt(codTmpLine, &thys->GeneticCode);
1845 }
1846
1847 return ajFalse;
1848 }
1849
1850
1851
1852
1853 /* @funcstatic codIsCodon *****************************************************
1854 **
1855 ** Checks a string is a codon, makes sure it is ACGT only
1856 **
1857 ** @param [u] token [AjPStr*] String
1858 **
1859 ** @return [AjBool] ajTrue if string is a valid codon sequence
1860 **
1861 ** @release 3.0.0
1862 ** @@
1863 ******************************************************************************/
1864
codIsCodon(AjPStr * token)1865 static AjBool codIsCodon (AjPStr* token)
1866 {
1867 char* cp;
1868
1869 if(ajStrGetLen(*token) != 3)
1870 return ajFalse;
1871
1872 for(cp = ajStrGetuniquePtr(token); *cp; cp++)
1873 {
1874 if(islower((ajint) *cp))
1875 *cp = toupper((ajint) *cp);
1876
1877 if(*cp == 'U')
1878 *cp = 'T';
1879
1880 if(!strchr("ACGT",*cp))
1881 return ajFalse;
1882 }
1883
1884 return ajTrue;
1885 }
1886
1887
1888
1889
1890 /* @funcstatic codIsAa ********************************************************
1891 **
1892 ** Checks a string is a codon, makes sure it is an amino acid or a stop (*).
1893 **
1894 ** @param [u] token [AjPStr*] String
1895 **
1896 ** @return [AjBool] ajTrue if string is a valid amino acid code
1897 **
1898 ** @release 3.0.0
1899 ** @@
1900 ******************************************************************************/
1901
codIsAa(AjPStr * token)1902 static AjBool codIsAa(AjPStr* token)
1903 {
1904 char* cp;
1905
1906 if(ajStrGetLen(*token) != 1)
1907 return ajFalse;
1908
1909 cp = ajStrGetuniquePtr(token);
1910
1911 if(islower((ajint) *cp))
1912 *cp = toupper((ajint) *cp);
1913
1914 if(!strchr("ACDEFGHIKLMNPQRSTUVWY*",*cp)) /* SelenoCysteine is U */
1915 return ajFalse;
1916
1917 return ajTrue;
1918 }
1919
1920
1921
1922
1923 /* @funcstatic codIsAa3 *******************************************************
1924 **
1925 ** Checks a string is a 3 letter amino acid code
1926 **
1927 ** @param [u] token [AjPStr*] String
1928 ** @param [w] idx [ajint*] Amino acid index number
1929 **
1930 ** @return [AjBool] ajTrue if string is a valid amino acid 3-letter code
1931 **
1932 ** @release 3.0.0
1933 ** @@
1934 ******************************************************************************/
1935
codIsAa3(AjPStr * token,ajint * idx)1936 static AjBool codIsAa3(AjPStr* token, ajint* idx)
1937 {
1938 const char* cp;
1939 ajint i;
1940
1941 static const char *tab[]=
1942 {
1943 "Ala","Asx","Cys","Asp","Glu","Phe","Gly","His",
1944 "Ile","" ,"Lys","Leu","Met","Asn","" ,"Pro",
1945 "Gln","Arg","Ser","Thr","Sel","Val","Trp","" ,
1946 "Tyr","" ,"" ,"End",NULL
1947 };
1948
1949 if(ajStrGetLen(*token) != 3)
1950 return ajFalse;
1951
1952 for(i=0;tab[i]; i++)
1953 {
1954 cp = tab[i];
1955
1956 if(!*cp)
1957 continue;
1958
1959 if(ajStrMatchCaseC(*token, cp))
1960 {
1961 if(!ajStrMatchC(*token, cp))
1962 ajStrAssignC(token, cp);
1963
1964 *idx = i;
1965 return ajTrue;
1966 }
1967 }
1968
1969 return ajFalse;
1970 }
1971
1972
1973
1974
1975 /* @funcstatic codIsNumber ****************************************************
1976 **
1977 ** Checks a string is numeric.
1978 **
1979 ** @param [r] token [const AjPStr] String
1980 **
1981 ** @return [AjBool] ajTrue if string is a valid integer
1982 **
1983 ** @release 3.0.0
1984 ** @@
1985 ******************************************************************************/
1986
codIsNumber(const AjPStr token)1987 static AjBool codIsNumber(const AjPStr token)
1988 {
1989 ajint ilen;
1990
1991 ilen = ajStrGetLen(token);
1992
1993 if(!ilen)
1994 return ajFalse;
1995
1996 if(ajStrFindRestC(token, "0123456789") != -1)
1997 return ajFalse;
1998
1999 return ajTrue;
2000 }
2001
2002
2003
2004
2005 /* @funcstatic codIsNumberF ***************************************************
2006 **
2007 ** Checks a string is a number, optionally ending in .000
2008 **
2009 ** @param [u] token [AjPStr*] String
2010 **
2011 ** @return [AjBool] ajTrue if string is a valid floating point number
2012 **
2013 ** @release 3.0.0
2014 ** @@
2015 ******************************************************************************/
2016
codIsNumberF(AjPStr * token)2017 static AjBool codIsNumberF(AjPStr * token)
2018 {
2019 ajuint ilen;
2020 AjPStr tmpstr = NULL;
2021
2022 ilen = ajStrGetLen(*token);
2023 if(!ilen)
2024 return ajFalse;
2025
2026 if(ajStrFindRestC(*token, "0123456789") == -1)
2027 return ajTrue;
2028
2029 ajStrAssignS(&tmpstr, *token);
2030
2031 ilen = ajStrGetLen(tmpstr);
2032 ajStrTrimEndC(&tmpstr, "0");
2033
2034 if(ajStrGetLen(tmpstr) < ilen)
2035 ilen = ajStrGetLen(tmpstr);
2036
2037 ajStrTrimEndC(&tmpstr,".");
2038
2039 if(ajStrGetLen(tmpstr) < ilen)
2040 {
2041 ajStrAssignS(token, tmpstr);
2042 ajStrDel(&tmpstr);
2043 return ajTrue;
2044 }
2045
2046 ajStrDel(&tmpstr);
2047
2048 return ajFalse;
2049 }
2050
2051
2052
2053
2054 /* @funcstatic codCalcFraction ************************************************
2055 **
2056 ** Calculates the fraction values for a codon usage object
2057 **
2058 ** @param [u] thys [AjPCod] Codon usage object
2059 **
2060 ** @return [void]
2061 **
2062 ** @release 3.0.0
2063 ** @@
2064 ******************************************************************************/
2065
codCalcFraction(AjPCod thys)2066 static void codCalcFraction(AjPCod thys)
2067 {
2068 ajint count[64];
2069 ajint i;
2070 ajint j;
2071 ajint idx;
2072 ajint icount;
2073 double fsum = 0.0;
2074 const char* cp;
2075 const char* aa = "ACDEFGHIKLMNPQRSTUVWY*";
2076
2077 cp = aa;
2078 while (*cp)
2079 {
2080 if(*cp == '*')
2081 idx = 27;
2082 else
2083 idx = *cp - 'A';
2084
2085 icount = 0;
2086 fsum = 0.0;
2087
2088 for(i=0;i<64;i++)
2089 {
2090 if(thys->aa[i] == idx)
2091 {
2092 count[icount++] = i;
2093 fsum += thys->tcount[i];
2094 }
2095 }
2096
2097 for(j=0;j<icount;j++)
2098 {
2099 /* so we know we have fsum > 0.0 */
2100 if(!E_FPZERO(thys->tcount[count[j]],U_DEPS))
2101 thys->fraction[count[j]] = thys->tcount[count[j]]/fsum;
2102 else
2103 thys->fraction[count[j]] = 0.0;
2104 }
2105 cp++;
2106 }
2107
2108 return;
2109 }
2110
2111
2112
2113
2114 /* @funcstatic codIsFraction **************************************************
2115 **
2116 ** Checks a string is a fraction between 0.0 and 1.0.
2117 **
2118 ** @param [r] token [const AjPStr] String
2119 **
2120 ** @return [AjBool] ajTrue if string is a valid fraction between 0.0 and 1.0
2121 **
2122 ** @release 3.0.0
2123 ** @@
2124 ******************************************************************************/
2125
codIsFraction(const AjPStr token)2126 static AjBool codIsFraction(const AjPStr token)
2127 {
2128 const char* cp;
2129 ajuint ilen;
2130 AjBool StartOne = ajFalse;;
2131
2132 ilen = ajStrGetLen(token);
2133 if(!ilen)
2134 return ajFalse;
2135
2136 cp = ajStrGetPtr(token);
2137
2138 if(*cp == '0')
2139 {
2140 cp++;
2141 ilen--;
2142 }
2143 else if(*cp == '1')
2144 {
2145 cp++;
2146 ilen--;
2147 StartOne = ajTrue;
2148 }
2149
2150 if(*cp == '.')
2151 {
2152 cp++;
2153 ilen--;
2154 }
2155
2156 if(!*cp)
2157 return ajTrue;
2158
2159 if(StartOne)
2160 {
2161 if(ilen != strspn(cp, "0"))
2162 return ajFalse;
2163 }
2164 else
2165 {
2166 if(ilen != strspn(cp, "0123456789"))
2167 return ajFalse;
2168 }
2169
2170 return ajTrue;
2171 }
2172
2173
2174
2175
2176 /* @funcstatic codIsFreq ******************************************************
2177 **
2178 ** Checks a string is a frequency between 0.0 and 1000.0.
2179 **
2180 ** @param [r] token [const AjPStr] String
2181 **
2182 ** @return [AjBool] ajTrue if string is a valid frequency between
2183 ** 0.0 and 1000.0
2184 **
2185 ** @release 3.0.0
2186 ** @@
2187 ******************************************************************************/
2188
codIsFreq(const AjPStr token)2189 static AjBool codIsFreq(const AjPStr token)
2190 {
2191 const char* cp;
2192 float f;
2193 ajuint ilen;
2194
2195 ilen = ajStrGetLen(token);
2196 if(!ilen)
2197 return ajFalse;
2198
2199 cp = ajStrGetPtr(token);
2200
2201 if(ilen != strspn(cp, "0123456789."))
2202 return ajFalse;
2203
2204 if(!ajStrToFloat(token, &f))
2205 return ajFalse;
2206
2207 if(f < 0.0)
2208 return ajFalse;
2209
2210 if(f > 1000.0)
2211 return ajFalse;
2212
2213 return ajTrue;
2214 }
2215
2216
2217
2218
2219 /* @func ajCodSetBacktranslate ************************************************
2220 **
2221 ** Fill the codon usage object "back" element with the most commonly
2222 ** used triplet index for the amino acids
2223 **
2224 ** @param [u] thys [AjPCod] codon usage structure
2225 **
2226 ** @return [void]
2227 **
2228 ** @release 1.0.0
2229 ** @@
2230 ******************************************************************************/
2231
ajCodSetBacktranslate(AjPCod thys)2232 void ajCodSetBacktranslate(AjPCod thys)
2233 {
2234 ajint i;
2235 ajint aa;
2236 double f;
2237 double f2;
2238
2239 if(!thys)
2240 ajFatal("Codon usage object uninitialised");
2241
2242 for(i=0;i<AJCODAMINOS;++i)
2243 thys->back[i] = -1;
2244
2245 for(i=0;i<AJCODSTART;++i)
2246 {
2247 aa = thys->aa[i];
2248
2249 if(thys->back[aa]<0)
2250 thys->back[aa] = i;
2251
2252 f = thys->fraction[i];
2253 f2 = thys->fraction[thys->back[aa]];
2254
2255 if(f>f2)
2256 thys->back[aa] = i;
2257 }
2258
2259 return;
2260 }
2261
2262
2263
2264
2265 /* @func ajCodTriplet *********************************************************
2266 **
2267 ** Convert triplet index to triple
2268 **
2269 ** @param [r] idx [ajint] triplet index
2270 **
2271 ** @return [char*] Triplet
2272 **
2273 ** @release 1.0.0
2274 ** @@
2275 ******************************************************************************/
2276
ajCodTriplet(ajint idx)2277 char* ajCodTriplet(ajint idx)
2278 {
2279 static char ret[4] = "AAA";
2280 const char *conv = "ACGT";
2281
2282 char *p;
2283
2284 p=ret;
2285
2286 *p++ = conv[idx>>4];
2287 *p++ = conv[(idx & 0xf)>>2];
2288 *p = conv[idx & 0x3];
2289
2290 return ret;
2291 }
2292
2293
2294
2295
2296 /* @func ajCodWriteOut ********************************************************
2297 **
2298 ** Write codon structure to output file
2299 **
2300 ** @param [r] thys [const AjPCod] codon usage
2301 ** @param [u] outf [AjPOutfile] output file
2302 **
2303 ** @return [void]
2304 ** @category output [AjPCod] Write codon structure to output file
2305 **
2306 ** @release 3.0.0
2307 ** @@
2308 ******************************************************************************/
ajCodWriteOut(const AjPCod thys,AjPOutfile outf)2309 void ajCodWriteOut(const AjPCod thys, AjPOutfile outf)
2310 {
2311 ajint i;
2312
2313 for(i=0;codInFormatDef[i].Name; i++)
2314 {
2315 if(ajStrMatchCaseC(ajOutfileGetFormat(outf), codoutFormatDef[i].Name))
2316 {
2317 (*codoutFormatDef[i].Write)(thys, ajOutfileGetFile(outf));
2318 return;
2319 }
2320 }
2321
2322 return;
2323 }
2324
2325
2326
2327
2328 /* @func ajCodWrite ***********************************************************
2329 **
2330 ** Write codon structure to output file
2331 **
2332 ** @param [u] thys [AjPCod] codon usage
2333 ** @param [u] outf [AjPFile] output file
2334 **
2335 ** @return [void]
2336 ** @category output [AjPCod] Write codon structure to output file
2337 **
2338 ** @release 1.0.0
2339 ** @@
2340 ******************************************************************************/
2341
ajCodWrite(AjPCod thys,AjPFile outf)2342 void ajCodWrite(AjPCod thys, AjPFile outf)
2343 {
2344 codFix(thys);
2345 codWriteEmboss(thys, outf);
2346 }
2347
2348
2349
2350
2351 /* @funcstatic codWriteEmboss *************************************************
2352 **
2353 ** Write codon structure to output file in EMBOSS format
2354 **
2355 ** @param [r] thys [const AjPCod] codon usage
2356 ** @param [u] outf [AjPFile] output file
2357 **
2358 ** @return [void]
2359 ** @category output [AjPCod] Write codon structure to output file
2360 **
2361 ** @release 3.0.0
2362 ** @@
2363 ******************************************************************************/
2364
codWriteEmboss(const AjPCod thys,AjPFile outf)2365 static void codWriteEmboss(const AjPCod thys, AjPFile outf)
2366 {
2367 ajint i;
2368 ajint j;
2369 const char* codon;
2370 ajint gc[3] = {0,0,0};
2371 float totgc = 0.0;
2372
2373 if(ajStrGetLen(thys->Species))
2374 ajFmtPrintF(outf, "#Species: %S\n", thys->Species);
2375
2376 if(ajStrGetLen(thys->Division))
2377 ajFmtPrintF(outf, "#Division: %S\n", thys->Division);
2378
2379 if(ajStrGetLen(thys->Release))
2380 ajFmtPrintF(outf, "#Release: %S\n", thys->Release);
2381
2382 if(thys->GeneticCode)
2383 ajFmtPrintF(outf, "#GeneticCode: %d\n", thys->GeneticCode);
2384
2385 if(thys->CdsCount)
2386 ajFmtPrintF(outf, "#CdsCount: %d\n", thys->CdsCount);
2387
2388 for(j=0;j<AJCODSTART;++j)
2389 {
2390 codon = ajCodTriplet(j);
2391
2392 for(i=0;i<3;i++)
2393 if(codon[i] == 'C' || codon[i] == 'G')
2394 gc[i] += thys->num[j];
2395 }
2396
2397 totgc = (float) (gc[0] + gc[1] + gc[2]);
2398 ajFmtPrintF(outf, "\n#Coding GC %5.2f%%\n",
2399 (100.0 * totgc/(float)thys->CodonCount/3.0));
2400 ajFmtPrintF(outf, "#1st letter GC %5.2f%%\n",
2401 (100.0 * (float)gc[0]/(float)thys->CodonCount));
2402 ajFmtPrintF(outf, "#2nd letter GC %5.2f%%\n",
2403 (100.0 * (float)gc[1]/(float)thys->CodonCount));
2404 ajFmtPrintF(outf, "#3rd letter GC %5.2f%%\n\n",
2405 (100.0 * (float)gc[2]/(float)thys->CodonCount));
2406
2407 ajFmtPrintF(outf, "#Codon AA Fraction Frequency Number\n");
2408
2409 for(i=0;i<AJCODAMINOS-2;++i)
2410 for(j=0;j<AJCODSTART;++j)
2411 if(thys->aa[j]==i)
2412 ajFmtPrintF(outf,"%s %c %8.3f %9.3f %6d\n",
2413 ajCodTriplet(j),
2414 i+'A',thys->fraction[j],thys->tcount[j],
2415 thys->num[j]);
2416
2417 for(j=0;j<AJCODSTART;++j)
2418 if(thys->aa[j]==27)
2419 ajFmtPrintF(outf,"%s * %8.3f %9.3f %6d\n",
2420 ajCodTriplet(j),
2421 thys->fraction[j],thys->tcount[j],thys->num[j]);
2422
2423 return;
2424 }
2425
2426
2427
2428
2429 /* @funcstatic codWriteSpsum **************************************************
2430 **
2431 ** Write codon structure to output file inSpsum format
2432 **
2433 ** @param [r] thys [const AjPCod] codon usage
2434 ** @param [u] outf [AjPFile] output file
2435 **
2436 ** @return [void]
2437 ** @category output [AjPCod] Write codon structure to output file
2438 **
2439 ** @release 3.0.0
2440 ** @@
2441 ******************************************************************************/
2442
codWriteSpsum(const AjPCod thys,AjPFile outf)2443 static void codWriteSpsum(const AjPCod thys, AjPFile outf)
2444 {
2445 AjPStr species = NULL;
2446 ajint i;
2447 ajint j;
2448
2449
2450 ajStrAssignS(&species, thys->Species);
2451 ajStrAssignEmptyC(&species, "unknown species");
2452
2453 ajFmtPrintF(outf, "%S: %d\n", species, thys->CdsCount);
2454
2455 for(i=0;i<64;i++)
2456 {
2457 j = ajCodIndexC(spsumcodons[i]);
2458
2459 if(i)
2460 ajFmtPrintF(outf, " %d", thys->num[j]);
2461 else
2462 ajFmtPrintF(outf, "%d", thys->num[j]);
2463 }
2464
2465 ajFmtPrintF(outf, "\n");
2466
2467 ajStrDel(&species);
2468
2469 return;
2470 }
2471
2472
2473
2474
2475 /* @funcstatic codWriteCutg ***************************************************
2476 **
2477 ** Write codon structure to output file in Cutg format
2478 **
2479 ** @param [r] thys [const AjPCod] codon usage
2480 ** @param [u] outf [AjPFile] output file
2481 **
2482 ** @return [void]
2483 ** @category output [AjPCod] Write codon structure to output file
2484 **
2485 ** @release 3.0.0
2486 ** @@
2487 ******************************************************************************/
2488
codWriteCutg(const AjPCod thys,AjPFile outf)2489 static void codWriteCutg(const AjPCod thys, AjPFile outf)
2490 {
2491 const char* c0;
2492 const char* c1;
2493 const char* c2;
2494 const char* bases = "UCAG";
2495 char codon[4]="GGG";
2496 ajint i;
2497 float totgc;
2498 ajint gc[3] = {0,0,0};
2499 AjPStr species = NULL;
2500 AjPStr division = NULL;
2501 AjPStr minusline = NULL;
2502
2503 ajStrAssignS(&species, thys->Species);
2504 ajStrAssignEmptyC(&species, "unknown species");
2505
2506 ajStrAssignS(&division, thys->Division);
2507 ajStrAssignEmptyC(&division, "unknown.division");
2508
2509 ajFmtPrintF(outf, "%S [%S]: %d CDS's (%d codons)\n",
2510 species, division, thys->CdsCount, thys->CodonCount);
2511
2512 ajStrAppendCountK(&minusline, '-', 80);
2513 ajFmtPrintF(outf, "%S\n", minusline);
2514 ajFmtPrintF(outf,
2515 "fields: [triplet] [frequency: per thousand] ([number])\n");
2516 ajFmtPrintF(outf, "%S\n\n", minusline);
2517
2518 c0 = bases;
2519
2520 while(*c0)
2521 {
2522 codon[0]=*c0;
2523 c2 = bases;
2524
2525 while(*c2)
2526 {
2527 codon[2]=*c2;
2528 c1 = bases;
2529
2530 while(*c1)
2531 {
2532 codon[1]=*c1;
2533 i = ajCodIndexC(codon);
2534
2535 if(*c0 == 'C' || *c0 == 'G')
2536 gc[0] += thys->num[i];
2537
2538 if(*c1 == 'C' || *c1 == 'G')
2539 gc[1] += thys->num[i];
2540
2541 if(*c2 == 'C' || *c2 == 'G')
2542 gc[2] += thys->num[i];
2543
2544 ajFmtPrintF(outf, "%s %4.1f(%6d)",
2545 codon, thys->tcount[i], thys->num[i]);
2546 c1++;
2547
2548 if(*c1)
2549 ajFmtPrintF(outf, " ");
2550 }
2551
2552 ajFmtPrintF(outf, "\n");
2553 c2++;
2554 }
2555
2556 c0++;
2557
2558 if(*c0)
2559 ajFmtPrintF(outf, "\n");
2560 }
2561
2562 ajFmtPrintF(outf, "\n\n");
2563 ajFmtPrintF(outf, "%S\n", minusline);
2564 totgc = (float) (gc[0] + gc[1] + gc[2]);
2565 ajFmtPrintF(outf, "Coding GC %5.2f%% 1st letter GC %5.2f%% "
2566 "2nd letter GC %5.2f%% 3rd letter GC %5.2f%%\n",
2567 (100.0 * totgc/(float)thys->CodonCount/3.0),
2568 (100.0 * (float)gc[0]/(float)thys->CodonCount),
2569 (100.0 * (float)gc[1]/(float)thys->CodonCount),
2570 (100.0 * (float)gc[2]/(float)thys->CodonCount));
2571
2572 ajStrDel(&species);
2573 ajStrDel(&division);
2574 ajStrDel(&minusline);
2575
2576 return;
2577 }
2578
2579
2580
2581
2582 /* @funcstatic codWriteCutgaa *************************************************
2583 **
2584 ** Write codon structure to output file in Cutg format with amino acids
2585 **
2586 ** @param [r] thys [const AjPCod] codon usage
2587 ** @param [u] outf [AjPFile] output file
2588 **
2589 ** @return [void]
2590 ** @category output [AjPCod] Write codon structure to output file
2591 **
2592 ** @release 3.0.0
2593 ** @@
2594 ******************************************************************************/
2595
codWriteCutgaa(const AjPCod thys,AjPFile outf)2596 static void codWriteCutgaa(const AjPCod thys, AjPFile outf)
2597 {
2598 const char* c0;
2599 const char* c1;
2600 const char* c2;
2601 const char* bases = "UCAG";
2602 char codon[4]="GGG";
2603 ajint i;
2604 float totgc;
2605 ajint gc[3] = {0,0,0};
2606 AjPStr species = NULL;
2607 AjPStr division = NULL;
2608 AjPStr minusline = NULL;
2609 char aa;
2610
2611 ajStrAssignS(&species, thys->Species);
2612 ajStrAssignEmptyC(&species, "unknown species");
2613
2614 ajStrAssignS(&division, thys->Division);
2615 ajStrAssignEmptyC(&division, "unknown.division");
2616
2617 ajFmtPrintF(outf, "%S [%S]: %d CDS's (%d codons)\n",
2618 species, division, thys->CdsCount, thys->CodonCount);
2619
2620 ajStrAppendCountK(&minusline, '-', 80);
2621 ajFmtPrintF(outf, "%S\n", minusline);
2622 ajFmtPrintF(outf,
2623 "fields: [triplet] [frequency: per thousand] ([number])\n");
2624 ajFmtPrintF(outf, "%S\n\n", minusline);
2625
2626 c0 = bases;
2627
2628 while(*c0)
2629 {
2630 codon[0]=*c0;
2631 c2 = bases;
2632
2633 while(*c2)
2634 {
2635 codon[2]=*c2;
2636 c1 = bases;
2637
2638 while(*c1)
2639 {
2640 codon[1]=*c1;
2641 i = ajCodIndexC(codon);
2642
2643 if(*c0 == 'C' || *c0 == 'G')
2644 gc[0] += thys->num[i];
2645 if(*c1 == 'C' || *c1 == 'G')
2646 gc[1] += thys->num[i];
2647 if(*c2 == 'C' || *c2 == 'G')
2648 gc[2] += thys->num[i];
2649 if(thys->aa[i] > 25)
2650 aa = '*';
2651 else
2652 aa = 'A' + thys->aa[i];
2653
2654 ajFmtPrintF(outf, "%s %c %4.2f %4.1f (%6d)",
2655 codon, aa,
2656 thys->fraction[i], thys->tcount[i], thys->num[i]);
2657 c1++;
2658
2659 if(*c1)
2660 ajFmtPrintF(outf, " ");
2661 }
2662
2663 ajFmtPrintF(outf, "\n");
2664 c2++;
2665 }
2666
2667 c0++;
2668
2669 if(*c0)
2670 ajFmtPrintF(outf, "\n");
2671 }
2672
2673 ajFmtPrintF(outf, "\n\n");
2674 ajFmtPrintF(outf, "%S\n", minusline);
2675 totgc = (float) (gc[0] + gc[1] + gc[2]);
2676 ajFmtPrintF(outf, "Coding GC %5.2f%% 1st letter GC %5.2f%% "
2677 "2nd letter GC %5.2f%% 3rd letter GC %5.2f%%\n",
2678 (100.0 * totgc/(float)thys->CodonCount/3.0),
2679 (100.0 * (float)gc[0]/(float)thys->CodonCount),
2680 (100.0 * (float)gc[1]/(float)thys->CodonCount),
2681 (100.0 * (float)gc[2]/(float)thys->CodonCount));
2682
2683 ajFmtPrintF(outf, "Genetic code %d: %S\n",
2684 thys->GeneticCode, ajTrnName(thys->GeneticCode));
2685 ajStrDel(&species);
2686 ajStrDel(&division);
2687 ajStrDel(&minusline);
2688
2689 return;
2690 }
2691
2692
2693
2694
2695 /* @funcstatic codWriteCherry *************************************************
2696 **
2697 ** Write codon structure to output file in Mike Cherry's codon usage database
2698 ** format.
2699 **
2700 ** Very similar to GCG format, has a structured header comment from
2701 ** which we can extract the species, CDS count, and release.
2702 **
2703 ** @param [r] thys [const AjPCod] codon usage
2704 ** @param [u] outf [AjPFile] output file
2705 **
2706 ** @return [void]
2707 ** @category output [AjPCod] Write codon structure to output file
2708 **
2709 ** @release 3.0.0
2710 ** @@
2711 ******************************************************************************/
2712
codWriteCherry(const AjPCod thys,AjPFile outf)2713 static void codWriteCherry(const AjPCod thys, AjPFile outf)
2714 {
2715 const char* c0;
2716 const char* c1;
2717 const char* c2;
2718 const char* bases = "GATC";
2719 char codon[4]="GGG";
2720 ajint i;
2721 AjPStr ThreeAa = NULL;
2722 AjPStr release = NULL;
2723 AjPStr species = NULL;
2724 AjPStr division = NULL;
2725
2726 ajStrAssignS(&release, thys->Release);
2727 ajStrAssignEmptyC(&release, "unknowndb0.0");
2728
2729 ajStrAssignS(&species, thys->Species);
2730 ajStrAssignEmptyC(&species, "unknown species");
2731
2732 ajStrAssignS(&division, thys->Division);
2733 ajStrAssignEmptyC(&division, "unknown.division");
2734
2735 ajFmtPrintF(outf, "\n%S\n\n", species);
2736 ajFmtPrintF(outf, "%d genes found in %S\n\n", thys->CdsCount, release);
2737 ajFmtPrintF(outf, "AmAcid Codon Number /1000 Fraction ..\n");
2738
2739 c0 = bases;
2740 while(*c0)
2741 {
2742 codon[0]=*c0;
2743 c1 = bases;
2744
2745 while(*c1)
2746 {
2747 codon[1]=*c1;
2748 ajFmtPrintF(outf, " \n");
2749 c2 = bases;
2750
2751 while(*c2)
2752 {
2753 codon[2]=*c2;
2754 i = ajCodIndexC(codon);
2755
2756 if(!ajResidueToTriplet(ajBasecodeFromInt(thys->aa[i]),
2757 &ThreeAa))
2758 ajStrAssignC(&ThreeAa, "End");
2759
2760 ajStrFmtLower(&ThreeAa);
2761 ajStrFmtTitle(&ThreeAa);
2762 ajFmtPrintF(outf, "%S %s %10.2f %6.2f %4.2f\n",
2763 ThreeAa, codon, (float) thys->num[i],
2764 thys->tcount[i], thys->fraction[i]);
2765 c2++;
2766 }
2767
2768 c1++;
2769 }
2770
2771 c0++;
2772 }
2773
2774 ajStrDel(&species);
2775 ajStrDel(&release);
2776 ajStrDel(&division);
2777 ajStrDel(&ThreeAa);
2778
2779 return;
2780 }
2781
2782
2783
2784
2785 /* @funcstatic codWriteTransterm **********************************************
2786 **
2787 ** Write codon structure to output file in Transterm database format
2788 **
2789 ** Very similar to GCG format. There is a fixed first line referring to the
2790 ** transterm program where we put today's date and time, and the line format
2791 ** has slightly different spacing to our "GCG" format.
2792 **
2793 ** @param [r] thys [const AjPCod] codon usage
2794 ** @param [u] outf [AjPFile] output file
2795 **
2796 ** @return [void]
2797 ** @category output [AjPCod] Write codon structure to output file
2798 **
2799 ** @release 3.0.0
2800 ** @@
2801 ******************************************************************************/
2802
codWriteTransterm(const AjPCod thys,AjPFile outf)2803 static void codWriteTransterm(const AjPCod thys, AjPFile outf)
2804 {
2805 const char* c0;
2806 const char* c1;
2807 const char* c2;
2808 const char* bases = "GATC";
2809 char codon[4]="GGG";
2810 ajint i;
2811 AjPStr ThreeAa = NULL;
2812 AjPStr datestr = NULL;
2813 AjPStr timestr = NULL;
2814
2815 ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("day"));
2816 ajFmtPrintS(×tr, "%D", ajTimeRefTodayFmt("time"));
2817
2818 ajFmtPrintF(outf, "FISH_TERM version 4.45 run at %S at %S\n\n",
2819 datestr, timestr);
2820
2821 ajStrDel(&datestr);
2822 ajStrDel(×tr);
2823
2824 ajFmtPrintF(outf, "AmAcid Codon Number /1000 Fraction ..\n");
2825
2826 c0 = bases;
2827
2828 while(*c0)
2829 {
2830 codon[0]=*c0;
2831 c1 = bases;
2832
2833 while(*c1)
2834 {
2835 codon[1]=*c1;
2836 ajFmtPrintF(outf, "\n");
2837 c2 = bases;
2838
2839 while(*c2)
2840 {
2841 codon[2]=*c2;
2842 i = ajCodIndexC(codon);
2843
2844 if(!ajResidueToTriplet(ajBasecodeFromInt(thys->aa[i]),
2845 &ThreeAa))
2846 ajStrAssignC(&ThreeAa, "End");
2847
2848 ajStrFmtLower(&ThreeAa);
2849 ajStrFmtTitle(&ThreeAa);
2850 ajFmtPrintF(outf, "%S %s %10.2f %6.2f %4.2f\n",
2851 ThreeAa, codon, (float) thys->num[i],
2852 thys->tcount[i], thys->fraction[i]);
2853 c2++;
2854 }
2855
2856 c1++;
2857 }
2858
2859 c0++;
2860 }
2861
2862 ajStrDel(&ThreeAa);
2863
2864 return;
2865 }
2866
2867
2868
2869
2870 /* @funcstatic codWriteGcg ****************************************************
2871 **
2872 ** Write codon structure to output file in Gcg format
2873 **
2874 ** @param [r] thys [const AjPCod] codon usage
2875 ** @param [u] outf [AjPFile] output file
2876 **
2877 ** @return [void]
2878 ** @category output [AjPCod] Write codon structure to output file
2879 **
2880 ** @release 3.0.0
2881 ** @@
2882 ******************************************************************************/
2883
codWriteGcg(const AjPCod thys,AjPFile outf)2884 static void codWriteGcg(const AjPCod thys, AjPFile outf)
2885 {
2886 const char* c0;
2887 const char* c1;
2888 const char* c2;
2889 const char* bases = "GATC";
2890 char codon[4]="GGG";
2891 ajint i;
2892 AjPStr ThreeAa = NULL;
2893
2894 ajFmtPrintF(outf, "!!CODON_FREQUENCY 1.0\n\n");
2895 ajFmtPrintF(outf, "AmAcid Codon Number /1000 Fraction ..\n");
2896
2897 c0 = bases;
2898
2899 while(*c0)
2900 {
2901 codon[0]=*c0;
2902 c1 = bases;
2903
2904 while(*c1)
2905 {
2906 codon[1]=*c1;
2907 ajFmtPrintF(outf, "\n");
2908 c2 = bases;
2909
2910 while(*c2)
2911 {
2912 codon[2]=*c2;
2913 i = ajCodIndexC(codon);
2914
2915 if(!ajResidueToTriplet(ajBasecodeFromInt(thys->aa[i]),
2916 &ThreeAa))
2917 ajStrAssignC(&ThreeAa, "End");
2918
2919 ajStrFmtLower(&ThreeAa);
2920 ajStrFmtTitle(&ThreeAa);
2921 ajFmtPrintF(outf, "%S %s %10.2f %8.2f %4.2f\n",
2922 ThreeAa, codon, (double) thys->num[i],
2923 thys->tcount[i], thys->fraction[i]);
2924 c2++;
2925 }
2926
2927 c1++;
2928 }
2929
2930 c0++;
2931 }
2932
2933 ajStrDel(&ThreeAa);
2934 return;
2935 }
2936
2937
2938
2939
2940 /* @funcstatic codWriteCodehop ************************************************
2941 **
2942 ** Write codon structure to output file in Codehop program format
2943 **
2944 ** @param [r] thys [const AjPCod] codon usage
2945 ** @param [u] outf [AjPFile] output file
2946 **
2947 ** @return [void]
2948 ** @category output [AjPCod] Write codon structure to output file
2949 **
2950 ** @release 3.0.0
2951 ** @@
2952 ******************************************************************************/
2953
codWriteCodehop(const AjPCod thys,AjPFile outf)2954 static void codWriteCodehop(const AjPCod thys, AjPFile outf)
2955 {
2956 const char* c0;
2957 const char* c1;
2958 const char* c2;
2959 const char* bases = "ACGU";
2960 char codon[4]="AAA";
2961 ajint i;
2962 ajint j = 0;
2963 AjPStr release = NULL;
2964 AjPStr species = NULL;
2965 AjPStr division = NULL;
2966
2967 ajStrAssignS(&release, thys->Release);
2968 ajStrAssignEmptyC(&release, "unknowndb0.0");
2969
2970 ajStrAssignS(&species, thys->Species);
2971 ajStrAssignEmptyC(&species, "unknown species");
2972
2973 ajStrAssignS(&division, thys->Division);
2974 ajStrAssignEmptyC(&division, "unknown.division");
2975
2976 c0 = bases;
2977
2978 while(*c0)
2979 {
2980 codon[0]=*c0;
2981 c1 = bases;
2982
2983 while(*c1)
2984 {
2985 codon[1]=*c1;
2986 c2 = bases;
2987
2988 while(*c2)
2989 {
2990 codon[2]=*c2;
2991 i = ajCodIndexC(codon);
2992 ajFmtPrintF(outf, "%8.6f\t-- [%2d] %s\n",
2993 0.001 * thys->tcount[i], j++, codon);
2994 c2++;
2995 }
2996 c1++;
2997 }
2998 c0++;
2999 }
3000
3001 ajFmtPrintF(outf, "---------------------------------------------\n");
3002 ajFmtPrintF(outf, "%S\n", thys->Name);
3003 ajFmtPrintF(outf, "Codon usage for %S: %d\n",
3004 species, thys->CdsCount);
3005 ajFmtPrintF(outf, " from %S/%S: %d codons\n",
3006 release, division, thys->CodonCount);
3007
3008 ajStrDel(&species);
3009 ajStrDel(&release);
3010 ajStrDel(&division);
3011
3012 return;
3013 }
3014
3015
3016
3017
3018 /* @funcstatic codWriteStaden *************************************************
3019 **
3020 ** Write codon structure to output file in Staden format with numbers
3021 ** (as generated by nip4) instead of percentages as used by the original
3022 ** nip program.
3023 **
3024 ** @param [r] thys [const AjPCod] codon usage
3025 ** @param [u] outf [AjPFile] output file
3026 **
3027 ** @return [void]
3028 ** @category output [AjPCod] Write codon structure to output file
3029 **
3030 ** @release 3.0.0
3031 ** @@
3032 ******************************************************************************/
3033
codWriteStaden(const AjPCod thys,AjPFile outf)3034 static void codWriteStaden(const AjPCod thys, AjPFile outf)
3035 {
3036 const char* c0;
3037 const char* c1;
3038 const char* c2;
3039 const char* bases = "TCAG";
3040 char codon[4]="TTT";
3041 const char* aa = "ABCDEFGHIJKLMNOPQRSTUVWXYZ.*";
3042 ajint i;
3043
3044 c0 = bases;
3045
3046 while(*c0)
3047 {
3048 codon[0]=*c0;
3049 ajFmtPrintF(outf,
3050 " ===========================================\n");
3051 c2 = bases;
3052
3053 while(*c2)
3054 {
3055 codon[2]=*c2;
3056 ajFmtPrintF(outf, " ");
3057 c1 = bases;
3058
3059 while(*c1)
3060 {
3061 codon[1]=*c1;
3062 i = ajCodIndexC(codon);
3063 ajFmtPrintF(outf, " %c %s %4d",
3064 aa[thys->aa[i]], codon, thys->num[i]);
3065 c1++;
3066 }
3067
3068 ajFmtPrintF(outf, "\n");
3069 c2++;
3070 }
3071
3072 c0++;
3073 }
3074
3075 ajFmtPrintF(outf,
3076 " ===========================================\n");
3077
3078 return;
3079 }
3080
3081
3082
3083
3084 /* @func ajCodComp ************************************************************
3085 **
3086 ** Calculate sequence composition
3087 **
3088 ** @param [w] NA [ajint *] number of A's
3089 ** @param [w] NC [ajint *] number of C's
3090 ** @param [w] NG [ajint *] number of G's
3091 ** @param [w] NT [ajint *] number of T'
3092 ** @param [r] str [const char *] sequence
3093 **
3094 ** @return [void]
3095 **
3096 ** @release 1.0.0
3097 ** @@
3098 ******************************************************************************/
3099
ajCodComp(ajint * NA,ajint * NC,ajint * NG,ajint * NT,const char * str)3100 void ajCodComp(ajint *NA, ajint *NC, ajint *NG, ajint *NT, const char *str)
3101 {
3102 const char *p;
3103 ajint c;
3104
3105 p = str;
3106
3107 while((c = *p))
3108 {
3109 if(c=='A')
3110 ++(*NA);
3111 else if(c=='C')
3112 ++(*NC);
3113 else if(c=='G')
3114 ++(*NG);
3115 else if(c=='T')
3116 ++(*NT);
3117 ++p;
3118 }
3119
3120 return;
3121 }
3122
3123
3124
3125
3126 /* @funcstatic codRandom ******************************************************
3127 **
3128 ** Calculate expected frequency of a codon
3129 **
3130 ** @param [r] NA [ajint] number of A's
3131 ** @param [r] NC [ajint] number of C's
3132 ** @param [r] NG [ajint] number of G's
3133 ** @param [r] NT [ajint] number of T'
3134 ** @param [r] len [ajint] sequence length
3135 ** @param [r] p [const char *] triplet
3136 **
3137 ** @return [double] triplet frequency
3138 **
3139 ** @release 2.9.0
3140 ** @@
3141 ******************************************************************************/
3142
codRandom(ajint NA,ajint NC,ajint NG,ajint NT,ajint len,const char * p)3143 static double codRandom(ajint NA, ajint NC, ajint NG, ajint NT,
3144 ajint len, const char *p)
3145 {
3146 ajint i;
3147 double prod;
3148 double tot;
3149 ajint c;
3150
3151
3152 prod = 1;
3153 tot = 1.0;
3154
3155 for(i=0;i<3;++i)
3156 {
3157 c = p[i];
3158
3159 if(c=='A')
3160 prod = (double)NA;
3161
3162 if(c=='C')
3163 prod = (double)NC;
3164
3165 if(c=='G')
3166 prod = (double)NG;
3167
3168 if(c=='T')
3169 prod = (double)NT;
3170
3171 tot *= prod/(double)len;
3172 }
3173
3174 return tot;
3175 }
3176
3177
3178
3179
3180 /* @funcstatic codGetWstat ****************************************************
3181 **
3182 ** Calculate codon adaptive index W values
3183 ** NAR 15:1281-1295
3184 **
3185 ** @param [r] thys [const AjPCod] codon usage
3186 **
3187 ** @return [double*] w value array
3188 **
3189 ** @release 6.2.0
3190 ** @@
3191 ******************************************************************************/
3192
codGetWstat(const AjPCod thys)3193 static double* codGetWstat(const AjPCod thys)
3194 {
3195 ajint i;
3196 ajint j;
3197 double *wk;
3198 ajint thisaa;
3199 double aamax;
3200
3201 AJCNEW0(wk,AJCODSTART);
3202
3203
3204 for(i=0;i<AJCODSTART;++i)
3205 {
3206 thisaa = thys->aa[i];
3207 aamax = (double)INT_MIN;
3208
3209 for(j=0;j<AJCODSTART;++j)
3210 if(thys->aa[j]==thisaa)
3211 aamax = aamax > thys->tcount[j] ? aamax : thys->tcount[j];
3212 if(!E_FPZERO(aamax,U_DEPS))
3213 wk[i] = thys->tcount[i] / aamax;
3214 }
3215
3216 return wk;
3217 }
3218
3219
3220
3221
3222 /* @func ajCodCalcCaiCod ******************************************************
3223 **
3224 ** Calculate codon adaptive index using overall codon usage data only.
3225 **
3226 ** See equation 8 in NAR 15:1281-1295
3227 **
3228 ** @param [r] thys [const AjPCod] codon usage
3229 **
3230 ** @return [double] CAI
3231 **
3232 ** @release 6.2.0
3233 ** @@
3234 ******************************************************************************/
3235
ajCodCalcCaiCod(const AjPCod thys)3236 double ajCodCalcCaiCod(const AjPCod thys)
3237 {
3238 double cai;
3239 double max;
3240 double sum;
3241 double res;
3242 double xij;
3243 double total;
3244 ajint i;
3245 ajint k;
3246
3247 total = (double)0.;
3248
3249 for(i=0;i<AJCODAMINOS-2;++i)
3250 {
3251 max = (double)0.;
3252 for(k=0;k<AJCODSTART;++k)
3253 {
3254 if(thys->aa[k]==27)
3255 continue;
3256
3257 if(thys->aa[k]==i)
3258 max = (max> thys->fraction[k]) ? max : thys->fraction[k];
3259 }
3260
3261 sum = (double)0.;
3262
3263 for(k=0;k<AJCODSTART && !E_FPZERO(max,U_DEPS);++k)
3264 {
3265 if(thys->aa[k]==27)
3266 continue;
3267
3268 if(thys->aa[k]==i)
3269 {
3270 xij = thys->fraction[k];
3271
3272 if(!E_FPZERO(xij,U_DEPS))
3273 {
3274 res = thys->tcount[k] * log(xij/max);
3275 sum += res;
3276 }
3277 }
3278 }
3279
3280 total += sum;
3281 }
3282
3283 total /= (double)1000.;
3284 cai = exp(total);
3285
3286 return cai;
3287 }
3288
3289
3290
3291
3292 /* @func ajCodCalcCaiSeq ******************************************************
3293 **
3294 ** Calculate codon adaptive index for a coding sequence
3295 **
3296 ** See equation 7 in NAR 15:1281-1295
3297 **
3298 ** @param [r] thys [const AjPCod] codon usage
3299 ** @param [r] str [const AjPStr] sequence
3300 **
3301 ** @return [double] CAI
3302 **
3303 ** @release 6.2.0
3304 ** @@
3305 ******************************************************************************/
3306
ajCodCalcCaiSeq(const AjPCod thys,const AjPStr str)3307 double ajCodCalcCaiSeq(const AjPCod thys, const AjPStr str)
3308 {
3309 double *wk;
3310 ajint i;
3311 ajint len;
3312 const char *p;
3313 ajint limit;
3314 ajint idx;
3315 double total;
3316
3317 wk = codGetWstat(thys);
3318
3319 len = ajStrGetLen(str);
3320 p = ajStrGetPtr(str);
3321
3322 limit = len / 3;
3323 total = (double)0.;
3324
3325 for(i=0;i<limit; ++i,p+=3)
3326 {
3327 idx = ajCodIndexC(p);
3328
3329 if(!E_FPZERO(wk[idx],U_DEPS))
3330 total += log(wk[idx]);
3331 }
3332
3333 total /= (double)limit;
3334
3335 AJFREE(wk);
3336
3337 return exp(total);
3338 }
3339
3340
3341
3342
3343 /* @func ajCodCalcGribskov ****************************************************
3344 **
3345 ** Calculate Gribskov statistic (count per thousand) in AjPCod internals
3346 **
3347 ** @param [u] thys [AjPCod] codon usage for sequence
3348 ** @param [r] s [const AjPStr] sequence
3349 **
3350 ** @return [void]
3351 **
3352 ** @release 1.0.0
3353 ** @@
3354 ******************************************************************************/
3355
ajCodCalcGribskov(AjPCod thys,const AjPStr s)3356 void ajCodCalcGribskov(AjPCod thys, const AjPStr s)
3357 {
3358 ajint i;
3359 ajint j;
3360
3361 ajint NA;
3362 ajint NC;
3363 ajint NG;
3364 ajint NT;
3365
3366 const char *p;
3367 ajint len;
3368 ajint aa;
3369 double fam[64];
3370 double frct[64];
3371
3372 double x;
3373 double z;
3374
3375 len = ajStrGetLen(s);
3376
3377 for(i=0;i<AJCODSTART;++i)
3378 frct[i] = thys->fraction[i];
3379
3380 NA = NC = NG = NT = 0;
3381 ajCodComp(&NA,&NC,&NG,&NT,ajStrGetPtr(s));
3382
3383 /* Get expected frequencies */
3384 for(i=0;i<AJCODSTART;++i)
3385 {
3386 p = ajCodTriplet(i);
3387 thys->tcount[i] = codRandom(NA,NC,NG,NT,len,p);
3388 }
3389
3390
3391 /* Calculate expected family */
3392 for(i=0;i<AJCODSTART;++i)
3393 {
3394 x=0.0;
3395 aa = thys->aa[i];
3396
3397 for(j=0;j<AJCODSTART;++j)
3398 if(thys->aa[j]==aa) x += thys->tcount[j];
3399
3400 fam[i] = x;
3401 }
3402
3403
3404 /* Calculate expected ratio etc */
3405
3406 for(i=0;i<AJCODSTART;++i)
3407 {
3408 z = thys->tcount[i] / fam[i];
3409
3410 /* Work out ln(real/expected) */
3411 thys->tcount[i]= log(frct[i] / z);
3412 }
3413
3414 return;
3415 }
3416
3417
3418
3419
3420 /* @func ajCodCalcNc **********************************************************
3421 **
3422 ** Calculate effective number of codons
3423 ** Wright, F. (1990) Gene 87:23-29
3424 **
3425 ** @param [r] thys [const AjPCod] codon usage
3426 **
3427 ** @return [double] Nc
3428 **
3429 ** @release 1.0.0
3430 ** @@
3431 ******************************************************************************/
3432
ajCodCalcNc(const AjPCod thys)3433 double ajCodCalcNc(const AjPCod thys)
3434 {
3435 ajint *df = NULL;
3436 ajint *n = NULL;
3437 ajint i;
3438 ajint j;
3439 ajint v;
3440
3441 ajint max;
3442 ajint ndx;
3443
3444 ajint *nt = NULL;
3445 double *Fbar = NULL;
3446 double *F = NULL;
3447 double sum;
3448 double num = 0.0;
3449
3450 AJCNEW0(df, AJCODAMINOS);
3451 AJCNEW0(n, AJCODAMINOS);
3452
3453 for(i=0;i<AJCODSTART;++i)
3454 {
3455 v = thys->aa[i];
3456
3457 if(v==27)
3458 continue;
3459
3460 ++df[v];
3461 n[thys->aa[i]] += thys->num[i];
3462 }
3463
3464
3465 for(i=0,max=INT_MIN;i<AJCODAMINOS;++i)
3466 max = (max > df[i]) ? max : df[i];
3467
3468 AJCNEW0(Fbar, max);
3469 AJCNEW0(nt, max);
3470 AJCNEW0(F, AJCODAMINOS);
3471
3472 for(i=0;i<AJCODAMINOS-2;++i)
3473 if(df[i])
3474 ++nt[df[i]-1];
3475
3476 for(i=0;i<AJCODAMINOS-2;++i)
3477 for(j=0;j<AJCODSTART;++j)
3478 {
3479 if(thys->aa[j]==27)
3480 continue;
3481
3482 if(thys->aa[j]==i)
3483 F[i] += pow(thys->fraction[j],2.0);
3484 }
3485
3486
3487 for(i=0;i<AJCODAMINOS-2;++i)
3488 {
3489 if(n[i]-1 <1 || (num=((double)n[i]*F[i]-1.0))<0.05)
3490 {
3491 F[i] = 0.0;
3492
3493 if(df[i])
3494 --nt[df[i]-1];
3495
3496 continue;
3497 }
3498
3499 F[i]= num / ((double)n[i]-1.0);
3500 }
3501
3502
3503 for(i=0;i<AJCODAMINOS-2;++i)
3504 {
3505 ndx=df[i];
3506
3507 if(!ndx)
3508 continue;
3509
3510 --ndx;
3511 Fbar[ndx] += F[i];
3512 }
3513
3514 for(i=0;i<max;++i)
3515 if(nt[i])
3516 Fbar[i] /= (double)nt[i];
3517
3518 if(E_FPZERO(Fbar[2],U_DEPS)) /* Ile fix */
3519 Fbar[2] = (Fbar[1]+Fbar[3]) / 2.0;
3520
3521
3522 for(i=1,sum=2.0;i<max;++i)
3523 {
3524 if(E_FPZERO(Fbar[i],U_DEPS))
3525 continue;
3526
3527 if(i==1)
3528 sum += 9.0/Fbar[i];
3529 else if(i==2)
3530 sum += 1.0/Fbar[i];
3531 else if(i==3)
3532 sum += 5.0/Fbar[i];
3533 else if(i==5)
3534 sum += 3.0/Fbar[i];
3535 }
3536
3537
3538 AJFREE(F);
3539 AJFREE(nt);
3540 AJFREE(Fbar);
3541 AJFREE(n);
3542 AJFREE(df);
3543
3544 if(sum>61.0)
3545 return 61.0;
3546
3547 return sum;
3548 }
3549
3550
3551
3552
3553 /* @func ajCodCalcUsage *******************************************************
3554 **
3555 ** Calculate fractional and thousand elements of a codon object
3556 ** Used for creating a codon usage table
3557 **
3558 ** If no counts are stored, warns of need to first run ajCodSetTriplets
3559 **
3560 ** @param [u] thys [AjPCod] Codon object
3561 ** @param [r] c [ajint] triplet count
3562 **
3563 ** @return [void]
3564 **
3565 ** @release 6.2.0
3566 ** @@
3567 ******************************************************************************/
3568
ajCodCalcUsage(AjPCod thys,ajint c)3569 void ajCodCalcUsage(AjPCod thys, ajint c)
3570 {
3571 ajint i;
3572 ajint *aasum;
3573 ajuint totcnt = 0;
3574
3575 for(i=0;i<AJCODSIZE;i++)
3576 totcnt += thys->num[i];
3577 if(!totcnt)
3578 ajWarn("empty codon usage table, first call ajCodSetTripletsS");
3579
3580 /* Calculate thousands */
3581 for(i=0;i<AJCODSTART;++i)
3582 if(!c)
3583 thys->tcount[i] = 0.0;
3584 else
3585 thys->tcount[i] = ((double)thys->num[i] / (double)c) * 1000.0;
3586
3587 /* Get number of codons per amino acid */
3588 AJCNEW0(aasum, AJCODAMINOS);
3589 for(i=0;i<AJCODSTART;++i)
3590 if(thys->aa[i]==27)
3591 aasum[27] += thys->num[i];
3592 else
3593 aasum[thys->aa[i]] += thys->num[i];
3594
3595
3596 /* Calculate fraction */
3597 for(i=0;i<AJCODSTART;++i)
3598 if(thys->aa[i]==27)
3599 {
3600 if(!aasum[27])
3601 thys->fraction[i] = 0.0;
3602 else
3603 thys->fraction[i] = (double)thys->num[i] /
3604 (double)aasum[27];
3605 }
3606 else
3607 {
3608 if(!aasum[thys->aa[i]])
3609 thys->fraction[i] = 0.0;
3610 else
3611 thys->fraction[i] = (double)thys->num[i] /
3612 (double)aasum[thys->aa[i]];
3613 }
3614
3615 AJFREE(aasum);
3616
3617 return;
3618 }
3619
3620
3621
3622
3623 /* @func ajCodGetName *********************************************************
3624 **
3625 ** Returns the name of a codon table
3626 **
3627 ** @param [r] thys [const AjPCod] Codon usage object
3628 ** @return [const AjPStr] Original filename
3629 **
3630 ** @release 2.9.0
3631 ******************************************************************************/
3632
ajCodGetName(const AjPCod thys)3633 const AjPStr ajCodGetName(const AjPCod thys)
3634 {
3635 return thys->Name;
3636 }
3637
3638
3639
3640
3641 /* @func ajCodGetNameC ********************************************************
3642 **
3643 ** Returns the name of a codon table
3644 **
3645 ** @param [r] thys [const AjPCod] Codon usage object
3646 ** @return [const char*] Original filename
3647 **
3648 ** @release 2.9.0
3649 ******************************************************************************/
3650
ajCodGetNameC(const AjPCod thys)3651 const char* ajCodGetNameC(const AjPCod thys)
3652 {
3653 return ajStrGetPtr(thys->Name);
3654 }
3655
3656
3657
3658
3659 /* @func ajCodGetDesc *********************************************************
3660 **
3661 ** Returns the description of a codon table
3662 **
3663 ** @param [r] thys [const AjPCod] Codon usage object
3664 ** @return [const AjPStr] Original filename
3665 **
3666 ** @release 2.9.0
3667 ******************************************************************************/
3668
ajCodGetDesc(const AjPCod thys)3669 const AjPStr ajCodGetDesc(const AjPCod thys)
3670 {
3671 return thys->Desc;
3672 }
3673
3674
3675
3676
3677 /* @func ajCodGetDescC ********************************************************
3678 **
3679 ** Returns the description of a codon table
3680 **
3681 ** @param [r] thys [const AjPCod] Codon usage object
3682 ** @return [const char*] Original filename
3683 **
3684 ** @release 2.9.0
3685 ******************************************************************************/
3686
ajCodGetDescC(const AjPCod thys)3687 const char* ajCodGetDescC(const AjPCod thys)
3688 {
3689 return ajStrGetPtr(thys->Desc);
3690 }
3691
3692
3693
3694
3695 /* @func ajCodGetSpecies ******************************************************
3696 **
3697 ** Returns the species of a codon table
3698 **
3699 ** @param [r] thys [const AjPCod] Codon usage object
3700 ** @return [const AjPStr] Species
3701 **
3702 ** @release 3.0.0
3703 ******************************************************************************/
3704
ajCodGetSpecies(const AjPCod thys)3705 const AjPStr ajCodGetSpecies(const AjPCod thys)
3706 {
3707 return thys->Species;
3708 }
3709
3710
3711
3712
3713 /* @func ajCodGetSpeciesC *****************************************************
3714 **
3715 ** Returns the species of a codon table
3716 **
3717 ** @param [r] thys [const AjPCod] Codon usage object
3718 ** @return [const char*] Species
3719 **
3720 ** @release 3.0.0
3721 ******************************************************************************/
3722
ajCodGetSpeciesC(const AjPCod thys)3723 const char* ajCodGetSpeciesC(const AjPCod thys)
3724 {
3725 return ajStrGetPtr(thys->Species);
3726 }
3727
3728
3729
3730
3731 /* @func ajCodGetDivision *****************************************************
3732 **
3733 ** Returns the division of a codon table
3734 **
3735 ** @param [r] thys [const AjPCod] Codon usage object
3736 ** @return [const AjPStr] Division
3737 **
3738 ** @release 3.0.0
3739 ******************************************************************************/
3740
ajCodGetDivision(const AjPCod thys)3741 const AjPStr ajCodGetDivision(const AjPCod thys)
3742 {
3743 return thys->Division;
3744 }
3745
3746
3747
3748
3749 /* @func ajCodGetDivisionC ****************************************************
3750 **
3751 ** Returns the division of a codon table
3752 **
3753 ** @param [r] thys [const AjPCod] Codon usage object
3754 ** @return [const char*] Division
3755 **
3756 ** @release 3.0.0
3757 ******************************************************************************/
3758
ajCodGetDivisionC(const AjPCod thys)3759 const char* ajCodGetDivisionC(const AjPCod thys)
3760 {
3761 return ajStrGetPtr(thys->Division);
3762 }
3763
3764
3765
3766
3767 /* @func ajCodGetRelease ******************************************************
3768 **
3769 ** Returns the release of a codon table
3770 **
3771 ** @param [r] thys [const AjPCod] Codon usage object
3772 ** @return [const AjPStr] Release
3773 **
3774 ** @release 3.0.0
3775 ******************************************************************************/
3776
ajCodGetRelease(const AjPCod thys)3777 const AjPStr ajCodGetRelease(const AjPCod thys)
3778 {
3779 return thys->Release;
3780 }
3781
3782
3783
3784
3785 /* @func ajCodGetReleaseC *****************************************************
3786 **
3787 ** Returns the release of a codon table
3788 **
3789 ** @param [r] thys [const AjPCod] Codon usage object
3790 ** @return [const char*] Release
3791 **
3792 ** @release 3.0.0
3793 ******************************************************************************/
3794
ajCodGetReleaseC(const AjPCod thys)3795 const char* ajCodGetReleaseC(const AjPCod thys)
3796 {
3797 return ajStrGetPtr(thys->Release);
3798 }
3799
3800
3801
3802
3803 /* @func ajCodGetNumcodon *****************************************************
3804 **
3805 ** Returns the number of codons in a codon table
3806 **
3807 ** @param [r] thys [const AjPCod] Codon usage object
3808 ** @return [ajint] Number of codons
3809 **
3810 ** @release 3.0.0
3811 ******************************************************************************/
3812
ajCodGetNumcodon(const AjPCod thys)3813 ajint ajCodGetNumcodon(const AjPCod thys)
3814 {
3815 return thys->CodonCount;
3816 }
3817
3818
3819
3820
3821 /* @func ajCodGetNumcds *******************************************************
3822 **
3823 ** Returns the numbers of CDSs in a codon table
3824 **
3825 ** @param [r] thys [const AjPCod] Codon usage object
3826 ** @return [ajint] Number of CDSs
3827 **
3828 ** @release 3.0.0
3829 ******************************************************************************/
3830
ajCodGetNumcds(const AjPCod thys)3831 ajint ajCodGetNumcds(const AjPCod thys)
3832 {
3833 return thys->CdsCount;
3834 }
3835
3836
3837
3838
3839 /* @func ajCodGetCode *********************************************************
3840 **
3841 ** Returns the genetic code of a codon table
3842 **
3843 ** @param [r] thys [const AjPCod] Codon usage object
3844 ** @return [ajint] Number of CDSs
3845 **
3846 ** @release 3.0.0
3847 ******************************************************************************/
3848
ajCodGetCode(const AjPCod thys)3849 ajint ajCodGetCode(const AjPCod thys)
3850 {
3851 return thys->GeneticCode;
3852 }
3853
3854
3855
3856
3857 /* @func ajCodSetCodenum ******************************************************
3858 **
3859 ** Assigns the genetic code in a codon table
3860 **
3861 ** @param [u] thys [AjPCod] Codon usage object
3862 ** @param [r] geneticcode [ajint] Genetic code
3863 ** @return [void]
3864 **
3865 ** @release 6.2.0
3866 ******************************************************************************/
3867
ajCodSetCodenum(AjPCod thys,ajint geneticcode)3868 void ajCodSetCodenum(AjPCod thys, ajint geneticcode)
3869 {
3870 thys->GeneticCode = geneticcode;
3871 return;
3872 }
3873
3874
3875
3876
3877 /* @func ajCodSetDescC ********************************************************
3878 **
3879 ** Assigns the description of a codon table
3880 **
3881 ** @param [u] thys [AjPCod] Codon usage object
3882 ** @param [r] desc [const char*] Description
3883 ** @return [void]
3884 **
3885 ** @release 6.2.0
3886 ******************************************************************************/
3887
ajCodSetDescC(AjPCod thys,const char * desc)3888 void ajCodSetDescC(AjPCod thys, const char* desc)
3889 {
3890 ajStrAssignC(&thys->Desc, desc);
3891 return;
3892 }
3893
3894
3895
3896
3897 /* @func ajCodSetDescS ********************************************************
3898 **
3899 ** Assigns the description of a codon table
3900 **
3901 ** @param [u] thys [AjPCod] Codon usage object
3902 ** @param [r] desc [const AjPStr] Description
3903 ** @return [void]
3904 **
3905 ** @release 6.2.0
3906 ******************************************************************************/
3907
ajCodSetDescS(AjPCod thys,const AjPStr desc)3908 void ajCodSetDescS(AjPCod thys, const AjPStr desc)
3909 {
3910 ajStrAssignS(&thys->Desc,desc );
3911 return;
3912 }
3913
3914
3915
3916
3917 /* @func ajCodSetDivisionC ****************************************************
3918 **
3919 ** Assigns the division of a codon table
3920 **
3921 ** @param [u] thys [AjPCod] Codon usage object
3922 ** @param [r] division [const char*] Division
3923 ** @return [void]
3924 **
3925 ** @release 6.2.0
3926 ******************************************************************************/
3927
ajCodSetDivisionC(AjPCod thys,const char * division)3928 void ajCodSetDivisionC(AjPCod thys, const char* division)
3929 {
3930 ajStrAssignC(&thys->Division, division);
3931 return;
3932 }
3933
3934
3935
3936
3937 /* @func ajCodSetDivisionS ****************************************************
3938 **
3939 ** Assigns the division of a codon table
3940 **
3941 ** @param [u] thys [AjPCod] Codon usage object
3942 ** @param [r] division [const AjPStr] Division
3943 ** @return [void]
3944 **
3945 ** @release 6.2.0
3946 ******************************************************************************/
3947
ajCodSetDivisionS(AjPCod thys,const AjPStr division)3948 void ajCodSetDivisionS(AjPCod thys, const AjPStr division)
3949 {
3950 ajStrAssignS(&thys->Division, division);
3951 return;
3952 }
3953
3954
3955
3956
3957 /* @func ajCodSetNameC ********************************************************
3958 **
3959 ** Assigns the name of a codon table
3960 **
3961 ** @param [u] thys [AjPCod] Codon usage object
3962 ** @param [r] name [const char*] Name
3963 ** @return [void]
3964 **
3965 ** @release 6.2.0
3966 ******************************************************************************/
3967
ajCodSetNameC(AjPCod thys,const char * name)3968 void ajCodSetNameC(AjPCod thys, const char* name)
3969 {
3970 ajStrAssignC(&thys->Name, name);
3971 return;
3972 }
3973
3974
3975
3976
3977 /* @func ajCodSetNameS ********************************************************
3978 **
3979 ** Assigns the name of a codon table
3980 **
3981 ** @param [u] thys [AjPCod] Codon usage object
3982 ** @param [r] name [const AjPStr] Name
3983 ** @return [void]
3984 **
3985 ** @release 6.2.0
3986 ******************************************************************************/
3987
ajCodSetNameS(AjPCod thys,const AjPStr name)3988 void ajCodSetNameS(AjPCod thys, const AjPStr name)
3989 {
3990 ajStrAssignS(&thys->Name, name);
3991 return;
3992 }
3993
3994
3995
3996
3997 /* @func ajCodSetNumcds *******************************************************
3998 **
3999 ** Assigns the number of CDSs in a codon table
4000 **
4001 ** @param [u] thys [AjPCod] Codon usage object
4002 ** @param [r] numcds [ajint] Number of codons
4003 ** @return [void]
4004 **
4005 ** @release 6.2.0
4006 ******************************************************************************/
4007
ajCodSetNumcds(AjPCod thys,ajint numcds)4008 void ajCodSetNumcds(AjPCod thys, ajint numcds)
4009 {
4010 thys->CdsCount = numcds;
4011 return;
4012 }
4013
4014
4015
4016
4017 /* @func ajCodSetNumcodons ****************************************************
4018 **
4019 ** Assigns the number of codons in a codon table
4020 **
4021 ** @param [u] thys [AjPCod] Codon usage object
4022 ** @param [r] numcodon [ajint] Number of codons
4023 ** @return [void]
4024 **
4025 ** @release 6.2.0
4026 ******************************************************************************/
4027
ajCodSetNumcodons(AjPCod thys,ajint numcodon)4028 void ajCodSetNumcodons(AjPCod thys, ajint numcodon)
4029 {
4030 thys->CodonCount = numcodon;
4031 return;
4032 }
4033
4034
4035
4036
4037 /* @func ajCodSetReleaseC *****************************************************
4038 **
4039 ** Assigns the of a codon table
4040 **
4041 ** @param [u] thys [AjPCod] Codon usage object
4042 ** @param [r] release [const char*] Release
4043 ** @return [void]
4044 **
4045 ** @release 6.2.0
4046 ******************************************************************************/
4047
ajCodSetReleaseC(AjPCod thys,const char * release)4048 void ajCodSetReleaseC(AjPCod thys, const char* release)
4049 {
4050 ajStrAssignC(&thys->Release, release);
4051 return;
4052 }
4053
4054
4055
4056
4057 /* @func ajCodSetReleaseS *****************************************************
4058 **
4059 ** Assigns the release of a codon table
4060 **
4061 ** @param [u] thys [AjPCod] Codon usage object
4062 ** @param [r] release [const AjPStr] Release
4063 ** @return [void]
4064 **
4065 ** @release 6.2.0
4066 ******************************************************************************/
4067
ajCodSetReleaseS(AjPCod thys,const AjPStr release)4068 void ajCodSetReleaseS(AjPCod thys, const AjPStr release)
4069 {
4070 ajStrAssignS(&thys->Release, release);
4071 return;
4072 }
4073
4074
4075
4076
4077 /* @func ajCodSetSpeciesC *****************************************************
4078 **
4079 ** Assigns the species of a codon table
4080 **
4081 ** @param [u] thys [AjPCod] Codon usage object
4082 ** @param [r] species [const char*] Species
4083 ** @return [void]
4084 **
4085 ** @release 6.2.0
4086 ******************************************************************************/
4087
ajCodSetSpeciesC(AjPCod thys,const char * species)4088 void ajCodSetSpeciesC(AjPCod thys, const char* species)
4089 {
4090 ajStrAssignC(&thys->Species, species);
4091 return;
4092 }
4093
4094
4095
4096
4097 /* @func ajCodSetSpeciesS *****************************************************
4098 **
4099 ** Assigns the species of a codon table
4100 **
4101 ** @param [u] thys [AjPCod] Codon usage object
4102 ** @param [r] species [const AjPStr] Species
4103 ** @return [void]
4104 **
4105 ** @release 6.2.0
4106 ******************************************************************************/
4107
ajCodSetSpeciesS(AjPCod thys,const AjPStr species)4108 void ajCodSetSpeciesS(AjPCod thys, const AjPStr species)
4109 {
4110 ajStrAssignS(&thys->Species, species);
4111 return;
4112 }
4113
4114
4115
4116
4117 /* @func ajCodoutformatFind ***************************************************
4118 **
4119 ** Tests the output format for an outcodon ACD type
4120 **
4121 ** @param [r] name [const AjPStr] Format name
4122 ** @param [w] iformat [ajint*] Internal format index
4123 ** @return [AjBool] True on success
4124 **
4125 ** @release 6.4.0
4126 ** @@
4127 ******************************************************************************/
4128
ajCodoutformatFind(const AjPStr name,ajint * iformat)4129 AjBool ajCodoutformatFind(const AjPStr name, ajint *iformat)
4130 {
4131 ajuint i;
4132
4133 for(i=0;codoutFormatDef[i].Name; i++)
4134 {
4135 if(ajStrMatchCaseC(name, codoutFormatDef[i].Name))
4136 {
4137 *iformat = i;
4138 return ajTrue;
4139 }
4140 }
4141
4142 *iformat = 0;
4143
4144 return ajFalse;
4145 }
4146
4147
4148
4149
4150 /* @funcstatic codFix *********************************************************
4151 **
4152 ** Fill in missing values in a codon usage object
4153 **
4154 ** @param [u] thys [AjPCod] Codon usage object
4155 ** @return [void]
4156 **
4157 ** @release 3.0.0
4158 ******************************************************************************/
codFix(AjPCod thys)4159 static void codFix(AjPCod thys)
4160 {
4161 ajint i;
4162 ajint totnum = 0;
4163 ajint totcds = 0;
4164 double totfreq = 0.0;
4165 double totfrac = 0.0;
4166 ajint totaa = 0;
4167 ajint aa;
4168 ajint aacount[28] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
4169 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
4170
4171 for(i=0;i<64;i++)
4172 {
4173 totnum += thys->num[i];
4174 totfreq += thys->tcount[i];
4175 totfrac += thys->fraction[i];
4176 aa = thys->aa[i];
4177
4178 if(aa == 27)
4179 totcds += thys->num[i];
4180
4181 if(aa > 27)
4182 aa = 26;
4183
4184 if(aa < 0)
4185 aa = 26;
4186
4187 if(aacount[aa] == -1)
4188 {
4189 totaa++;
4190 aacount[aa] = 0;
4191 }
4192
4193 aacount[aa] += thys->num[i];
4194 }
4195
4196 if(thys->CdsCount)
4197 {
4198 if(totcds > thys->CdsCount) /* can be less if stops ignored */
4199 ajDebug("Codon usage file '%S' has %d stop codons, CDS count %d\n",
4200 thys->Name, totcds, thys->CdsCount);
4201 }
4202 else
4203 thys->CdsCount = totcds;
4204
4205 if(thys->CodonCount)
4206 {
4207 if(totnum != thys->CodonCount)
4208 ajDebug("Codon usage file '%S' has %d codons, Codon count %d\n",
4209 thys->Name, totnum, thys->CodonCount);
4210 }
4211 else
4212 thys->CodonCount = totnum;
4213
4214 if(E_FPZERO(totfreq,U_DEPS))
4215 {
4216 for(i=0;i<64;i++)
4217 {
4218 thys->tcount[i] = 1000.0 * (double)thys->num[i] / (double) totnum;
4219 totfreq += thys->tcount[i];
4220 }
4221 }
4222
4223 if(fabs(totfreq - 1000.0) > 0.1)
4224 ajDebug("Codon usage file '%S' has total frequency/1000 of %.2f\n",
4225 thys->Name, totfreq);
4226
4227 if(E_FPZERO(totfrac,U_DEPS))
4228 {
4229 for(i=0;i<64;i++)
4230 {
4231 aa = thys->aa[i];
4232
4233 if(aa > 27)
4234 aa = 26;
4235
4236 if(aa < 0)
4237 aa = 26;
4238
4239 if(aacount[aa])
4240 thys->fraction[i] = (double)thys->num[i]/(double)(aacount[aa]);
4241
4242 totfrac += thys->fraction[i];
4243 }
4244 }
4245
4246 if(fabs(totfrac - (double)totaa) > 0.1)
4247 ajDebug("Codon usage file '%S' has total fraction of %.2f for %d "
4248 "amino acids\n",
4249 thys->Name, totfrac, totaa);
4250
4251 return;
4252 }
4253
4254
4255
4256
4257 /* @func ajCodPrintFormat *****************************************************
4258 **
4259 ** Reports the internal data structures
4260 **
4261 ** @param [u] outf [AjPFile] Output file
4262 ** @param [r] full [AjBool] Full report, currently no extra details printed
4263 ** @return [void]
4264 **
4265 ** @release 3.0.0
4266 ** @@
4267 ******************************************************************************/
4268
ajCodPrintFormat(AjPFile outf,AjBool full)4269 void ajCodPrintFormat(AjPFile outf, AjBool full)
4270 {
4271 ajint i = 0;
4272
4273 if(full)
4274 i=0;
4275
4276 ajFmtPrintF(outf, "\n");
4277 ajFmtPrintF(outf, "# Codon usage input formats\n");
4278 ajFmtPrintF(outf, "# Name Format name (or alias)\n");
4279 ajFmtPrintF(outf, "# Try Test for unknown input files\n");
4280 ajFmtPrintF(outf, "# Desc Format description\n");
4281 ajFmtPrintF(outf, "# Name Try Description\n");
4282 ajFmtPrintF(outf, "\n");
4283 ajFmtPrintF(outf, "Format {\n");
4284
4285 for(i=0; codInFormatDef[i].Name; i++)
4286 {
4287 ajFmtPrintF(outf, " %-12s %3B \"%s\"\n",
4288 codInFormatDef[i].Name,
4289 codInFormatDef[i].Try,
4290 codInFormatDef[i].Desc);
4291 }
4292
4293 ajFmtPrintF(outf, "}\n\n");
4294
4295 ajFmtPrintF(outf, "\n");
4296 ajFmtPrintF(outf, "# Codon usage output formats\n");
4297 ajFmtPrintF(outf, "# Name Format name (or alias)\n");
4298 ajFmtPrintF(outf, "# Desc Format description\n");
4299 ajFmtPrintF(outf, "# Name Description\n");
4300 ajFmtPrintF(outf, "\n");
4301 ajFmtPrintF(outf, "OFormat {\n");
4302
4303 for(i=0; codoutFormatDef[i].Name; i++)
4304 {
4305 ajFmtPrintF(outf, " %-12s \"%s\"\n",
4306 codoutFormatDef[i].Name,
4307 codoutFormatDef[i].Desc);
4308 }
4309 ajFmtPrintF(outf, "}\n\n");
4310
4311 return;
4312 }
4313
4314
4315
4316
4317 /* @func ajCodGetCodonlist ****************************************************
4318 **
4319 ** Writes codon triplets to a string list
4320 **
4321 ** @param [r] cod [const AjPCod] Cusp file
4322 ** @param [w] list [AjPList] List with character distributions
4323 ** @return [void]
4324 **
4325 ** @release 4.0.0
4326 ** @@
4327 ******************************************************************************/
4328
ajCodGetCodonlist(const AjPCod cod,AjPList list)4329 void ajCodGetCodonlist(const AjPCod cod, AjPList list)
4330 {
4331 ajint i;
4332 ajint j;
4333 AjPStr codon = NULL;
4334
4335 for(i=0;i<AJCODSTART;++i)
4336 for(j=0;j<cod->tcount[i];j++)
4337 {
4338 codon = ajStrNewC(ajCodTriplet(i));
4339 ajListstrPushAppend(list, codon);
4340 codon = NULL;
4341 }
4342
4343 return;
4344 }
4345
4346
4347
4348
4349 /* @func ajCodExit ************************************************************
4350 **
4351 ** Cleans up codon usage processing internal memory
4352 **
4353 ** @return [void]
4354 **
4355 ** @release 4.0.0
4356 ** @@
4357 ******************************************************************************/
4358
ajCodExit(void)4359 void ajCodExit(void)
4360 {
4361 ajStrDel(&codReadLine);
4362 ajStrDel(&codTmpLine);
4363
4364 return;
4365 }
4366
4367
4368
4369
4370 #ifdef AJ_COMPILE_DEPRECATED_BOOK
4371 #endif
4372
4373
4374
4375
4376 #ifdef AJ_COMPILE_DEPRECATED
4377 /* @obsolete ajCodNewCode
4378 ** @rename ajCodNewCodenum
4379 */
4380
ajCodNewCode(ajint code)4381 __deprecated AjPCod ajCodNewCode(ajint code)
4382 {
4383 return ajCodNewCodenum(code);
4384 }
4385
4386
4387
4388
4389 /* @obsolete ajCodDup
4390 ** @rename ajCodNewCod
4391 */
4392
ajCodDup(const AjPCod thys)4393 __deprecated AjPCod ajCodDup(const AjPCod thys)
4394 {
4395 return ajCodNewCod(thys);
4396 }
4397
4398
4399
4400
4401
4402 /* @obsolete ajCodCountTriplets
4403 ** @rename ajCodSetTripletsS
4404 */
4405
ajCodCountTriplets(AjPCod thys,const AjPStr s,ajint * c)4406 __deprecated void ajCodCountTriplets(AjPCod thys, const AjPStr s, ajint *c)
4407 {
4408 ajCodSetTripletsS(thys, s, c);
4409 return;
4410 }
4411
4412
4413
4414
4415 /* @obsolete ajCodCalcCai
4416 ** @rename ajCodCalcCaiSeq
4417 */
4418
ajCodCalcCai(const AjPCod thys,const AjPStr str)4419 __deprecated double ajCodCalcCai(const AjPCod thys, const AjPStr str)
4420 {
4421 return ajCodCalcCaiSeq(thys, str);
4422 }
4423
4424
4425
4426
4427 /* @obsolete ajCodCalculateUsage
4428 ** @rename ajCodCalcUsage
4429 */
4430
ajCodCalculateUsage(AjPCod thys,ajint c)4431 __deprecated void ajCodCalculateUsage(AjPCod thys, ajint c)
4432 {
4433 ajCodCalcUsage(thys, c);
4434 return;
4435 }
4436
4437
4438
4439
4440 /* @obsolete ajCodAssCode
4441 ** @rename ajCodSetCodenum
4442 */
4443
ajCodAssCode(AjPCod thys,ajint geneticcode)4444 __deprecated void ajCodAssCode(AjPCod thys, ajint geneticcode)
4445 {
4446 thys->GeneticCode = geneticcode;
4447 return;
4448 }
4449
4450
4451
4452
4453 /* @obsolete ajCodAssDescC
4454 ** @rename ajCodSetDescC
4455 */
4456
ajCodAssDescC(AjPCod thys,const char * desc)4457 __deprecated void ajCodAssDescC(AjPCod thys, const char* desc)
4458 {
4459 ajCodSetDescC(thys, desc);
4460 return;
4461 }
4462
4463
4464
4465
4466 /* @obsolete ajCodAssDesc
4467 ** @rename ajCodSetDescS
4468 */
4469
ajCodAssDesc(AjPCod thys,const AjPStr desc)4470 __deprecated void ajCodAssDesc(AjPCod thys, const AjPStr desc)
4471 {
4472 ajCodSetDescS(thys, desc);
4473 return;
4474 }
4475
4476
4477
4478
4479 /* @obsolete ajCodAssDivisionC
4480 ** @rename ajCodSetDivisionC
4481 */
4482
ajCodAssDivisionC(AjPCod thys,const char * division)4483 __deprecated void ajCodAssDivisionC(AjPCod thys, const char* division)
4484 {
4485 ajCodSetDivisionC(thys, division);
4486 return;
4487 }
4488
4489
4490
4491
4492 /* @obsolete ajCodAssDivision
4493 ** @rename ajCodSetDivisionS
4494 */
4495
ajCodAssDivision(AjPCod thys,const AjPStr division)4496 __deprecated void ajCodAssDivision(AjPCod thys, const AjPStr division)
4497 {
4498 ajCodSetDivisionS(thys, division);
4499 return;
4500 }
4501
4502
4503
4504
4505 /* @obsolete ajCodAssNameC
4506 ** @rename ajCodSetNameC
4507 */
4508
ajCodAssNameC(AjPCod thys,const char * name)4509 __deprecated void ajCodAssNameC(AjPCod thys, const char* name)
4510 {
4511 ajCodSetNameC(thys, name);
4512 return;
4513 }
4514
4515
4516
4517
4518 /* @obsolete ajCodAssName
4519 ** @rename ajCodSetNameS
4520 */
4521
ajCodAssName(AjPCod thys,const AjPStr name)4522 __deprecated void ajCodAssName(AjPCod thys, const AjPStr name)
4523 {
4524 ajCodSetNameS(thys, name);
4525 return;
4526 }
4527
4528
4529
4530
4531 /* @obsolete ajCodAssNumcds
4532 ** @rename ajCodSetNumcds
4533 */
4534
ajCodAssNumcds(AjPCod thys,ajint numcds)4535 __deprecated void ajCodAssNumcds(AjPCod thys, ajint numcds)
4536 {
4537 ajCodSetNumcds(thys, numcds);
4538 return;
4539 }
4540
4541
4542
4543
4544 /* @obsolete ajCodAssNumcodon
4545 ** @rename ajCodSetNumcodons
4546 */
4547
ajCodAssNumcodon(AjPCod thys,ajint numcodon)4548 __deprecated void ajCodAssNumcodon(AjPCod thys, ajint numcodon)
4549 {
4550 ajCodSetNumcodons(thys, numcodon);
4551 return;
4552 }
4553
4554
4555
4556
4557 /* @obsolete ajCodAssReleaseC
4558 ** @rename ajCodSetReleaseC
4559 */
4560
ajCodAssReleaseC(AjPCod thys,const char * release)4561 __deprecated void ajCodAssReleaseC(AjPCod thys, const char* release)
4562 {
4563 ajStrAssignC(&thys->Release, release);
4564 return;
4565 }
4566
4567
4568
4569
4570 /* @obsolete ajCodAssRelease
4571 ** @rename ajCodSetReleaseS
4572 */
4573
ajCodAssRelease(AjPCod thys,const AjPStr release)4574 __deprecated void ajCodAssRelease(AjPCod thys, const AjPStr release)
4575 {
4576 ajCodSetReleaseS(thys, release);
4577 return;
4578 }
4579
4580
4581
4582
4583 /* @obsolete ajCodAssSpeciesC
4584 ** @rename ajCodSetSpeciesC
4585 */
4586
ajCodAssSpeciesC(AjPCod thys,const char * species)4587 __deprecated void ajCodAssSpeciesC(AjPCod thys, const char* species)
4588 {
4589 ajCodSetSpeciesC(thys, species);
4590 return;
4591 }
4592
4593
4594
4595
4596 /* @obsolete ajCodAssSpecies
4597 ** @rename ajCodSetSpeciesS
4598 */
4599
ajCodAssSpecies(AjPCod thys,const AjPStr species)4600 __deprecated void ajCodAssSpecies(AjPCod thys, const AjPStr species)
4601 {
4602 ajCodSetSpeciesS(thys, species);
4603 return;
4604 }
4605
4606
4607
4608
4609 /* @obsolete ajCodOutFormat
4610 ** @remove use ajCodoutformatFind
4611 */
ajCodOutFormat(const AjPStr name)4612 __deprecated ajint ajCodOutFormat(const AjPStr name)
4613 {
4614 ajint iformat = 0;
4615 if(ajCodoutformatFind(name, &iformat))
4616 return iformat;
4617 else
4618 return -1;
4619 }
4620 #endif
4621