1 /* @source ajbase *************************************************************
2 **
3 ** AJAX IUB base nucleic acid functions
4 **
5 ** @author Copyright (C) 1999 Alan Bleasby
6 ** @version $Revision: 1.31 $
7 ** @modified Feb 28 ajb First version
8 ** @modified $Date: 2011/11/08 15:07:45 $ by $Author: rice $
9 ** @@
10 **
11 ** This library is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU Lesser General Public
13 ** License as published by the Free Software Foundation; either
14 ** version 2.1 of the License, or (at your option) any later version.
15 **
16 ** This library is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ** Lesser General Public License for more details.
20 **
21 ** You should have received a copy of the GNU Lesser General Public
22 ** License along with this library; if not, write to the Free Software
23 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 ** MA  02110-1301,  USA.
25 **
26 ******************************************************************************/
27 
28 
29 #include "ajlib.h"
30 
31 #include "ajbase.h"
32 #include "ajreg.h"
33 #include "ajsys.h"
34 #include "ajfiledata.h"
35 #include "ajfileio.h"
36 
37 #include <string.h>
38 
39 
40 #define IUBFILE "Ebases.iub"
41 #define IUBPFILE "Eresidues.iub"
42 
43 
44 
45 
46 /* @datastatic BasePIub *******************************************************
47 **
48 ** Base codes
49 **
50 ** @alias BaseSIub
51 ** @alias BaseOIub
52 **
53 ** @attr code [AjPStr] Code
54 ** @attr list [AjPStr] List
55 ** @attr mnemonic [AjPStr] Mnemonic
56 ** @@
57 ******************************************************************************/
58 
59 typedef struct BaseSIub
60 {
61     AjPStr code;
62     AjPStr list;
63     AjPStr mnemonic;
64 } BaseOIub;
65 
66 #define BasePIub BaseOIub*
67 
68 BaseOIub aj_base_iubS[256];	  /* Base letters and their alternatives */
69 ajint    aj_base_table[256];	  /* Base letter numerical codes         */
70 float    aj_base_prob[32][32];    /* Asym base probability matches       */
71 
72 BaseOIub aj_residue_iubS[256];	  /* Residues and their alternatives     */
73 ajint    aj_residue_table[256];	  /* Residue numerical codes             */
74 float    aj_residue_prob[32][32]; /* Asym base probability matches       */
75 
76 
77 
78 
79 /* @conststatic iubbases ******************************************************
80 **
81 ** Valid IUB nucleotide base characters
82 **
83 ******************************************************************************/
84 
85 static const char* iubbases = "XACMGRSVTWYHKDBN";
86 
87 
88 
89 
90 /* @conststatic BaseAaTable ***************************************************
91 **
92 ** Valid 3-letter amino acid names from A to Z
93 **
94 ******************************************************************************/
95 
96 static const char *BaseAaTable[]=
97 {
98     "ALA","ASX","CYS","ASP","GLU","PHE","GLY","HIS",
99     "ILE","---","LYS","LEU","MET","ASN","---","PRO",
100     "GLN","ARG","SER","THR","---","VAL","TRP","XAA",
101     "TYR","GLX"
102 };
103 
104 
105 
106 
107 /* @conststatic BaseNucTable **************************************************
108 **
109 ** Valid 2-letter nucleotide names from A to Z for PDB format
110 **
111 ******************************************************************************/
112 
113 static const char *BaseNucTable[]=
114 {
115     "DA","DB","DC","DD","--","--","DG","DH",
116     "--","--","DK","--","DM","DN","--","--",
117     "--","DR","DS","DT","DU","DV","DW","DX",
118     "DY","--"
119 };
120 
121 
122 static AjBool aj_base_I = AJFALSE;
123 static AjBool aj_residue_I = AJFALSE;
124 
125 static AjBool baseInit(void);
126 static AjBool residueInit(void);
127 
128 
129 
130 
131 /* @filesection ajbase ********************************************************
132 **
133 ** @nam1rule aj Function belongs to the AJAX library.
134 **
135 */
136 
137 
138 
139 
140 /* @datasection [none] Base *******************************************
141 **
142 ** Function is for manipulating nucleotide base codes
143 **
144 ** @nam2rule Base
145 **
146 */
147 
148 
149 
150 
151 /* @section character conversion
152 **
153 ** Functions converting binary forms of base codes
154 **
155 ** @fdata      [none]
156 **
157 ** @nam3rule Alpha Converts a character value
158 ** @nam4rule AlphaCompare Compares two base codes
159 ** @nam4rule AlphaTo Converts to a specific type
160 ** @nam5rule AlphaToBin Converts to binary code type
161 **
162 ** @argrule Alpha base [ajint] Alphabetic character as an integer
163 ** @argrule Compare base2 [ajint] Comparison alphabetic character as a
164 **                                character
165 **
166 ** @valrule Compare [float] Probability bases are the same
167 ** @valrule ToBin [ajint] Binary code in range 0 to 31
168 **
169 ** @fcategory cast
170 */
171 
172 
173 
174 
175 /* @func ajBaseAlphaCompare ***************************************************
176 **
177 ** Returns an element of the base match probability array
178 **
179 ** @param [r] base [ajint] First base offset
180 ** @param [r] base2 [ajint] Second base offset
181 **
182 ** @return [float] Base probability value
183 **
184 ** @release 6.0.0
185 ** @@
186 ******************************************************************************/
187 
ajBaseAlphaCompare(ajint base,ajint base2)188 float  ajBaseAlphaCompare(ajint base, ajint base2)
189 {
190     ajint b1;
191     ajint b2;
192 
193     if(!aj_base_I)
194         baseInit();
195 
196     b1 = base;
197     b2 = base2;
198 
199     if(b1<0)
200         b1=0;
201 
202     if(b1>31)
203         b1=31;
204 
205     if(b2<0)
206         b2=0;
207 
208     if(b2>31)
209         b2=31;
210 
211     return aj_base_prob[b1][b2];
212 }
213 
214 
215 
216 
217 /* @func ajBaseAlphaToBin *****************************************************
218 **
219 ** Returns a binary OR'd representation of an IUB base where A=1, C=2,
220 ** G=4 and T=8
221 ** Uses the base table set up by daseInit
222 **
223 ** @param  [r] base [ajint] character to convert
224 **
225 ** @return [ajint] Binary OR'd representation
226 **
227 ** @release 6.0.0
228 ******************************************************************************/
229 
ajBaseAlphaToBin(ajint base)230 ajint ajBaseAlphaToBin(ajint base)
231 {
232     if(!aj_base_I)
233 	baseInit();
234 
235     return (aj_base_table[toupper(base)]);
236 }
237 
238 
239 
240 
241 /* @section character conversion
242 **
243 ** Functions converting binary forms of base codes
244 **
245 ** @fdata      [none]
246 **
247 ** @nam3rule Alphachar Converts a character value
248 ** @nam4rule AlphacharCompare Compares two base codes
249 ** @nam4rule AlphacharTo Converts to a specific type
250 ** @nam5rule AlphacharToBin Converts to binary code type
251 **
252 ** @argrule Alphachar c [char] Alphabetic character as a character
253 ** @argrule Compare c2 [char] Comparison alphabetic character as a character
254 **
255 ** @valrule Compare [float] Probability of a match allowing for ambiguity codes
256 ** @valrule ToBin [char] Binary code in range 0 to 31
257 **
258 ** @fcategory cast
259 */
260 
261 
262 
263 
264 /* @func ajBaseAlphacharCompare ***********************************************
265 **
266 ** Check ambiguity codes (IUB) to estimate the distance score.
267 **
268 ** @param [r] c [char] First base to compare
269 ** @param [r] c2 [char] Second base to compare
270 ** @return [float] estimated match
271 **
272 **
273 ** @release 6.0.0
274 ******************************************************************************/
275 
ajBaseAlphacharCompare(char c,char c2)276 float ajBaseAlphacharCompare(char c, char c2)
277 {
278     const AjPStr b1 = NULL;
279     const AjPStr b2 = NULL;
280     AjPStr b = NULL;
281     AjPRegexp rexp = NULL;
282     AjBool pmatch = ajFalse;
283 
284     ajuint i;
285     float n;
286     ajint len1;
287     ajint len2;
288 
289     if(!aj_base_I)
290         baseInit();
291 
292     len1 = ajStrGetLen(aj_base_iubS[(int)c].list)-1;
293     len2 = ajStrGetLen(aj_base_iubS[(int)c2].list)-1;
294 
295     b1 = aj_base_iubS[(int)c].list;
296 
297     b2 = aj_base_iubS[(int)c2].list;
298 
299     /*
300     ** for each base code in 1 cf. base code
301     ** for seq 2 to see if there is a match
302     */
303     for(i = 0;i < (ajuint) len1;i++)
304     {
305 	b = ajStrNew();
306 	ajStrAssignSubS(&b,b1,i,i);
307 	rexp = ajRegComp(b);
308 
309 	if(ajRegExec(rexp,b2))
310 	    pmatch = ajTrue;
311 
312 	ajRegFree(&rexp);
313 	ajStrDel(&b);
314     }
315 
316 
317     if(pmatch)
318 	n = ((float)1./len1)*((float)1./len2);
319     else
320 	n = 0.;
321 
322     return n;
323 }
324 
325 
326 
327 
328 /* @func ajBaseAlphacharToBin *************************************************
329 **
330 ** Returns a binary OR'd representation of an IUB base where A=1, C=2,
331 ** G=4 and T=8
332 ** Uses the base table set up by baseInit
333 **
334 ** @param  [r] c [char] character to convert
335 **
336 ** @return [char] Binary OR'd representation
337 **
338 ** @release 6.0.0
339 ******************************************************************************/
340 
ajBaseAlphacharToBin(char c)341 char ajBaseAlphacharToBin(char c)
342 {
343     if(!aj_base_I)
344 	baseInit();
345 
346     return ajSysCastItoc(aj_base_table[toupper((ajint) c)]);
347 }
348 
349 
350 
351 
352 /* @section Complement
353 **
354 ** Functions complementing nucleotide base codes
355 **
356 ** @fdata      [none]
357 **
358 ** @nam3rule Alphachar Converts a character value
359 ** @nam4rule Comp Complement a nucleotide base code
360 **
361 ** @argrule * c [char]
362 **
363 ** @valrule * [char]
364 **
365 ** @fcategory use
366 */
367 
368 
369 
370 
371 /* @func ajBaseAlphacharComp **************************************************
372 **
373 ** Complements a nucleotide base.
374 **
375 ** @param [r] c [char] Base character.
376 ** @return [char] Complementary base.
377 **
378 ** @release 6.0.0
379 ** @@
380 ******************************************************************************/
381 
ajBaseAlphacharComp(char c)382 char ajBaseAlphacharComp(char c)
383 {
384     static char fwd[]="ACGTURYWSMKBDHVNXacgturywsmkbdhvnx";
385     static char rev[]="TGCAAYRWSKMVHDBNXtgcaayrwskmvhdbnx";
386     char *cp;
387     char *cq;
388 
389     cp = strchr(fwd,c);
390 
391     if(cp)
392     {
393 	cq = cp - fwd + rev;
394 
395 	return *cq;
396     }
397 
398     return c;
399 }
400 
401 
402 
403 
404 /* @section binary conversion
405 **
406 ** Functions converting binary forms of base codes
407 **
408 ** @fdata      [none]
409 **
410 ** @nam3rule Bin Converts a binary code value
411 ** @nam4rule BinTo Converts to a specific type
412 ** @nam5rule BinToAlpha Converts to alphabetic type
413 **
414 ** @argrule Bin c [ajint] Binary code in range 0 to 31
415 **
416 ** @valrule ToAlpha [char]
417 **
418 ** @fcategory cast
419 */
420 
421 
422 
423 
424 /* @func ajBaseBinToAlpha *****************************************************
425 **
426 ** Converts a binary OR'd representation of an IUB base where A=1, C=2,
427 ** G=4 and T=8 into an ambiguous DNA base code (uses T rather than U).
428 **
429 ** Uses the base table set up by baseInit
430 **
431 ** @param  [r] c [ajint] character to convert
432 **
433 ** @return [char] Ambiguous DNA base code
434 **
435 ** @release 6.0.0
436 ******************************************************************************/
437 
ajBaseBinToAlpha(ajint c)438 char ajBaseBinToAlpha(ajint c)
439 {
440     if(c<0)
441 	return 'N';
442 
443     if(c>15)
444 	return 'N';
445 
446     return (iubbases[c]);
447 }
448 
449 
450 
451 
452 /* @section query *************************************************************
453 **
454 ** Functions
455 **
456 ** @fdata      [none]
457 **
458 ** @nam3rule Is test a base code
459 ** @nam3rule Exists Tests code is valid
460 ** @suffix Char Character code
461 ** @suffix Bin Numeric base code
462 **
463 ** @argrule Bin base [ajint] Binary base code in range 0 to 31
464 ** @argrule Char c [char] Character base code
465 **
466 ** @valrule Exists [AjBool] True is the code exists
467 **
468 ** @fcategory use
469 **
470 ******************************************************************************/
471 
472 
473 
474 
475 /* @func ajBaseExistsBin ******************************************************
476 **
477 ** Tests whether a base code exists
478 **
479 ** @param [r] base [ajint] Base code in range 0 to 31
480 ** @return [AjBool] True if base code is known
481 **
482 **
483 ** @release 6.0.0
484 ******************************************************************************/
485 
ajBaseExistsBin(ajint base)486 AjBool ajBaseExistsBin(ajint base)
487 {
488     if(!aj_base_I)
489 	baseInit();
490 
491     if(ajStrGetLen(aj_base_iubS[base].code))
492         return ajTrue;
493 
494     return ajFalse;
495 }
496 
497 
498 
499 
500 /* @func ajBaseExistsChar *****************************************************
501 **
502 ** Tests whether a base code exists
503 **
504 ** @param [r] c [char] Base character
505 ** @return [AjBool] True if base code is known
506 **
507 **
508 ** @release 6.0.0
509 ******************************************************************************/
510 
ajBaseExistsChar(char c)511 AjBool ajBaseExistsChar(char c)
512 {
513     int itest;
514 
515     if(!aj_base_I)
516 	baseInit();
517 
518     itest = toupper((int)c);
519 
520     if(ajStrGetLen(aj_base_iubS[itest].code))
521         return ajTrue;
522 
523     itest = tolower((int)c);
524 
525     if(ajStrGetLen(aj_base_iubS[itest].code))
526         return ajTrue;
527 
528     return ajFalse;
529 }
530 
531 
532 
533 
534 /* @section retrieval
535 **
536 ** Functions
537 **
538 ** @fdata      [none]
539 **
540 ** @nam3rule Get Return a value
541 ** @nam4rule GetCodes Returns a string of matching base codes
542 ** @nam4rule GetMnemonic Returns mnemonic string
543 **
544 ** @argrule Get base [ajint] Binary base code in range 0 to 31
545 **
546 ** @valrule GetCodes [const AjPStr] Matching base codes
547 ** @valrule GetMnemonic [const AjPStr] Mnemonic for base code
548 **
549 ** @fcategory use
550 */
551 
552 
553 
554 
555 /* @func ajBaseGetCodes *******************************************************
556 **
557 ** Returns a string of matching base codes
558 **
559 ** @param [r] base [ajint] Original base code
560 **
561 ** @return [const AjPStr] Base codes
562 **
563 ** @release 6.0.0
564 ******************************************************************************/
565 
ajBaseGetCodes(ajint base)566 const AjPStr ajBaseGetCodes(ajint base)
567 {
568     if(!aj_base_I)
569 	baseInit();
570 
571     return  aj_base_iubS[base].list;
572 }
573 
574 
575 
576 
577 /* @func ajBaseGetMnemonic ****************************************************
578 **
579 ** Returns a string of matching base codes
580 **
581 ** @param [r] base [ajint] Original base code
582 **
583 ** @return [const AjPStr] Base codes
584 **
585 ** @release 6.0.0
586 ******************************************************************************/
587 
ajBaseGetMnemonic(ajint base)588 const AjPStr ajBaseGetMnemonic(ajint base)
589 {
590     if(!aj_base_I)
591 	baseInit();
592 
593     return  aj_base_iubS[base].mnemonic;
594 }
595 
596 
597 
598 
599 /* @funcstatic baseInit *******************************************************
600 **
601 ** Sets up binary OR'd representation of an IUB bases in a table
602 ** aj_base_table where A=1, C=2, G=4 and T=8
603 ** Also sets up a match probability array aj_base_prob holding the
604 ** probability of one IUB base matching any other.
605 ** Uses the Ebases.iub file
606 ** Is initialised if necessary from other AJAX functions.
607 **
608 ** @return [AjBool] True on success
609 **
610 ** @release 6.0.0
611 ******************************************************************************/
612 
baseInit(void)613 static AjBool baseInit(void)
614 {
615     AjPFile bfptr    = NULL;
616     AjPStr  bfname   = NULL;
617     AjPStr  line     = NULL;
618     AjPStr  code     = NULL;
619     AjPStr  list     = NULL;
620     AjPStr  mnemonic = NULL;
621 
622     ajint i;
623     ajint j;
624     ajint k;
625 
626     ajint c;
627     ajint qc;
628 
629     ajint l1;
630     ajint l2;
631 
632     ajint x;
633     ajint y;
634 
635     ajint n;
636     const char *p;
637     const char *q;
638 
639     if(aj_base_I)
640 	return ajTrue;
641 
642 
643     for(i=0;i<256;++i)
644     {
645 	aj_base_iubS[i].code = ajStrNewC("");
646 	aj_base_iubS[i].list = ajStrNewC("");
647 	aj_base_iubS[i].mnemonic = ajStrNewC("");
648 	aj_base_table[i] = 0;
649     }
650 
651     code = ajStrNew();
652     list = ajStrNew();
653     ajStrAssignClear(&code);
654     ajStrAssignC(&list,"ACGT");
655 
656 
657     bfname = ajStrNewC(IUBFILE);
658     bfptr = ajDatafileNewInNameS(bfname);
659 
660     if(!bfptr)
661         ajFatal("%S file not found\n", bfname);
662 
663 
664     line = ajStrNew();
665 
666 
667     while(ajReadline(bfptr, &line))
668     {
669 	p = ajStrGetPtr(line);
670 
671 	if(*p=='#' || *p=='!' || *p=='\n')
672 	    continue;
673 
674 	p = ajSysFuncStrtok(p," \t\r\n");
675 	ajStrAssignC(&code,p);
676 	p=ajSysFuncStrtok(NULL," \t\r\n");
677 
678 	if(sscanf(p,"%d",&n)!=1)
679 	    ajFatal("Bad format IUB file");
680 
681 	p = ajSysFuncStrtok(NULL," \t\r\n");
682 	ajStrAssignC(&list,p);
683 	p = ajSysFuncStrtok(NULL," \t\r\n");
684 	ajStrAssignC(&mnemonic,p);
685 	qc = (ajint) ajStrGetCharFirst(code);
686 	ajStrAssignS(&aj_base_iubS[toupper(qc)].code,code);
687 	ajStrAssignS(&aj_base_iubS[toupper(qc)].list,list);
688 	ajStrAssignS(&aj_base_iubS[toupper(qc)].mnemonic,mnemonic);
689 	ajStrAssignS(&aj_base_iubS[tolower(qc)].code,code);
690 	ajStrAssignS(&aj_base_iubS[tolower(qc)].list,list);
691 	ajStrAssignS(&aj_base_iubS[tolower(qc)].mnemonic,mnemonic);
692 	aj_base_table[toupper(qc)] = n;
693 	aj_base_table[tolower(qc)] = n;
694     }
695 
696     ajStrDel(&code);
697     ajStrDel(&list);
698     ajStrDel(&line);
699     ajStrDel(&mnemonic);
700     ajStrDel(&bfname);
701 
702     ajFileClose(&bfptr);
703 
704 
705     for(i=0;i<32;++i)
706     {
707 	x = ajBasecodeFromInt(i);
708 
709 	for(j=0;j<32;++j)
710 	{
711 	    y = ajBasecodeFromInt(j);
712 
713 	    if(!(l1=ajStrGetLen(aj_base_iubS[x].code)))
714 	    {
715 		aj_base_prob[i][j]=0.0;
716 		continue;
717 	    }
718 
719 	    if(l1!=1)
720 		ajFatal("Bad IUB letter");
721 
722 
723 	    p = ajStrGetPtr(aj_base_iubS[x].list);
724 	    q = ajStrGetPtr(aj_base_iubS[y].list);
725 	    l1 = strlen(p);
726 	    l2 = strlen(q);
727 
728 	    for(k=0,c=0;k<l1;++k)
729 		if(strchr(q,(ajint)*(p+k))) ++c;
730 
731 	    if(l2)
732 		aj_base_prob[i][j] = (float)c / (float)l2;
733 	    else
734 		aj_base_prob[i][j]=0.0;
735 	}
736     }
737 
738     aj_base_I = ajTrue;
739 
740     return aj_base_I;
741 }
742 
743 
744 
745 
746 /* @section Doublet names
747 **
748 ** Functions exchanging residue codes with PDB two letter codes
749 **
750 ** @fdata      [none]
751 **
752 ** @nam3rule From Convert some other form to a residue
753 ** @nam3rule To   Convert residue to another form
754 ** @nam4rule Doublet Convert a doublet 2-letter base name
755 **
756 ** @argrule FromDoublet nuc2 [const AjPStr] Doublet base name
757 ** @argrule From Pc [char*] Doublet base name
758 ** @argrule To c [char] Base character code
759 ** @argrule ToDoublet Pnuc2 [AjPStr*] Doublet base name
760 **
761 ** @valrule * [AjBool] True if code was recognised.
762 **
763 ** @fcategory cast
764 */
765 
766 
767 
768 
769 /* @func ajBaseFromDoublet ****************************************************
770 **
771 ** Takes a 2 character PDB base code and writes a char with the
772 ** corresponding single letter code.
773 **
774 ** @param [r] nuc2 [const AjPStr]   AjPStr object (2 letter code)
775 ** @param [w] Pc [char *] Resulting residue code
776 **
777 ** @return [AjBool] True on success, false if doublet is not recognised
778 **
779 ** @release 6.1.0
780 ** @@
781 ******************************************************************************/
782 
ajBaseFromDoublet(const AjPStr nuc2,char * Pc)783 AjBool ajBaseFromDoublet(const AjPStr nuc2, char* Pc)
784 {
785     ajint i;
786 
787     for(i=0; i<26; i++)
788 	if(!ajStrCmpC(nuc2, BaseNucTable[i]))
789 	{
790 	    *Pc = (char) (i + (int) 'A');
791 	    return ajTrue;
792 	}
793 
794     if(!ajStrCmpC(nuc2, "UNK"))
795     {
796 	*Pc = 'N';
797 
798 	return ajTrue;
799     }
800 
801     *Pc='N';
802 
803     return ajFalse;
804 }
805 
806 
807 
808 
809 /* @section exit
810 **
811 ** Functions called on exit from the program by ajExit to do
812 ** any necessary cleanup and to report internal statistics to the debug file
813 **
814 ** @fdata      [none]
815 **
816 ** @nam3rule Exit Cleanup and report on exit
817 **
818 ** @valrule * [void]
819 **
820 ** @fcategory misc
821 */
822 
823 
824 
825 
826 /* @func ajBaseExit ***********************************************************
827 **
828 ** Cleans up sequence base and residue processing internal memory
829 **
830 ** @return [void]
831 **
832 ** @release 4.0.0
833 ** @@
834 ******************************************************************************/
835 
ajBaseExit(void)836 void ajBaseExit(void)
837 {
838     ajint i;
839 
840     if(aj_base_I)
841         for(i=0;i<256;++i)
842         {
843             ajStrDel(&aj_base_iubS[i].code);
844             ajStrDel(&aj_base_iubS[i].list);
845             ajStrDel(&aj_base_iubS[i].mnemonic);
846         }
847 
848     if(aj_residue_I)
849         for(i=0;i<256;++i)
850         {
851             ajStrDel(&aj_residue_iubS[i].code);
852             ajStrDel(&aj_residue_iubS[i].list);
853             ajStrDel(&aj_residue_iubS[i].mnemonic);
854         }
855 
856     return;
857 }
858 
859 
860 
861 
862 /* @datasection [none] Residue *******************************************
863 **
864 ** Function is for manipulating amino acid residue codes
865 **
866 ** @nam2rule Residue
867 **
868 */
869 
870 
871 
872 
873 /* @section character conversion
874 **
875 ** Functions converting binary forms of amino acid residue codes
876 **
877 ** @fdata      [none]
878 **
879 ** @nam3rule Alpha Converts a character value
880 ** @nam4rule AlphaTo Converts to a specific type
881 ** @nam5rule AlphaToBin Converts to binary code type
882 **
883 ** @argrule Alpha base [ajint] Alphabetic character as an integer
884 **
885 ** @valrule ToBin [ajint] Binary code in range 0 to 31
886 **
887 ** @fcategory cast
888 */
889 
890 
891 
892 
893 /* @func ajResidueAlphaToBin **************************************************
894 **
895 ** Returns a binary OR'd representation of an IUB residue where A=1, C=2,
896 ** Uses the base table set up by residueInit
897 **
898 ** @param  [r] base [ajint] character to convert
899 **
900 ** @return [ajint] Binary OR'd representation
901 **
902 ** @release 6.0.0
903 ******************************************************************************/
904 
ajResidueAlphaToBin(ajint base)905 ajint ajResidueAlphaToBin(ajint base)
906 {
907     if(!aj_residue_I)
908 	residueInit();
909 
910     return (aj_residue_table[toupper(base)]);
911 }
912 
913 
914 
915 
916 /* @section binary conversion
917 **
918 ** Functions converting binary forms of base codes
919 **
920 ** @fdata      [none]
921 **
922 ** @nam3rule Bin Converts a binary code value
923 ** @nam4rule BinTo Converts to a specific type
924 ** @nam5rule BinToAlpha Converts to alphabetic type
925 **
926 ** @argrule Bin c [ajint] Binary code in range 0 to 31
927 **
928 ** @valrule ToAlpha [char]
929 **
930 ** @fcategory cast
931 */
932 
933 
934 
935 
936 /* @func ajResidueBinToAlpha **************************************************
937 **
938 ** Converts a binary OR'd representation of an IUB residue where A=1, C=2,
939 ** etc into an ambiguous protein code
940 **
941 ** Uses the base table set up by residueInit
942 **
943 ** @param  [r] c [ajint] character to convert
944 **
945 ** @return [char] Ambiguous residue code
946 **
947 ** @release 6.0.0
948 ******************************************************************************/
949 
ajResidueBinToAlpha(ajint c)950 char ajResidueBinToAlpha(ajint c)
951 {
952     ajuint i;
953 
954     if(!aj_residue_I)
955 	residueInit();
956 
957     for(i = 0;i<256;i++)
958         if(aj_residue_table[i] == c)
959             return ajStrGetCharFirst(aj_residue_iubS[i].code);
960 
961     return 'X';
962 }
963 
964 
965 
966 
967 /* @section query *************************************************************
968 **
969 ** Functions
970 **
971 ** @fdata      [none]
972 **
973 ** @nam3rule Is test a base code
974 ** @nam3rule Exists Tests code is valid
975 ** @suffix Char Character code
976 ** @suffix Bin Numeric base code
977 **
978 ** @argrule Bin base [ajint] Binary base code in range 0 to 31
979 ** @argrule Char c [char] Character base code
980 **
981 ** @valrule Exists [AjBool] True is the code exists
982 **
983 ** @fcategory use
984 **
985 ******************************************************************************/
986 
987 
988 
989 
990 /* @func ajResidueExistsBin ***************************************************
991 **
992 ** Tests whether a residue code exists
993 **
994 ** @param [r] base [ajint] Base code in range 0 to 31
995 ** @return [AjBool] True if base code is known
996 **
997 **
998 ** @release 6.0.0
999 ******************************************************************************/
1000 
ajResidueExistsBin(ajint base)1001 AjBool ajResidueExistsBin(ajint base)
1002 {
1003     if(!aj_residue_I)
1004 	residueInit();
1005 
1006     if(ajStrGetLen(aj_residue_iubS[base].code))
1007         return ajTrue;
1008 
1009     return ajFalse;
1010 }
1011 
1012 
1013 
1014 
1015 /* @func ajResidueExistsChar **************************************************
1016 **
1017 ** Tests whether a residue code exists
1018 **
1019 ** @param [r] c [char] Base character
1020 ** @return [AjBool] True if base code is known
1021 **
1022 **
1023 ** @release 6.0.0
1024 ******************************************************************************/
1025 
ajResidueExistsChar(char c)1026 AjBool ajResidueExistsChar(char c)
1027 {
1028     int itest;
1029 
1030     if(!aj_residue_I)
1031 	residueInit();
1032 
1033     itest = toupper((int)c);
1034 
1035     if(ajStrGetLen(aj_residue_iubS[itest].code))
1036         return ajTrue;
1037 
1038     itest = tolower((int)c);
1039 
1040     if(ajStrGetLen(aj_residue_iubS[itest].code))
1041         return ajTrue;
1042 
1043     return ajFalse;
1044 }
1045 
1046 
1047 
1048 
1049 /* @section retrieval
1050 **
1051 ** Functions
1052 **
1053 ** @fdata      [none]
1054 **
1055 ** @nam3rule Get Return a value
1056 ** @nam4rule GetCodes Returns a string of matching base codes
1057 ** @nam4rule GetMnemonic Returns mnemonic string
1058 **
1059 ** @argrule Get base [ajint] Binary base code in range 0 to 31
1060 **
1061 ** @valrule GetCodes [const AjPStr] Matching base codes
1062 ** @valrule GetMnemonic [const AjPStr] Mnemonic for base code
1063 **
1064 ** @fcategory use
1065 */
1066 
1067 
1068 
1069 
1070 /* @func ajResidueGetCodes ****************************************************
1071 **
1072 ** Returns a string of matching amino acid residue codes
1073 **
1074 ** @param [r] base [ajint] Original base code
1075 **
1076 ** @return [const AjPStr] Base codes
1077 **
1078 ** @release 6.0.0
1079 ******************************************************************************/
1080 
ajResidueGetCodes(ajint base)1081 const AjPStr ajResidueGetCodes(ajint base)
1082 {
1083     if(!aj_residue_I)
1084 	residueInit();
1085 
1086     return  aj_residue_iubS[base].list;
1087 }
1088 
1089 
1090 
1091 
1092 /* @func ajResidueGetMnemonic *************************************************
1093 **
1094 ** Returns a string of matching amino acid residue codes
1095 **
1096 ** @param [r] base [ajint] Original base code
1097 **
1098 ** @return [const AjPStr] Base codes
1099 **
1100 ** @release 6.0.0
1101 ******************************************************************************/
1102 
ajResidueGetMnemonic(ajint base)1103 const AjPStr ajResidueGetMnemonic(ajint base)
1104 {
1105     if(!aj_residue_I)
1106 	residueInit();
1107 
1108     return  aj_residue_iubS[base].mnemonic;
1109 }
1110 
1111 
1112 
1113 
1114 /* @funcstatic residueInit ****************************************************
1115 **
1116 ** Sets up binary OR'd representation of an IUB residues in a table
1117 ** aj_residue_table where A=1, C=2, etc
1118 ** Also sets up a match probability array aj_residue_prob holding the
1119 ** probability of one IUB residue matching any other.
1120 ** Uses the Eresidues.iub file
1121 ** Is initialised if necessary from other AJAX functions.
1122 **
1123 ** @return [AjBool] True on success
1124 **
1125 ** @release 6.0.0
1126 ******************************************************************************/
1127 
residueInit(void)1128 static AjBool residueInit(void)
1129 {
1130     AjPFile bfptr    = NULL;
1131     AjPStr  bfname   = NULL;
1132     AjPStr  line     = NULL;
1133     AjPStr  code     = NULL;
1134     AjPStr  list     = NULL;
1135     AjPStr  mnemonic = NULL;
1136 
1137     ajint i;
1138     ajint j;
1139     ajint k;
1140 
1141     ajint c;
1142     ajint qc;
1143 
1144     ajint l1;
1145     ajint l2;
1146 
1147     ajint x;
1148     ajint y;
1149 
1150     ajint n;
1151     const char *p;
1152     const char *q;
1153 
1154     if(aj_residue_I)
1155 	return ajTrue;
1156 
1157 
1158     for(i=0;i<256;++i)
1159     {
1160 	aj_residue_iubS[i].code = ajStrNewC("");
1161 	aj_residue_iubS[i].list = ajStrNewC("");
1162 	aj_residue_table[i] = 0;
1163     }
1164 
1165     code = ajStrNew();
1166     list = ajStrNew();
1167     ajStrAssignClear(&code);
1168     ajStrAssignC(&list,"ABCDEFGHIJKLMNOPQRSTUVWXYZ");
1169 
1170 
1171     ajStrAssignC(&bfname,IUBPFILE);
1172     bfptr = ajDatafileNewInNameS(bfname);
1173 
1174     if(!bfptr)
1175         ajFatal("%S file not found\n", bfname);
1176 
1177 
1178     while(ajReadline(bfptr, &line))
1179     {
1180 	p = ajStrGetPtr(line);
1181 
1182 	if(*p=='#' || *p=='!' || *p=='\n')
1183 	    continue;
1184 
1185 	p = ajSysFuncStrtok(p," \t\r\n");
1186 	ajStrAssignC(&code,p);
1187 	p=ajSysFuncStrtok(NULL," \t\r\n");
1188 
1189 	if(sscanf(p,"%d",&n)!=1)
1190 	    ajFatal("Bad format IUB file");
1191 
1192 	p = ajSysFuncStrtok(NULL," \t\r\n");
1193 	ajStrAssignC(&list,p);
1194 	p = ajSysFuncStrtok(NULL," \t\r\n");
1195 	ajStrAssignC(&mnemonic,p);
1196 	qc = (ajint) ajStrGetCharFirst(code);
1197 	ajStrAssignS(&aj_residue_iubS[toupper(qc)].code,code);
1198 	ajStrAssignS(&aj_residue_iubS[toupper(qc)].list,list);
1199 	ajStrAssignS(&aj_residue_iubS[toupper(qc)].mnemonic,mnemonic);
1200 	ajStrAssignS(&aj_residue_iubS[tolower(qc)].code,code);
1201 	ajStrAssignS(&aj_residue_iubS[tolower(qc)].list,list);
1202 	ajStrAssignS(&aj_residue_iubS[tolower(qc)].mnemonic,mnemonic);
1203 	aj_residue_table[toupper(qc)] = n;
1204 	aj_residue_table[tolower(qc)] = n;
1205     }
1206 
1207     ajStrDel(&code);
1208     ajStrDel(&list);
1209     ajStrDel(&line);
1210     ajStrDel(&mnemonic);
1211     ajStrDel(&bfname);
1212 
1213     ajFileClose(&bfptr);
1214 
1215 
1216     for(i=0;i<32;++i)
1217     {
1218 	x = ajBasecodeFromInt(i);
1219 
1220 	for(j=0;j<32;++j)
1221 	{
1222 	    y = ajBasecodeFromInt(j);
1223 
1224 	    if(!(l1=ajStrGetLen(aj_residue_iubS[x].code)))
1225 	    {
1226 		aj_residue_prob[i][j]=0.0;
1227 		continue;
1228 	    }
1229 
1230 	    if(l1!=1)
1231 		ajFatal("Bad IUB letter");
1232 
1233 	    p = ajStrGetPtr(aj_residue_iubS[x].list);
1234 	    q = ajStrGetPtr(aj_residue_iubS[y].list);
1235 	    l1 = strlen(p);
1236 	    l2 = strlen(q);
1237 
1238 	    for(k=0,c=0;k<l1;++k)
1239 		if(strchr(q,(ajint)*(p+k))) ++c;
1240 
1241 	    if(l2)
1242 		aj_residue_prob[i][j] = (float)c / (float)l2;
1243 	    else
1244 		aj_residue_prob[i][j]=0.0;
1245 	}
1246     }
1247 
1248     aj_residue_I = ajTrue;
1249 
1250     return aj_residue_I;
1251 }
1252 
1253 
1254 
1255 
1256 /* @section Triplet names
1257 **
1258 ** Functions exchanging residue codes with three letter codes
1259 **
1260 ** @fdata      [none]
1261 **
1262 ** @nam3rule From Convert some other form to a residue
1263 ** @nam3rule To   Convert residue to another form
1264 ** @nam4rule Triplet Convert a triplet 3-letter residue name
1265 **
1266 ** @argrule FromTriplet aa3 [const AjPStr] Triplet residue name
1267 ** @argrule From Pc [char*] Triplet residue name
1268 ** @argrule To c [char] Residue character code
1269 ** @argrule ToTriplet Paa3 [AjPStr*] Triplet residue name
1270 **
1271 ** @valrule * [AjBool] True if code was recognised.
1272 **
1273 ** @fcategory cast
1274 */
1275 
1276 
1277 
1278 
1279 /* @func ajResidueFromTriplet *************************************************
1280 **
1281 ** Takes a 3 character amino acid code and writes a char with the
1282 ** corresponding single letter code.
1283 **
1284 ** @param [r] aa3 [const AjPStr]   AjPStr object (3 letter code)
1285 ** @param [w] Pc [char *] Resulting residue code
1286 **
1287 ** @return [AjBool] True on success, false if triplet is not recognised
1288 **
1289 ** @release 6.0.0
1290 ** @@
1291 ****************************************************************************/
1292 
ajResidueFromTriplet(const AjPStr aa3,char * Pc)1293 AjBool  ajResidueFromTriplet(const AjPStr aa3, char *Pc)
1294 {
1295     ajint i;
1296 
1297     for(i=0; i<26; i++)
1298 	if(!ajStrCmpC(aa3, BaseAaTable[i]))
1299 	{
1300 	    *Pc = (char) (i + (int) 'A');
1301 
1302 	    return ajTrue;
1303 	}
1304 
1305     if(!ajStrCmpC(aa3, "UNK"))
1306     {
1307 	*Pc = 'X';
1308 
1309 	return ajTrue;
1310     }
1311 
1312     *Pc='X';
1313 
1314     return ajFalse;
1315 }
1316 
1317 
1318 
1319 
1320 /* @func ajResidueToTriplet ***************************************************
1321 **
1322 ** Writes an AjPStr with an amino acid 3 letter code
1323 **
1324 ** @param [r] c [char] Single letter identifier of amino acid
1325 ** @param [w] Paa3  [AjPStr *] AjPStr object
1326 **
1327 ** @return [AjBool] True on success
1328 **
1329 ** @release 6.0.0
1330 ** @@
1331 ******************************************************************************/
1332 
ajResidueToTriplet(char c,AjPStr * Paa3)1333 AjBool  ajResidueToTriplet(char c, AjPStr *Paa3)
1334 {
1335     ajint idx;
1336 
1337      if((idx=ajBasecodeToInt(c))>25)
1338 	return ajFalse;
1339 
1340     ajStrAssignC(Paa3, BaseAaTable[idx]);
1341 
1342     return ajTrue;
1343 }
1344 
1345 
1346 
1347 
1348 /* @datasection [none] Base *************************************************
1349 **
1350 ** Function is for manipulating nucleotide base codes
1351 **
1352 ** @nam2rule Base
1353 **
1354 */
1355 
1356 
1357 
1358 
1359 /* @datasection [none] Basecode *******************************************
1360 **
1361 ** Function is for coding the letters used for nucleotide bases and
1362 ** protein residues as integers from zero for use as array indexes.
1363 **
1364 ** @nam2rule Basecode
1365 **
1366 */
1367 
1368 
1369 
1370 
1371 /* @section Conversion
1372 **
1373 ** @fdata [none]
1374 **
1375 ** @nam3rule From Convert from another type
1376 ** @nam3rule To Convert To another type
1377 ** @nam4rule Int Convert integer code type
1378 **
1379 ** @argrule To c [ajint] Base code character to convert from
1380 ** @argrule FromInt n [ajint] Integer base code to convert from
1381 **
1382 ** @valrule From [ajint] Base code character
1383 ** @valrule ToInt [ajint] Integer base code
1384 **
1385 ** @fcategory cast
1386 */
1387 
1388 
1389 
1390 
1391 /* @func ajBasecodeFromInt ****************************************************
1392 **
1393 ** Returns 'A' for 0 to  'Z' for 25
1394 **
1395 ** @param  [r] n [ajint] character to convert
1396 **
1397 ** @return [ajint] 0 as 'A' up to  25 as 'Z'
1398 **
1399 ** @release 6.0.0
1400 ** @@
1401 ******************************************************************************/
1402 
ajBasecodeFromInt(ajint n)1403 ajint ajBasecodeFromInt(ajint n)
1404 {
1405     if(n>25)
1406 	return (ajint) '*';
1407 
1408     if(n<0)
1409 	return (ajint) '*';
1410 
1411     return(n+(ajint)'A');
1412 }
1413 
1414 
1415 
1416 
1417 /* @func ajBasecodeToInt ******************************************************
1418 **
1419 ** Returns A=0 to Z=25  or 27 otherwise
1420 **
1421 ** @param  [r] c [ajint] character to convert
1422 **
1423 ** @return [ajint] A=0 to Z=25 or 27 if unknown
1424 **
1425 ** @release 6.0.0
1426 ** @@
1427 ******************************************************************************/
1428 
ajBasecodeToInt(ajint c)1429 ajint ajBasecodeToInt(ajint c)
1430 {
1431     ajint ic = toupper(c);
1432 
1433     if(ic >= (ajint) 'A' && ic <= (ajint) 'Z')
1434 	return(ic - (ajint)'A');
1435 
1436     return 27;
1437 }
1438 
1439 
1440 
1441 
1442 #ifdef AJ_COMPILE_DEPRECATED_BOOK
1443 #endif
1444 
1445 
1446 
1447 
1448 #ifdef AJ_COMPILE_DEPRECATED
1449 /* @obsolete ajBaseProb
1450 ** @rename ajBaseAlphaCompare
1451 */
1452 
ajBaseProb(ajint base1,ajint base2)1453 __deprecated float  ajBaseProb(ajint base1, ajint base2)
1454 {
1455     return ajBaseAlphaCompare(base1, base2);
1456 }
1457 
1458 
1459 
1460 
1461 /* @obsolete ajAZToBin
1462 ** @rename ajBaseAlphaToBin
1463 */
1464 
ajAZToBin(ajint c)1465 __deprecated ajint ajAZToBin(ajint c)
1466 {
1467     return ajBaseAlphaToBin(c);
1468 }
1469 
1470 
1471 
1472 
1473 /* @obsolete ajAZToBinC
1474 ** @rename ajBaseAlphacharToBin
1475 */
1476 
ajAZToBinC(char c)1477 __deprecated char ajAZToBinC(char c)
1478 {
1479     return (char)ajBaseAlphacharToBin((int)c);
1480 }
1481 
1482 
1483 
1484 
1485 /* @obsolete ajBaseComp
1486 ** @rename ajBaseAlphacharComp
1487 */
ajBaseComp(char base)1488 __deprecated char ajBaseComp(char base)
1489 {
1490 
1491     return ajBaseAlphacharComp(base);
1492 }
1493 
1494 
1495 
1496 
1497 /* @obsolete ajSeqBaseComp
1498 ** @rename ajBaseComp
1499 */
ajSeqBaseComp(char base)1500 __deprecated char ajSeqBaseComp(char base)
1501 {
1502     return ajBaseAlphacharComp(base);
1503 }
1504 
1505 
1506 
1507 
1508 /* @obsolete ajBinToAZ
1509 ** @rename ajBaseBinToAlpha
1510 */
1511 
ajBinToAZ(ajint c)1512 __deprecated char ajBinToAZ(ajint c)
1513 {
1514     return ajBaseBinToAlpha(c);
1515 }
1516 
1517 
1518 
1519 
1520 /* @obsolete ajBaseAa1ToAa3
1521 ** @rename ajResidueToTriplet
1522 */
1523 
ajBaseAa1ToAa3(char aa1,AjPStr * Paa3)1524 __deprecated AjBool  ajBaseAa1ToAa3(char aa1, AjPStr *Paa3)
1525 {
1526     return ajResidueToTriplet(aa1, Paa3);
1527 }
1528 
1529 
1530 
1531 
1532 /* @obsolete ajIntToAZ
1533 ** @rename ajBasecodeFromInt
1534 */
1535 
ajIntToAZ(ajint n)1536 __deprecated ajint ajIntToAZ(ajint n)
1537 {
1538     return ajBasecodeFromInt(n);
1539 }
1540 
1541 
1542 
1543 
1544 /* @obsolete ajAZToInt
1545 ** @rename ajBasecodeToInt
1546 */
1547 
ajAZToInt(ajint c)1548 __deprecated ajint ajAZToInt(ajint c)
1549 {
1550 
1551     return ajBasecodeToInt(c);
1552 }
1553 #endif
1554