1 /* @source ajdan **************************************************************
2 **
3 ** AJAX nucleic acid functions
4 **
5 ** @author Copyright (C) 1999 Alan Bleasby
6 ** @version $Revision: 1.26 $
7 ** @modified Feb 25 ajb First version
8 ** @modified $Date: 2011/11/08 15:07:45 $ by $Author: rice $
9 ** @@
10 **
11 ** This library is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU Lesser General Public
13 ** License as published by the Free Software Foundation; either
14 ** version 2.1 of the License, or (at your option) any later version.
15 **
16 ** This library is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ** Lesser General Public License for more details.
20 **
21 ** You should have received a copy of the GNU Lesser General Public
22 ** License along with this library; if not, write to the Free Software
23 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 ** MA 02110-1301, USA.
25 **
26 ******************************************************************************/
27
28
29 #include "ajlib.h"
30
31 #include "ajdan.h"
32 #include "ajsys.h"
33 #include "ajfileio.h"
34 #include "ajfiledata.h"
35 #include "ajbase.h"
36
37 #include <math.h>
38 #include <string.h>
39 #include <ctype.h>
40
41 #define DNAMELTFILE "Edna.melt"
42 #define RNAMELTFILE "Erna.melt"
43 #define MAXMELTSAVE 10000
44
45 static AjOMelt meltTable[256];
46 static AjBool meltInitDone = AJFALSE;
47
48 static ajint aj_melt_savesize = 0;
49 static AjBool aj_melt_saveinit = 0;
50 static AjBool aj_melt_saveshift = 1;
51
52 static float meltProbScore(const AjPStr seq1, const AjPStr seq2, ajint len);
53
54
55
56
57 /* @func ajMeltInit ***********************************************************
58 **
59 ** Initialises melt entropies, enthalpies and energies. Different data
60 ** files are read for DNA or RNA heteroduplex. Also sets optional flag
61 ** for array saving of the above.
62 **
63 ** @param [r] isdna [AjBool] true for DNA, false for RNA
64 ** @param [r] savesize [ajint] Size of array to save, or zero if none
65 ** @return [void] Number of energies to save
66 **
67 ** @release 1.0.0
68 ******************************************************************************/
69
ajMeltInit(AjBool isdna,ajint savesize)70 void ajMeltInit(AjBool isdna, ajint savesize)
71 {
72 AjPFile mfptr;
73 AjPStr mfname = NULL;
74 AjPStr pair = NULL;
75 AjPStr pair1 = NULL;
76 AjPStr pair2 = NULL;
77 AjPStr acgt = NULL;
78 AjPStr line = NULL;
79
80 ajint i;
81 ajint j;
82 ajint k;
83
84 char *p;
85 const char *q;
86 float enthalpy;
87 float entropy;
88 float energy;
89 ajuint iline = 0;
90
91 AjBool got1;
92 AjBool got2;
93
94
95 aj_melt_savesize = savesize;
96 aj_melt_saveinit = ajFalse;
97
98 if(meltInitDone)
99 return;
100
101 mfname = ajStrNew();
102
103 if(isdna)
104 ajStrAssignEmptyC(&mfname,DNAMELTFILE);
105 else
106 ajStrAssignEmptyC(&mfname,RNAMELTFILE);
107
108 mfptr = ajDatafileNewInNameS(mfname);
109
110 if(!mfptr)
111 ajFatal("Entropy/enthalpy/energy file '%S' not found\n",
112 mfname);
113
114 pair1 = ajStrNew();
115 pair2 = ajStrNew();
116 line = ajStrNew();
117
118 ajStrAssignC(&pair,"AA");
119 ajStrAssignC(&acgt,"ACGT");
120
121 p = ajStrGetuniquePtr(&pair);
122 q = ajStrGetPtr(acgt);
123
124 for(i=0,k=0;i<4;++i)
125 {
126 *p = *(q+i);
127
128 for(j=0;j<4;++j)
129 {
130 *(p+1) = *(q+j);
131 meltTable[k++].pair = ajStrNewC(p);
132 }
133 }
134
135 iline = 0;
136 while(ajReadline(mfptr, &line))
137 {
138 ajStrRemoveWhiteExcess(&line);
139 iline++;
140 p = ajStrGetuniquePtr(&line);
141
142 if(*p=='#' || *p=='!' || !*p)
143 continue;
144
145 p = ajSysFuncStrtok(p," ");
146
147 if(!p)
148 ajDie("Bad melt data file '%F' line %d '%S'",
149 mfptr, iline, line);
150 ajStrAssignC(&pair1,p);
151 p = ajSysFuncStrtok(NULL," ");
152
153 if(!p)
154 ajDie("Bad melt data file '%F' line %d '%S'",
155 mfptr, iline, line);
156 ajStrAssignC(&pair2,p);
157 p = ajSysFuncStrtok(NULL," ");
158
159 if(!p)
160 ajDie("Bad melt data file '%F' line %d '%S'",
161 mfptr, iline, line);
162
163 if(sscanf(p,"%f",&enthalpy)!=1)
164 ajDie("Bad melt data file '%F' line %d '%S'",
165 mfptr, iline, line);
166
167 p = ajSysFuncStrtok(NULL," ");
168
169 if(sscanf(p,"%f",&entropy)!=1)
170 ajDie("Bad melt data file '%F' line %d '%S'",
171 mfptr, iline, line);
172 p = ajSysFuncStrtok(NULL," ");
173
174 if(sscanf(p,"%f",&energy)!=1)
175 ajDie("Bad melt data file '%F' line %d '%S'",
176 mfptr, iline, line);
177
178 got1 = got2 = ajFalse;
179
180 for(k=0;k<16;++k)
181 if(!ajStrCmpS(meltTable[k].pair,pair1))
182 {
183 meltTable[k].enthalpy = enthalpy;
184 meltTable[k].entropy = entropy;
185 meltTable[k].energy = energy;
186 got1 = ajTrue;
187 }
188
189 for(k=0;k<16;++k)
190 if(!ajStrCmpS(meltTable[k].pair,pair2))
191 {
192 meltTable[k].enthalpy = enthalpy;
193 meltTable[k].entropy = entropy;
194 meltTable[k].energy = energy;
195 got2 = ajTrue;
196 }
197
198 if(!got1 || !got2)
199 ajDie("Bad melt data file '%F' line %d '%S' duplicate pair",
200 mfptr, iline, line);
201 }
202
203 ajStrDel(&mfname);
204 ajStrDel(&pair);
205 ajStrDel(&pair1);
206 ajStrDel(&pair2);
207 ajStrDel(&acgt);
208 ajStrDel(&line);
209
210 ajFileClose(&mfptr);
211
212 meltInitDone = ajTrue;
213
214 return;
215 }
216
217
218
219
220 /* @funcstatic meltProbScore **************************************************
221 **
222 ** Gives a score for the probability of two sequences being the same.
223 ** The sequences are the same length.
224 **
225 ** Uses IUB ambiguity codes. The result is the sum of the probabilities
226 ** at each position.
227 **
228 ** @param [r] seq1 [const AjPStr] Pointer to a sequence string
229 ** @param [r] seq2 [const AjPStr] Pointer to a another sequence
230 ** @param [r] len [ajint] Length of sequences
231 ** @return [float] Match probability
232 **
233 ** @release 6.2.0
234 ******************************************************************************/
235
meltProbScore(const AjPStr seq1,const AjPStr seq2,ajint len)236 static float meltProbScore(const AjPStr seq1, const AjPStr seq2, ajint len)
237 {
238 ajint mlen;
239 float score;
240 ajint i;
241 ajint x;
242 ajint y;
243 const char *p;
244 const char *q;
245
246
247 mlen = (ajStrGetLen(seq1) < ajStrGetLen(seq2)) ? ajStrGetLen(seq1) :
248 ajStrGetLen(seq2);
249
250 if(len > 0)
251 mlen = (mlen < len) ? mlen : len;
252
253 score = 0.0;
254
255 if(!mlen)
256 return score;
257
258 score = 1.0;
259 p = ajStrGetPtr(seq1);
260 q = ajStrGetPtr(seq2);
261
262 for(i=0; i<mlen; ++i)
263 {
264 x = ajBasecodeToInt(*(p+i));
265 y = ajBasecodeToInt(*(q+i));
266 score *= ajBaseAlphaCompare(x,y);
267 }
268
269 return score;
270 }
271
272
273
274
275 /* @func ajMeltEnergy *********************************************************
276 **
277 ** Calculates melt energy and enthalpy/entropy for a sequence string.
278 ** An optional shift is given for stepping along the sequence and loading
279 ** up energy arrays.
280 **
281 ** @param [r] strand [const AjPStr] Pointer to a sequence string
282 ** @param [r] len [ajint] Length of sequence
283 ** @param [r] shift [ajint] Stepping value
284 ** @param [r] isDNA [AjBool] DNA or RNA
285 ** @param [r] maySave [AjBool] May use the save arrays for speedup
286 ** @param [w] enthalpy [float*] enthalpy
287 ** @param [w] entropy [float*] entropy
288 **
289 ** @return [float] Melt energy
290 **
291 ** @release 1.0.0
292 ******************************************************************************/
293
ajMeltEnergy(const AjPStr strand,ajint len,ajint shift,AjBool isDNA,AjBool maySave,float * enthalpy,float * entropy)294 float ajMeltEnergy(const AjPStr strand, ajint len, ajint shift, AjBool isDNA,
295 AjBool maySave, float *enthalpy, float *entropy)
296 {
297 AjPStr line;
298 ajint i;
299 ajint j;
300 ajint k;
301 ajint ipos;
302 const char *p;
303 static float energy;
304 float ident;
305 AjBool doShift;
306
307 static float saveEnthalpy[MAXMELTSAVE];
308 static float saveEntropy[MAXMELTSAVE];
309 static float saveEnergy[MAXMELTSAVE];
310
311
312 ajMeltInit(isDNA,len);
313
314 if (maySave == ajFalse)
315 aj_melt_saveshift = ajFalse;
316
317 doShift = (aj_melt_saveshift && aj_melt_savesize > 0) ? ajTrue : ajFalse;
318
319
320 ipos = 0;
321
322 if(doShift)
323 {
324 if(!aj_melt_saveinit)
325 {
326 ipos = 0;
327
328 for(i=0;i<aj_melt_savesize;++i)
329 saveEnergy[i] = saveEntropy[i] = saveEnthalpy[i] = 0.0;
330 energy = *entropy = *enthalpy = 0.0;
331 aj_melt_saveinit = ajTrue;
332 }
333 else
334 {
335 ipos = (len - shift) - 1;
336
337 for(i=0;i<shift;++i)
338 {
339 energy -= saveEnergy[i];
340 *entropy -= saveEntropy[i];
341 *enthalpy -= saveEnthalpy[i];
342 }
343
344 for(i=0,k=shift; k < aj_melt_savesize; ++i, ++k)
345 {
346 saveEnergy[i] = saveEnergy[k];
347 saveEntropy[i] = saveEntropy[k];
348 saveEnthalpy[i] = saveEnthalpy[k];
349 }
350 }
351 }
352 else
353 {
354 ipos=0;
355 energy = *entropy = *enthalpy = 0.0;
356 }
357
358 line = ajStrNew();
359 p = ajStrGetPtr(strand);
360
361 while(ipos < len-1)
362 {
363 if(doShift)
364 {
365 saveEnthalpy[ipos] = 0.0;
366 saveEntropy[ipos] = 0.0;
367 saveEnergy[ipos] = 0.0;
368 }
369
370 for(j=0;j<16;++j)
371 {
372 ajStrAssignSubC(&line,p+ipos,0,1);
373 ident = meltProbScore(meltTable[j].pair, line, 2);
374
375 if(ident>0.9)
376 {
377 if(doShift)
378 {
379 saveEnergy[ipos] += (ident * meltTable[j].energy);
380 saveEntropy[ipos] += (ident * meltTable[j].entropy);
381 saveEnthalpy[ipos] += (ident * meltTable[j].enthalpy);
382 }
383 else
384 {
385 energy += (ident * meltTable[j].energy);
386 *entropy += (ident * meltTable[j].entropy);
387 *enthalpy += (ident * meltTable[j].enthalpy);
388 }
389 }
390 }
391
392
393 if(doShift)
394 {
395 energy += saveEnergy[ipos];
396 *entropy += saveEntropy[ipos];
397 *enthalpy += saveEnthalpy[ipos];
398 }
399 ++ipos;
400 }
401
402 ajStrDel(&line);
403
404 return energy;
405 }
406
407
408
409
410 /* @func ajMeltTemp ***********************************************************
411 **
412 ** Calculates melt temperature of DNA or RNA
413 ** An optional shift is given for stepping along the sequence and loading
414 ** up energy arrays.
415 **
416 ** @param [r] strand [const AjPStr] Pointer to a sequence string
417 ** @param [r] len [ajint] Length of sequence
418 ** @param [r] shift [ajint] Stepping value
419 ** @param [r] saltconc [float] mM salt concentration
420 ** @param [r] DNAconc [float] nM DNA concentration
421 ** @param [r] isDNA [AjBool] DNA or RNA
422 **
423 ** @return [float] Melt temperature
424 **
425 ** @release 6.2.0
426 ******************************************************************************/
427
ajMeltTemp(const AjPStr strand,ajint len,ajint shift,float saltconc,float DNAconc,AjBool isDNA)428 float ajMeltTemp(const AjPStr strand, ajint len, ajint shift, float saltconc,
429 float DNAconc, AjBool isDNA)
430 {
431 double entropy;
432 double enthalpy;
433 double dTm;
434 float Tm;
435 static float sumEntropy;
436 static float sumEnthalpy;
437 float To;
438 float R;
439 double LogDNA;
440
441
442 R = (float) 1.987; /* molar gas constant (cal/c * mol) */
443 LogDNA = R * (float)log((double)(DNAconc/4000000000.0)); /* nM */
444 To = (float) 273.15;
445
446 ajMeltEnergy(strand, len, shift, isDNA, ajFalse, &sumEnthalpy,
447 &sumEntropy);
448
449 entropy = -10.8 - sumEntropy;
450 entropy += (len-1) * (log10((double) (saltconc/1000.0))) *
451 (float) 0.368;
452
453 enthalpy = -sumEnthalpy;
454
455 dTm = ((enthalpy*1000.0) / (entropy+LogDNA)) - To;
456 Tm = (float) dTm;
457
458 return Tm;
459 }
460
461
462
463
464 /* @func ajMeltGC *************************************************************
465 **
466 ** Calculates GC fraction of a sequence allowing for ambiguity
467 **
468 ** @param [r] strand [const AjPStr] Pointer to a sequence string
469 ** @param [r] len [ajint] Length of sequence
470 **
471 ** @return [float] GC fraction
472 **
473 ** @release 1.0.0
474 ******************************************************************************/
475
ajMeltGC(const AjPStr strand,ajint len)476 float ajMeltGC(const AjPStr strand, ajint len)
477 {
478 ajint t;
479 ajint i;
480 const char *p;
481 double count;
482
483 p=ajStrGetPtr(strand);
484 count = 0.0;
485
486 for(i=0;i<len;++i)
487 {
488 t = toupper((ajint) *(p+i));
489
490 if(strchr("GCS",t))
491 ++count;
492 else if(strchr("ATUW",t))
493 count += 0.0;
494 else if(strchr("RYMK",t))
495 count += 0.5;
496 else if(strchr("NX",t))
497 count += 0.5;
498 else if(strchr("BV",t))
499 count += 0.6666667;
500 else if(strchr("DH",t))
501 count += 0.3333333;
502 }
503
504 return ((float)(count/(double)len));
505 }
506
507
508
509
510 /* @func ajMeltEnergy2 ********************************************************
511 **
512 ** Calculates melt energy for use with programs like prima
513 **
514 ** Giving this routine the complete sequence on the first call and
515 ** setting meltInitDone to false will initialise the energy, entropy
516 ** and enthalpy arrays. Subsequent calls will not look at the
517 ** sequence directly.
518 **
519 ** @param [r] strand [const char *] Pointer to a sequence string
520 ** @param [r] pos [ajint] Position within sequence
521 ** @param [r] len [ajint] Length of sequence segment
522 ** @param [r] isDNA [AjBool] true if dna
523 ** @param [w] enthalpy [float *] calculated enthalpy
524 ** @param [w] entropy [float *] calculated entropy
525 ** @param [w] saveentr [float **] entropy save array
526 ** @param [w] saveenth [float **] enthalpy save array
527 ** @param [w] saveener [float **] energy save array
528 **
529 ** @return [float] melt energy
530 **
531 ** @release 1.0.0
532 ******************************************************************************/
533
ajMeltEnergy2(const char * strand,ajint pos,ajint len,AjBool isDNA,float * enthalpy,float * entropy,float ** saveentr,float ** saveenth,float ** saveener)534 float ajMeltEnergy2(const char *strand, ajint pos, ajint len, AjBool isDNA,
535 float *enthalpy, float *entropy,
536 float **saveentr, float **saveenth, float **saveener)
537 {
538 ajint i;
539 ajint j;
540
541 ajint limit = 0;
542 AjPStr line = NULL;
543 float ident = 0.0;
544 float energy;
545
546 limit = len-1;
547
548 ajMeltInit(isDNA,len);
549
550 line = ajStrNewC("AA");
551
552 for(i=0;i<limit;++i)
553 {
554 ajStrAssignSubC(&line,strand,i,i+1);
555
556 for(j=0;j<16;++j)
557 {
558 ident = meltProbScore(meltTable[j].pair,line,2);
559
560 if(ident>.9)
561 {
562 (*saveentr)[i] += (ident * meltTable[j].entropy);
563 (*saveenth)[i] += (ident * meltTable[j].enthalpy);
564 (*saveener)[i] += (ident * meltTable[j].energy);
565 }
566 }
567 }
568
569 ajStrDel(&line);
570
571 energy = *enthalpy = *entropy = 0.0;
572
573 for(i=0;i<limit;++i)
574 {
575 energy += (*saveener)[pos+i];
576 *entropy += (*saveentr)[pos+i];
577 *enthalpy += (*saveenth)[pos+i];
578 }
579
580 return energy;
581 }
582
583
584
585
586 /* @func ajMeltTempSave *******************************************************
587 **
588 ** Calculates melt temperature of DNA or RNA
589 **
590 ** @param [r] strand [const char*] Pointer to a sequence string
591 ** @param [r] pos [ajint] position within sequence
592 ** @param [r] len [ajint] Length of sequence (segment)
593 ** @param [r] saltconc [float] mM salt concentration
594 ** @param [r] DNAconc [float] nM DNA concentration
595 ** @param [r] isDNA [AjBool] DNA or RNA
596 ** @param [w] saveentr [float **] entropy save array
597 ** @param [w] saveenth [float **] enthalpy save array
598 ** @param [w] saveener [float **] energy save array
599 **
600 ** @return [float] Melt temperature
601 **
602 ** @release 6.2.0
603 ******************************************************************************/
604
ajMeltTempSave(const char * strand,ajint pos,ajint len,float saltconc,float DNAconc,AjBool isDNA,float ** saveentr,float ** saveenth,float ** saveener)605 float ajMeltTempSave(const char *strand, ajint pos, ajint len, float saltconc,
606 float DNAconc, AjBool isDNA,
607 float **saveentr, float **saveenth, float **saveener)
608 {
609 double entropy;
610 double enthalpy;
611 double dTm;
612 float Tm;
613 float sumEntropy;
614 float sumEnthalpy;
615 float To;
616 float R;
617 double LogDNA;
618 /* double LogSalt;*/
619
620
621 /* LogSalt = 16.6 * (float) (log10((double) (saltconc/1000.0))); */ /* mM */
622 R = (float) 1.987; /* molar gas constant (cal/c * mol) */
623 LogDNA = R * (float)log((double)(DNAconc/4000000000.0)); /* nM */
624 To = (float) 273.15;
625
626 ajMeltEnergy2(strand, pos, len, isDNA, &sumEnthalpy,
627 &sumEntropy, saveentr, saveenth, saveener);
628
629 entropy = -10.8 - sumEntropy;
630
631 /* Added for santa lucia */
632 entropy += (len-1) * (log10((double) (saltconc/1000.0))) *
633 (float) 0.368;
634
635
636 enthalpy = -sumEnthalpy;
637
638 /* logsalt removed for santa lucia */
639 dTm = ((enthalpy*1000.0) / (entropy+LogDNA)) + /*LogSalt*/ - To;
640 Tm = (float) dTm; /* slight loss of precision here but no matter */
641
642 return Tm;
643 }
644
645
646
647
648 /* @func ajMeltTempProd *******************************************************
649 **
650 ** Calculates product melt temperature of DNA
651 **
652 ** @param [r] gc [float] GC percentage
653 ** @param [r] saltconc [float] mM salt concentration
654 ** @param [r] len [ajint] Length of sequence (segment)
655 **
656 ** @return [float] Melt temperature
657 **
658 ** @release 6.2.0
659 ******************************************************************************/
660
ajMeltTempProd(float gc,float saltconc,ajint len)661 float ajMeltTempProd(float gc, float saltconc, ajint len)
662 {
663 float ptm;
664 float LogSalt;
665
666
667 LogSalt = (float)16.6 * (float) (log10((double) (saltconc/1000.0)));
668
669 ptm = (float)81.5 - (float)(675/len) + LogSalt + ((float)0.41 * gc);
670
671 return ptm;
672 }
673
674
675
676
677 /* @func ajAnneal *************************************************************
678 **
679 ** Calculates annealing temperature of product and primer
680 **
681 ** @param [r] tmprimer [float] primer Tm
682 ** @param [r] tmproduct [float] product Tm
683 **
684 ** @return [float] Annealing temperature
685 **
686 ** @release 1.0.0
687 ******************************************************************************/
688
ajAnneal(float tmprimer,float tmproduct)689 float ajAnneal(float tmprimer, float tmproduct)
690 {
691 return ((float).7*tmproduct)-(float)14.9+((float).3*tmprimer);
692 }
693
694
695
696
697 /* @func ajMeltExit ***********************************************************
698 **
699 ** Cleans up DNA melting processing internal memory
700 **
701 ** @return [void]
702 **
703 ** @release 4.0.0
704 ** @@
705 ******************************************************************************/
706
ajMeltExit(void)707 void ajMeltExit(void)
708 {
709 ajint i;
710
711 for(i=0;i<256;i++)
712 ajStrDel(&meltTable[i].pair);
713
714 return;
715 }
716
717
718
719
720 #ifdef AJ_COMPILE_DEPRECATED_BOOK
721 #endif
722
723
724
725
726 #ifdef AJ_COMPILE_DEPRECATED
727 /* @obsolete ajTm
728 ** @rename ajMeltTemp
729 */
730
ajTm(const AjPStr strand,ajint len,ajint shift,float saltconc,float DNAconc,AjBool isDNA)731 __deprecated float ajTm(const AjPStr strand, ajint len,
732 ajint shift, float saltconc,
733 float DNAconc, AjBool isDNA)
734 {
735
736 return ajMeltTemp(strand, len, shift, saltconc, DNAconc, isDNA);
737 }
738
739
740
741
742 /* @obsolete ajTm2
743 ** @rename ajMeltTempSave
744 */
745
ajTm2(const char * strand,ajint pos,ajint len,float saltconc,float DNAconc,AjBool isDNA,float ** saveentr,float ** saveenth,float ** saveener)746 __deprecated float ajTm2(const char *strand, ajint pos,
747 ajint len, float saltconc,
748 float DNAconc, AjBool isDNA,
749 float **saveentr, float **saveenth, float **saveener)
750 {
751
752 return ajMeltTempSave(strand, pos, len, saltconc, DNAconc, isDNA,
753 saveentr, saveenth, saveener);
754 }
755
756
757
758
759 /* @obsolete ajProdTm
760 ** @rename ajMeltTempProd
761 */
762
ajProdTm(float gc,float saltconc,ajint len)763 __deprecated float ajProdTm(float gc, float saltconc, ajint len)
764 {
765 return ajMeltTempProd(gc, saltconc, len);
766 }
767 #endif
768