1 // utility functions
2 //
3 #ifndef FREEBAYES_UTILITY_H
4 #define FREEBAYES_UTILITY_H
5 
6 #include <cmath>
7 #include <vector>
8 #include <list>
9 #include <string>
10 #include <algorithm>
11 #include <string>
12 #include <float.h>
13 #include <iostream>
14 #include <fstream>
15 #include <map>
16 #include <time.h>
17 #include "convert.h"
18 #include "ttmath.h"
19 
20 using namespace std;
21 
22 typedef ttmath::Big<TTMATH_BITS(256), TTMATH_BITS(64)> BigFloat;
23 
24 long double factorial(int);
25 short qualityChar2ShortInt(char c);
26 long double qualityChar2LongDouble(char c);
27 long double lnqualityChar2ShortInt(char c);
28 char qualityInt2Char(short i);
29 //long double phred2float(int qual);
30 long double phred2ln(int qual);
31 long double ln2phred(long double prob);
32 long double ln2log10(long double prob);
33 long double log102ln(long double prob);
34 long double phred2float(int qual);
35 long double float2phred(long double prob);
36 long double big2phred(const BigFloat& prob);
37 long double nan2zero(long double x);
38 long double powln(long double m, int n);
39 // here 'joint' means 'probability that we have a vector entirely composed of true bases'
40 long double jointQuality(const std::vector<short>& quals);
41 long double jointQuality(const std::string& qualstr);
42 std::vector<short> qualities(const std::string& qualstr);
43 //
44 long double sumQuality(const std::string& qualstr);
45 long double minQuality(const std::string& qualstr);
46 short minQuality(const std::vector<short>& qualities);
47 long double averageQuality(const std::string& qualstr);
48 long double averageQuality(const std::vector<short>& qualities);
49 //unsigned int factorial(int n);
50 bool stringInVector(string item, vector<string> items);
51 int upper(int c); // helper to below, wraps toupper
52 string uppercase(string s);
53 bool allATGC(string& s);
54 string strip(string const& str, char const* separators = " \t");
55 
56 int binomialCoefficient(int n, int k);
57 long double binomialCoefficientLn(int k, int n);
58 long double binomialProb(int k, int n, long double p);
59 long double impl_binomialProbln(int k, int n, long double p);
60 long double binomialProbln(int k, int n, long double p);
61 
62 long double poissonpln(int observed, int expected);
63 long double poissonp(int observed, int expected);
64 long double poissonPvalLn(int a, int b);
65 
66 long double gammaln( long double x);
67 long double factorial( int n);
68 double factorialln( int n);
69 long double impl_factorialln( int n);
70 
71 #define MAX_FACTORIAL_CACHE_SIZE 100000
72 
73 class FactorialCache : public map<int, long double> {
74 public:
factorialln(int n)75     long double factorialln(int n) {
76         map<int, long double>::iterator f = find(n);
77         if (f == end()) {
78             if (size() > MAX_FACTORIAL_CACHE_SIZE) {
79                 clear();
80             }
81             long double fln = impl_factorialln(n);
82             insert(make_pair(n, fln));
83             return fln;
84         } else {
85             return f->second;
86         }
87     }
88 };
89 
90 #define MAX_BINOMIAL_CACHE_SIZE 100000
91 
92 class BinomialCache : public map<long double, map<pair<int, int>, long double> > {
93 public:
binomialProbln(int k,int n,long double p)94     long double binomialProbln(int k, int n, long double p) {
95         map<pair<int, int>, long double>& t = (*this)[p];
96         pair<int, int> kn = make_pair(k, n);
97         map<pair<int, int>, long double>::iterator f = t.find(kn);
98         if (f == t.end()) {
99             if (t.size() > MAX_BINOMIAL_CACHE_SIZE) {
100                 t.clear();
101             }
102             long double bln = impl_binomialProbln(k, n, p);
103             t.insert(make_pair(kn, bln));
104             return bln;
105         } else {
106             return f->second;
107         }
108     }
109 };
110 
111 long double cofactor( int n, int i);
112 long double cofactorln( int n, int i);
113 
114 long double harmonicSum(int n);
115 
116 long double safedivide(long double a, long double b);
117 
118 long double safe_exp(long double ln);
119 
120 BigFloat big_exp(long double ln);
121 
122 long double logsumexp_probs(const vector<long double>& lnv);
123 long double logsumexp(const vector<long double>& lnv);
124 
125 long double betaln(const vector<long double>& alphas);
126 long double beta(const vector<long double>& alphas);
127 
128 long double hoeffding(double successes, double trials, double prob);
129 long double hoeffdingln(double successes, double trials, double prob);
130 
131 int levenshteinDistance(const std::string source, const std::string target);
132 bool isTransition(string& ref, string& alt);
133 
134 string dateStr(void);
135 
136 long double string2float(const string& s);
137 long double log10string2ln(const string& s);
138 
139 string mergeCigar(const string& c1, const string& c2);
140 vector<pair<int, string> > splitCigar(const string& cigarStr);
141 list<pair<int, string> > splitCigarList(const string& cigarStr);
142 string joinCigar(const vector<pair<int, string> >& cigar);
143 string joinCigarList(const list<pair<int, string> >& cigar);
144 bool isEmptyCigarElement(const pair<int, string>& elem);
145 
146 std::string operator*(std::string const &s, size_t n);
147 
148 void normalizeSumToOne(vector<long double>&);
149 
150 void addLinesFromFile(vector<string>& v, const string& f);
151 
152 double entropy(const string& st);
153 
154 #endif
155