1 // utility functions 2 // 3 #ifndef FREEBAYES_UTILITY_H 4 #define FREEBAYES_UTILITY_H 5 6 #include <cmath> 7 #include <vector> 8 #include <list> 9 #include <string> 10 #include <algorithm> 11 #include <string> 12 #include <float.h> 13 #include <iostream> 14 #include <fstream> 15 #include <map> 16 #include <time.h> 17 #include "convert.h" 18 #include "ttmath.h" 19 20 using namespace std; 21 22 typedef ttmath::Big<TTMATH_BITS(256), TTMATH_BITS(64)> BigFloat; 23 24 long double factorial(int); 25 short qualityChar2ShortInt(char c); 26 long double qualityChar2LongDouble(char c); 27 long double lnqualityChar2ShortInt(char c); 28 char qualityInt2Char(short i); 29 //long double phred2float(int qual); 30 long double phred2ln(int qual); 31 long double ln2phred(long double prob); 32 long double ln2log10(long double prob); 33 long double log102ln(long double prob); 34 long double phred2float(int qual); 35 long double float2phred(long double prob); 36 long double big2phred(const BigFloat& prob); 37 long double nan2zero(long double x); 38 long double powln(long double m, int n); 39 // here 'joint' means 'probability that we have a vector entirely composed of true bases' 40 long double jointQuality(const std::vector<short>& quals); 41 long double jointQuality(const std::string& qualstr); 42 std::vector<short> qualities(const std::string& qualstr); 43 // 44 long double sumQuality(const std::string& qualstr); 45 long double minQuality(const std::string& qualstr); 46 short minQuality(const std::vector<short>& qualities); 47 long double averageQuality(const std::string& qualstr); 48 long double averageQuality(const std::vector<short>& qualities); 49 //unsigned int factorial(int n); 50 bool stringInVector(string item, vector<string> items); 51 int upper(int c); // helper to below, wraps toupper 52 string uppercase(string s); 53 bool allATGC(string& s); 54 string strip(string const& str, char const* separators = " \t"); 55 56 int binomialCoefficient(int n, int k); 57 long double binomialCoefficientLn(int k, int n); 58 long double binomialProb(int k, int n, long double p); 59 long double impl_binomialProbln(int k, int n, long double p); 60 long double binomialProbln(int k, int n, long double p); 61 62 long double poissonpln(int observed, int expected); 63 long double poissonp(int observed, int expected); 64 long double poissonPvalLn(int a, int b); 65 66 long double gammaln( long double x); 67 long double factorial( int n); 68 double factorialln( int n); 69 long double impl_factorialln( int n); 70 71 #define MAX_FACTORIAL_CACHE_SIZE 100000 72 73 class FactorialCache : public map<int, long double> { 74 public: factorialln(int n)75 long double factorialln(int n) { 76 map<int, long double>::iterator f = find(n); 77 if (f == end()) { 78 if (size() > MAX_FACTORIAL_CACHE_SIZE) { 79 clear(); 80 } 81 long double fln = impl_factorialln(n); 82 insert(make_pair(n, fln)); 83 return fln; 84 } else { 85 return f->second; 86 } 87 } 88 }; 89 90 #define MAX_BINOMIAL_CACHE_SIZE 100000 91 92 class BinomialCache : public map<long double, map<pair<int, int>, long double> > { 93 public: binomialProbln(int k,int n,long double p)94 long double binomialProbln(int k, int n, long double p) { 95 map<pair<int, int>, long double>& t = (*this)[p]; 96 pair<int, int> kn = make_pair(k, n); 97 map<pair<int, int>, long double>::iterator f = t.find(kn); 98 if (f == t.end()) { 99 if (t.size() > MAX_BINOMIAL_CACHE_SIZE) { 100 t.clear(); 101 } 102 long double bln = impl_binomialProbln(k, n, p); 103 t.insert(make_pair(kn, bln)); 104 return bln; 105 } else { 106 return f->second; 107 } 108 } 109 }; 110 111 long double cofactor( int n, int i); 112 long double cofactorln( int n, int i); 113 114 long double harmonicSum(int n); 115 116 long double safedivide(long double a, long double b); 117 118 long double safe_exp(long double ln); 119 120 BigFloat big_exp(long double ln); 121 122 long double logsumexp_probs(const vector<long double>& lnv); 123 long double logsumexp(const vector<long double>& lnv); 124 125 long double betaln(const vector<long double>& alphas); 126 long double beta(const vector<long double>& alphas); 127 128 long double hoeffding(double successes, double trials, double prob); 129 long double hoeffdingln(double successes, double trials, double prob); 130 131 int levenshteinDistance(const std::string source, const std::string target); 132 bool isTransition(string& ref, string& alt); 133 134 string dateStr(void); 135 136 long double string2float(const string& s); 137 long double log10string2ln(const string& s); 138 139 string mergeCigar(const string& c1, const string& c2); 140 vector<pair<int, string> > splitCigar(const string& cigarStr); 141 list<pair<int, string> > splitCigarList(const string& cigarStr); 142 string joinCigar(const vector<pair<int, string> >& cigar); 143 string joinCigarList(const list<pair<int, string> >& cigar); 144 bool isEmptyCigarElement(const pair<int, string>& elem); 145 146 std::string operator*(std::string const &s, size_t n); 147 148 void normalizeSumToOne(vector<long double>&); 149 150 void addLinesFromFile(vector<string>& v, const string& f); 151 152 double entropy(const string& st); 153 154 #endif 155