1 /***************************************************************************
2 * Copyright (C) 2009 by BUI Quang Minh *
3 * minh.bui@univie.ac.at *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20 #include "maalignment.h"
21
readLogLL(char * fileName)22 void MaAlignment::readLogLL(char *fileName)
23 {
24 //First read the values from inFile to a DoubleVector
25 DoubleVector _logllVec;
26 int siteNum = -1;
27 string currentString;
28 cout << "\nReading file containing site's loglikelihood: " << fileName << "...." << endl;
29 ifstream inFile;
30 try{
31 inFile.exceptions (ios::failbit | ios::badbit);
32 inFile.open(fileName);
33 /**really start reading*/
34 //read number of sites
35 inFile >> currentString;
36 siteNum = convert_int(currentString.c_str());
37 //ignore "Site_Lh"
38 inFile >> currentString;
39 while (!inFile.eof())
40 {
41 //reading each line of the file
42 //remove the badbit
43 inFile.exceptions (ios::badbit);
44 if ( !(inFile >> currentString) ) break;
45 //set the failbit again
46 inFile.exceptions (ios::failbit | ios::badbit);
47 _logllVec.push_back(convert_double(currentString.c_str()));
48 }/**finish reading*/
49 inFile.clear();
50 inFile.exceptions (ios::failbit | ios::badbit);
51 inFile.close();
52 } catch(bad_alloc){
53 outError(ERR_NO_MEMORY);
54 } catch (const char *str){
55 outError(str);
56 } catch (string str){
57 outError(str);
58 } catch (ios::failure){
59 outError(ERR_READ_INPUT);
60 } catch (...){
61 outError(ERR_READ_ANY);
62 }
63 if (siteNum != _logllVec.size())
64 outError("Actual number of site's likelihoods is not consistent with the announced number in the first line.");
65 cout << "Finish reading, now assign the logLL to the pattern:" << endl;
66
67 logLL.resize(getNPattern(),0.0);
68 for (int i = 0; i < siteNum; i++)
69 {
70 int patIndex = getPatternID(i);
71 if ( logLL[patIndex] == 0 )
72 logLL[patIndex] = _logllVec[i];
73 else
74 if ( logLL[patIndex] != _logllVec[i] )
75 // outError("Conflicting between the likelihoods reported for pattern", (*this)[i]);
76 outError("Conflicting between the likelihoods reported for pattern");
77 }
78 // int npat = getNPattern();
79 // cout << "Number of patterns: " << npat << endl;
80 // for ( int j = 0; j < npat; j++ )
81 // cout << j << "\t" << at(j) << "\t" << logLL[j] << endl;
82 cout << "Finish assigning logLL to the patterns!" << endl;
83 }
84
computeExpectedNorFre()85 IntVector MaAlignment::computeExpectedNorFre()
86 {
87 IntVector expectedNorFre;
88 if ( logLL.empty())
89 outError("Error: log likelihood of patterns are not given!");
90
91 int patNum = getNPattern();
92 int alignLen = getNSite();
93 //resize the expectedNorFre vector
94 expectedNorFre.resize(patNum,-1);
95
96 //Vector containing the likelihood of the pattern p_i
97 DoubleVector LL(patNum,-1.0);
98 double sumLL = 0; //sum of the likelihood of the patterns in the alignment
99
100 //Compute the likelihood from the logLL
101 for ( int i = 0; i < patNum; i++ )
102 {
103 LL[i] = exp(logLL[i]);
104 sumLL += LL[i];
105 }
106
107 //Vector containing l_i = p_i*ell/sum_i(p_i)
108 DoubleVector ell(patNum, -1.0);
109 //Compute l_i
110 for ( int i = 0; i < patNum; i++ )
111 {
112 ell[i] = (double)alignLen * LL[i] / sumLL;
113 }
114
115
116 //Vector containing r_i where r_0 = ell_0; r_{i+1} = ell_{i+1} + r_i - ordinaryRounding(r_i)
117 DoubleVector r(patNum, -1.0);
118 //Compute r_i and the expected normalized frequencies
119 r[0] = ell[0];
120 expectedNorFre[0] = (int)floor(ell[0]+0.5); //note that floor(_number+0.5) returns the ordinary rounding of _number
121 int sum = expectedNorFre[0];
122 for (int j = 1; j < patNum; j++ )
123 {
124 r[j] = ell[j] + r[j-1] - floor(r[j-1]+0.5);
125 expectedNorFre[j] = (int)floor(r[j]+0.5);
126 sum += expectedNorFre[j];
127 }
128
129 //cout << "Number of patterns: " << patNum << ", sum of expected sites: " << sum << endl;
130 return expectedNorFre;
131 }
132
printPatObsExpFre(const char * fileName)133 void MaAlignment::printPatObsExpFre(const char *fileName)
134 {
135 IntVector expectedNorFre = computeExpectedNorFre();
136 printPatObsExpFre(fileName, expectedNorFre);
137 }
138
printPatObsExpFre(const char * fileName,const IntVector expectedNorFre)139 void MaAlignment::printPatObsExpFre(const char *fileName, const IntVector expectedNorFre)
140 {
141 try {
142 ofstream out;
143 out.exceptions(ios::failbit | ios::badbit);
144 out.open(fileName);
145 out << "Pattern\tLogLL\tObservedFre\tExpectedFre" << endl;
146
147 int patNum = getNPattern();
148 int seqNum = getNSeq();
149 int seqID;
150
151 for ( int i = 0; i < patNum; i++ )
152 {
153 for ( seqID = 0; seqID < seqNum; seqID++ ){
154 out << convertStateBackStr(at(i)[seqID]);
155 }
156 out << "\t" << logLL[i] << "\t" << (*this)[i].frequency << "\t" << expectedNorFre[i] << endl;
157 }
158 out.close();
159 } catch (ios::failure) {
160 outError(ERR_WRITE_OUTPUT, fileName);
161 }
162 }
163
generateExpectedAlignment(MaAlignment * aln,double & prob)164 void MaAlignment::generateExpectedAlignment(MaAlignment *aln, double &prob)
165 {
166 //cout << "In function: generating expected alignment!" << endl;
167 IntVector expectedNorFre = aln->computeExpectedNorFre();
168
169 int nsite = aln->getNSite();
170 seq_names.insert(seq_names.begin(), aln->seq_names.begin(), aln->seq_names.end());
171 num_states = aln->num_states;
172 site_pattern.resize(nsite, -1);
173 clear();
174 pattern_index.clear();
175 VerboseMode save_mode = verbose_mode;
176 verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern
177
178 int patID;
179 int site = 0;
180 int npat = aln->getNPattern();
181
182 double sumFac = 0;
183 double sumProb = 0;
184 double fac = logFac(nsite);
185
186 double sumFacMax = 0;
187 double sumProbMax = 0;
188
189 for (patID = 0; patID < npat; patID++) {
190 int patFre = expectedNorFre[patID];
191 for ( int patSite = 0; patSite < patFre; patSite++)
192 {
193 Pattern pat = aln->at(patID);
194 addPattern(pat,site);
195 site++;
196 }
197
198 //to compute the probability of the new alignment given the multinomial distribution
199 sumFac += logFac(patFre);
200 sumProb += (double)patFre*log((double)aln->at(patID).frequency/(double)nsite);
201
202 //for the unconstraint maximum log likelihood
203 sumFacMax += logFac(aln->at(patID).frequency);
204 sumProbMax += (double)aln->at(patID).frequency*log((double)aln->at(patID).frequency/(double)nsite);
205 }
206 prob = fac - sumFac + sumProb;
207
208 double probMax = fac - sumFacMax + sumProbMax;
209 // cout << "total number of sites: " << site << endl;
210 verbose_mode = save_mode;
211 countConstSite();
212 //cout << "Finish generating expected alignment!" << endl;
213 cout << "Logarithm of the probability of the new alignment given the multinomial distribution of the input alignment is: " << prob << endl;
214 cout << "Maximum unconstraint (log) likelihood of the input alignment: " << probMax << endl;
215 // cout << "Maximum unconstraint likelihood: " << exp(probMax) << endl;
216 }
217
218 /*void MaAlignment::multinomialProb(Alignment objectAlign, double &prob)
219 {
220 cout << "Computing the multinomial probability of an object alignment given a reference alignment ..." << endl;
221 //should we check for compatibility of sequence's names and sequence's order in THIS alignment and in the objectAlign??
222 //check alignment length
223 int nsite = getNSite();
224 assert(nsite == objectAlign.getNSite());
225 double sumFac = 0;
226 double sumProb = 0;
227 double fac = logFac(nsite);
228 int index;
229 for ( Alignment::iterator objectIt = objectAlign.begin(); objectIt != objectAlign.end() ; objectIt++)
230 {
231 PatternIntMap::iterator pat_it = pattern_index.find((*objectIt));
232 if ( pat_it == pattern_index.end() ) //not found ==> error
233 outError("Pattern in the object alignment is not found in the reference alignment!");
234 sumFac += logFac((*objectIt).frequency);
235 index = pat_it->second;
236 sumProb += (double)(*objectIt).frequency*log((double)at(index).frequency/(double)nsite);
237 }
238 prob = fac - sumFac + sumProb;
239 }*/
240
241 /*void MaAlignment::multinomialProb(AlignmentVector objectAligns, DoubleVector &probs)
242 {
243 int num = objectAligns.size();
244 double curProb;
245 if (num > 0)
246 {
247 probs.resize(num,0);
248 for ( int i = 0; i < num; i++ )
249 {
250 (*this).multinomialProb(objectAligns[i], curProb);
251 probs[i] = curProb;
252 }
253 }
254 }*/
255