1 /***************************************************************************
2 * Copyright (C) 2009 by BUI Quang Minh *
3 * minh.bui@univie.ac.at *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20 #include "phylotree.h"
21 #include "vectorclass/instrset.h"
22
23 #if INSTRSET < 2
24 #include "phylokernelnew.h"
25 #define KERNEL_FIX_STATES
26 #include "phylokernelnew.h"
27 #include "vectorclass/vectorf64.h"
28 #endif
29
30 //#include "phylokernel.h"
31 //#include "phylokernelmixture.h"
32 //#include "phylokernelmixrate.h"
33 //#include "phylokernelsitemodel.h"
34
35 #include "model/modelmarkov.h"
36 #include "model/modelset.h"
37
38 /* BQM: to ignore all-gapp subtree at an alignment site */
39 //#define IGNORE_GAP_LH
40
41 //#define USING_SSE
42
setNumThreads(int num_threads)43 void PhyloTree::setNumThreads(int num_threads) {
44 if (!isSuperTree() && aln && num_threads > 1 && num_threads > aln->getNPattern()/8) {
45 outWarning(convertIntToString(num_threads) + " threads for alignment length " +
46 convertIntToString(aln->getNPattern()) + " will slow down analysis");
47 num_threads = max(aln->getNPattern()/8,1);
48 }
49 this->num_threads = num_threads;
50 }
51
setParsimonyKernel(LikelihoodKernel lk)52 void PhyloTree::setParsimonyKernel(LikelihoodKernel lk) {
53
54 if (cost_matrix) {
55 // Sankoff parsimony kernel
56 if (lk < LK_SSE2) {
57 computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchSankoff;
58 computePartialParsimonyPointer = &PhyloTree::computePartialParsimonySankoff;
59 return;
60 }
61 if (lk >= LK_AVX) {
62 setParsimonyKernelAVX();
63 return;
64 }
65 if (lk >= LK_SSE2) {
66 setParsimonyKernelSSE();
67 return;
68 }
69 ASSERT(0);
70 return;
71 }
72 // set Fitch parsimony kernel
73 if (lk < LK_SSE2) {
74 computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchFast;
75 computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyFast;
76 return;
77 }
78 if (lk >= LK_AVX) {
79 setParsimonyKernelAVX();
80 return;
81 }
82 if (lk >= LK_SSE2) {
83 setParsimonyKernelSSE();
84 return;
85 }
86 ASSERT(0);
87 }
88
setLikelihoodKernel(LikelihoodKernel lk)89 void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk) {
90
91 sse = lk;
92 vector_size = 1;
93 safe_numeric = (params && (params->lk_safe_scaling || leafNum >= params->numseq_safe_scaling)) ||
94 (aln && aln->num_states != 4 && aln->num_states != 20);
95
96 //--- parsimony kernel ---
97 setParsimonyKernel(lk);
98
99 //--- dot-product kernel ---
100 #ifdef __AVX512KNL
101 if (lk >= LK_AVX512) {
102 setDotProductAVX512();
103 } else
104 #endif
105 if (lk >= LK_AVX_FMA) {
106 setDotProductFMA();
107 } else if (lk >= LK_AVX) {
108 setDotProductAVX();
109 } else if (lk >= LK_SSE2) {
110 setDotProductSSE();
111 } else {
112
113 #if INSTRSET < 2
114 #ifdef BOOT_VAL_FLOAT
115 // TODO naive dot-product for float
116 ASSERT(0 && "Not supported, contact developer");
117 // dotProduct = &PhyloTree::dotProductSIMD<float, Vec1f>;
118 #else
119 dotProduct = &PhyloTree::dotProductSIMD<double, Vec1d>;
120 #endif
121 dotProductDouble = &PhyloTree::dotProductSIMD<double, Vec1d>;
122 #endif
123 }
124
125 //--- naive likelihood kernel, no alignment specified yet ---
126 if (!aln) {
127 #if INSTRSET < 2
128 computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec1d, SAFE_LH>;
129 computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec1d, SAFE_LH>;
130 computeLikelihoodDervMixlenPointer = NULL;
131 computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec1d, SAFE_LH>;
132 computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec1d, SAFE_LH>;
133 sse = LK_386;
134 #else
135 computeLikelihoodBranchPointer = NULL;
136 computeLikelihoodDervPointer = NULL;
137 computeLikelihoodDervMixlenPointer = NULL;
138 computePartialLikelihoodPointer = NULL;
139 computeLikelihoodFromBufferPointer = NULL;
140 sse = LK_386;
141 #endif
142 return;
143 }
144 // if (model_factory && !model_factory->model->isReversible()) {
145 // // if nonreversible model
146 // computeLikelihoodBranchPointer = &PhyloTree::computeNonrevLikelihoodBranch;
147 // computeLikelihoodDervPointer = &PhyloTree::computeNonrevLikelihoodDerv;
148 // computePartialLikelihoodPointer = &PhyloTree::computeNonrevPartialLikelihood;
149 // computeLikelihoodFromBufferPointer = NULL;
150 // return;
151 // }
152
153 //--- SIMD kernel ---
154 if (lk >= LK_SSE2) {
155 #ifdef __AVX512KNL
156 if (lk >= LK_AVX512) {
157 setLikelihoodKernelAVX512();
158 return;
159 }
160 #endif
161 if (lk >= LK_AVX_FMA) {
162 // CPU supports AVX and FMA
163 setLikelihoodKernelFMA();
164 } else if (lk >= LK_AVX) {
165 // CPU supports AVX
166 setLikelihoodKernelAVX();
167 } else {
168 // SSE kernel
169 setLikelihoodKernelSSE();
170 }
171 return;
172 }
173
174 #if INSTRSET < 2
175 //--- naive kernel for site-specific model ---
176 if (model_factory && model_factory->model->isSiteSpecificModel()) {
177 computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec1d, SAFE_LH, false, true>;
178 computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec1d, SAFE_LH, false, true>;
179 computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec1d, SAFE_LH, false, true>;
180 computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec1d, SAFE_LH, false, true>;
181 return;
182 }
183
184 //--- naive (no SIMD) kernel ---
185 computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec1d, SAFE_LH>;
186 computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec1d, SAFE_LH>;
187 computeLikelihoodDervMixlenPointer = NULL;
188 computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec1d, SAFE_LH>;
189 computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec1d, SAFE_LH>;
190 #else
191 computeLikelihoodBranchPointer = NULL;
192 computeLikelihoodDervPointer = NULL;
193 computeLikelihoodDervMixlenPointer = NULL;
194 computePartialLikelihoodPointer = NULL;
195 computeLikelihoodFromBufferPointer = NULL;
196 #endif
197 }
198
changeLikelihoodKernel(LikelihoodKernel lk)199 void PhyloTree::changeLikelihoodKernel(LikelihoodKernel lk) {
200 if (sse == lk) return;
201 setLikelihoodKernel(lk);
202 }
203
204 /*******************************************************
205 *
206 * master function: wrapper for other optimized functions
207 *
208 ******************************************************/
209
computePartialLikelihood(TraversalInfo & info,size_t ptn_left,size_t ptn_right,int thread_id)210 void PhyloTree::computePartialLikelihood(TraversalInfo &info, size_t ptn_left, size_t ptn_right, int thread_id) {
211 (this->*computePartialLikelihoodPointer)(info, ptn_left, ptn_right, thread_id);
212 }
213
computeLikelihoodBranch(PhyloNeighbor * dad_branch,PhyloNode * dad)214 double PhyloTree::computeLikelihoodBranch(PhyloNeighbor *dad_branch, PhyloNode *dad) {
215 return (this->*computeLikelihoodBranchPointer)(dad_branch, dad);
216
217 }
218
computeLikelihoodDerv(PhyloNeighbor * dad_branch,PhyloNode * dad,double * df,double * ddf)219 void PhyloTree::computeLikelihoodDerv(PhyloNeighbor *dad_branch, PhyloNode *dad, double *df, double *ddf) {
220 (this->*computeLikelihoodDervPointer)(dad_branch, dad, df, ddf);
221 }
222
223
computeLikelihoodFromBuffer()224 double PhyloTree::computeLikelihoodFromBuffer() {
225 ASSERT(current_it && current_it_back);
226
227 // TODO: buffer stuff for mixlen model
228 if (computeLikelihoodFromBufferPointer && optimize_by_newton)
229 return (this->*computeLikelihoodFromBufferPointer)();
230 else {
231 return (this->*computeLikelihoodBranchPointer)(current_it, (PhyloNode*)current_it_back->node);
232 }
233
234 }
235
dotProductDoubleCall(double * x,double * y,int size)236 double PhyloTree::dotProductDoubleCall(double *x, double *y, int size) {
237 return (this->*dotProductDouble)(x, y, size);
238 }
239
computeTipPartialLikelihoodPoMo(int state,double * lh,bool hyper)240 void PhyloTree::computeTipPartialLikelihoodPoMo(int state, double *lh, bool hyper) {
241 int nstates = aln->num_states;
242 int N = aln->virtual_pop_size;
243
244 memset(lh, 0, sizeof(double)*nstates);
245
246 // decode the id and value
247 int id1 = aln->pomo_sampled_states[state] & 3;
248 int id2 = (aln->pomo_sampled_states[state] >> 16) & 3;
249 int j = (aln->pomo_sampled_states[state] >> 2) & 16383;
250 int M = j + (aln->pomo_sampled_states[state] >> 18);
251
252 // Number of alleles is hard coded here, change if generalization is needed.
253 int nnuc = 4;
254
255 // TODO DS: Implement down sampling or a better approach.
256 if (hyper && M > N)
257 outError("Down sampling not yet supported.");
258
259 // Check if observed state is a fixed one. If so, many
260 // PoMo states can lead to this data. E.g., even (2A,8T)
261 // can lead to a sampled data of 7A.
262 if (j == M) {
263 lh[id1] = 1.0;
264 // Second: Polymorphic states.
265 for (int s_id1 = 0; s_id1 < nnuc-1; s_id1++) {
266 for (int s_id2 = s_id1+1; s_id2 < nnuc; s_id2++) {
267 if (s_id1 == id1) {
268 // States are in the order {FIXED,
269 // 1A(N-1)C, ..., (N-1)A1C, ...}.
270 int k;
271 if (s_id1 == 0) k = s_id2 - 1;
272 else k = s_id1 + s_id2;
273 // Start one earlier because increment
274 // happens after execution of for loop
275 // body.
276 int real_state = nnuc - 1 + k*(N-1) + 1;
277 for (int i = 1; i < N; i++, real_state++) {
278 ASSERT(real_state < nstates);
279 if (!hyper)
280 lh[real_state] = std::pow((double)i/(double)N,j);
281 else {
282 lh[real_state] = 1.0;
283 for (int l = 0; l<j; l++)
284 lh[real_state] *= (double) (i-l) / (double) (N-l);
285 }
286 }
287 }
288 // Same but fixed allele is the second one
289 // in polymorphic states.
290 else if (s_id2 == id1) {
291 int k;
292 if (s_id1 == 0) k = s_id2 - 1;
293 else k = s_id1 + s_id2;
294 int real_state = nnuc - 1 + k*(N-1) + 1;
295 for (int i = 1; i < N; i++, real_state++) {
296 ASSERT(real_state < nstates);
297 if (!hyper)
298 lh[real_state] = std::pow((double)(N-i)/(double)N,j);
299 else {
300 lh[real_state] = 1.0;
301 for (int l = 0; l<j; l++)
302 lh[real_state] *= (double) (N-i-l) / (double) (N-l);
303 }
304 }
305 }
306 }
307 }
308 }
309 // Observed state is polymorphic. We only need to set the
310 // partial likelihoods for states that are also
311 // polymorphic for the same alleles. E.g., states of type
312 // (ix,(N-i)y) can lead to the observed state (jx,(M-j)y).
313 else {
314 int k;
315 if (id1 == 0) k = id2 - 1;
316 else k = id1 + id2;
317 int real_state = nnuc + k*(N-1);
318 for (int i = 1; i < N; i++, real_state++) {
319 ASSERT(real_state < nstates);
320 if (!hyper)
321 lh[real_state] = binomial_dist(j, M, (double) i / (double) N);
322 if (hyper)
323 lh[real_state] = hypergeometric_dist(j, M, i, N);
324 }
325 }
326
327 // cout << "Sample M,j,id1,id2: " << M << ", " << j << ", " << id1 << ", " << id2 << endl;
328 // cout << "Real partial likelihood vector: ";
329 // for (i=0; i<4; i++)
330 // cout << real_partial_lh[i] << " ";
331 // cout << endl;
332 // for (i=4; i<nstates; i++) {
333 // cout << real_partial_lh[i] << " ";
334 // if ((i-3)%(N-1) == 0)
335 // cout << endl;
336 // }
337 // double sum = 0.0;
338 // for (i=0; i<nstates; i++)
339 // sum += real_partial_lh[i];
340 // cout << sum << endl;
341 }
342
343
computeTipPartialLikelihood()344 void PhyloTree::computeTipPartialLikelihood() {
345 if ((tip_partial_lh_computed & 1) != 0)
346 return;
347 tip_partial_lh_computed |= 1;
348
349
350 //-------------------------------------------------------
351 // initialize ptn_freq and ptn_invar
352 //-------------------------------------------------------
353
354 computePtnFreq();
355 // for +I model
356 computePtnInvar();
357
358 if (getModel()->isSiteSpecificModel()) {
359 // ModelSet *models = (ModelSet*)model;
360 size_t nptn = aln->getNPattern(), max_nptn = ((nptn+vector_size-1)/vector_size)*vector_size, tip_block_size = max_nptn * aln->num_states;
361 int nstates = aln->num_states;
362 int nseq = aln->getNSeq();
363 ASSERT(vector_size > 0);
364 #ifdef _OPENMP
365 #pragma omp parallel for schedule(static)
366 #endif
367 for (int nodeid = 0; nodeid < nseq; nodeid++) {
368 int i, x, v;
369 double *partial_lh = tip_partial_lh + tip_block_size*nodeid;
370 size_t ptn;
371 for (ptn = 0; ptn < nptn; ptn+=vector_size, partial_lh += nstates*vector_size) {
372 // int state[vector_size];
373 // for (v = 0; v < vector_size; v++) {
374 // if (ptn+v < nptn)
375 // state[v] = aln->at(ptn+v)[nodeid];
376 // else
377 // state[v] = aln->STATE_UNKNOWN;
378 // }
379
380 double *inv_evec = &model->getInverseEigenvectors()[ptn*nstates*nstates];
381 for (v = 0; v < vector_size; v++) {
382 int state = 0;
383 if (ptn+v < nptn)
384 state = aln->at(ptn+v)[nodeid];
385 // double *partial_lh = node_partial_lh + ptn*nstates;
386 // double *inv_evec = models->at(ptn)->getInverseEigenvectors();
387
388 if (state < nstates) {
389 for (i = 0; i < nstates; i++)
390 partial_lh[i*vector_size+v] = inv_evec[(i*nstates+state)*vector_size+v];
391 } else if (state == aln->STATE_UNKNOWN) {
392 // special treatment for unknown char
393 for (i = 0; i < nstates; i++) {
394 double lh_unknown = 0.0;
395 // double *this_inv_evec = inv_evec + i*nstates;
396 for (x = 0; x < nstates; x++)
397 lh_unknown += inv_evec[(i*nstates+x)*vector_size+v];
398 partial_lh[i*vector_size+v] = lh_unknown;
399 }
400 } else {
401 double lh_ambiguous;
402 // ambiguous characters
403 int ambi_aa[] = {
404 4+8, // B = N or D
405 32+64, // Z = Q or E
406 512+1024 // U = I or L
407 };
408 switch (aln->seq_type) {
409 case SEQ_DNA:
410 {
411 int cstate = state-nstates+1;
412 for (i = 0; i < nstates; i++) {
413 lh_ambiguous = 0.0;
414 for (x = 0; x < nstates; x++)
415 if ((cstate) & (1 << x))
416 lh_ambiguous += inv_evec[(i*nstates+x)*vector_size+v];
417 partial_lh[i*vector_size+v] = lh_ambiguous;
418 }
419 }
420 break;
421 case SEQ_PROTEIN:
422 //map[(unsigned char)'B'] = 4+8+19; // N or D
423 //map[(unsigned char)'Z'] = 32+64+19; // Q or E
424 {
425 int cstate = state-nstates;
426 for (i = 0; i < nstates; i++) {
427 lh_ambiguous = 0.0;
428 for (x = 0; x < 11; x++)
429 if (ambi_aa[cstate] & (1 << x))
430 lh_ambiguous += inv_evec[(i*nstates+x)*vector_size+v];
431 partial_lh[i*vector_size+v] = lh_ambiguous;
432 }
433 }
434 break;
435 default:
436 ASSERT(0);
437 break;
438 }
439 }
440 // sanity check
441 // bool all_zero = true;
442 // for (i = 0; i < nstates; i++)
443 // if (partial_lh[i] != 0) {
444 // all_zero = false;
445 // break;
446 // }
447 // assert(!all_zero && "some tip_partial_lh are all zeros");
448
449 } // FOR v
450 } // FOR ptn
451 // NO Need to copy dummy anymore
452 // dummy values
453 // for (ptn = nptn; ptn < max_nptn; ptn++, partial_lh += nstates)
454 // memcpy(partial_lh, partial_lh-nstates, nstates*sizeof(double));
455 } // FOR nodeid
456 return;
457 }
458
459 int i, state, nstates = aln->num_states;
460 ASSERT(tip_partial_lh);
461 // ambiguous characters
462 int ambi_aa[] = {
463 4+8, // B = N or D
464 32+64, // Z = Q or E
465 512+1024 // U = I or L
466 };
467
468 if (!getModel()->isReversible() || params->kernel_nonrev) {
469 // nonreversible model
470 memset(tip_partial_lh, 0, (aln->STATE_UNKNOWN)*nstates*sizeof(double));
471 for (state = 0; state < nstates; state++) {
472 tip_partial_lh[state*nstates+state] = 1.0;
473 }
474 double *this_tip_partial_lh = &tip_partial_lh[aln->STATE_UNKNOWN*nstates];
475 // special treatment for unknown char
476 for (i = 0; i < nstates; i++) {
477 this_tip_partial_lh[i] = 1.0;
478 }
479 // special treatment for ambiguous characters
480 switch (aln->seq_type) {
481 case SEQ_DNA:
482 for (state = 4; state < 18; state++) {
483 int cstate = state-nstates+1;
484 this_tip_partial_lh = &tip_partial_lh[state*nstates];
485 for (i = 0; i < nstates; i++) {
486 if ((cstate) & (1 << i))
487 this_tip_partial_lh[i] = 1.0;
488 }
489 }
490 break;
491 case SEQ_PROTEIN:
492 for (state = 0; state < sizeof(ambi_aa)/sizeof(int); state++) {
493 this_tip_partial_lh = &tip_partial_lh[(state+20)*nstates];
494 for (i = 0; i < nstates; i++) {
495 if (ambi_aa[state] & (1 << i))
496 this_tip_partial_lh[i] = 1.0;
497 }
498 }
499 break;
500 case SEQ_POMO: {
501 if (aln->pomo_sampling_method != SAMPLING_WEIGHTED_BINOM &&
502 aln->pomo_sampling_method != SAMPLING_WEIGHTED_HYPER)
503 outError("Sampling method not supported by PoMo.");
504 bool hyper = false;
505 if (aln->pomo_sampling_method == SAMPLING_WEIGHTED_HYPER)
506 hyper = true;
507 double *real_partial_lh = aligned_alloc<double>(nstates);
508 for (state = 0; state < aln->pomo_sampled_states.size(); state++) {
509 computeTipPartialLikelihoodPoMo(state, real_partial_lh, hyper);
510 // The vector tip_partial_lh stores inner product of real_partial_lh
511 // and inverse eigenvector for each state
512 double *this_tip_partial_lh = &tip_partial_lh[(state+nstates)*nstates];
513 memset(this_tip_partial_lh, 0, nstates*sizeof(double));
514 for (i = 0; i < nstates; i++)
515 this_tip_partial_lh[i] = real_partial_lh[i];
516 }
517 aligned_free(real_partial_lh);
518 break;
519 }
520 default:
521 break;
522 }
523 return;
524 }
525
526 // THIS IS FOR REVERSIBLE MODEL
527 int m, x, nmixtures = model->getNMixtures();
528 double *all_inv_evec = model->getInverseEigenvectors();
529 ASSERT(all_inv_evec);
530
531
532 for (state = 0; state < nstates; state++) {
533 double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixtures];
534 for (m = 0; m < nmixtures; m++) {
535 double *inv_evec = &all_inv_evec[m*nstates*nstates];
536 for (i = 0; i < nstates; i++)
537 this_tip_partial_lh[m*nstates + i] = inv_evec[i*nstates+state];
538 }
539 }
540 // special treatment for unknown char
541 for (i = 0; i < nstates; i++) {
542 double *this_tip_partial_lh = &tip_partial_lh[aln->STATE_UNKNOWN*nstates*nmixtures];
543 for (m = 0; m < nmixtures; m++) {
544 double *inv_evec = &all_inv_evec[m*nstates*nstates];
545 double lh_unknown = 0.0;
546 for (x = 0; x < nstates; x++)
547 lh_unknown += inv_evec[i*nstates+x];
548 this_tip_partial_lh[m*nstates + i] = lh_unknown;
549 }
550 }
551
552 // special treatment for ambiguous characters
553 double lh_ambiguous;
554 switch (aln->seq_type) {
555 case SEQ_DNA:
556 for (state = 4; state < 18; state++) {
557 int cstate = state-nstates+1;
558 double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixtures];
559 for (m = 0; m < nmixtures; m++) {
560 double *inv_evec = &all_inv_evec[m*nstates*nstates];
561 for (i = 0; i < nstates; i++) {
562 lh_ambiguous = 0.0;
563 for (x = 0; x < nstates; x++)
564 if ((cstate) & (1 << x))
565 lh_ambiguous += inv_evec[i*nstates+x];
566 this_tip_partial_lh[m*nstates+i] = lh_ambiguous;
567 }
568 }
569 }
570 break;
571 case SEQ_PROTEIN:
572 //map[(unsigned char)'B'] = 4+8+19; // N or D
573 //map[(unsigned char)'Z'] = 32+64+19; // Q or E
574 for (state = 0; state < sizeof(ambi_aa)/sizeof(int); state++) {
575 double *this_tip_partial_lh = &tip_partial_lh[(state+20)*nstates*nmixtures];
576 for (m = 0; m < nmixtures; m++) {
577 double *inv_evec = &all_inv_evec[m*nstates*nstates];
578 for (i = 0; i < nstates; i++) {
579 lh_ambiguous = 0.0;
580 for (x = 0; x < 11; x++)
581 if (ambi_aa[state] & (1 << x))
582 lh_ambiguous += inv_evec[i*nstates+x];
583 this_tip_partial_lh[m*nstates+i] = lh_ambiguous;
584 }
585 }
586 }
587 break;
588 case SEQ_POMO: {
589 if (aln->pomo_sampling_method != SAMPLING_WEIGHTED_BINOM &&
590 aln->pomo_sampling_method != SAMPLING_WEIGHTED_HYPER)
591 outError("Sampling method not supported by PoMo.");
592 bool hyper = false;
593 if (aln->pomo_sampling_method == SAMPLING_WEIGHTED_HYPER)
594 hyper = true;
595 double *real_partial_lh = aligned_alloc<double>(nstates);
596 for (state = 0; state < aln->pomo_sampled_states.size(); state++) {
597 computeTipPartialLikelihoodPoMo(state, real_partial_lh, hyper);
598 // The vector tip_partial_lh stores inner product of real_partial_lh
599 // and inverse eigenvector for each state
600 double *this_tip_partial_lh = &tip_partial_lh[(state+nstates)*nstates*nmixtures];
601 memset(this_tip_partial_lh, 0, nmixtures*nstates*sizeof(double));
602 for (m = 0; m < nmixtures; m++) {
603 double *inv_evec = &all_inv_evec[m*nstates*nstates];
604 for (int i = 0; i < nstates; i++)
605 for (int j = 0; j < nstates; j++)
606 this_tip_partial_lh[m*nstates + i] +=
607 inv_evec[i*nstates+j] * real_partial_lh[j];
608 }
609 }
610 aligned_free(real_partial_lh);
611 break;
612 }
613 default:
614 break;
615 }
616 }
617
computePtnFreq()618 void PhyloTree::computePtnFreq() {
619 if (ptn_freq_computed) return;
620 ptn_freq_computed = true;
621 size_t nptn = aln->getNPattern();
622 size_t maxptn = get_safe_upper_limit(nptn)+get_safe_upper_limit(model_factory->unobserved_ptns.size());
623 int ptn;
624 for (ptn = 0; ptn < nptn; ptn++)
625 ptn_freq[ptn] = (*aln)[ptn].frequency;
626 for (ptn = nptn; ptn < maxptn; ptn++)
627 ptn_freq[ptn] = 0.0;
628 }
629
computePtnInvar()630 void PhyloTree::computePtnInvar() {
631 size_t nptn = aln->getNPattern(), ptn;
632 size_t maxptn = get_safe_upper_limit(nptn)+get_safe_upper_limit(model_factory->unobserved_ptns.size());
633 // For PoMo, only consider monomorphic states and set nstates to the number of
634 // states of the underlying mutation model.
635 int nstates = model->getMutationModel()->num_states;
636 int x;
637 // ambiguous characters
638 int ambi_aa[] = {
639 4+8, // B = N or D
640 32+64, // Z = Q or E
641 512+1024 // U = I or L
642 };
643
644 double state_freq[nstates];
645
646 // -1 for mixture model
647
648 // Again for PoMo, the stationary frequencies are set to the stationary
649 // frequencies of the boundary states.
650 model->getMutationModel()->getStateFrequency(state_freq, -1);
651
652 memset(ptn_invar, 0, maxptn*sizeof(double));
653 double p_invar = site_rate->getPInvar();
654 if (p_invar != 0.0) {
655 for (ptn = 0; ptn < nptn; ptn++) {
656 if ((*aln)[ptn].const_char > aln->STATE_UNKNOWN)
657 continue;
658
659 if ((*aln)[ptn].const_char == aln->STATE_UNKNOWN) {
660 ptn_invar[ptn] = p_invar;
661 // For PoMo, if a polymorphic state is considered, the likelihood is
662 // left unchanged and zero because ptn_invar has been initialized to 0.
663 } else if ((*aln)[ptn].const_char < nstates) {
664 ptn_invar[ptn] = p_invar * state_freq[(int) (*aln)[ptn].const_char];
665 } else if (aln->seq_type == SEQ_DNA) {
666 // 2016-12-21: handling ambiguous state
667 ptn_invar[ptn] = 0.0;
668 int cstate = (*aln)[ptn].const_char-nstates+1;
669 for (x = 0; x < nstates; x++) {
670 if ((cstate) & (1 << x))
671 ptn_invar[ptn] += state_freq[x];
672 }
673 ptn_invar[ptn] *= p_invar;
674 } else if (aln->seq_type == SEQ_PROTEIN) {
675 ptn_invar[ptn] = 0.0;
676 int cstate = (*aln)[ptn].const_char-nstates;
677 ASSERT(cstate <= 2);
678 for (x = 0; x < 11; x++)
679 if (ambi_aa[cstate] & (1 << x))
680 ptn_invar[ptn] += state_freq[x];
681 ptn_invar[ptn] *= p_invar;
682 } else ASSERT(0);
683 }
684 // // ascertmain bias correction
685 // for (ptn = 0; ptn < model_factory->unobserved_ptns.size(); ptn++)
686 // ptn_invar[nptn+ptn] = p_invar * state_freq[(int)model_factory->unobserved_ptns[ptn]];
687 //
688 // dummy values
689 for (ptn = nptn; ptn < maxptn; ptn++)
690 ptn_invar[ptn] = p_invar;
691 }
692 // aligned_free(state_freq);
693 }
694
695 /*******************************************************
696 *
697 * non-vectorized likelihood functions.
698 * this version uses Alexis' technique that stores the
699 * dot product of partial likelihoods and eigenvectors at node
700 * for faster branch length optimization
701 *
702 ******************************************************/
703
704 /*
705 void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
706
707 // don't recompute the likelihood
708 assert(dad);
709 if (dad_branch->partial_lh_computed & 1)
710 return;
711 dad_branch->partial_lh_computed |= 1;
712 PhyloNode *node = (PhyloNode*)(dad_branch->node);
713
714
715 size_t nstates = aln->num_states;
716 size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
717
718 if (!tip_partial_lh_computed)
719 computeTipPartialLikelihood();
720
721 if (node->isLeaf()) {
722 dad_branch->lh_scale_factor = 0.0;
723 return;
724 }
725
726 size_t ptn, c;
727 size_t orig_ntn = aln->size();
728 size_t ncat = site_rate->getNRate();
729 size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
730 size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
731 size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
732 for (c = 0; c < ncat_mix; c++) {
733 size_t m = c/denom;
734 mix_addr_nstates[c] = m*nstates;
735 mix_addr[c] = mix_addr_nstates[c]*nstates;
736 }
737 size_t i, x;
738 size_t block = nstates * ncat_mix;
739 size_t tip_block = nstates * model->getNMixtures();
740 size_t scale_size = nptn * ncat_mix;
741
742 double *evec = model->getEigenvectors();
743 double *inv_evec = model->getInverseEigenvectors();
744 assert(inv_evec && evec);
745 double *eval = model->getEigenvalues();
746
747 dad_branch->lh_scale_factor = 0.0;
748
749 // internal node
750 PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
751 int num_leaves = 0;
752 FOR_NEIGHBOR_IT(node, dad, it) {
753 PhyloNeighbor *nei = (PhyloNeighbor*)*it;
754 if (!left) left = (PhyloNeighbor*)(*it); else right = (PhyloNeighbor*)(*it);
755 if ((nei->partial_lh_computed & 1) == 0)
756 computePartialLikelihood(nei, node);
757 dad_branch->lh_scale_factor += nei->lh_scale_factor;
758 if (nei->node->isLeaf())
759 num_leaves ++;
760 }
761
762 if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
763 // re-orient partial_lh
764 bool done = false;
765 FOR_NEIGHBOR_IT(node, dad, it2) {
766 PhyloNeighbor *backnei = ((PhyloNeighbor*)(*it2)->node->findNeighbor(node));
767 if (backnei->partial_lh) {
768 dad_branch->partial_lh = backnei->partial_lh;
769 dad_branch->scale_num = backnei->scale_num;
770 backnei->partial_lh = NULL;
771 backnei->scale_num = NULL;
772 backnei->partial_lh_computed &= ~1; // clear bit
773 done = true;
774 break;
775 }
776 }
777 if (!done) {
778 printTree(cout, WT_BR_LEN + WT_NEWLINE);
779 }
780 assert(done && "partial_lh is not re-oriented");
781 }
782
783 // precompute buffer to save times
784 double *echildren = new double[block*nstates*(node->degree()-1)];
785 double *partial_lh_leaves = NULL;
786 if (num_leaves > 0)
787 partial_lh_leaves = new double[(aln->STATE_UNKNOWN+1)*block*num_leaves];
788 double *echild = echildren;
789 double *partial_lh_leaf = partial_lh_leaves;
790
791 FOR_NEIGHBOR_IT(node, dad, it) {
792 double expchild[nstates];
793 PhyloNeighbor *child = (PhyloNeighbor*)*it;
794 // precompute information buffer
795 double *echild_ptr = echild;
796 for (c = 0; c < ncat_mix; c++) {
797 double len_child = site_rate->getRate(c%ncat) * child->length;
798 double *eval_ptr = eval + mix_addr_nstates[c];
799 double *evec_ptr = evec + mix_addr[c];
800 for (i = 0; i < nstates; i++) {
801 expchild[i] = exp(eval_ptr[i]*len_child);
802 }
803 for (x = 0; x < nstates; x++) {
804 for (i = 0; i < nstates; i++) {
805 echild_ptr[i] = evec_ptr[x*nstates+i] * expchild[i];
806 }
807 echild_ptr += nstates;
808 }
809 }
810
811 // pre compute information for tip
812 if (child->node->isLeaf()) {
813 vector<int>::iterator it;
814 for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
815 int state = (*it);
816 double *this_partial_lh_leaf = partial_lh_leaf + state*block;
817 double *echild_ptr = echild;
818 for (c = 0; c < ncat_mix; c++) {
819 double *this_tip_partial_lh = tip_partial_lh + state*tip_block + mix_addr_nstates[c];
820 for (x = 0; x < nstates; x++) {
821 double vchild = 0.0;
822 for (i = 0; i < nstates; i++) {
823 vchild += echild_ptr[i] * this_tip_partial_lh[i];
824 }
825 this_partial_lh_leaf[x] = vchild;
826 echild_ptr += nstates;
827 }
828 this_partial_lh_leaf += nstates;
829 }
830 }
831 size_t addr = aln->STATE_UNKNOWN * block;
832 for (x = 0; x < block; x++) {
833 partial_lh_leaf[addr+x] = 1.0;
834 }
835 partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
836 }
837 echild += block*nstates;
838 }
839
840
841 double *eleft = echildren, *eright = echildren + block*nstates;
842
843 if (!left->node->isLeaf() && right->node->isLeaf()) {
844 PhyloNeighbor *tmp = left;
845 left = right;
846 right = tmp;
847 double *etmp = eleft;
848 eleft = eright;
849 eright = etmp;
850 }
851
852 if (node->degree() > 3) {
853
854 //--------------------- multifurcating node ------------------//
855
856 // now for-loop computing partial_lh over all site-patterns
857 #ifdef _OPENMP
858 #pragma omp parallel for private(ptn, c, x, i) schedule(static)
859 #endif
860 for (ptn = 0; ptn < nptn; ptn++) {
861 double partial_lh_all[block];
862 for (i = 0; i < block; i++)
863 partial_lh_all[i] = 1.0;
864 UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
865 memset(scale_dad, 0, sizeof(UBYTE)*ncat_mix);
866
867 double *partial_lh_leaf = partial_lh_leaves;
868 double *echild = echildren;
869
870 FOR_NEIGHBOR_IT(node, dad, it) {
871 PhyloNeighbor *child = (PhyloNeighbor*)*it;
872 UBYTE *scale_child = child->scale_num + ptn*ncat_mix;
873 if (child->node->isLeaf()) {
874 // external node
875 int state_child = (ptn < orig_ntn) ? (aln->at(ptn))[child->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
876 double *child_lh = partial_lh_leaf + state_child*block;
877 for (c = 0; c < block; c++) {
878 // compute real partial likelihood vector
879 partial_lh_all[c] *= child_lh[c];
880 }
881 partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
882 } else {
883 // internal node
884 double *partial_lh = partial_lh_all;
885 double *partial_lh_child = child->partial_lh + ptn*block;
886
887 double *echild_ptr = echild;
888 for (c = 0; c < ncat_mix; c++) {
889 scale_dad[c] += scale_child[c];
890 // compute real partial likelihood vector
891 for (x = 0; x < nstates; x++) {
892 double vchild = 0.0;
893 // double *echild_ptr = echild + (c*nstatesqr+x*nstates);
894 for (i = 0; i < nstates; i++) {
895 vchild += echild_ptr[i] * partial_lh_child[i];
896 }
897 echild_ptr += nstates;
898 partial_lh[x] *= vchild;
899 }
900 partial_lh += nstates;
901 partial_lh_child += nstates;
902 }
903 } // if
904 echild += block*nstates;
905 } // FOR_NEIGHBOR
906
907
908 // compute dot-product with inv_eigenvector
909 double *partial_lh_tmp = partial_lh_all;
910 double *partial_lh = dad_branch->partial_lh + ptn*block;
911 for (c = 0; c < ncat_mix; c++) {
912 double lh_max = 0.0;
913 double *inv_evec_ptr = inv_evec + mix_addr[c];
914 for (i = 0; i < nstates; i++) {
915 double res = 0.0;
916 for (x = 0; x < nstates; x++) {
917 res += partial_lh_tmp[x]*inv_evec_ptr[x];
918 }
919 inv_evec_ptr += nstates;
920 partial_lh[i] = res;
921 lh_max = max(lh_max, fabs(res));
922 }
923 // check if one should scale partial likelihoods
924 if (lh_max < SCALING_THRESHOLD && lh_max != 0.0) {
925 //assert(lh_max != 0.0 && "Numerical underflow for multifurcation node");
926 if (ptn_invar[ptn] == 0.0) {
927 // now do the likelihood scaling
928 for (i = 0; i < nstates; i++)
929 partial_lh[i] *= SCALING_THRESHOLD_INVER;
930 scale_dad[c] += 1;
931 }
932 }
933 partial_lh += nstates;
934 partial_lh_tmp += nstates;
935 }
936
937 } // for ptn
938 // dad_branch->lh_scale_factor += sum_scale;
939
940 // end multifurcating treatment
941 } else if (left->node->isLeaf() && right->node->isLeaf()) {
942
943 //--------------------- TIP-TIP (cherry) case ------------------//
944
945 double *partial_lh_left = partial_lh_leaves;
946 double *partial_lh_right = partial_lh_leaves + (aln->STATE_UNKNOWN+1)*block;
947
948 // scale number must be ZERO
949 memset(dad_branch->scale_num, 0, scale_size * sizeof(UBYTE));
950 #ifdef _OPENMP
951 #pragma omp parallel for private(ptn, c, x, i) schedule(static)
952 #endif
953 for (ptn = 0; ptn < nptn; ptn++) {
954 double partial_lh_tmp[nstates];
955 double *partial_lh = dad_branch->partial_lh + ptn*block;
956 double *vleft = partial_lh_left + block*((ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn]);
957 double *vright = partial_lh_right + block*((ptn < orig_ntn) ? (aln->at(ptn))[right->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn]);
958 for (c = 0; c < ncat_mix; c++) {
959 double *inv_evec_ptr = inv_evec + mix_addr[c];
960 // compute real partial likelihood vector
961 for (x = 0; x < nstates; x++) {
962 partial_lh_tmp[x] = vleft[x] * vright[x];
963 }
964
965 // compute dot-product with inv_eigenvector
966 for (i = 0; i < nstates; i++) {
967 double res = 0.0;
968 for (x = 0; x < nstates; x++) {
969 res += partial_lh_tmp[x]*inv_evec_ptr[x];
970 }
971 inv_evec_ptr += nstates;
972 partial_lh[i] = res;
973 }
974 vleft += nstates;
975 vright += nstates;
976 partial_lh += nstates;
977 }
978 }
979 } else if (left->node->isLeaf() && !right->node->isLeaf()) {
980
981 //--------------------- TIP-INTERNAL NODE case ------------------//
982
983 // only take scale_num from the right subtree
984 memcpy(dad_branch->scale_num, right->scale_num, scale_size * sizeof(UBYTE));
985
986
987 double *partial_lh_left = partial_lh_leaves;
988
989 #ifdef _OPENMP
990 #pragma omp parallel for private(ptn, c, x, i) schedule(static)
991 #endif
992 for (ptn = 0; ptn < nptn; ptn++) {
993 double partial_lh_tmp[nstates];
994 double *partial_lh = dad_branch->partial_lh + ptn*block;
995 double *partial_lh_right = right->partial_lh + ptn*block;
996 double *vleft = partial_lh_left + block*((ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn]);
997
998 double *eright_ptr = eright;
999 for (c = 0; c < ncat_mix; c++) {
1000 double lh_max = 0.0;
1001 double *inv_evec_ptr = inv_evec + mix_addr[c];
1002 // compute real partial likelihood vector
1003 for (x = 0; x < nstates; x++) {
1004 double vright = 0.0;
1005 // size_t addr = c*nstatesqr+x*nstates;
1006 // vleft = partial_lh_left[state_left*block+c*nstates+x];
1007 for (i = 0; i < nstates; i++) {
1008 vright += eright_ptr[i] * partial_lh_right[i];
1009 }
1010 eright_ptr += nstates;
1011 partial_lh_tmp[x] = vleft[x] * (vright);
1012 }
1013
1014 // compute dot-product with inv_eigenvector
1015 for (i = 0; i < nstates; i++) {
1016 double res = 0.0;
1017 for (x = 0; x < nstates; x++) {
1018 res += partial_lh_tmp[x]*inv_evec_ptr[x];
1019 }
1020 inv_evec_ptr += nstates;
1021 partial_lh[i] = res;
1022 lh_max = max(fabs(res), lh_max);
1023 }
1024 // check if one should scale partial likelihoods
1025 if (lh_max < SCALING_THRESHOLD && lh_max != 0.0) {
1026 //assert(lh_max != 0.0 && "Numerical underflow for tip-inner node");
1027 if (ptn_invar[ptn] == 0.0) {
1028 // now do the likelihood scaling
1029 for (i = 0; i < nstates; i++)
1030 partial_lh[i] *= SCALING_THRESHOLD_INVER;
1031 dad_branch->scale_num[ptn*ncat_mix+c] += 1;
1032 }
1033 }
1034 vleft += nstates;
1035 partial_lh_right += nstates;
1036 partial_lh += nstates;
1037 }
1038
1039 }
1040 // dad_branch->lh_scale_factor += sum_scale;
1041 // delete [] partial_lh_left;
1042
1043 } else {
1044
1045 //--------------------- INTERNAL-INTERNAL NODE case ------------------//
1046
1047 #ifdef _OPENMP
1048 #pragma omp parallel for private(ptn, c, x, i) schedule(static)
1049 #endif
1050 for (ptn = 0; ptn < nptn; ptn++) {
1051 double partial_lh_tmp[nstates];
1052 double *partial_lh = dad_branch->partial_lh + ptn*block;
1053 double *partial_lh_left = left->partial_lh + ptn*block;
1054 double *partial_lh_right = right->partial_lh + ptn*block;
1055 UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
1056 UBYTE *scale_left = left->scale_num + ptn*ncat_mix;
1057 UBYTE *scale_right = right->scale_num + ptn*ncat_mix;
1058
1059 double *eleft_ptr = eleft;
1060 double *eright_ptr = eright;
1061
1062 for (c = 0; c < ncat_mix; c++) {
1063 scale_dad[c] = scale_left[c] + scale_right[c];
1064 double lh_max = 0.0;
1065 double *inv_evec_ptr = inv_evec + mix_addr[c];
1066 // compute real partial likelihood vector
1067 for (x = 0; x < nstates; x++) {
1068 double vleft = 0.0, vright = 0.0;
1069 // size_t addr = c*nstatesqr+x*nstates;
1070 for (i = 0; i < nstates; i++) {
1071 vleft += eleft_ptr[i] * partial_lh_left[i];
1072 vright += eright_ptr[i] * partial_lh_right[i];
1073 }
1074 eleft_ptr += nstates;
1075 eright_ptr += nstates;
1076 partial_lh_tmp[x] = vleft*vright;
1077 // assert(partial_lh_tmp[x] != 0.0);
1078 }
1079
1080 // compute dot-product with inv_eigenvector
1081 for (i = 0; i < nstates; i++) {
1082 double res = 0.0;
1083 for (x = 0; x < nstates; x++) {
1084 res += partial_lh_tmp[x]*inv_evec_ptr[x];
1085 }
1086 inv_evec_ptr += nstates;
1087 partial_lh[i] = res;
1088 lh_max = max(lh_max, fabs(res));
1089 }
1090 // check if one should scale partial likelihoods
1091 if (lh_max < SCALING_THRESHOLD && lh_max != 0.0) {
1092 //assert(lh_max != 0.0 && "Numerical underflow for inner-inner node");
1093 if (ptn_invar[ptn] == 0.0) {
1094 // BQM 2016-05-03: only scale for non-constant sites
1095 // now do the likelihood scaling
1096 for (i = 0; i < nstates; i++)
1097 partial_lh[i] *= SCALING_THRESHOLD_INVER;
1098 scale_dad[c] += 1;
1099 }
1100 }
1101 partial_lh_left += nstates;
1102 partial_lh_right += nstates;
1103 partial_lh += nstates;
1104 }
1105
1106 }
1107 // dad_branch->lh_scale_factor += sum_scale;
1108
1109 }
1110
1111 if (partial_lh_leaves)
1112 delete [] partial_lh_leaves;
1113 delete [] echildren;
1114 }
1115
1116 void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf) {
1117 PhyloNode *node = (PhyloNode*) dad_branch->node;
1118 PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
1119 if (!central_partial_lh)
1120 initializeAllPartialLh();
1121 if (node->isLeaf()) {
1122 PhyloNode *tmp_node = dad;
1123 dad = node;
1124 node = tmp_node;
1125 PhyloNeighbor *tmp_nei = dad_branch;
1126 dad_branch = node_branch;
1127 node_branch = tmp_nei;
1128 }
1129 if ((dad_branch->partial_lh_computed & 1) == 0)
1130 computePartialLikelihoodEigen(dad_branch, dad);
1131 if ((node_branch->partial_lh_computed & 1) == 0)
1132 computePartialLikelihoodEigen(node_branch, node);
1133
1134 size_t nstates = aln->num_states;
1135 size_t ncat = site_rate->getNRate();
1136 size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
1137
1138 size_t block = ncat_mix * nstates;
1139 size_t tip_block = nstates * model->getNMixtures();
1140 size_t ptn; // for big data size > 4GB memory required
1141 size_t c, i;
1142 size_t orig_nptn = aln->size();
1143 size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
1144
1145 size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
1146 size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
1147 for (c = 0; c < ncat_mix; c++) {
1148 size_t m = c/denom;
1149 mix_addr_nstates[c] = m*nstates;
1150 mix_addr[c] = mix_addr_nstates[c]*nstates;
1151 }
1152
1153 double *eval = model->getEigenvalues();
1154 assert(eval);
1155
1156 assert(theta_all);
1157 if (!theta_computed) {
1158 // precompute theta for fast branch length optimization
1159
1160 if (dad->isLeaf()) {
1161 // special treatment for TIP-INTERNAL NODE case
1162 #ifdef _OPENMP
1163 #pragma omp parallel for private(ptn, i, c) schedule(static)
1164 #endif
1165 for (ptn = 0; ptn < nptn; ptn++) {
1166 double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1167 UBYTE *scale_dad = dad_branch->scale_num+ptn*ncat_mix;
1168 double *theta = theta_all + ptn*block;
1169 double *this_tip_partial_lh = tip_partial_lh + tip_block*((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn]);
1170 UBYTE min_scale = scale_dad[0];
1171 for (c = 1; c < ncat_mix; c++)
1172 min_scale = min(min_scale, scale_dad[c]);
1173 for (c = 0; c < ncat_mix; c++) {
1174 double *lh_tip = this_tip_partial_lh + mix_addr_nstates[c];
1175 if (scale_dad[c] == min_scale) {
1176 for (i = 0; i < nstates; i++) {
1177 theta[i] = lh_tip[i] * partial_lh_dad[i];
1178 }
1179 } else if (scale_dad[c] == min_scale+1) {
1180 for (i = 0; i < nstates; i++) {
1181 theta[i] = lh_tip[i] * partial_lh_dad[i] * SCALING_THRESHOLD;
1182 }
1183 } else {
1184 memset(theta, 0, sizeof(double)*nstates);
1185 }
1186 partial_lh_dad += nstates;
1187 theta += nstates;
1188 }
1189
1190 }
1191 // ascertainment bias correction
1192 } else {
1193 // both dad and node are internal nodes
1194
1195 // size_t all_entries = nptn*block;
1196 #ifdef _OPENMP
1197 #pragma omp parallel for private(ptn, i, c) schedule(static)
1198 #endif
1199 for (ptn = 0; ptn < nptn; ptn++) {
1200 double *theta = theta_all + ptn*block;
1201 double *partial_lh_node = node_branch->partial_lh + ptn*block;
1202 double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1203
1204 size_t ptn_ncat = ptn*ncat_mix;
1205 UBYTE *scale_dad = dad_branch->scale_num + ptn_ncat;
1206 UBYTE *scale_node = node_branch->scale_num + ptn_ncat;
1207 UBYTE sum_scale[ncat_mix];
1208 UBYTE min_scale = sum_scale[0] = scale_dad[0] + scale_node[0];
1209 for (c = 1; c < ncat_mix; c++) {
1210 sum_scale[c] = scale_dad[c] + scale_node[c];
1211 min_scale = min(min_scale, sum_scale[c]);
1212 }
1213 for (c = 0; c < ncat_mix; c++) {
1214 if (sum_scale[c] == min_scale) {
1215 for (i = 0; i < nstates; i++) {
1216 theta[i] = partial_lh_node[i] * partial_lh_dad[i];
1217 }
1218 } else if (sum_scale[c] == min_scale+1) {
1219 for (i = 0; i < nstates; i++) {
1220 theta[i] = partial_lh_node[i] * partial_lh_dad[i] * SCALING_THRESHOLD;
1221 }
1222 } else {
1223 memset(theta, 0, sizeof(double)*nstates);
1224 }
1225 theta += nstates;
1226 partial_lh_dad += nstates;
1227 partial_lh_node += nstates;
1228 }
1229 }
1230 }
1231 theta_computed = true;
1232 }
1233
1234 double *val0 = new double[block];
1235 double *val1 = new double[block];
1236 double *val2 = new double[block];
1237 for (c = 0; c < ncat_mix; c++) {
1238 size_t m = c/denom;
1239 double *eval_ptr = eval + mix_addr_nstates[c];
1240 size_t mycat = c%ncat;
1241 double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
1242 size_t addr = c*nstates;
1243 for (i = 0; i < nstates; i++) {
1244 double cof = eval_ptr[i]*site_rate->getRate(mycat);
1245 double val = exp(cof*dad_branch->length) * prop;
1246 double val1_ = cof*val;
1247 val0[addr+i] = val;
1248 val1[addr+i] = val1_;
1249 val2[addr+i] = cof*val1_;
1250 }
1251 }
1252
1253
1254 double my_df = 0.0, my_ddf = 0.0, prob_const = 0.0, df_const = 0.0, ddf_const = 0.0;
1255 // double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
1256
1257 #ifdef _OPENMP
1258 #pragma omp parallel for reduction(+: my_df, my_ddf, prob_const, df_const, ddf_const) private(ptn, i) schedule(static)
1259 #endif
1260 for (ptn = 0; ptn < nptn; ptn++) {
1261 double lh_ptn = ptn_invar[ptn], df_ptn = 0.0, ddf_ptn = 0.0;
1262 double *theta = theta_all + ptn*block;
1263 for (i = 0; i < block; i++) {
1264 lh_ptn += val0[i] * theta[i];
1265 df_ptn += val1[i] * theta[i];
1266 ddf_ptn += val2[i] * theta[i];
1267 }
1268
1269 // assert(lh_ptn > 0.0);
1270 lh_ptn = fabs(lh_ptn);
1271
1272 if (ptn < orig_nptn) {
1273 double df_frac = df_ptn / lh_ptn;
1274 double ddf_frac = ddf_ptn / lh_ptn;
1275 double freq = ptn_freq[ptn];
1276 double tmp1 = df_frac * freq;
1277 double tmp2 = ddf_frac * freq;
1278 my_df += tmp1;
1279 my_ddf += tmp2 - tmp1 * df_frac;
1280 } else {
1281 // ascertainment bias correction
1282 prob_const += lh_ptn;
1283 df_const += df_ptn;
1284 ddf_const += ddf_ptn;
1285 }
1286 }
1287 df = my_df;
1288 ddf = my_ddf;
1289
1290 assert(!isnan(df) && !isinf(df) && "Numerical underflow for lh-derivative");
1291
1292 if (isnan(df) || isinf(df)) {
1293 df = 0.0;
1294 ddf = 0.0;
1295 // outWarning("Numerical instability (some site-likelihood = 0)");
1296 }
1297
1298 if (orig_nptn < nptn) {
1299 // ascertainment bias correction
1300 prob_const = 1.0 - prob_const;
1301 double df_frac = df_const / prob_const;
1302 double ddf_frac = ddf_const / prob_const;
1303 int nsites = aln->getNSite();
1304 df += nsites * df_frac;
1305 ddf += nsites *(ddf_frac + df_frac*df_frac);
1306 }
1307
1308
1309 delete [] val2;
1310 delete [] val1;
1311 delete [] val0;
1312 }
1313
1314 //template <const int nstates>
1315 double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
1316 PhyloNode *node = (PhyloNode*) dad_branch->node;
1317 PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
1318 if (!central_partial_lh)
1319 initializeAllPartialLh();
1320 if (node->isLeaf()) {
1321 PhyloNode *tmp_node = dad;
1322 dad = node;
1323 node = tmp_node;
1324 PhyloNeighbor *tmp_nei = dad_branch;
1325 dad_branch = node_branch;
1326 node_branch = tmp_nei;
1327 }
1328 if ((dad_branch->partial_lh_computed & 1) == 0)
1329 // computePartialLikelihoodEigen(dad_branch, dad);
1330 computePartialLikelihood(dad_branch, dad);
1331 if ((node_branch->partial_lh_computed & 1) == 0)
1332 // computePartialLikelihoodEigen(node_branch, node);
1333 computePartialLikelihood(node_branch, node);
1334 double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
1335 size_t nstates = aln->num_states;
1336 size_t ncat = site_rate->getNRate();
1337 size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
1338
1339 size_t block = ncat_mix * nstates;
1340 size_t tip_block = nstates * model->getNMixtures();
1341 size_t ptn; // for big data size > 4GB memory required
1342 size_t c, i;
1343 size_t orig_nptn = aln->size();
1344 size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
1345
1346 size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
1347 size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
1348
1349 double *eval = model->getEigenvalues();
1350 assert(eval);
1351
1352 double *val = new double[block];
1353 for (c = 0; c < ncat_mix; c++) {
1354 size_t mycat = c%ncat;
1355 size_t m = c/denom;
1356 mix_addr_nstates[c] = m*nstates;
1357 mix_addr[c] = mix_addr_nstates[c]*nstates;
1358 double *eval_ptr = eval + mix_addr_nstates[c];
1359 double len = site_rate->getRate(mycat)*dad_branch->length;
1360 double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
1361 double *this_val = val + c*nstates;
1362 for (i = 0; i < nstates; i++)
1363 this_val[i] = exp(eval_ptr[i]*len) * prop;
1364 }
1365
1366 double prob_const = 0.0;
1367 memset(_pattern_lh_cat, 0, sizeof(double)*nptn*ncat_mix);
1368
1369 if (dad->isLeaf()) {
1370 // special treatment for TIP-INTERNAL NODE case
1371 double *partial_lh_node = new double[(aln->STATE_UNKNOWN+1)*block];
1372 IntVector states_dad;
1373 if (dad->id < aln->getNSeq())
1374 states_dad = aln->seq_states[dad->id];
1375 states_dad.push_back(aln->STATE_UNKNOWN);
1376 // precompute information from one tip
1377 for (IntVector::iterator it = states_dad.begin(); it != states_dad.end(); it++) {
1378 double *lh_node = partial_lh_node +(*it)*block;
1379 double *val_tmp = val;
1380 double *this_tip_partial_lh = tip_partial_lh + (*it)*tip_block;
1381 for (c = 0; c < ncat_mix; c++) {
1382 double *lh_tip = this_tip_partial_lh + mix_addr_nstates[c];
1383 for (i = 0; i < nstates; i++) {
1384 lh_node[i] = val_tmp[i] * lh_tip[i];
1385 }
1386 lh_node += nstates;
1387 val_tmp += nstates;
1388 }
1389 }
1390
1391 // now do the real computation
1392 #ifdef _OPENMP
1393 #pragma omp parallel for reduction(+: tree_lh, prob_const) private(ptn, i, c) schedule(static)
1394 #endif
1395 for (ptn = 0; ptn < nptn; ptn++) {
1396 double lh_ptn = ptn_invar[ptn];
1397 double *lh_cat = _pattern_lh_cat + ptn*ncat_mix;
1398 double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1399 UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
1400 double *lh_node = partial_lh_node + block*((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn]);
1401 // determine the min scaling
1402 UBYTE min_scale = scale_dad[0];
1403 for (c = 1; c < ncat_mix; c++)
1404 min_scale = min(min_scale, scale_dad[c]);
1405
1406 for (c = 0; c < ncat_mix; c++) {
1407 if (scale_dad[c] <= min_scale+1) {
1408 // only compute for least scale category
1409 for (i = 0; i < nstates; i++) {
1410 *lh_cat += (lh_node[i] * partial_lh_dad[i]);
1411 }
1412 if (scale_dad[c] != min_scale)
1413 *lh_cat *= SCALING_THRESHOLD;
1414 lh_ptn += *lh_cat;
1415 }
1416 lh_node += nstates;
1417 partial_lh_dad += nstates;
1418 lh_cat++;
1419 }
1420 // assert(lh_ptn > -1e-10);
1421 if (ptn < orig_nptn) {
1422 lh_ptn = log(fabs(lh_ptn)) + LOG_SCALING_THRESHOLD*min_scale;
1423 _pattern_lh[ptn] = lh_ptn;
1424 tree_lh += lh_ptn * ptn_freq[ptn];
1425 } else {
1426 // bugfix 2016-01-21, prob_const can be rescaled
1427 if (min_scale >= 1)
1428 lh_ptn *= SCALING_THRESHOLD;
1429 // _pattern_lh[ptn] = lh_ptn;
1430 prob_const += lh_ptn;
1431 }
1432 }
1433 delete [] partial_lh_node;
1434 } else {
1435 // both dad and node are internal nodes
1436 #ifdef _OPENMP
1437 #pragma omp parallel for reduction(+: tree_lh, prob_const) private(ptn, i, c) schedule(static)
1438 #endif
1439 for (ptn = 0; ptn < nptn; ptn++) {
1440 double lh_ptn = ptn_invar[ptn];
1441 double *lh_cat = _pattern_lh_cat + ptn*ncat_mix;
1442 double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1443 double *partial_lh_node = node_branch->partial_lh + ptn*block;
1444 double *val_tmp = val;
1445 UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
1446 UBYTE *scale_node = node_branch->scale_num + ptn*ncat_mix;
1447 UBYTE sum_scale[ncat_mix];
1448 UBYTE min_scale = sum_scale[0] = scale_dad[0]+scale_node[0];
1449 for (c = 1; c < ncat_mix; c++) {
1450 sum_scale[c] = scale_dad[c] + scale_node[c];
1451 min_scale = min(min_scale, sum_scale[c]);
1452 }
1453 for (c = 0; c < ncat_mix; c++) {
1454 if (sum_scale[c] <= min_scale+1) {
1455 // only compute for least scale category
1456 for (i = 0; i < nstates; i++) {
1457 *lh_cat += (val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i]);
1458 }
1459 if (sum_scale[c] != min_scale)
1460 *lh_cat *= SCALING_THRESHOLD;
1461 lh_ptn += *lh_cat;
1462 }
1463 partial_lh_node += nstates;
1464 partial_lh_dad += nstates;
1465 val_tmp += nstates;
1466 lh_cat++;
1467 }
1468
1469 // assert(lh_ptn > 0.0);
1470 if (ptn < orig_nptn) {
1471 lh_ptn = log(fabs(lh_ptn)) + LOG_SCALING_THRESHOLD*min_scale;
1472 _pattern_lh[ptn] = lh_ptn;
1473 tree_lh += lh_ptn * ptn_freq[ptn];
1474 } else {
1475 // bugfix 2016-01-21, prob_const can be rescaled
1476 if (min_scale >= 1)
1477 lh_ptn *= SCALING_THRESHOLD;
1478 // _pattern_lh[ptn] = lh_ptn;
1479 prob_const += lh_ptn;
1480 }
1481 }
1482 }
1483
1484 assert(!isnan(tree_lh) && !isinf(tree_lh) && "Numerical underflow for lh-branch");
1485
1486 if (orig_nptn < nptn) {
1487 // ascertainment bias correction
1488 if (prob_const >= 1.0 || prob_const < 0.0) {
1489 printTree(cout, WT_TAXON_ID + WT_BR_LEN + WT_NEWLINE);
1490 model->writeInfo(cout);
1491 }
1492 assert(prob_const < 1.0 && prob_const >= 0.0);
1493
1494 // BQM 2015-10-11: fix this those functions using _pattern_lh_cat
1495 // double inv_const = 1.0 / (1.0-prob_const);
1496 // size_t nptn_cat = orig_nptn*ncat;
1497 // for (ptn = 0; ptn < nptn_cat; ptn++)
1498 // _pattern_lh_cat[ptn] *= inv_const;
1499
1500 prob_const = log(1.0 - prob_const);
1501 for (ptn = 0; ptn < orig_nptn; ptn++)
1502 _pattern_lh[ptn] -= prob_const;
1503 tree_lh -= aln->getNSite()*prob_const;
1504 assert(!isnan(tree_lh) && !isinf(tree_lh));
1505 }
1506
1507 assert(!isnan(tree_lh) && !isinf(tree_lh));
1508
1509 delete [] val;
1510 return tree_lh;
1511 }
1512 */
1513
1514
1515 /*******************************************************
1516 *
1517 * ancestral sequence reconstruction
1518 *
1519 ******************************************************/
1520
1521
initMarginalAncestralState(ostream & out,bool & orig_kernel_nonrev,double * & ptn_ancestral_prob,int * & ptn_ancestral_seq)1522 void PhyloTree::initMarginalAncestralState(ostream &out, bool &orig_kernel_nonrev, double* &ptn_ancestral_prob, int* &ptn_ancestral_seq) {
1523 orig_kernel_nonrev = params->kernel_nonrev;
1524 if (!orig_kernel_nonrev) {
1525 // switch to nonrev kernel to compute _pattern_lh_cat_state
1526 params->kernel_nonrev = true;
1527 setLikelihoodKernel(sse);
1528 clearAllPartialLH();
1529 }
1530 _pattern_lh_cat_state = newPartialLh();
1531
1532 size_t nptn = getAlnNPattern();
1533 size_t nstates = model->num_states;
1534
1535 ptn_ancestral_prob = aligned_alloc<double>(nptn*nstates);
1536 ptn_ancestral_seq = aligned_alloc<int>(nptn);
1537 }
1538
computeMarginalAncestralState(PhyloNeighbor * dad_branch,PhyloNode * dad,double * ptn_ancestral_prob,int * ptn_ancestral_seq)1539 void PhyloTree::computeMarginalAncestralState(PhyloNeighbor *dad_branch, PhyloNode *dad,
1540 double *ptn_ancestral_prob, int *ptn_ancestral_seq) {
1541 size_t nptn = getAlnNPattern();
1542 size_t nstates = model->num_states;
1543 size_t nstates_vector = nstates * vector_size;
1544 size_t ncat_mix = (model_factory->fused_mix_rate) ? site_rate->getNRate() : site_rate->getNRate()*model->getNMixtures();
1545 double state_freq[nstates];
1546 model->getStateFrequency(state_freq);
1547
1548 // compute _pattern_lh_cat_state using NONREV kernel
1549 computeLikelihoodBranch(dad_branch, dad);
1550
1551 size_t ptn, i, c, v;
1552
1553 double *lh_state = _pattern_lh_cat_state;
1554 memset(ptn_ancestral_prob, 0, sizeof(double)*nptn*nstates);
1555
1556 // convert vector_size into continuous pattern
1557 for (ptn = 0; ptn < nptn; ptn += vector_size) {
1558 double *state_prob = ptn_ancestral_prob + ptn*nstates;
1559 for (c = 0; c < ncat_mix; c++) {
1560 for (i = 0; i < nstates; i++) {
1561 for (v = 0; v < vector_size; v++) if (ptn+v < nptn)
1562 {
1563 state_prob[v*nstates+i] += lh_state[i*vector_size + v];
1564 }
1565 }
1566 lh_state += nstates_vector;
1567 }
1568 }
1569
1570 // now normalize to probability
1571 for (ptn = 0; ptn < nptn; ptn++) {
1572 double *state_prob = ptn_ancestral_prob + ptn*nstates;
1573 double sum = 0.0;
1574 int state_best = 0;
1575 for (i = 0; i < nstates; i++) {
1576 sum += state_prob[i];
1577 if (state_prob[i] > state_prob[state_best])
1578 state_best = i;
1579 }
1580 sum = 1.0/sum;
1581 for (i = 0; i < nstates; i++) {
1582 state_prob[i] *= sum;
1583 }
1584
1585 // best state must exceed its equilibrium frequency!
1586 if (state_prob[state_best] < params->min_ancestral_prob ||
1587 state_prob[state_best] <= state_freq[state_best]+MIN_FREQUENCY_DIFF)
1588 state_best = aln->STATE_UNKNOWN;
1589 ptn_ancestral_seq[ptn] = state_best;
1590 }
1591
1592 }
1593
writeMarginalAncestralState(ostream & out,PhyloNode * node,double * ptn_ancestral_prob,int * ptn_ancestral_seq)1594 void PhyloTree::writeMarginalAncestralState(ostream &out, PhyloNode *node, double *ptn_ancestral_prob, int *ptn_ancestral_seq) {
1595 size_t site, nsites = aln->getNSite(), nstates = model->num_states;
1596 for (site = 0; site < nsites; site++) {
1597 int ptn = aln->getPatternID(site);
1598 out << node->name << "\t" << site+1 << "\t";
1599 // if (params->print_ancestral_sequence == AST_JOINT)
1600 // out << aln->convertStateBackStr(joint_ancestral_node[ptn]) << "\t";
1601 out << aln->convertStateBackStr(ptn_ancestral_seq[ptn]);
1602 double *state_prob = ptn_ancestral_prob + ptn*nstates;
1603 for (size_t j = 0; j < nstates; j++) {
1604 out << "\t" << state_prob[j];
1605 }
1606 out << endl;
1607 }
1608
1609 }
1610
endMarginalAncestralState(bool orig_kernel_nonrev,double * & ptn_ancestral_prob,int * & ptn_ancestral_seq)1611 void PhyloTree::endMarginalAncestralState(bool orig_kernel_nonrev, double* &ptn_ancestral_prob, int* &ptn_ancestral_seq) {
1612 if (!orig_kernel_nonrev) {
1613 // switch back to REV kernel
1614 params->kernel_nonrev = orig_kernel_nonrev;
1615 setLikelihoodKernel(sse);
1616 clearAllPartialLH();
1617 }
1618 aligned_free(ptn_ancestral_seq);
1619 aligned_free(ptn_ancestral_prob);
1620
1621 aligned_free(_pattern_lh_cat_state);
1622 _pattern_lh_cat_state = NULL;
1623 }
1624
1625 /*
1626 void PhyloTree::computeMarginalAncestralProbability(PhyloNeighbor *dad_branch, PhyloNode *dad, double *ptn_ancestral_prob) {
1627 PhyloNode *node = (PhyloNode*) dad_branch->node;
1628 PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
1629 if (!central_partial_lh)
1630 initializeAllPartialLh();
1631 ASSERT(!node->isLeaf());
1632
1633 // TODO: not working yet
1634
1635 // if ((dad_branch->partial_lh_computed & 1) == 0)
1636 // computePartialLikelihood(dad_branch, dad);
1637 // if ((node_branch->partial_lh_computed & 1) == 0)
1638 // computePartialLikelihood(node_branch, node);
1639 size_t nstates = aln->num_states;
1640 const size_t nstatesqr=nstates*nstates;
1641 size_t ncat = site_rate->getNRate();
1642 size_t statecat = nstates * ncat;
1643 size_t nmixture = model->getNMixtures();
1644
1645 size_t block = ncat * nstates * nmixture;
1646 size_t ptn; // for big data size > 4GB memory required
1647 size_t c, i, m, x;
1648 size_t nptn = aln->size();
1649 double *eval = model->getEigenvalues();
1650 double *evec = model->getEigenvectors();
1651 double *inv_evec = model->getInverseEigenvectors();
1652 ASSERT(eval);
1653
1654 double echild[block*nstates];
1655
1656 for (c = 0; c < ncat; c++) {
1657 double expchild[nstates];
1658 double len_child = site_rate->getRate(c) * dad_branch->length;
1659 for (m = 0; m < nmixture; m++) {
1660 for (i = 0; i < nstates; i++) {
1661 expchild[i] = exp(eval[m*nstates+i]*len_child);
1662 }
1663 for (x = 0; x < nstates; x++)
1664 for (i = 0; i < nstates; i++) {
1665 echild[(m*ncat+c)*nstatesqr+x*nstates+i] = evec[m*nstatesqr+x*nstates+i] * expchild[i];
1666 }
1667 }
1668 }
1669
1670
1671 memset(ptn_ancestral_prob, 0, sizeof(double)*nptn*nstates);
1672
1673 if (dad->isLeaf()) {
1674 // special treatment for TIP-INTERNAL NODE case
1675 double partial_lh_leaf[(aln->STATE_UNKNOWN+1)*block];
1676
1677 for (IntVector::iterator it = aln->seq_states[dad->id].begin(); it != aln->seq_states[dad->id].end(); it++) {
1678 int state = (*it);
1679 for (m = 0; m < nmixture; m++) {
1680 double *this_echild = &echild[m*nstatesqr*ncat];
1681 double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixture + m*nstates];
1682 double *this_partial_lh_leaf = &partial_lh_leaf[state*block+m*statecat];
1683 for (x = 0; x < statecat; x++) {
1684 double vchild = 0.0;
1685 for (i = 0; i < nstates; i++) {
1686 vchild += this_echild[x*nstates+i] * this_tip_partial_lh[i];
1687 }
1688 this_partial_lh_leaf[x] = vchild;
1689 }
1690 }
1691 }
1692 size_t addr = aln->STATE_UNKNOWN * block;
1693 for (x = 0; x < block; x++) {
1694 partial_lh_leaf[addr+x] = 1.0;
1695 }
1696
1697
1698 // now do the real computation
1699 #ifdef _OPENMP
1700 #pragma omp parallel for private(ptn, i, c, m, x)
1701 #endif
1702 for (ptn = 0; ptn < nptn; ptn++) {
1703 double *lh_state = ptn_ancestral_prob + ptn*nstates;
1704 double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1705 int state_dad = (aln->at(ptn))[dad->id];
1706 double *lh_leaf = partial_lh_leaf + state_dad*block;
1707 for (m = 0; m < nmixture; m++) {
1708 double *this_inv_evec = inv_evec + (m*nstatesqr);
1709 for (c = 0; c < ncat; c++) {
1710 // compute real partial likelihood vector
1711 for (x = 0; x < nstates; x++) {
1712 double vnode = 0.0;
1713 for (i = 0; i < nstates; i++) {
1714 vnode += this_inv_evec[i*nstates+x] * partial_lh_dad[i];
1715 }
1716 lh_state[x] += lh_leaf[x] * vnode;
1717 }
1718 lh_leaf += nstates;
1719 partial_lh_dad += nstates;
1720 }
1721 }
1722
1723 double lh_sum = lh_state[0];
1724 for (x = 1; x < nstates; x++)
1725 lh_sum += lh_state[x];
1726 lh_sum = 1.0/lh_sum;
1727 for (x = 0; x < nstates; x++)
1728 lh_state[x] *= lh_sum;
1729 }
1730 } else {
1731 // both dad and node are internal nodes
1732 #ifdef _OPENMP
1733 #pragma omp parallel for private(ptn, i, c, m, x)
1734 #endif
1735 for (ptn = 0; ptn < nptn; ptn++) {
1736 double *lh_state = ptn_ancestral_prob + ptn*nstates;
1737 double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1738 double *partial_lh_node = node_branch->partial_lh + ptn*block;
1739
1740 for (m = 0; m < nmixture; m++) {
1741 double *this_inv_evec = inv_evec + (m*nstatesqr);
1742 for (c = 0; c < ncat; c++) {
1743 // compute real partial likelihood vector
1744 for (x = 0; x < nstates; x++) {
1745 double vdad = 0.0, vnode = 0.0;
1746 size_t addr = (m*ncat+c)*nstatesqr+x*nstates;
1747 for (i = 0; i < nstates; i++) {
1748 vdad += echild[addr+i] * partial_lh_node[m*statecat+c*nstates+i];
1749 vnode += this_inv_evec[i*nstates+x] * partial_lh_dad[m*statecat+c*nstates+i];
1750 }
1751 lh_state[x] += vnode*vdad;
1752 }
1753 }
1754 }
1755
1756 double lh_sum = lh_state[0];
1757 for (x = 1; x < nstates; x++)
1758 lh_sum += lh_state[x];
1759 lh_sum = 1.0/lh_sum;
1760 for (x = 0; x < nstates; x++)
1761 lh_state[x] *= lh_sum;
1762
1763 }
1764 }
1765 }
1766 */
1767
computeJointAncestralSequences(int * ancestral_seqs)1768 void PhyloTree::computeJointAncestralSequences(int *ancestral_seqs) {
1769
1770 // step 1-3 of the dynamic programming algorithm of Pupko et al. 2000, MBE 17:890-896
1771 ASSERT(root->isLeaf());
1772 int *C = new int[(size_t)getAlnNPattern()*model->num_states*leafNum];
1773 computeAncestralLikelihood((PhyloNeighbor*)root->neighbors[0], NULL, C);
1774
1775 // step 4-5 of the dynamic programming algorithm of Pupko et al. 2000, MBE 17:890-896
1776 computeAncestralState((PhyloNeighbor*)root->neighbors[0], NULL, C, ancestral_seqs);
1777
1778 clearAllPartialLH();
1779
1780 delete[] C;
1781 }
1782
computeAncestralLikelihood(PhyloNeighbor * dad_branch,PhyloNode * dad,int * C)1783 void PhyloTree::computeAncestralLikelihood(PhyloNeighbor *dad_branch, PhyloNode *dad, int *C) {
1784 PhyloNode *node = (PhyloNode*)dad_branch->node;
1785 if (node->isLeaf())
1786 return;
1787
1788 int num_leaves = 0;
1789
1790 // recursive into subtree
1791 FOR_NEIGHBOR_DECLARE(node, dad, it) {
1792 if ((*it)->node->isLeaf()) {
1793 num_leaves++;
1794 } else {
1795 computeAncestralLikelihood((PhyloNeighbor*)(*it), node, C);
1796 }
1797 }
1798
1799 // TODO mem save
1800 if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
1801 // re-orient partial_lh
1802 bool done = false;
1803 FOR_NEIGHBOR_IT(node, dad, it2) {
1804 PhyloNeighbor *backnei = ((PhyloNeighbor*)(*it2)->node->findNeighbor(node));
1805 if (backnei->partial_lh) {
1806 dad_branch->partial_lh = backnei->partial_lh;
1807 dad_branch->scale_num = backnei->scale_num;
1808 backnei->partial_lh = NULL;
1809 backnei->scale_num = NULL;
1810 backnei->partial_lh_computed &= ~1; // clear bit
1811 done = true;
1812 break;
1813 }
1814 }
1815 ASSERT(done && "partial_lh is not re-oriented");
1816 }
1817
1818 size_t nptn = aln->getNPattern();
1819 size_t ptn;
1820 size_t nstates = model->num_states;
1821 size_t nstatesqr = nstates*nstates;
1822 size_t parent, child;
1823 double *trans_mat = new double[nstatesqr];
1824 double *lh_leaves = NULL;
1825 if (num_leaves > 0) {
1826 lh_leaves = new double[(aln->STATE_UNKNOWN+1)*nstates*num_leaves];
1827 }
1828 if (dad) {
1829 model->computeTransMatrix(dad_branch->length, trans_mat);
1830 for (parent = 0; parent < nstatesqr; parent++)
1831 trans_mat[parent] = log(trans_mat[parent]);
1832 } else {
1833 model->getStateFrequency(trans_mat);
1834 for (parent = 0; parent < nstates; parent++)
1835 trans_mat[parent] = log(trans_mat[parent]);
1836 for (parent = 1; parent < nstates; parent++)
1837 memcpy(trans_mat+parent*nstates, trans_mat, sizeof(double)*nstates);
1838 }
1839
1840 // compute information buffer for leaves
1841 int ambi_aa[] = {
1842 4+8, // B = N or D
1843 32+64, // Z = Q or E
1844 512+1024 // U = I or L
1845 };
1846 int leafid = 0;
1847 FOR_NEIGHBOR(node, dad, it) {
1848 if ((*it)->node->isLeaf()) {
1849 double trans_leaf[nstatesqr];
1850 model->computeTransMatrix((*it)->length, trans_leaf);
1851 double *lh_leaf = lh_leaves+leafid*nstates*(aln->STATE_UNKNOWN+1);
1852
1853 // assign lh_leaf for normal states
1854 for (parent = 0; parent < nstates; parent++)
1855 for (child = 0; child < nstates; child++)
1856 lh_leaf[child*nstates+parent] = log(trans_leaf[parent*nstates+child]);
1857
1858 // for unknown state
1859 double *this_lh_leaf = lh_leaf + (aln->STATE_UNKNOWN*nstates);
1860 for (parent = 0; parent < nstates; parent++)
1861 this_lh_leaf[parent] = 0.0;
1862
1863 // special treatment for ambiguous characters
1864 switch (aln->seq_type) {
1865 case SEQ_DNA:
1866 for (int state = 4; state < 18; state++) {
1867 this_lh_leaf = lh_leaf + (state*nstates);
1868 int cstate = state-nstates+1;
1869 for (parent = 0; parent < nstates; parent++) {
1870 double sumlh = 0.0;
1871 for (child = 0; child < nstates; child++) {
1872 if ((cstate) & (1 << child))
1873 sumlh += trans_leaf[parent*nstates+child];
1874 }
1875 this_lh_leaf[parent] = log(sumlh);
1876 }
1877 }
1878 break;
1879 case SEQ_PROTEIN:
1880 for (int state = 0; state < sizeof(ambi_aa)/sizeof(int); state++) {
1881 this_lh_leaf = lh_leaf + ((state+20)*nstates);
1882 for (parent = 0; parent < nstates; parent++) {
1883 double sumlh = 0.0;
1884 for (child = 0; child < nstates; child++) {
1885 if (ambi_aa[state] & (1 << child))
1886 sumlh += trans_leaf[parent*nstates+child];
1887 }
1888 this_lh_leaf[parent] = log(sumlh);
1889 }
1890 }
1891 break;
1892 default:
1893 break;
1894 }
1895 leafid++;
1896 }
1897 }
1898
1899 // initialize L_y(i) and C_y(i)
1900 // memset(dad_branch->partial_lh, 0, nptn*nstates*sizeof(double));
1901
1902 int *C_node = C + (node->id-leafNum)*nptn*nstates;
1903
1904 for (ptn = 0; ptn < nptn; ptn++) {
1905 double *lh_dad = dad_branch->partial_lh+ptn*nstates;
1906 int *this_C_node = C_node + (ptn*nstates);
1907 leafid = 0;
1908 double sumlh[nstates];
1909 memset(sumlh, 0, sizeof(double)*nstates);
1910 FOR_NEIGHBOR(node, dad, it) {
1911 PhyloNeighbor *childnei = (PhyloNeighbor*)(*it);
1912 if ((*it)->node->isLeaf()) {
1913 double *lh_leaf = lh_leaves+leafid*nstates*(aln->STATE_UNKNOWN+1);
1914 // external node
1915 int state_child;
1916 state_child = (aln->at(ptn))[(*it)->node->id];
1917 double *child_lh = lh_leaf + state_child*nstates;
1918 for (child = 0; child < nstates; child++)
1919 sumlh[child] += child_lh[child];
1920 leafid++;
1921 } else {
1922 double *child_lh = childnei->partial_lh + ptn*nstates;
1923 for (child = 0; child < nstates; child++)
1924 sumlh[child] += child_lh[child];
1925 }
1926 }
1927
1928
1929 if (dad) {
1930 // internal node
1931 for (parent = 0; parent < nstates; parent++) {
1932 lh_dad[parent] = trans_mat[parent*nstates] + sumlh[0];
1933 this_C_node[parent] = 0;
1934 for (child = 1; child < nstates; child++) {
1935 double lh = trans_mat[parent*nstates+child] + sumlh[child];
1936 if (lh > lh_dad[parent]) {
1937 lh_dad[parent] = lh;
1938 this_C_node[parent] = child;
1939 }
1940 }
1941 }
1942 } else {
1943 // at the root
1944 lh_dad[0] = trans_mat[0] + sumlh[0];
1945 this_C_node[0] = 0;
1946 for (parent = 1; parent < nstates; parent++) {
1947 double lh = trans_mat[parent] + sumlh[parent];
1948 if (lh > lh_dad[0]) {
1949 lh_dad[0] = lh;
1950 this_C_node[0] = parent;
1951 }
1952 }
1953 }
1954 }
1955
1956
1957 if (lh_leaves)
1958 delete[] lh_leaves;
1959 delete[] trans_mat;
1960 }
1961
1962
computeAncestralState(PhyloNeighbor * dad_branch,PhyloNode * dad,int * C,int * ancestral_seqs)1963 void PhyloTree::computeAncestralState(PhyloNeighbor *dad_branch, PhyloNode *dad, int *C, int *ancestral_seqs) {
1964 PhyloNode *node = (PhyloNode*)dad_branch->node;
1965 if (node->isLeaf())
1966 return;
1967
1968 size_t nptn = aln->getNPattern();
1969 size_t ptn;
1970 size_t nstates = model->num_states;
1971
1972 int *C_node = C + (node->id-leafNum)*nptn*nstates;
1973 int *ancestral_seqs_node = ancestral_seqs + (node->id-leafNum)*nptn;
1974 if (dad) {
1975 // at an internal node
1976 int *ancestral_seqs_dad = ancestral_seqs + (dad->id-leafNum)*nptn;
1977 for (ptn = 0; ptn < nptn; ptn++)
1978 ancestral_seqs_node[ptn] = C_node[ptn*nstates+ancestral_seqs_dad[ptn]];
1979
1980 } else {
1981 // at the root
1982 for (ptn = 0; ptn < nptn; ptn++)
1983 ancestral_seqs_node[ptn] = C_node[ptn*nstates];
1984 }
1985 FOR_NEIGHBOR_IT(node, dad, it)
1986 computeAncestralState((PhyloNeighbor*)(*it), node, C, ancestral_seqs);
1987 }
1988
1989
1990
1991