1 /*
2  * phylokernelsitemodel.cpp
3  * likelihood kernel site-specific frequency model
4  *
5  *  Created on: Jan 9, 2016
6  *      Author: minh
7  */
8 
9 
10 
11 #include "tree/phylotree.h"
12 #include "model/modelset.h"
13 
computeSitemodelPartialLikelihoodEigen(PhyloNeighbor * dad_branch,PhyloNode * dad)14 void PhyloTree::computeSitemodelPartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
15 
16     // don't recompute the likelihood
17 	ASSERT(dad);
18     if (dad_branch->partial_lh_computed & 1)
19         return;
20     dad_branch->partial_lh_computed |= 1;
21     PhyloNode *node = (PhyloNode*)(dad_branch->node);
22 
23 
24     size_t nstates = aln->num_states;
25     size_t nptn = aln->size(), tip_block_size = get_safe_upper_limit(nptn)*nstates;
26     size_t ptn, c;
27     size_t ncat = site_rate->getNRate();
28     size_t i, x;
29     size_t block = nstates * ncat;
30     ModelSet *models = (ModelSet*) model;
31     ASSERT(models->size() == nptn);
32 
33 
34 	if (node->isLeaf()) {
35 	    dad_branch->lh_scale_factor = 0.0;
36 		// scale number must be ZERO
37 //	    memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
38 
39 		if (!tip_partial_lh_computed)
40 			computeTipPartialLikelihood();
41 
42 		return;
43 	}
44 
45     dad_branch->lh_scale_factor = 0.0;
46 
47 	// internal node
48 	PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
49 	FOR_NEIGHBOR_IT(node, dad, it) {
50         PhyloNeighbor *nei = (PhyloNeighbor*)*it;
51 		if (!left) left = (PhyloNeighbor*)(*it); else right = (PhyloNeighbor*)(*it);
52         if ((nei->partial_lh_computed & 1) == 0)
53             computePartialLikelihood(nei, node);
54         dad_branch->lh_scale_factor += nei->lh_scale_factor;
55 	}
56 
57     if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
58         // re-orient partial_lh
59         bool done = false;
60         FOR_NEIGHBOR_IT(node, dad, it2) {
61             PhyloNeighbor *backnei = ((PhyloNeighbor*)(*it2)->node->findNeighbor(node));
62             if (backnei->partial_lh) {
63                 dad_branch->partial_lh = backnei->partial_lh;
64                 dad_branch->scale_num = backnei->scale_num;
65                 backnei->partial_lh = NULL;
66                 backnei->scale_num = NULL;
67                 backnei->partial_lh_computed &= ~1; // clear bit
68                 done = true;
69                 break;
70             }
71         }
72         ASSERT(done && "partial_lh is not re-oriented");
73     }
74 
75 
76     double sum_scale = 0.0;
77 
78 	if (!left->node->isLeaf() && right->node->isLeaf()) {
79 		PhyloNeighbor *tmp = left;
80 		left = right;
81 		right = tmp;
82 	}
83 
84     if (node->degree() > 3) {
85 
86         /*--------------------- multifurcating node ------------------*/
87 
88         // now for-loop computing partial_lh over all site-patterns
89 #ifdef _OPENMP
90 #pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i) schedule(static)
91 #endif
92         for (ptn = 0; ptn < nptn; ptn++) {
93             double partial_lh_all[block];
94             for (i = 0; i < block; i++)
95                 partial_lh_all[i] = 1.0;
96             dad_branch->scale_num[ptn] = 0;
97 
98             double expchild[nstates];
99             double *eval = models->at(ptn)->getEigenvalues();
100             double *evec = models->at(ptn)->getEigenvectors();
101             double *inv_evec = models->at(ptn)->getInverseEigenvectors();
102 
103             FOR_NEIGHBOR_IT(node, dad, it) {
104                 PhyloNeighbor *child = (PhyloNeighbor*)*it;
105                 if (child->node->isLeaf()) {
106                     // external node
107                     double *tip_partial_lh_child = tip_partial_lh + (child->node->id*tip_block_size)+ptn*nstates;
108                     double *partial_lh = partial_lh_all;
109                     for (c = 0; c < ncat; c++) {
110                         double len_child = site_rate->getRate(c) * child->length;
111                         for (i = 0; i < nstates; i++) {
112                             expchild[i] = exp(eval[i]*len_child) * tip_partial_lh_child[i];
113                         }
114                         // compute real partial likelihood vector
115                         for (x = 0; x < nstates; x++) {
116                             double vchild = 0.0;
117                             double *this_evec = evec + x*nstates;
118                             for (i = 0; i < nstates; i++) {
119                                 vchild += this_evec[i] * expchild[i];
120                             }
121                             partial_lh[x] *= vchild;
122                         }
123                         partial_lh += nstates;
124                     }
125                 } else {
126                     // internal node
127                     dad_branch->scale_num[ptn] += child->scale_num[ptn];
128 
129                     double *partial_lh_child = child->partial_lh + ptn*block;
130                     double *partial_lh = partial_lh_all;
131                     for (c = 0; c < ncat; c++) {
132                         double len_child = site_rate->getRate(c) * child->length;
133                         for (i = 0; i < nstates; i++) {
134                             expchild[i] = exp(eval[i]*len_child) * partial_lh_child[i];
135                         }
136                         // compute real partial likelihood vector
137                         for (x = 0; x < nstates; x++) {
138                             double vchild = 0.0;
139                             double *this_evec = evec + x*nstates;
140                             for (i = 0; i < nstates; i++) {
141                                 vchild += this_evec[i] * expchild[i];
142                             }
143                             partial_lh[x] *= vchild;
144                         }
145                         partial_lh_child += nstates;
146                         partial_lh += nstates;
147                     }
148                 } // if
149 
150 
151             } // FOR_NEIGHBOR
152 
153 
154             // compute dot-product with inv_eigenvector
155             double lh_max = 0.0;
156             double *partial_lh_tmp = partial_lh_all;
157             double *partial_lh = dad_branch->partial_lh + ptn*block;
158             for (c = 0; c < ncat; c++) {
159                 double *inv_evec_ptr = inv_evec;
160                 for (i = 0; i < nstates; i++) {
161                     double res = 0.0;
162                     for (x = 0; x < nstates; x++) {
163                         res += partial_lh_tmp[x]*inv_evec_ptr[x];
164                     }
165                     inv_evec_ptr += nstates;
166                     partial_lh[i] = res;
167                     lh_max = max(lh_max, fabs(res));
168                 }
169                 partial_lh += nstates;
170                 partial_lh_tmp += nstates;
171             }
172             // check if one should scale partial likelihoods
173             if (lh_max < SCALING_THRESHOLD) {
174                 partial_lh = dad_branch->partial_lh + ptn*block;
175                 if (lh_max == 0.0) {
176                     // for very shitty data
177                     for (c = 0; c < ncat; c++)
178                         memcpy(&partial_lh[c*nstates], &tip_partial_lh[ptn*nstates], nstates*sizeof(double));
179                     sum_scale += LOG_SCALING_THRESHOLD* 4 * ptn_freq[ptn];
180                     //sum_scale += log(lh_max) * ptn_freq[ptn];
181                     dad_branch->scale_num[ptn] += 4;
182                     int nsite = aln->getNSite();
183                     for (i = 0, x = 0; i < nsite && x < ptn_freq[ptn]; i++)
184                         if (aln->getPatternID(i) == ptn) {
185                             outWarning((string)"Numerical underflow for site " + convertIntToString(i+1));
186                             x++;
187                         }
188                 } else {
189                     // now do the likelihood scaling
190                     for (i = 0; i < block; i++) {
191                         partial_lh[i] *= SCALING_THRESHOLD_INVER;
192                         //partial_lh[i] /= lh_max;
193                     }
194                     // unobserved const pattern will never have underflow
195                     sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
196                     //sum_scale += log(lh_max) * ptn_freq[ptn];
197                     dad_branch->scale_num[ptn] += 1;
198                 }
199             }
200 
201         } // for ptn
202         dad_branch->lh_scale_factor += sum_scale;
203 
204         // end multifurcating treatment
205     } else
206     if (left->node->isLeaf() && right->node->isLeaf()) {
207 
208         /*--------------------- TIP-TIP (cherry) case ------------------*/
209 
210         double *tip_partial_lh_left = tip_partial_lh + (left->node->id * tip_block_size);
211         double *tip_partial_lh_right = tip_partial_lh + (right->node->id * tip_block_size);
212 
213 		// scale number must be ZERO
214 	    memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
215 #ifdef _OPENMP
216 #pragma omp parallel for private(ptn, c, x, i) schedule(static)
217 #endif
218 		for (ptn = 0; ptn < nptn; ptn++) {
219 			double partial_lh_tmp[nstates];
220 			double *partial_lh = dad_branch->partial_lh + ptn*block;
221 			double *partial_lh_left = tip_partial_lh_left + ptn*nstates;
222 			double *partial_lh_right = tip_partial_lh_right + ptn*nstates;
223 
224             double expleft[nstates];
225             double expright[nstates];
226             double *eval = models->at(ptn)->getEigenvalues();
227             double *evec = models->at(ptn)->getEigenvectors();
228             double *inv_evec = models->at(ptn)->getInverseEigenvectors();
229 
230 			for (c = 0; c < ncat; c++) {
231                 double len_left = site_rate->getRate(c) * left->length;
232                 double len_right = site_rate->getRate(c) * right->length;
233                 for (i = 0; i < nstates; i++) {
234                     expleft[i] = exp(eval[i]*len_left) * partial_lh_left[i];
235                     expright[i] = exp(eval[i]*len_right) * partial_lh_right[i];
236                 }
237 
238 				// compute real partial likelihood vector
239 				for (x = 0; x < nstates; x++) {
240 					double vleft = 0.0, vright = 0.0;
241                     double *this_evec = evec + x*nstates;
242 					for (i = 0; i < nstates; i++) {
243 						vleft += this_evec[i] * expleft[i];
244 						vright += this_evec[i] * expright[i];
245 					}
246 					partial_lh_tmp[x] = vleft*vright;
247 				}
248 
249                 // do not increase partial_lh_left and right for tips
250 
251 				// compute dot-product with inv_eigenvector
252                 double *inv_evec_ptr = inv_evec;
253 				for (i = 0; i < nstates; i++) {
254 					double res = 0.0;
255 					for (x = 0; x < nstates; x++) {
256 						res += partial_lh_tmp[x]*inv_evec_ptr[x];
257 					}
258                     inv_evec_ptr += nstates;
259 					partial_lh[c*nstates+i] = res;
260 				}
261 			}
262 
263 		}
264 
265 	} else if (left->node->isLeaf() && !right->node->isLeaf()) {
266 
267         /*--------------------- TIP-INTERNAL NODE case ------------------*/
268 
269         double *tip_partial_lh_left = tip_partial_lh + (left->node->id * tip_block_size);
270 
271 		// only take scale_num from the right subtree
272 		memcpy(dad_branch->scale_num, right->scale_num, nptn * sizeof(UBYTE));
273 #ifdef _OPENMP
274 #pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i) schedule(static)
275 #endif
276 		for (ptn = 0; ptn < nptn; ptn++) {
277 			double partial_lh_tmp[nstates];
278 			double *partial_lh = dad_branch->partial_lh + ptn*block;
279 			double *partial_lh_left = tip_partial_lh_left + ptn*nstates;
280 			double *partial_lh_right = right->partial_lh + ptn*block;
281             double lh_max = 0.0;
282 
283             double expleft[nstates];
284             double expright[nstates];
285             double *eval = models->at(ptn)->getEigenvalues();
286             double *evec = models->at(ptn)->getEigenvectors();
287             double *inv_evec = models->at(ptn)->getInverseEigenvectors();
288 
289 			for (c = 0; c < ncat; c++) {
290                 double len_left = site_rate->getRate(c) * left->length;
291                 double len_right = site_rate->getRate(c) * right->length;
292                 for (i = 0; i < nstates; i++) {
293                     expleft[i] = exp(eval[i]*len_left) * partial_lh_left[i];
294                     expright[i] = exp(eval[i]*len_right) * partial_lh_right[i];
295                 }
296 				// compute real partial likelihood vector
297 				for (x = 0; x < nstates; x++) {
298 					double vleft = 0.0, vright = 0.0;
299                     double *this_evec = evec + x*nstates;
300 					for (i = 0; i < nstates; i++) {
301 						vleft += this_evec[i] * expleft[i];
302 						vright += this_evec[i] * expright[i];
303 					}
304 					partial_lh_tmp[x] = vleft*vright;
305 				}
306                 // do not increase partial_lh_left for left tip
307                 partial_lh_right += nstates;
308 
309 				// compute dot-product with inv_eigenvector
310                 double *inv_evec_ptr = inv_evec;
311 				for (i = 0; i < nstates; i++) {
312 					double res = 0.0;
313 					for (x = 0; x < nstates; x++) {
314 						res += partial_lh_tmp[x]*inv_evec_ptr[x];
315 					}
316                     inv_evec_ptr += nstates;
317 					partial_lh[c*nstates+i] = res;
318                     lh_max = max(lh_max, fabs(res));
319 				}
320 			}
321 
322             // check if one should scale partial likelihoods
323             if (lh_max < SCALING_THRESHOLD) {
324             	if (lh_max == 0.0) {
325             		// for very shitty data
326             		for (c = 0; c < ncat; c++)
327             			memcpy(&partial_lh[c*nstates], &tip_partial_lh[ptn*nstates], nstates*sizeof(double));
328 					sum_scale += LOG_SCALING_THRESHOLD* 4 * ptn_freq[ptn];
329 					//sum_scale += log(lh_max) * ptn_freq[ptn];
330 					dad_branch->scale_num[ptn] += 4;
331 					int nsite = aln->getNSite();
332 					for (i = 0, x = 0; i < nsite && x < ptn_freq[ptn]; i++)
333 						if (aln->getPatternID(i) == ptn) {
334 							outWarning((string)"Numerical underflow for site " + convertIntToString(i+1));
335 							x++;
336 						}
337             	} else {
338 					// now do the likelihood scaling
339 					for (i = 0; i < block; i++) {
340 						partial_lh[i] *= SCALING_THRESHOLD_INVER;
341 	                    //partial_lh[i] /= lh_max;
342 					}
343 					// unobserved const pattern will never have underflow
344 					sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
345 					//sum_scale += log(lh_max) * ptn_freq[ptn];
346 					dad_branch->scale_num[ptn] += 1;
347             	}
348             }
349 
350 		}
351 		dad_branch->lh_scale_factor += sum_scale;
352 
353 	} else {
354         /*--------------------- INTERNAL-INTERNAL NODE case ------------------*/
355 
356 #ifdef _OPENMP
357 #pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i) schedule(static)
358 #endif
359 		for (ptn = 0; ptn < nptn; ptn++) {
360 			double partial_lh_tmp[nstates];
361 			double *partial_lh = dad_branch->partial_lh + ptn*block;
362 			double *partial_lh_left = left->partial_lh + ptn*block;
363 			double *partial_lh_right = right->partial_lh + ptn*block;
364             double lh_max = 0.0;
365 
366             double expleft[nstates];
367             double expright[nstates];
368             double *eval = models->at(ptn)->getEigenvalues();
369             double *evec = models->at(ptn)->getEigenvectors();
370             double *inv_evec = models->at(ptn)->getInverseEigenvectors();
371 
372 			dad_branch->scale_num[ptn] = left->scale_num[ptn] + right->scale_num[ptn];
373 
374 			for (c = 0; c < ncat; c++) {
375                 double len_left = site_rate->getRate(c) * left->length;
376                 double len_right = site_rate->getRate(c) * right->length;
377                 for (i = 0; i < nstates; i++) {
378                     expleft[i] = exp(eval[i]*len_left) * partial_lh_left[i];
379                     expright[i] = exp(eval[i]*len_right) * partial_lh_right[i];
380                 }
381 				// compute real partial likelihood vector
382 				for (x = 0; x < nstates; x++) {
383 					double vleft = 0.0, vright = 0.0;
384                     double *this_evec = evec + x*nstates;
385 					for (i = 0; i < nstates; i++) {
386 						vleft += this_evec[i] * expleft[i];
387 						vright += this_evec[i] * expright[i];
388 					}
389 					partial_lh_tmp[x] = vleft*vright;
390 				}
391                 partial_lh_left += nstates;
392                 partial_lh_right += nstates;
393 
394 				// compute dot-product with inv_eigenvector
395                 double *inv_evec_ptr = inv_evec;
396 				for (i = 0; i < nstates; i++) {
397 					double res = 0.0;
398 					for (x = 0; x < nstates; x++) {
399 						res += partial_lh_tmp[x]*inv_evec_ptr[x];
400 					}
401                     inv_evec_ptr += nstates;
402 					partial_lh[c*nstates+i] = res;
403                     lh_max = max(lh_max, fabs(res));
404 				}
405 			}
406 
407             // check if one should scale partial likelihoods
408             if (lh_max < SCALING_THRESHOLD) {
409             	if (lh_max == 0.0) {
410             		// for very shitty data
411             		for (c = 0; c < ncat; c++)
412             			memcpy(&partial_lh[c*nstates], &tip_partial_lh[ptn*nstates], nstates*sizeof(double));
413 					sum_scale += LOG_SCALING_THRESHOLD* 4 * ptn_freq[ptn];
414 					//sum_scale += log(lh_max) * ptn_freq[ptn];
415 					dad_branch->scale_num[ptn] += 4;
416 					int nsite = aln->getNSite();
417 					for (i = 0, x = 0; i < nsite && x < ptn_freq[ptn]; i++)
418 						if (aln->getPatternID(i) == ptn) {
419 							outWarning((string)"Numerical underflow for site " + convertIntToString(i+1));
420 							x++;
421 						}
422             	} else {
423 					// now do the likelihood scaling
424 					for (i = 0; i < block; i++) {
425 						partial_lh[i] *= SCALING_THRESHOLD_INVER;
426 	                    //partial_lh[i] /= lh_max;
427 					}
428 					// unobserved const pattern will never have underflow
429 					sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
430 					//sum_scale += log(lh_max) * ptn_freq[ptn];
431 					dad_branch->scale_num[ptn] += 1;
432             	}
433             }
434 
435 		}
436 		dad_branch->lh_scale_factor += sum_scale;
437 
438 	}
439 
440 }
441 
442 //template <const int nstates>
computeSitemodelLikelihoodDervEigen(PhyloNeighbor * dad_branch,PhyloNode * dad,double & df,double & ddf)443 void PhyloTree::computeSitemodelLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf) {
444     PhyloNode *node = (PhyloNode*) dad_branch->node;
445     PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
446     if (!central_partial_lh)
447         initializeAllPartialLh();
448     if (node->isLeaf()) {
449     	PhyloNode *tmp_node = dad;
450     	dad = node;
451     	node = tmp_node;
452     	PhyloNeighbor *tmp_nei = dad_branch;
453     	dad_branch = node_branch;
454     	node_branch = tmp_nei;
455     }
456     if ((dad_branch->partial_lh_computed & 1) == 0)
457         computePartialLikelihood(dad_branch, dad);
458     if ((node_branch->partial_lh_computed & 1) == 0)
459         computePartialLikelihood(node_branch, node);
460 
461     size_t nstates = aln->num_states;
462     size_t ncat = site_rate->getNRate();
463 
464     size_t block = ncat * nstates;
465     size_t ptn; // for big data size > 4GB memory required
466     size_t c, i;
467     size_t nptn = aln->size();
468 
469 	ASSERT(theta_all);
470 	if (!theta_computed) {
471 		// precompute theta for fast branch length optimization
472 
473 	    if (dad->isLeaf()) {
474 	    	// special treatment for TIP-INTERNAL NODE case
475 
476             double *tip_partial_lh_node = tip_partial_lh + (dad->id * get_safe_upper_limit(nptn)*nstates);
477 
478 #ifdef _OPENMP
479 #pragma omp parallel for private(ptn, i, c) schedule(static)
480 #endif
481 	    	for (ptn = 0; ptn < nptn; ptn++) {
482 				double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
483 				double *theta = theta_all + ptn*block;
484 				double *lh_tip = tip_partial_lh_node + ptn*nstates;
485                 for (c = 0; c < ncat; c++) {
486                     for (i = 0; i < nstates; i++) {
487                         theta[i] = lh_tip[i] * partial_lh_dad[i];
488                     }
489                     partial_lh_dad += nstates;
490                     theta += nstates;
491                 }
492 
493 			}
494 	    } else
495         {
496 	    	// both dad and node are internal nodes
497 
498 //	    	size_t all_entries = nptn*block;
499 #ifdef _OPENMP
500 #pragma omp parallel for private(ptn, i) schedule(static)
501 #endif
502 	    	for (ptn = 0; ptn < nptn; ptn++) {
503 				double *theta = theta_all + ptn*block;
504 			    double *partial_lh_node = node_branch->partial_lh + ptn*block;
505 			    double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
506 	    		for (i = 0; i < block; i++) {
507 	    			theta[i] = partial_lh_node[i] * partial_lh_dad[i];
508 	    		}
509 			}
510 	    }
511 		theta_computed = true;
512 	}
513 
514     ModelSet *models = (ModelSet*)model;
515     double my_df = 0.0, my_ddf = 0.0;
516 
517 #ifdef _OPENMP
518 #pragma omp parallel for reduction(+: my_df, my_ddf) private(ptn, i, c) schedule(static)
519 #endif
520     for (ptn = 0; ptn < nptn; ptn++) {
521 		double lh_ptn = ptn_invar[ptn], df_ptn = 0.0, ddf_ptn = 0.0;
522 		double *theta = theta_all + ptn*block;
523 
524         double *eval = models->at(ptn)->getEigenvalues();
525 
526         for (c = 0; c < ncat; c++) {
527             double lh_cat = 0.0, df_cat = 0.0, ddf_cat = 0.0;
528             for (i = 0; i < nstates; i++) {
529                 double cof = eval[i]*site_rate->getRate(c);
530                 double val = exp(cof*dad_branch->length) * theta[i];
531                 double val1 = cof*val;
532                 lh_cat += val;
533                 df_cat += val1;
534                 ddf_cat += cof*val1;
535             }
536             double prop = site_rate->getProp(c);
537             lh_ptn += prop * lh_cat;
538             df_ptn += prop * df_cat;
539             ddf_ptn += prop * ddf_cat;
540             theta += nstates;
541         }
542 
543         lh_ptn = 1.0/fabs(lh_ptn);
544 
545         double df_frac = df_ptn * lh_ptn;
546         double ddf_frac = ddf_ptn * lh_ptn;
547         double freq = ptn_freq[ptn];
548         double tmp1 = df_frac * freq;
549         double tmp2 = ddf_frac * freq;
550         my_df += tmp1;
551         my_ddf += tmp2 - tmp1 * df_frac;
552     }
553 	df = my_df;
554 	ddf = my_ddf;
555     if (isnan(df) || isinf(df)) {
556         df = 0.0;
557         ddf = 0.0;
558 //        outWarning("Numerical instability (some site-likelihood = 0)");
559     }
560 
561 }
562 
563 //template <const int nstates>
computeSitemodelLikelihoodBranchEigen(PhyloNeighbor * dad_branch,PhyloNode * dad)564 double PhyloTree::computeSitemodelLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
565     PhyloNode *node = (PhyloNode*) dad_branch->node;
566     PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
567     if (!central_partial_lh)
568         initializeAllPartialLh();
569     if (node->isLeaf()) {
570     	PhyloNode *tmp_node = dad;
571     	dad = node;
572     	node = tmp_node;
573     	PhyloNeighbor *tmp_nei = dad_branch;
574     	dad_branch = node_branch;
575     	node_branch = tmp_nei;
576     }
577     if ((dad_branch->partial_lh_computed & 1) == 0)
578 //        computePartialLikelihoodEigen(dad_branch, dad);
579         computePartialLikelihood(dad_branch, dad);
580     if ((node_branch->partial_lh_computed & 1) == 0)
581 //        computePartialLikelihoodEigen(node_branch, node);
582         computePartialLikelihood(node_branch, node);
583     double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
584     size_t nstates = aln->num_states;
585     size_t ncat = site_rate->getNRate();
586 
587     size_t block = ncat * nstates;
588     size_t ptn; // for big data size > 4GB memory required
589     size_t c, i;
590     size_t nptn = aln->size();
591 
592 
593 	memset(_pattern_lh_cat, 0, sizeof(double)*nptn*ncat);
594     ModelSet *models = (ModelSet*)model;
595 
596     if (dad->isLeaf()) {
597     	// special treatment for TIP-INTERNAL NODE case
598         double *tip_partial_lh_node = tip_partial_lh + (dad->id * get_safe_upper_limit(nptn)*nstates);
599 #ifdef _OPENMP
600 #pragma omp parallel for reduction(+: tree_lh) private(ptn, i, c) schedule(static)
601 #endif
602     	for (ptn = 0; ptn < nptn; ptn++) {
603 			double lh_ptn = ptn_invar[ptn];
604 			double *lh_cat = _pattern_lh_cat + ptn*ncat;
605 			double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
606 			double *partial_lh_node = tip_partial_lh_node + ptn*nstates;
607             double *eval = models->at(ptn)->getEigenvalues();
608 
609 			for (c = 0; c < ncat; c++) {
610                 double len = site_rate->getRate(c)*dad_branch->length;
611                 double prop = site_rate->getProp(c);
612 				for (i = 0; i < nstates; i++) {
613 					*lh_cat +=  (exp(eval[i]*len) * partial_lh_node[i] * partial_lh_dad[i]);
614 				}
615                 *lh_cat *= prop;
616 				lh_ptn += *lh_cat;
617                 // don't increase partial_lh_node pointer
618 				partial_lh_dad += nstates;
619 				lh_cat++;
620 			}
621 
622             lh_ptn = log(fabs(lh_ptn));
623             _pattern_lh[ptn] = lh_ptn;
624             tree_lh += lh_ptn * ptn_freq[ptn];
625 		}
626     } else
627     {
628     	// both dad and node are internal nodes
629 #ifdef _OPENMP
630 #pragma omp parallel for reduction(+: tree_lh) private(ptn, i, c) schedule(static)
631 #endif
632     	for (ptn = 0; ptn < nptn; ptn++) {
633 			double lh_ptn = ptn_invar[ptn];
634 			double *lh_cat = _pattern_lh_cat + ptn*ncat;
635 			double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
636 			double *partial_lh_node = node_branch->partial_lh + ptn*block;
637             double *eval = models->at(ptn)->getEigenvalues();
638 
639 			for (c = 0; c < ncat; c++) {
640                 double len = site_rate->getRate(c)*dad_branch->length;
641                 double prop = site_rate->getProp(c);
642 				for (i = 0; i < nstates; i++) {
643 					*lh_cat +=  (exp(eval[i]*len) * partial_lh_node[i] * partial_lh_dad[i]);
644 				}
645                 *lh_cat *= prop;
646 				lh_ptn += *lh_cat;
647 				partial_lh_node += nstates;
648 				partial_lh_dad += nstates;
649 				lh_cat++;
650 			}
651 
652             lh_ptn = log(fabs(lh_ptn));
653             _pattern_lh[ptn] = lh_ptn;
654             tree_lh += lh_ptn * ptn_freq[ptn];
655 		}
656     }
657 
658     if (isnan(tree_lh) || isinf(tree_lh)) {
659         cout << "WARNING: Numerical underflow caused by alignment sites";
660         i = aln->getNSite();
661         int j;
662         for (j = 0, c = 0; j < i; j++) {
663             ptn = aln->getPatternID(j);
664             if (isnan(_pattern_lh[ptn]) || isinf(_pattern_lh[ptn])) {
665                 cout << " " << j+1;
666                 c++;
667                 if (c >= 10) {
668                     cout << " ...";
669                     break;
670                 }
671             }
672         }
673         cout << endl;
674         tree_lh = current_it->lh_scale_factor + current_it_back->lh_scale_factor;
675         for (ptn = 0; ptn < nptn; ptn++) {
676             if (isnan(_pattern_lh[ptn]) || isinf(_pattern_lh[ptn])) {
677                 _pattern_lh[ptn] = LOG_SCALING_THRESHOLD*4; // log(2^(-1024))
678             }
679             tree_lh += _pattern_lh[ptn] * ptn_freq[ptn];
680         }
681     }
682 
683 	ASSERT(!isnan(tree_lh) && !isinf(tree_lh));
684 
685     return tree_lh;
686 }
687 
688 
computeSitemodelLikelihoodFromBufferEigen()689 double PhyloTree::computeSitemodelLikelihoodFromBufferEigen() {
690 	ASSERT(theta_all && theta_computed);
691 
692     size_t nstates = aln->num_states;
693     size_t ncat = site_rate->getNRate();
694 
695     size_t block = ncat * nstates;
696     size_t ptn; // for big data size > 4GB memory required
697     size_t c, i;
698     size_t nptn = aln->size();
699 
700     ModelSet *models = (ModelSet*)model;
701 
702     double tree_lh = current_it->lh_scale_factor + current_it_back->lh_scale_factor;
703 
704 #ifdef _OPENMP
705 #pragma omp parallel for reduction(+: tree_lh) private(ptn, i, c) schedule(static)
706 #endif
707     for (ptn = 0; ptn < nptn; ptn++) {
708 		double lh_ptn = ptn_invar[ptn];
709 		double *theta = theta_all + ptn*block;
710 
711         double *eval = models->at(ptn)->getEigenvalues();
712 
713         for (c = 0; c < ncat; c++) {
714             double lh_cat = 0.0;
715             double len = site_rate->getRate(c)*current_it->length;
716             for (i = 0; i < nstates; i++) {
717                 lh_cat += exp(eval[i]*len) * theta[i];
718             }
719             lh_ptn += lh_cat * site_rate->getProp(c);
720             theta += nstates;
721         }
722 
723         lh_ptn = log(fabs(lh_ptn));
724         _pattern_lh[ptn] = lh_ptn;
725         tree_lh += lh_ptn * ptn_freq[ptn];
726     }
727     return tree_lh;
728 }
729 
730