1 /***************************************************************************
2  *   Copyright (C) 2009 by BUI Quang Minh   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #include "phylotree.h"
21 #include "vectorclass/instrset.h"
22 
23 #if INSTRSET < 2
24 #include "phylokernelnew.h"
25 #define KERNEL_FIX_STATES
26 #include "phylokernelnew.h"
27 #include "vectorclass/vectorf64.h"
28 #endif
29 
30 //#include "phylokernel.h"
31 //#include "phylokernelmixture.h"
32 //#include "phylokernelmixrate.h"
33 //#include "phylokernelsitemodel.h"
34 
35 #include "model/modelmarkov.h"
36 #include "model/modelset.h"
37 
38 /* BQM: to ignore all-gapp subtree at an alignment site */
39 //#define IGNORE_GAP_LH
40 
41 //#define USING_SSE
42 
setNumThreads(int num_threads)43 void PhyloTree::setNumThreads(int num_threads) {
44     if (!isSuperTree() && aln && num_threads > 1 && num_threads > aln->getNPattern()/8) {
45         outWarning(convertIntToString(num_threads) + " threads for alignment length " +
46                    convertIntToString(aln->getNPattern()) + " will slow down analysis");
47         num_threads = max(aln->getNPattern()/8,1);
48     }
49     this->num_threads = num_threads;
50 }
51 
setParsimonyKernel(LikelihoodKernel lk)52 void PhyloTree::setParsimonyKernel(LikelihoodKernel lk) {
53 
54     if (cost_matrix) {
55         // Sankoff parsimony kernel
56         if (lk < LK_SSE2) {
57             computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchSankoff;
58             computePartialParsimonyPointer = &PhyloTree::computePartialParsimonySankoff;
59             return;
60         }
61         if (lk >= LK_AVX) {
62             setParsimonyKernelAVX();
63             return;
64         }
65         if (lk >= LK_SSE2) {
66             setParsimonyKernelSSE();
67             return;
68         }
69         ASSERT(0);
70         return;
71     }
72     // set Fitch parsimony kernel
73     if (lk < LK_SSE2) {
74         computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchFast;
75         computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyFast;
76     	return;
77     }
78     if (lk >= LK_AVX) {
79         setParsimonyKernelAVX();
80         return;
81     }
82     if (lk >= LK_SSE2) {
83         setParsimonyKernelSSE();
84         return;
85     }
86     ASSERT(0);
87 }
88 
setLikelihoodKernel(LikelihoodKernel lk)89 void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk) {
90 
91 	sse = lk;
92     vector_size = 1;
93     safe_numeric = (params && (params->lk_safe_scaling || leafNum >= params->numseq_safe_scaling)) ||
94         (aln && aln->num_states != 4 && aln->num_states != 20);
95 
96     //--- parsimony kernel ---
97     setParsimonyKernel(lk);
98 
99     //--- dot-product kernel ---
100 #ifdef __AVX512KNL
101     if (lk >= LK_AVX512) {
102 		setDotProductAVX512();
103     } else
104 #endif
105     if (lk >= LK_AVX_FMA) {
106 		setDotProductFMA();
107 	} else if (lk >= LK_AVX) {
108 		setDotProductAVX();
109     } else if (lk >= LK_SSE2) {
110         setDotProductSSE();
111 	} else {
112 
113 #if INSTRSET < 2
114 #ifdef BOOT_VAL_FLOAT
115         // TODO naive dot-product for float
116         ASSERT(0 && "Not supported, contact developer");
117 //		dotProduct = &PhyloTree::dotProductSIMD<float, Vec1f>;
118 #else
119 		dotProduct = &PhyloTree::dotProductSIMD<double, Vec1d>;
120 #endif
121         dotProductDouble = &PhyloTree::dotProductSIMD<double, Vec1d>;
122 #endif
123 	}
124 
125     //--- naive likelihood kernel, no alignment specified yet ---
126     if (!aln) {
127 #if INSTRSET < 2
128         computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec1d, SAFE_LH>;
129         computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec1d, SAFE_LH>;
130         computeLikelihoodDervMixlenPointer = NULL;
131         computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec1d, SAFE_LH>;
132         computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec1d, SAFE_LH>;
133         sse = LK_386;
134 #else
135         computeLikelihoodBranchPointer = NULL;
136         computeLikelihoodDervPointer = NULL;
137         computeLikelihoodDervMixlenPointer = NULL;
138         computePartialLikelihoodPointer = NULL;
139         computeLikelihoodFromBufferPointer = NULL;
140         sse = LK_386;
141 #endif
142         return;
143     }
144 //    if (model_factory && !model_factory->model->isReversible()) {
145 //        // if nonreversible model
146 //        computeLikelihoodBranchPointer = &PhyloTree::computeNonrevLikelihoodBranch;
147 //        computeLikelihoodDervPointer = &PhyloTree::computeNonrevLikelihoodDerv;
148 //        computePartialLikelihoodPointer = &PhyloTree::computeNonrevPartialLikelihood;
149 //        computeLikelihoodFromBufferPointer = NULL;
150 //        return;
151 //    }
152 
153     //--- SIMD kernel ---
154     if (lk >= LK_SSE2) {
155 #ifdef __AVX512KNL
156     	if (lk >= LK_AVX512) {
157     		setLikelihoodKernelAVX512();
158     		return;
159     	}
160 #endif
161     	if (lk >= LK_AVX_FMA) {
162             // CPU supports AVX and FMA
163             setLikelihoodKernelFMA();
164         } else if (lk >= LK_AVX) {
165             // CPU supports AVX
166             setLikelihoodKernelAVX();
167         } else {
168             // SSE kernel
169             setLikelihoodKernelSSE();
170         }
171         return;
172     }
173 
174 #if INSTRSET < 2
175     //--- naive kernel for site-specific model ---
176     if (model_factory && model_factory->model->isSiteSpecificModel()) {
177         computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec1d, SAFE_LH, false, true>;
178         computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec1d, SAFE_LH, false, true>;
179         computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec1d, SAFE_LH, false, true>;
180         computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec1d, SAFE_LH, false, true>;
181         return;
182     }
183 
184     //--- naive (no SIMD) kernel ---
185     computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec1d, SAFE_LH>;
186     computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec1d, SAFE_LH>;
187     computeLikelihoodDervMixlenPointer = NULL;
188     computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec1d, SAFE_LH>;
189     computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec1d, SAFE_LH>;
190 #else
191     computeLikelihoodBranchPointer = NULL;
192     computeLikelihoodDervPointer = NULL;
193     computeLikelihoodDervMixlenPointer = NULL;
194     computePartialLikelihoodPointer = NULL;
195     computeLikelihoodFromBufferPointer = NULL;
196 #endif
197 }
198 
changeLikelihoodKernel(LikelihoodKernel lk)199 void PhyloTree::changeLikelihoodKernel(LikelihoodKernel lk) {
200 	if (sse == lk) return;
201     setLikelihoodKernel(lk);
202 }
203 
204 /*******************************************************
205  *
206  * master function: wrapper for other optimized functions
207  *
208  ******************************************************/
209 
computePartialLikelihood(TraversalInfo & info,size_t ptn_left,size_t ptn_right,int thread_id)210 void PhyloTree::computePartialLikelihood(TraversalInfo &info, size_t ptn_left, size_t ptn_right, int thread_id) {
211 	(this->*computePartialLikelihoodPointer)(info, ptn_left, ptn_right, thread_id);
212 }
213 
computeLikelihoodBranch(PhyloNeighbor * dad_branch,PhyloNode * dad)214 double PhyloTree::computeLikelihoodBranch(PhyloNeighbor *dad_branch, PhyloNode *dad) {
215 	return (this->*computeLikelihoodBranchPointer)(dad_branch, dad);
216 
217 }
218 
computeLikelihoodDerv(PhyloNeighbor * dad_branch,PhyloNode * dad,double * df,double * ddf)219 void PhyloTree::computeLikelihoodDerv(PhyloNeighbor *dad_branch, PhyloNode *dad, double *df, double *ddf) {
220 	(this->*computeLikelihoodDervPointer)(dad_branch, dad, df, ddf);
221 }
222 
223 
computeLikelihoodFromBuffer()224 double PhyloTree::computeLikelihoodFromBuffer() {
225 	ASSERT(current_it && current_it_back);
226 
227     // TODO: buffer stuff for mixlen model
228 	if (computeLikelihoodFromBufferPointer && optimize_by_newton)
229 		return (this->*computeLikelihoodFromBufferPointer)();
230 	else {
231 		return (this->*computeLikelihoodBranchPointer)(current_it, (PhyloNode*)current_it_back->node);
232     }
233 
234 }
235 
dotProductDoubleCall(double * x,double * y,int size)236 double PhyloTree::dotProductDoubleCall(double *x, double *y, int size) {
237     return (this->*dotProductDouble)(x, y, size);
238 }
239 
computeTipPartialLikelihoodPoMo(int state,double * lh,bool hyper)240 void PhyloTree::computeTipPartialLikelihoodPoMo(int state, double *lh, bool hyper) {
241   int nstates = aln->num_states;
242   int N = aln->virtual_pop_size;
243 
244   memset(lh, 0, sizeof(double)*nstates);
245 
246   // decode the id and value
247   int id1 = aln->pomo_sampled_states[state] & 3;
248   int id2 = (aln->pomo_sampled_states[state] >> 16) & 3;
249   int j = (aln->pomo_sampled_states[state] >> 2) & 16383;
250   int M = j + (aln->pomo_sampled_states[state] >> 18);
251 
252   // Number of alleles is hard coded here, change if generalization is needed.
253   int nnuc = 4;
254 
255   // TODO DS: Implement down sampling or a better approach.
256   if (hyper && M > N)
257     outError("Down sampling not yet supported.");
258 
259   // Check if observed state is a fixed one.  If so, many
260   // PoMo states can lead to this data.  E.g., even (2A,8T)
261   // can lead to a sampled data of 7A.
262   if (j == M) {
263     lh[id1] = 1.0;
264     // Second: Polymorphic states.
265     for (int s_id1 = 0; s_id1 < nnuc-1; s_id1++) {
266       for (int s_id2 = s_id1+1; s_id2 < nnuc; s_id2++) {
267         if (s_id1 == id1) {
268           // States are in the order {FIXED,
269           // 1A(N-1)C, ..., (N-1)A1C, ...}.
270           int k;
271           if (s_id1 == 0) k = s_id2 - 1;
272           else k = s_id1 + s_id2;
273           // Start one earlier because increment
274           // happens after execution of for loop
275           // body.
276           int real_state = nnuc - 1 + k*(N-1) + 1;
277           for (int i = 1; i < N; i++, real_state++) {
278             ASSERT(real_state < nstates);
279             if (!hyper)
280               lh[real_state] = std::pow((double)i/(double)N,j);
281             else {
282               lh[real_state] = 1.0;
283               for (int l = 0; l<j; l++)
284                 lh[real_state] *= (double) (i-l) / (double) (N-l);
285             }
286           }
287         }
288         // Same but fixed allele is the second one
289         // in polymorphic states.
290         else if (s_id2 == id1) {
291           int k;
292           if (s_id1 == 0) k = s_id2 - 1;
293           else k = s_id1 + s_id2;
294           int real_state = nnuc - 1 + k*(N-1) + 1;
295           for (int i = 1; i < N; i++, real_state++) {
296             ASSERT(real_state < nstates);
297             if (!hyper)
298               lh[real_state] = std::pow((double)(N-i)/(double)N,j);
299             else {
300               lh[real_state] = 1.0;
301               for (int l = 0; l<j; l++)
302                 lh[real_state] *= (double) (N-i-l) / (double) (N-l);
303             }
304           }
305         }
306       }
307     }
308   }
309   // Observed state is polymorphic.  We only need to set the
310   // partial likelihoods for states that are also
311   // polymorphic for the same alleles.  E.g., states of type
312   // (ix,(N-i)y) can lead to the observed state (jx,(M-j)y).
313   else {
314     int k;
315     if (id1 == 0) k = id2 - 1;
316     else k = id1 + id2;
317     int real_state = nnuc + k*(N-1);
318     for (int i = 1; i < N; i++, real_state++) {
319       ASSERT(real_state < nstates);
320       if (!hyper)
321         lh[real_state] = binomial_dist(j, M, (double) i / (double) N);
322       if (hyper)
323         lh[real_state] = hypergeometric_dist(j, M, i, N);
324     }
325   }
326 
327   // cout << "Sample M,j,id1,id2: " << M << ", " << j << ", " << id1 << ", " << id2 << endl;
328   // cout << "Real partial likelihood vector: ";
329   // for (i=0; i<4; i++)
330   //   cout << real_partial_lh[i] << " ";
331   // cout << endl;
332   // for (i=4; i<nstates; i++) {
333   //   cout << real_partial_lh[i] << " ";
334   //   if ((i-3)%(N-1) == 0)
335   //     cout << endl;
336   // }
337   // double sum = 0.0;
338   // for (i=0; i<nstates; i++)
339   //   sum += real_partial_lh[i];
340   // cout << sum << endl;
341 }
342 
343 
computeTipPartialLikelihood()344 void PhyloTree::computeTipPartialLikelihood() {
345 	if ((tip_partial_lh_computed & 1) != 0)
346 		return;
347 	tip_partial_lh_computed |= 1;
348 
349 
350 	//-------------------------------------------------------
351 	// initialize ptn_freq and ptn_invar
352 	//-------------------------------------------------------
353 
354 	computePtnFreq();
355 	// for +I model
356 	computePtnInvar();
357 
358     if (getModel()->isSiteSpecificModel()) {
359 //        ModelSet *models = (ModelSet*)model;
360         size_t nptn = aln->getNPattern(), max_nptn = ((nptn+vector_size-1)/vector_size)*vector_size, tip_block_size = max_nptn * aln->num_states;
361         int nstates = aln->num_states;
362         int nseq = aln->getNSeq();
363         ASSERT(vector_size > 0);
364 #ifdef _OPENMP
365         #pragma omp parallel for schedule(static)
366 #endif
367         for (int nodeid = 0; nodeid < nseq; nodeid++) {
368             int i, x, v;
369             double *partial_lh = tip_partial_lh + tip_block_size*nodeid;
370             size_t ptn;
371             for (ptn = 0; ptn < nptn; ptn+=vector_size, partial_lh += nstates*vector_size) {
372 //                int state[vector_size];
373 //                for (v = 0; v < vector_size; v++) {
374 //                    if (ptn+v < nptn)
375 //                        state[v] = aln->at(ptn+v)[nodeid];
376 //                    else
377 //                        state[v] = aln->STATE_UNKNOWN;
378 //                }
379 
380                 double *inv_evec = &model->getInverseEigenvectors()[ptn*nstates*nstates];
381                 for (v = 0; v < vector_size; v++) {
382                     int state = 0;
383                     if (ptn+v < nptn)
384                         state = aln->at(ptn+v)[nodeid];
385     //                double *partial_lh = node_partial_lh + ptn*nstates;
386 //                    double *inv_evec = models->at(ptn)->getInverseEigenvectors();
387 
388                     if (state < nstates) {
389                         for (i = 0; i < nstates; i++)
390                             partial_lh[i*vector_size+v] = inv_evec[(i*nstates+state)*vector_size+v];
391                     } else if (state == aln->STATE_UNKNOWN) {
392                         // special treatment for unknown char
393                         for (i = 0; i < nstates; i++) {
394                             double lh_unknown = 0.0;
395 //                            double *this_inv_evec = inv_evec + i*nstates;
396                             for (x = 0; x < nstates; x++)
397                                 lh_unknown += inv_evec[(i*nstates+x)*vector_size+v];
398                             partial_lh[i*vector_size+v] = lh_unknown;
399                         }
400                     } else {
401                         double lh_ambiguous;
402                         // ambiguous characters
403                         int ambi_aa[] = {
404                             4+8, // B = N or D
405                             32+64, // Z = Q or E
406                             512+1024 // U = I or L
407                             };
408                         switch (aln->seq_type) {
409                         case SEQ_DNA:
410                             {
411                                 int cstate = state-nstates+1;
412                                 for (i = 0; i < nstates; i++) {
413                                     lh_ambiguous = 0.0;
414                                     for (x = 0; x < nstates; x++)
415                                         if ((cstate) & (1 << x))
416                                             lh_ambiguous += inv_evec[(i*nstates+x)*vector_size+v];
417                                     partial_lh[i*vector_size+v] = lh_ambiguous;
418                                 }
419                             }
420                             break;
421                         case SEQ_PROTEIN:
422                             //map[(unsigned char)'B'] = 4+8+19; // N or D
423                             //map[(unsigned char)'Z'] = 32+64+19; // Q or E
424                             {
425                                 int cstate = state-nstates;
426                                 for (i = 0; i < nstates; i++) {
427                                     lh_ambiguous = 0.0;
428                                     for (x = 0; x < 11; x++)
429                                         if (ambi_aa[cstate] & (1 << x))
430                                             lh_ambiguous += inv_evec[(i*nstates+x)*vector_size+v];
431                                     partial_lh[i*vector_size+v] = lh_ambiguous;
432                                 }
433                             }
434                             break;
435                         default:
436                             ASSERT(0);
437                             break;
438                         }
439                     }
440                     // sanity check
441     //                bool all_zero = true;
442     //                for (i = 0; i < nstates; i++)
443     //                    if (partial_lh[i] != 0) {
444     //                        all_zero = false;
445     //                        break;
446     //                    }
447     //                assert(!all_zero && "some tip_partial_lh are all zeros");
448 
449                 } // FOR v
450             } // FOR ptn
451             // NO Need to copy dummy anymore
452             // dummy values
453 //            for (ptn = nptn; ptn < max_nptn; ptn++, partial_lh += nstates)
454 //                memcpy(partial_lh, partial_lh-nstates, nstates*sizeof(double));
455         } // FOR nodeid
456         return;
457     }
458 
459 	int i, state, nstates = aln->num_states;
460 	ASSERT(tip_partial_lh);
461 	// ambiguous characters
462 	int ambi_aa[] = {
463         4+8, // B = N or D
464         32+64, // Z = Q or E
465         512+1024 // U = I or L
466     };
467 
468     if (!getModel()->isReversible() || params->kernel_nonrev) {
469         // nonreversible model
470         memset(tip_partial_lh, 0, (aln->STATE_UNKNOWN)*nstates*sizeof(double));
471         for (state = 0; state < nstates; state++) {
472             tip_partial_lh[state*nstates+state] = 1.0;
473         }
474         double *this_tip_partial_lh = &tip_partial_lh[aln->STATE_UNKNOWN*nstates];
475         // special treatment for unknown char
476         for (i = 0; i < nstates; i++) {
477             this_tip_partial_lh[i] = 1.0;
478         }
479         // special treatment for ambiguous characters
480         switch (aln->seq_type) {
481         case SEQ_DNA:
482             for (state = 4; state < 18; state++) {
483                 int cstate = state-nstates+1;
484                 this_tip_partial_lh = &tip_partial_lh[state*nstates];
485                 for (i = 0; i < nstates; i++) {
486                     if ((cstate) & (1 << i))
487                         this_tip_partial_lh[i] = 1.0;
488                 }
489             }
490             break;
491         case SEQ_PROTEIN:
492             for (state = 0; state < sizeof(ambi_aa)/sizeof(int); state++) {
493                 this_tip_partial_lh = &tip_partial_lh[(state+20)*nstates];
494                 for (i = 0; i < nstates; i++) {
495                     if (ambi_aa[state] & (1 << i))
496                         this_tip_partial_lh[i] = 1.0;
497                 }
498             }
499             break;
500         case SEQ_POMO: {
501           if (aln->pomo_sampling_method != SAMPLING_WEIGHTED_BINOM &&
502               aln->pomo_sampling_method != SAMPLING_WEIGHTED_HYPER)
503             outError("Sampling method not supported by PoMo.");
504           bool hyper = false;
505           if (aln->pomo_sampling_method == SAMPLING_WEIGHTED_HYPER)
506             hyper = true;
507           double *real_partial_lh = aligned_alloc<double>(nstates);
508           for (state = 0; state < aln->pomo_sampled_states.size(); state++) {
509             computeTipPartialLikelihoodPoMo(state, real_partial_lh, hyper);
510             // The vector tip_partial_lh stores inner product of real_partial_lh
511             // and inverse eigenvector for each state
512             double *this_tip_partial_lh = &tip_partial_lh[(state+nstates)*nstates];
513             memset(this_tip_partial_lh, 0, nstates*sizeof(double));
514             for (i = 0; i < nstates; i++)
515               this_tip_partial_lh[i] = real_partial_lh[i];
516           }
517           aligned_free(real_partial_lh);
518           break;
519         }
520         default:
521             break;
522         }
523         return;
524     }
525 
526     // THIS IS FOR REVERSIBLE MODEL
527     int m, x, nmixtures = model->getNMixtures();
528 	double *all_inv_evec = model->getInverseEigenvectors();
529 	ASSERT(all_inv_evec);
530 
531 
532 	for (state = 0; state < nstates; state++) {
533 		double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixtures];
534 		for (m = 0; m < nmixtures; m++) {
535 			double *inv_evec = &all_inv_evec[m*nstates*nstates];
536 			for (i = 0; i < nstates; i++)
537 				this_tip_partial_lh[m*nstates + i] = inv_evec[i*nstates+state];
538 		}
539 	}
540 	// special treatment for unknown char
541 	for (i = 0; i < nstates; i++) {
542 		double *this_tip_partial_lh = &tip_partial_lh[aln->STATE_UNKNOWN*nstates*nmixtures];
543 		for (m = 0; m < nmixtures; m++) {
544 			double *inv_evec = &all_inv_evec[m*nstates*nstates];
545 			double lh_unknown = 0.0;
546 			for (x = 0; x < nstates; x++)
547 				lh_unknown += inv_evec[i*nstates+x];
548 			this_tip_partial_lh[m*nstates + i] = lh_unknown;
549 		}
550 	}
551 
552     // special treatment for ambiguous characters
553 	double lh_ambiguous;
554 	switch (aln->seq_type) {
555 	case SEQ_DNA:
556 		for (state = 4; state < 18; state++) {
557 			int cstate = state-nstates+1;
558 			double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixtures];
559 			for (m = 0; m < nmixtures; m++) {
560 				double *inv_evec = &all_inv_evec[m*nstates*nstates];
561 				for (i = 0; i < nstates; i++) {
562 					lh_ambiguous = 0.0;
563 					for (x = 0; x < nstates; x++)
564 						if ((cstate) & (1 << x))
565 							lh_ambiguous += inv_evec[i*nstates+x];
566 					this_tip_partial_lh[m*nstates+i] = lh_ambiguous;
567 				}
568 			}
569 		}
570 		break;
571 	case SEQ_PROTEIN:
572 		//map[(unsigned char)'B'] = 4+8+19; // N or D
573 		//map[(unsigned char)'Z'] = 32+64+19; // Q or E
574 		for (state = 0; state < sizeof(ambi_aa)/sizeof(int); state++) {
575 			double *this_tip_partial_lh = &tip_partial_lh[(state+20)*nstates*nmixtures];
576 			for (m = 0; m < nmixtures; m++) {
577 				double *inv_evec = &all_inv_evec[m*nstates*nstates];
578 				for (i = 0; i < nstates; i++) {
579 					lh_ambiguous = 0.0;
580 					for (x = 0; x < 11; x++)
581 						if (ambi_aa[state] & (1 << x))
582 							lh_ambiguous += inv_evec[i*nstates+x];
583 					this_tip_partial_lh[m*nstates+i] = lh_ambiguous;
584 				}
585 			}
586 		}
587 		break;
588   case SEQ_POMO: {
589     if (aln->pomo_sampling_method != SAMPLING_WEIGHTED_BINOM &&
590         aln->pomo_sampling_method != SAMPLING_WEIGHTED_HYPER)
591       outError("Sampling method not supported by PoMo.");
592     bool hyper = false;
593     if (aln->pomo_sampling_method == SAMPLING_WEIGHTED_HYPER)
594       hyper = true;
595     double *real_partial_lh = aligned_alloc<double>(nstates);
596     for (state = 0; state < aln->pomo_sampled_states.size(); state++) {
597       computeTipPartialLikelihoodPoMo(state, real_partial_lh, hyper);
598       // The vector tip_partial_lh stores inner product of real_partial_lh
599       // and inverse eigenvector for each state
600       double *this_tip_partial_lh = &tip_partial_lh[(state+nstates)*nstates*nmixtures];
601       memset(this_tip_partial_lh, 0, nmixtures*nstates*sizeof(double));
602       for (m = 0; m < nmixtures; m++) {
603         double *inv_evec = &all_inv_evec[m*nstates*nstates];
604         for (int i = 0; i < nstates; i++)
605           for (int j = 0; j < nstates; j++)
606             this_tip_partial_lh[m*nstates + i] +=
607               inv_evec[i*nstates+j] * real_partial_lh[j];
608       }
609     }
610     aligned_free(real_partial_lh);
611     break;
612   }
613 	default:
614 		break;
615 	}
616 }
617 
computePtnFreq()618 void PhyloTree::computePtnFreq() {
619 	if (ptn_freq_computed) return;
620 	ptn_freq_computed = true;
621 	size_t nptn = aln->getNPattern();
622 	size_t maxptn = get_safe_upper_limit(nptn)+get_safe_upper_limit(model_factory->unobserved_ptns.size());
623 	int ptn;
624 	for (ptn = 0; ptn < nptn; ptn++)
625 		ptn_freq[ptn] = (*aln)[ptn].frequency;
626 	for (ptn = nptn; ptn < maxptn; ptn++)
627 		ptn_freq[ptn] = 0.0;
628 }
629 
computePtnInvar()630 void PhyloTree::computePtnInvar() {
631 	size_t nptn = aln->getNPattern(), ptn;
632 	size_t maxptn = get_safe_upper_limit(nptn)+get_safe_upper_limit(model_factory->unobserved_ptns.size());
633   // For PoMo, only consider monomorphic states and set nstates to the number of
634   // states of the underlying mutation model.
635 	int nstates = model->getMutationModel()->num_states;
636     int x;
637     // ambiguous characters
638     int ambi_aa[] = {
639         4+8, // B = N or D
640         32+64, // Z = Q or E
641         512+1024 // U = I or L
642     };
643 
644     double state_freq[nstates];
645 
646     // -1 for mixture model
647 
648     // Again for PoMo, the stationary frequencies are set to the stationary
649     // frequencies of the boundary states.
650     model->getMutationModel()->getStateFrequency(state_freq, -1);
651 
652 	memset(ptn_invar, 0, maxptn*sizeof(double));
653 	double p_invar = site_rate->getPInvar();
654 	if (p_invar != 0.0) {
655 		for (ptn = 0; ptn < nptn; ptn++) {
656             if ((*aln)[ptn].const_char > aln->STATE_UNKNOWN)
657                 continue;
658 
659 			if ((*aln)[ptn].const_char == aln->STATE_UNKNOWN) {
660 				ptn_invar[ptn] = p_invar;
661         // For PoMo, if a polymorphic state is considered, the likelihood is
662         // left unchanged and zero because ptn_invar has been initialized to 0.
663 			} else if ((*aln)[ptn].const_char < nstates) {
664 				ptn_invar[ptn] = p_invar * state_freq[(int) (*aln)[ptn].const_char];
665 			} else if (aln->seq_type == SEQ_DNA) {
666                 // 2016-12-21: handling ambiguous state
667                 ptn_invar[ptn] = 0.0;
668                 int cstate = (*aln)[ptn].const_char-nstates+1;
669                 for (x = 0; x < nstates; x++) {
670                     if ((cstate) & (1 << x))
671                         ptn_invar[ptn] += state_freq[x];
672                 }
673                 ptn_invar[ptn] *= p_invar;
674             } else if (aln->seq_type == SEQ_PROTEIN) {
675                 ptn_invar[ptn] = 0.0;
676                 int cstate = (*aln)[ptn].const_char-nstates;
677                 ASSERT(cstate <= 2);
678                 for (x = 0; x < 11; x++)
679                     if (ambi_aa[cstate] & (1 << x))
680                         ptn_invar[ptn] += state_freq[x];
681                 ptn_invar[ptn] *= p_invar;
682             } else ASSERT(0);
683 		}
684 //		// ascertmain bias correction
685 //		for (ptn = 0; ptn < model_factory->unobserved_ptns.size(); ptn++)
686 //			ptn_invar[nptn+ptn] = p_invar * state_freq[(int)model_factory->unobserved_ptns[ptn]];
687 //
688 		// dummy values
689 		for (ptn = nptn; ptn < maxptn; ptn++)
690 			ptn_invar[ptn] = p_invar;
691 	}
692 //	aligned_free(state_freq);
693 }
694 
695 /*******************************************************
696  *
697  * non-vectorized likelihood functions.
698  * this version uses Alexis' technique that stores the
699  * dot product of partial likelihoods and eigenvectors at node
700  * for faster branch length optimization
701  *
702  ******************************************************/
703 
704 /*
705 void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
706 
707     // don't recompute the likelihood
708 	assert(dad);
709     if (dad_branch->partial_lh_computed & 1)
710         return;
711     dad_branch->partial_lh_computed |= 1;
712     PhyloNode *node = (PhyloNode*)(dad_branch->node);
713 
714 
715     size_t nstates = aln->num_states;
716     size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
717 
718     if (!tip_partial_lh_computed)
719         computeTipPartialLikelihood();
720 
721 	if (node->isLeaf()) {
722 	    dad_branch->lh_scale_factor = 0.0;
723 		return;
724 	}
725 
726     size_t ptn, c;
727     size_t orig_ntn = aln->size();
728     size_t ncat = site_rate->getNRate();
729     size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
730     size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
731     size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
732     for (c = 0; c < ncat_mix; c++) {
733         size_t m = c/denom;
734         mix_addr_nstates[c] = m*nstates;
735         mix_addr[c] = mix_addr_nstates[c]*nstates;
736     }
737     size_t i, x;
738     size_t block = nstates * ncat_mix;
739     size_t tip_block = nstates * model->getNMixtures();
740     size_t scale_size = nptn * ncat_mix;
741 
742 	double *evec = model->getEigenvectors();
743 	double *inv_evec = model->getInverseEigenvectors();
744 	assert(inv_evec && evec);
745 	double *eval = model->getEigenvalues();
746 
747     dad_branch->lh_scale_factor = 0.0;
748 
749 	// internal node
750 	PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
751     int num_leaves = 0;
752 	FOR_NEIGHBOR_IT(node, dad, it) {
753         PhyloNeighbor *nei = (PhyloNeighbor*)*it;
754 		if (!left) left = (PhyloNeighbor*)(*it); else right = (PhyloNeighbor*)(*it);
755         if ((nei->partial_lh_computed & 1) == 0)
756             computePartialLikelihood(nei, node);
757         dad_branch->lh_scale_factor += nei->lh_scale_factor;
758         if (nei->node->isLeaf())
759             num_leaves ++;
760 	}
761 
762     if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
763         // re-orient partial_lh
764         bool done = false;
765         FOR_NEIGHBOR_IT(node, dad, it2) {
766             PhyloNeighbor *backnei = ((PhyloNeighbor*)(*it2)->node->findNeighbor(node));
767             if (backnei->partial_lh) {
768                 dad_branch->partial_lh = backnei->partial_lh;
769                 dad_branch->scale_num = backnei->scale_num;
770                 backnei->partial_lh = NULL;
771                 backnei->scale_num = NULL;
772                 backnei->partial_lh_computed &= ~1; // clear bit
773                 done = true;
774                 break;
775             }
776         }
777         if (!done) {
778             printTree(cout, WT_BR_LEN + WT_NEWLINE);
779         }
780         assert(done && "partial_lh is not re-oriented");
781     }
782 
783     // precompute buffer to save times
784     double *echildren = new double[block*nstates*(node->degree()-1)];
785     double *partial_lh_leaves = NULL;
786     if (num_leaves > 0)
787         partial_lh_leaves = new double[(aln->STATE_UNKNOWN+1)*block*num_leaves];
788     double *echild = echildren;
789     double *partial_lh_leaf = partial_lh_leaves;
790 
791     FOR_NEIGHBOR_IT(node, dad, it) {
792         double expchild[nstates];
793         PhyloNeighbor *child = (PhyloNeighbor*)*it;
794         // precompute information buffer
795         double *echild_ptr = echild;
796         for (c = 0; c < ncat_mix; c++) {
797             double len_child = site_rate->getRate(c%ncat) * child->length;
798             double *eval_ptr = eval + mix_addr_nstates[c];
799             double *evec_ptr = evec + mix_addr[c];
800             for (i = 0; i < nstates; i++) {
801                 expchild[i] = exp(eval_ptr[i]*len_child);
802             }
803             for (x = 0; x < nstates; x++) {
804                 for (i = 0; i < nstates; i++) {
805                     echild_ptr[i] = evec_ptr[x*nstates+i] * expchild[i];
806                 }
807                 echild_ptr += nstates;
808             }
809         }
810 
811         // pre compute information for tip
812         if (child->node->isLeaf()) {
813             vector<int>::iterator it;
814             for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
815                 int state = (*it);
816                 double *this_partial_lh_leaf = partial_lh_leaf + state*block;
817                 double *echild_ptr = echild;
818                 for (c = 0; c < ncat_mix; c++) {
819                     double *this_tip_partial_lh = tip_partial_lh + state*tip_block + mix_addr_nstates[c];
820                     for (x = 0; x < nstates; x++) {
821                         double vchild = 0.0;
822                         for (i = 0; i < nstates; i++) {
823                             vchild += echild_ptr[i] * this_tip_partial_lh[i];
824                         }
825                         this_partial_lh_leaf[x] = vchild;
826                         echild_ptr += nstates;
827                     }
828                     this_partial_lh_leaf += nstates;
829                 }
830             }
831             size_t addr = aln->STATE_UNKNOWN * block;
832             for (x = 0; x < block; x++) {
833                 partial_lh_leaf[addr+x] = 1.0;
834             }
835             partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
836         }
837         echild += block*nstates;
838     }
839 
840 
841     double *eleft = echildren, *eright = echildren + block*nstates;
842 
843 	if (!left->node->isLeaf() && right->node->isLeaf()) {
844 		PhyloNeighbor *tmp = left;
845 		left = right;
846 		right = tmp;
847         double *etmp = eleft;
848         eleft = eright;
849         eright = etmp;
850 	}
851 
852     if (node->degree() > 3) {
853 
854         //--------------------- multifurcating node ------------------//
855 
856         // now for-loop computing partial_lh over all site-patterns
857 #ifdef _OPENMP
858 #pragma omp parallel for private(ptn, c, x, i) schedule(static)
859 #endif
860         for (ptn = 0; ptn < nptn; ptn++) {
861             double partial_lh_all[block];
862             for (i = 0; i < block; i++)
863                 partial_lh_all[i] = 1.0;
864             UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
865             memset(scale_dad, 0, sizeof(UBYTE)*ncat_mix);
866 
867             double *partial_lh_leaf = partial_lh_leaves;
868             double *echild = echildren;
869 
870             FOR_NEIGHBOR_IT(node, dad, it) {
871                 PhyloNeighbor *child = (PhyloNeighbor*)*it;
872                 UBYTE *scale_child = child->scale_num + ptn*ncat_mix;
873                 if (child->node->isLeaf()) {
874                     // external node
875                     int state_child = (ptn < orig_ntn) ? (aln->at(ptn))[child->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
876                     double *child_lh = partial_lh_leaf + state_child*block;
877                     for (c = 0; c < block; c++) {
878                         // compute real partial likelihood vector
879                         partial_lh_all[c] *= child_lh[c];
880                     }
881                     partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
882                 } else {
883                     // internal node
884                     double *partial_lh = partial_lh_all;
885                     double *partial_lh_child = child->partial_lh + ptn*block;
886 
887                     double *echild_ptr = echild;
888                     for (c = 0; c < ncat_mix; c++) {
889                         scale_dad[c] += scale_child[c];
890                         // compute real partial likelihood vector
891                         for (x = 0; x < nstates; x++) {
892                             double vchild = 0.0;
893 //                            double *echild_ptr = echild + (c*nstatesqr+x*nstates);
894                             for (i = 0; i < nstates; i++) {
895                                 vchild += echild_ptr[i] * partial_lh_child[i];
896                             }
897                             echild_ptr += nstates;
898                             partial_lh[x] *= vchild;
899                         }
900                         partial_lh += nstates;
901                         partial_lh_child += nstates;
902                     }
903                 } // if
904                 echild += block*nstates;
905             } // FOR_NEIGHBOR
906 
907 
908             // compute dot-product with inv_eigenvector
909             double *partial_lh_tmp = partial_lh_all;
910             double *partial_lh = dad_branch->partial_lh + ptn*block;
911             for (c = 0; c < ncat_mix; c++) {
912                 double lh_max = 0.0;
913                 double *inv_evec_ptr = inv_evec + mix_addr[c];
914                 for (i = 0; i < nstates; i++) {
915                     double res = 0.0;
916                     for (x = 0; x < nstates; x++) {
917                         res += partial_lh_tmp[x]*inv_evec_ptr[x];
918                     }
919                     inv_evec_ptr += nstates;
920                     partial_lh[i] = res;
921                     lh_max = max(lh_max, fabs(res));
922                 }
923                 // check if one should scale partial likelihoods
924                 if (lh_max < SCALING_THRESHOLD && lh_max != 0.0) {
925                     //assert(lh_max != 0.0 && "Numerical underflow for multifurcation node");
926                     if (ptn_invar[ptn] == 0.0) {
927                         // now do the likelihood scaling
928                         for (i = 0; i < nstates; i++)
929                             partial_lh[i] *= SCALING_THRESHOLD_INVER;
930                         scale_dad[c] += 1;
931                     }
932                 }
933                 partial_lh += nstates;
934                 partial_lh_tmp += nstates;
935             }
936 
937         } // for ptn
938 //        dad_branch->lh_scale_factor += sum_scale;
939 
940         // end multifurcating treatment
941     } else if (left->node->isLeaf() && right->node->isLeaf()) {
942 
943         //--------------------- TIP-TIP (cherry) case ------------------//
944 
945         double *partial_lh_left = partial_lh_leaves;
946         double *partial_lh_right = partial_lh_leaves + (aln->STATE_UNKNOWN+1)*block;
947 
948 		// scale number must be ZERO
949 	    memset(dad_branch->scale_num, 0, scale_size * sizeof(UBYTE));
950 #ifdef _OPENMP
951 #pragma omp parallel for private(ptn, c, x, i) schedule(static)
952 #endif
953 		for (ptn = 0; ptn < nptn; ptn++) {
954 			double partial_lh_tmp[nstates];
955 			double *partial_lh = dad_branch->partial_lh + ptn*block;
956 			double *vleft = partial_lh_left + block*((ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn]);
957 			double *vright = partial_lh_right + block*((ptn < orig_ntn) ? (aln->at(ptn))[right->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn]);
958 			for (c = 0; c < ncat_mix; c++) {
959                 double *inv_evec_ptr = inv_evec + mix_addr[c];
960 				// compute real partial likelihood vector
961 				for (x = 0; x < nstates; x++) {
962 					partial_lh_tmp[x] = vleft[x] * vright[x];
963 				}
964 
965 				// compute dot-product with inv_eigenvector
966 				for (i = 0; i < nstates; i++) {
967 					double res = 0.0;
968 					for (x = 0; x < nstates; x++) {
969 						res += partial_lh_tmp[x]*inv_evec_ptr[x];
970 					}
971                     inv_evec_ptr += nstates;
972 					partial_lh[i] = res;
973 				}
974                 vleft += nstates;
975                 vright += nstates;
976                 partial_lh += nstates;
977 			}
978 		}
979 	} else if (left->node->isLeaf() && !right->node->isLeaf()) {
980 
981         //--------------------- TIP-INTERNAL NODE case ------------------//
982 
983 		// only take scale_num from the right subtree
984 		memcpy(dad_branch->scale_num, right->scale_num, scale_size * sizeof(UBYTE));
985 
986 
987         double *partial_lh_left = partial_lh_leaves;
988 
989 #ifdef _OPENMP
990 #pragma omp parallel for private(ptn, c, x, i) schedule(static)
991 #endif
992 		for (ptn = 0; ptn < nptn; ptn++) {
993 			double partial_lh_tmp[nstates];
994 			double *partial_lh = dad_branch->partial_lh + ptn*block;
995 			double *partial_lh_right = right->partial_lh + ptn*block;
996 			double *vleft = partial_lh_left + block*((ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn]);
997 
998             double *eright_ptr = eright;
999 			for (c = 0; c < ncat_mix; c++) {
1000                 double lh_max = 0.0;
1001                 double *inv_evec_ptr = inv_evec + mix_addr[c];
1002 				// compute real partial likelihood vector
1003 				for (x = 0; x < nstates; x++) {
1004 					double vright = 0.0;
1005 //					size_t addr = c*nstatesqr+x*nstates;
1006 //					vleft = partial_lh_left[state_left*block+c*nstates+x];
1007 					for (i = 0; i < nstates; i++) {
1008 						vright += eright_ptr[i] * partial_lh_right[i];
1009 					}
1010                     eright_ptr += nstates;
1011 					partial_lh_tmp[x] = vleft[x] * (vright);
1012 				}
1013 
1014 				// compute dot-product with inv_eigenvector
1015 				for (i = 0; i < nstates; i++) {
1016 					double res = 0.0;
1017 					for (x = 0; x < nstates; x++) {
1018 						res += partial_lh_tmp[x]*inv_evec_ptr[x];
1019 					}
1020                     inv_evec_ptr += nstates;
1021 					partial_lh[i] = res;
1022                     lh_max = max(fabs(res), lh_max);
1023 				}
1024                 // check if one should scale partial likelihoods
1025                 if (lh_max < SCALING_THRESHOLD && lh_max != 0.0) {
1026                     //assert(lh_max != 0.0 && "Numerical underflow for tip-inner node");
1027                     if (ptn_invar[ptn] == 0.0) {
1028                         // now do the likelihood scaling
1029                         for (i = 0; i < nstates; i++)
1030                             partial_lh[i] *= SCALING_THRESHOLD_INVER;
1031                         dad_branch->scale_num[ptn*ncat_mix+c] += 1;
1032                     }
1033                 }
1034                 vleft += nstates;
1035                 partial_lh_right += nstates;
1036                 partial_lh += nstates;
1037 			}
1038 
1039 		}
1040 //		dad_branch->lh_scale_factor += sum_scale;
1041 //		delete [] partial_lh_left;
1042 
1043 	} else {
1044 
1045         //--------------------- INTERNAL-INTERNAL NODE case ------------------//
1046 
1047 #ifdef _OPENMP
1048 #pragma omp parallel for private(ptn, c, x, i) schedule(static)
1049 #endif
1050 		for (ptn = 0; ptn < nptn; ptn++) {
1051 			double partial_lh_tmp[nstates];
1052 			double *partial_lh = dad_branch->partial_lh + ptn*block;
1053 			double *partial_lh_left = left->partial_lh + ptn*block;
1054 			double *partial_lh_right = right->partial_lh + ptn*block;
1055             UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
1056             UBYTE *scale_left = left->scale_num + ptn*ncat_mix;
1057             UBYTE *scale_right = right->scale_num + ptn*ncat_mix;
1058 
1059             double *eleft_ptr = eleft;
1060             double *eright_ptr = eright;
1061 
1062 			for (c = 0; c < ncat_mix; c++) {
1063                 scale_dad[c] = scale_left[c] + scale_right[c];
1064                 double lh_max = 0.0;
1065                 double *inv_evec_ptr = inv_evec + mix_addr[c];
1066 				// compute real partial likelihood vector
1067 				for (x = 0; x < nstates; x++) {
1068 					double vleft = 0.0, vright = 0.0;
1069 //					size_t addr = c*nstatesqr+x*nstates;
1070 					for (i = 0; i < nstates; i++) {
1071 						vleft += eleft_ptr[i] * partial_lh_left[i];
1072 						vright += eright_ptr[i] * partial_lh_right[i];
1073 					}
1074                     eleft_ptr += nstates;
1075                     eright_ptr += nstates;
1076 					partial_lh_tmp[x] = vleft*vright;
1077 //                    assert(partial_lh_tmp[x] != 0.0);
1078 				}
1079 
1080 				// compute dot-product with inv_eigenvector
1081 				for (i = 0; i < nstates; i++) {
1082 					double res = 0.0;
1083 					for (x = 0; x < nstates; x++) {
1084 						res += partial_lh_tmp[x]*inv_evec_ptr[x];
1085 					}
1086                     inv_evec_ptr += nstates;
1087 					partial_lh[i] = res;
1088                     lh_max = max(lh_max, fabs(res));
1089 				}
1090                 // check if one should scale partial likelihoods
1091                 if (lh_max < SCALING_THRESHOLD && lh_max != 0.0) {
1092                     //assert(lh_max != 0.0 && "Numerical underflow for inner-inner node");
1093                     if (ptn_invar[ptn] == 0.0) {
1094                         // BQM 2016-05-03: only scale for non-constant sites
1095                         // now do the likelihood scaling
1096                         for (i = 0; i < nstates; i++)
1097                             partial_lh[i] *= SCALING_THRESHOLD_INVER;
1098                         scale_dad[c] += 1;
1099                     }
1100                 }
1101                 partial_lh_left += nstates;
1102                 partial_lh_right += nstates;
1103                 partial_lh += nstates;
1104 			}
1105 
1106 		}
1107 //		dad_branch->lh_scale_factor += sum_scale;
1108 
1109 	}
1110 
1111     if (partial_lh_leaves)
1112         delete [] partial_lh_leaves;
1113     delete [] echildren;
1114 }
1115 
1116 void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf) {
1117     PhyloNode *node = (PhyloNode*) dad_branch->node;
1118     PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
1119     if (!central_partial_lh)
1120         initializeAllPartialLh();
1121     if (node->isLeaf()) {
1122     	PhyloNode *tmp_node = dad;
1123     	dad = node;
1124     	node = tmp_node;
1125     	PhyloNeighbor *tmp_nei = dad_branch;
1126     	dad_branch = node_branch;
1127     	node_branch = tmp_nei;
1128     }
1129     if ((dad_branch->partial_lh_computed & 1) == 0)
1130         computePartialLikelihoodEigen(dad_branch, dad);
1131     if ((node_branch->partial_lh_computed & 1) == 0)
1132         computePartialLikelihoodEigen(node_branch, node);
1133 
1134     size_t nstates = aln->num_states;
1135     size_t ncat = site_rate->getNRate();
1136     size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
1137 
1138     size_t block = ncat_mix * nstates;
1139     size_t tip_block = nstates * model->getNMixtures();
1140     size_t ptn; // for big data size > 4GB memory required
1141     size_t c, i;
1142     size_t orig_nptn = aln->size();
1143     size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
1144 
1145     size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
1146     size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
1147     for (c = 0; c < ncat_mix; c++) {
1148         size_t m = c/denom;
1149         mix_addr_nstates[c] = m*nstates;
1150         mix_addr[c] = mix_addr_nstates[c]*nstates;
1151     }
1152 
1153     double *eval = model->getEigenvalues();
1154     assert(eval);
1155 
1156 	assert(theta_all);
1157 	if (!theta_computed) {
1158 		// precompute theta for fast branch length optimization
1159 
1160 	    if (dad->isLeaf()) {
1161 	    	// special treatment for TIP-INTERNAL NODE case
1162 #ifdef _OPENMP
1163 #pragma omp parallel for private(ptn, i, c) schedule(static)
1164 #endif
1165 	    	for (ptn = 0; ptn < nptn; ptn++) {
1166 				double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1167                 UBYTE *scale_dad = dad_branch->scale_num+ptn*ncat_mix;
1168 				double *theta = theta_all + ptn*block;
1169                 double *this_tip_partial_lh = tip_partial_lh + tip_block*((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] :  model_factory->unobserved_ptns[ptn-orig_nptn]);
1170                 UBYTE min_scale = scale_dad[0];
1171                 for (c = 1; c < ncat_mix; c++)
1172                     min_scale = min(min_scale, scale_dad[c]);
1173                 for (c = 0; c < ncat_mix; c++) {
1174                     double *lh_tip = this_tip_partial_lh + mix_addr_nstates[c];
1175                     if (scale_dad[c] == min_scale) {
1176                         for (i = 0; i < nstates; i++) {
1177                             theta[i] = lh_tip[i] * partial_lh_dad[i];
1178                         }
1179                     } else if (scale_dad[c] == min_scale+1) {
1180                         for (i = 0; i < nstates; i++) {
1181                             theta[i] = lh_tip[i] * partial_lh_dad[i] * SCALING_THRESHOLD;
1182                         }
1183                     } else {
1184                         memset(theta, 0, sizeof(double)*nstates);
1185                     }
1186                     partial_lh_dad += nstates;
1187                     theta += nstates;
1188                 }
1189 
1190 			}
1191 			// ascertainment bias correction
1192 	    } else {
1193 	    	// both dad and node are internal nodes
1194 
1195 //	    	size_t all_entries = nptn*block;
1196 #ifdef _OPENMP
1197 #pragma omp parallel for private(ptn, i, c) schedule(static)
1198 #endif
1199 	    	for (ptn = 0; ptn < nptn; ptn++) {
1200 				double *theta = theta_all + ptn*block;
1201 			    double *partial_lh_node = node_branch->partial_lh + ptn*block;
1202 			    double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1203 
1204                 size_t ptn_ncat = ptn*ncat_mix;
1205                 UBYTE *scale_dad = dad_branch->scale_num + ptn_ncat;
1206                 UBYTE *scale_node = node_branch->scale_num + ptn_ncat;
1207                 UBYTE sum_scale[ncat_mix];
1208                 UBYTE min_scale = sum_scale[0] = scale_dad[0] + scale_node[0];
1209                 for (c = 1; c < ncat_mix; c++) {
1210                     sum_scale[c] = scale_dad[c] + scale_node[c];
1211                     min_scale = min(min_scale, sum_scale[c]);
1212                 }
1213                 for (c = 0; c < ncat_mix; c++) {
1214                     if (sum_scale[c] == min_scale) {
1215                         for (i = 0; i < nstates; i++) {
1216                             theta[i] = partial_lh_node[i] * partial_lh_dad[i];
1217                         }
1218                     } else if (sum_scale[c] == min_scale+1) {
1219                         for (i = 0; i < nstates; i++) {
1220                             theta[i] = partial_lh_node[i] * partial_lh_dad[i] * SCALING_THRESHOLD;
1221                         }
1222                     } else {
1223                         memset(theta, 0, sizeof(double)*nstates);
1224                     }
1225                     theta += nstates;
1226                     partial_lh_dad += nstates;
1227                     partial_lh_node += nstates;
1228                 }
1229 			}
1230 	    }
1231 		theta_computed = true;
1232 	}
1233 
1234     double *val0 = new double[block];
1235     double *val1 = new double[block];
1236     double *val2 = new double[block];
1237 	for (c = 0; c < ncat_mix; c++) {
1238         size_t m = c/denom;
1239         double *eval_ptr = eval + mix_addr_nstates[c];
1240         size_t mycat = c%ncat;
1241         double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
1242         size_t addr = c*nstates;
1243 		for (i = 0; i < nstates; i++) {
1244 			double cof = eval_ptr[i]*site_rate->getRate(mycat);
1245 			double val = exp(cof*dad_branch->length) * prop;
1246 			double val1_ = cof*val;
1247 			val0[addr+i] = val;
1248 			val1[addr+i] = val1_;
1249 			val2[addr+i] = cof*val1_;
1250 		}
1251 	}
1252 
1253 
1254     double my_df = 0.0, my_ddf = 0.0, prob_const = 0.0, df_const = 0.0, ddf_const = 0.0;
1255 //    double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
1256 
1257 #ifdef _OPENMP
1258 #pragma omp parallel for reduction(+: my_df, my_ddf, prob_const, df_const, ddf_const) private(ptn, i) schedule(static)
1259 #endif
1260     for (ptn = 0; ptn < nptn; ptn++) {
1261 		double lh_ptn = ptn_invar[ptn], df_ptn = 0.0, ddf_ptn = 0.0;
1262 		double *theta = theta_all + ptn*block;
1263 		for (i = 0; i < block; i++) {
1264 			lh_ptn += val0[i] * theta[i];
1265 			df_ptn += val1[i] * theta[i];
1266 			ddf_ptn += val2[i] * theta[i];
1267 		}
1268 
1269 //        assert(lh_ptn > 0.0);
1270         lh_ptn = fabs(lh_ptn);
1271 
1272         if (ptn < orig_nptn) {
1273 			double df_frac = df_ptn / lh_ptn;
1274 			double ddf_frac = ddf_ptn / lh_ptn;
1275 			double freq = ptn_freq[ptn];
1276 			double tmp1 = df_frac * freq;
1277 			double tmp2 = ddf_frac * freq;
1278 			my_df += tmp1;
1279 			my_ddf += tmp2 - tmp1 * df_frac;
1280 		} else {
1281 			// ascertainment bias correction
1282 			prob_const += lh_ptn;
1283 			df_const += df_ptn;
1284 			ddf_const += ddf_ptn;
1285 		}
1286     }
1287 	df = my_df;
1288 	ddf = my_ddf;
1289 
1290     assert(!isnan(df) && !isinf(df) && "Numerical underflow for lh-derivative");
1291 
1292     if (isnan(df) || isinf(df)) {
1293         df = 0.0;
1294         ddf = 0.0;
1295 //        outWarning("Numerical instability (some site-likelihood = 0)");
1296     }
1297 
1298 	if (orig_nptn < nptn) {
1299     	// ascertainment bias correction
1300     	prob_const = 1.0 - prob_const;
1301     	double df_frac = df_const / prob_const;
1302     	double ddf_frac = ddf_const / prob_const;
1303     	int nsites = aln->getNSite();
1304     	df += nsites * df_frac;
1305     	ddf += nsites *(ddf_frac + df_frac*df_frac);
1306     }
1307 
1308 
1309     delete [] val2;
1310     delete [] val1;
1311     delete [] val0;
1312 }
1313 
1314 //template <const int nstates>
1315 double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
1316     PhyloNode *node = (PhyloNode*) dad_branch->node;
1317     PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
1318     if (!central_partial_lh)
1319         initializeAllPartialLh();
1320     if (node->isLeaf()) {
1321     	PhyloNode *tmp_node = dad;
1322     	dad = node;
1323     	node = tmp_node;
1324     	PhyloNeighbor *tmp_nei = dad_branch;
1325     	dad_branch = node_branch;
1326     	node_branch = tmp_nei;
1327     }
1328     if ((dad_branch->partial_lh_computed & 1) == 0)
1329 //        computePartialLikelihoodEigen(dad_branch, dad);
1330         computePartialLikelihood(dad_branch, dad);
1331     if ((node_branch->partial_lh_computed & 1) == 0)
1332 //        computePartialLikelihoodEigen(node_branch, node);
1333         computePartialLikelihood(node_branch, node);
1334     double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
1335     size_t nstates = aln->num_states;
1336     size_t ncat = site_rate->getNRate();
1337     size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
1338 
1339     size_t block = ncat_mix * nstates;
1340     size_t tip_block = nstates * model->getNMixtures();
1341     size_t ptn; // for big data size > 4GB memory required
1342     size_t c, i;
1343     size_t orig_nptn = aln->size();
1344     size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
1345 
1346     size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
1347     size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
1348 
1349     double *eval = model->getEigenvalues();
1350     assert(eval);
1351 
1352     double *val = new double[block];
1353 	for (c = 0; c < ncat_mix; c++) {
1354         size_t mycat = c%ncat;
1355         size_t m = c/denom;
1356         mix_addr_nstates[c] = m*nstates;
1357         mix_addr[c] = mix_addr_nstates[c]*nstates;
1358         double *eval_ptr = eval + mix_addr_nstates[c];
1359 		double len = site_rate->getRate(mycat)*dad_branch->length;
1360 		double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
1361         double *this_val = val + c*nstates;
1362 		for (i = 0; i < nstates; i++)
1363 			this_val[i] = exp(eval_ptr[i]*len) * prop;
1364 	}
1365 
1366 	double prob_const = 0.0;
1367 	memset(_pattern_lh_cat, 0, sizeof(double)*nptn*ncat_mix);
1368 
1369     if (dad->isLeaf()) {
1370     	// special treatment for TIP-INTERNAL NODE case
1371     	double *partial_lh_node = new double[(aln->STATE_UNKNOWN+1)*block];
1372     	IntVector states_dad;
1373         if (dad->id < aln->getNSeq())
1374             states_dad = aln->seq_states[dad->id];
1375     	states_dad.push_back(aln->STATE_UNKNOWN);
1376     	// precompute information from one tip
1377     	for (IntVector::iterator it = states_dad.begin(); it != states_dad.end(); it++) {
1378     		double *lh_node = partial_lh_node +(*it)*block;
1379     		double *val_tmp = val;
1380             double *this_tip_partial_lh = tip_partial_lh + (*it)*tip_block;
1381 			for (c = 0; c < ncat_mix; c++) {
1382                 double *lh_tip = this_tip_partial_lh + mix_addr_nstates[c];
1383 				for (i = 0; i < nstates; i++) {
1384 					  lh_node[i] = val_tmp[i] * lh_tip[i];
1385 				}
1386 				lh_node += nstates;
1387 				val_tmp += nstates;
1388 			}
1389     	}
1390 
1391     	// now do the real computation
1392 #ifdef _OPENMP
1393 #pragma omp parallel for reduction(+: tree_lh, prob_const) private(ptn, i, c) schedule(static)
1394 #endif
1395     	for (ptn = 0; ptn < nptn; ptn++) {
1396 			double lh_ptn = ptn_invar[ptn];
1397             double *lh_cat = _pattern_lh_cat + ptn*ncat_mix;
1398             double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1399             UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
1400             double *lh_node = partial_lh_node + block*((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn]);
1401             // determine the min scaling
1402             UBYTE min_scale = scale_dad[0];
1403             for (c = 1; c < ncat_mix; c++)
1404                 min_scale = min(min_scale, scale_dad[c]);
1405 
1406             for (c = 0; c < ncat_mix; c++) {
1407                 if (scale_dad[c] <= min_scale+1) {
1408                     // only compute for least scale category
1409                     for (i = 0; i < nstates; i++) {
1410                         *lh_cat += (lh_node[i] * partial_lh_dad[i]);
1411                     }
1412                     if (scale_dad[c] != min_scale)
1413                         *lh_cat *= SCALING_THRESHOLD;
1414                     lh_ptn += *lh_cat;
1415                 }
1416                 lh_node += nstates;
1417                 partial_lh_dad += nstates;
1418                 lh_cat++;
1419             }
1420 //			assert(lh_ptn > -1e-10);
1421 			if (ptn < orig_nptn) {
1422 				lh_ptn = log(fabs(lh_ptn)) + LOG_SCALING_THRESHOLD*min_scale;
1423 				_pattern_lh[ptn] = lh_ptn;
1424 				tree_lh += lh_ptn * ptn_freq[ptn];
1425 			} else {
1426                 // bugfix 2016-01-21, prob_const can be rescaled
1427                 if (min_scale >= 1)
1428                     lh_ptn *= SCALING_THRESHOLD;
1429 //				_pattern_lh[ptn] = lh_ptn;
1430 				prob_const += lh_ptn;
1431 			}
1432 		}
1433 		delete [] partial_lh_node;
1434     } else {
1435     	// both dad and node are internal nodes
1436 #ifdef _OPENMP
1437 #pragma omp parallel for reduction(+: tree_lh, prob_const) private(ptn, i, c) schedule(static)
1438 #endif
1439     	for (ptn = 0; ptn < nptn; ptn++) {
1440 			double lh_ptn = ptn_invar[ptn];
1441             double *lh_cat = _pattern_lh_cat + ptn*ncat_mix;
1442             double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1443             double *partial_lh_node = node_branch->partial_lh + ptn*block;
1444             double *val_tmp = val;
1445             UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
1446             UBYTE *scale_node = node_branch->scale_num + ptn*ncat_mix;
1447             UBYTE sum_scale[ncat_mix];
1448             UBYTE min_scale = sum_scale[0] = scale_dad[0]+scale_node[0];
1449             for (c = 1; c < ncat_mix; c++) {
1450                 sum_scale[c] = scale_dad[c] + scale_node[c];
1451                 min_scale = min(min_scale, sum_scale[c]);
1452             }
1453             for (c = 0; c < ncat_mix; c++) {
1454                 if (sum_scale[c] <= min_scale+1) {
1455                     // only compute for least scale category
1456                     for (i = 0; i < nstates; i++) {
1457                         *lh_cat +=  (val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i]);
1458                     }
1459                     if (sum_scale[c] != min_scale)
1460                         *lh_cat *= SCALING_THRESHOLD;
1461                     lh_ptn += *lh_cat;
1462                 }
1463                 partial_lh_node += nstates;
1464                 partial_lh_dad += nstates;
1465                 val_tmp += nstates;
1466                 lh_cat++;
1467             }
1468 
1469 //			assert(lh_ptn > 0.0);
1470             if (ptn < orig_nptn) {
1471 				lh_ptn = log(fabs(lh_ptn)) + LOG_SCALING_THRESHOLD*min_scale;
1472 				_pattern_lh[ptn] = lh_ptn;
1473 				tree_lh += lh_ptn * ptn_freq[ptn];
1474 			} else {
1475                 // bugfix 2016-01-21, prob_const can be rescaled
1476                 if (min_scale >= 1)
1477                     lh_ptn *= SCALING_THRESHOLD;
1478 //				_pattern_lh[ptn] = lh_ptn;
1479 				prob_const += lh_ptn;
1480 			}
1481 		}
1482     }
1483 
1484     assert(!isnan(tree_lh) && !isinf(tree_lh) && "Numerical underflow for lh-branch");
1485 
1486     if (orig_nptn < nptn) {
1487     	// ascertainment bias correction
1488         if (prob_const >= 1.0 || prob_const < 0.0) {
1489             printTree(cout, WT_TAXON_ID + WT_BR_LEN + WT_NEWLINE);
1490             model->writeInfo(cout);
1491         }
1492         assert(prob_const < 1.0 && prob_const >= 0.0);
1493 
1494         // BQM 2015-10-11: fix this those functions using _pattern_lh_cat
1495 //        double inv_const = 1.0 / (1.0-prob_const);
1496 //        size_t nptn_cat = orig_nptn*ncat;
1497 //    	for (ptn = 0; ptn < nptn_cat; ptn++)
1498 //            _pattern_lh_cat[ptn] *= inv_const;
1499 
1500     	prob_const = log(1.0 - prob_const);
1501     	for (ptn = 0; ptn < orig_nptn; ptn++)
1502     		_pattern_lh[ptn] -= prob_const;
1503     	tree_lh -= aln->getNSite()*prob_const;
1504 		assert(!isnan(tree_lh) && !isinf(tree_lh));
1505     }
1506 
1507 	assert(!isnan(tree_lh) && !isinf(tree_lh));
1508 
1509     delete [] val;
1510     return tree_lh;
1511 }
1512 */
1513 
1514 
1515 /*******************************************************
1516  *
1517  * ancestral sequence reconstruction
1518  *
1519  ******************************************************/
1520 
1521 
initMarginalAncestralState(ostream & out,bool & orig_kernel_nonrev,double * & ptn_ancestral_prob,int * & ptn_ancestral_seq)1522 void PhyloTree::initMarginalAncestralState(ostream &out, bool &orig_kernel_nonrev, double* &ptn_ancestral_prob, int* &ptn_ancestral_seq) {
1523     orig_kernel_nonrev = params->kernel_nonrev;
1524     if (!orig_kernel_nonrev) {
1525         // switch to nonrev kernel to compute _pattern_lh_cat_state
1526         params->kernel_nonrev = true;
1527         setLikelihoodKernel(sse);
1528         clearAllPartialLH();
1529     }
1530     _pattern_lh_cat_state = newPartialLh();
1531 
1532     size_t nptn = getAlnNPattern();
1533     size_t nstates = model->num_states;
1534 
1535     ptn_ancestral_prob = aligned_alloc<double>(nptn*nstates);
1536     ptn_ancestral_seq = aligned_alloc<int>(nptn);
1537 }
1538 
computeMarginalAncestralState(PhyloNeighbor * dad_branch,PhyloNode * dad,double * ptn_ancestral_prob,int * ptn_ancestral_seq)1539 void PhyloTree::computeMarginalAncestralState(PhyloNeighbor *dad_branch, PhyloNode *dad,
1540     double *ptn_ancestral_prob, int *ptn_ancestral_seq) {
1541     size_t nptn = getAlnNPattern();
1542     size_t nstates = model->num_states;
1543     size_t nstates_vector = nstates * vector_size;
1544     size_t ncat_mix = (model_factory->fused_mix_rate) ? site_rate->getNRate() : site_rate->getNRate()*model->getNMixtures();
1545     double state_freq[nstates];
1546     model->getStateFrequency(state_freq);
1547 
1548     // compute _pattern_lh_cat_state using NONREV kernel
1549     computeLikelihoodBranch(dad_branch, dad);
1550 
1551     size_t ptn, i, c, v;
1552 
1553     double *lh_state = _pattern_lh_cat_state;
1554     memset(ptn_ancestral_prob, 0, sizeof(double)*nptn*nstates);
1555 
1556     // convert vector_size into continuous pattern
1557     for (ptn = 0; ptn < nptn; ptn += vector_size) {
1558         double *state_prob = ptn_ancestral_prob + ptn*nstates;
1559         for (c = 0; c < ncat_mix; c++) {
1560             for (i = 0; i < nstates; i++) {
1561                 for (v = 0; v < vector_size; v++) if (ptn+v < nptn)
1562                 {
1563                     state_prob[v*nstates+i] += lh_state[i*vector_size + v];
1564                 }
1565             }
1566             lh_state += nstates_vector;
1567         }
1568     }
1569 
1570     // now normalize to probability
1571     for (ptn = 0; ptn < nptn; ptn++) {
1572         double *state_prob = ptn_ancestral_prob + ptn*nstates;
1573         double sum = 0.0;
1574         int state_best = 0;
1575         for (i = 0; i < nstates; i++) {
1576             sum += state_prob[i];
1577             if (state_prob[i] > state_prob[state_best])
1578                 state_best = i;
1579         }
1580         sum = 1.0/sum;
1581         for (i = 0; i < nstates; i++) {
1582             state_prob[i] *= sum;
1583         }
1584 
1585         // best state must exceed its equilibrium frequency!
1586         if (state_prob[state_best] < params->min_ancestral_prob ||
1587             state_prob[state_best] <= state_freq[state_best]+MIN_FREQUENCY_DIFF)
1588             state_best = aln->STATE_UNKNOWN;
1589         ptn_ancestral_seq[ptn] = state_best;
1590     }
1591 
1592 }
1593 
writeMarginalAncestralState(ostream & out,PhyloNode * node,double * ptn_ancestral_prob,int * ptn_ancestral_seq)1594 void PhyloTree::writeMarginalAncestralState(ostream &out, PhyloNode *node, double *ptn_ancestral_prob, int *ptn_ancestral_seq) {
1595     size_t site, nsites = aln->getNSite(), nstates = model->num_states;
1596     for (site = 0; site < nsites; site++) {
1597         int ptn = aln->getPatternID(site);
1598         out << node->name << "\t" << site+1 << "\t";
1599 //        if (params->print_ancestral_sequence == AST_JOINT)
1600 //            out << aln->convertStateBackStr(joint_ancestral_node[ptn]) << "\t";
1601         out << aln->convertStateBackStr(ptn_ancestral_seq[ptn]);
1602         double *state_prob = ptn_ancestral_prob + ptn*nstates;
1603         for (size_t j = 0; j < nstates; j++) {
1604             out << "\t" << state_prob[j];
1605         }
1606         out << endl;
1607     }
1608 
1609 }
1610 
endMarginalAncestralState(bool orig_kernel_nonrev,double * & ptn_ancestral_prob,int * & ptn_ancestral_seq)1611 void PhyloTree::endMarginalAncestralState(bool orig_kernel_nonrev, double* &ptn_ancestral_prob, int* &ptn_ancestral_seq) {
1612     if (!orig_kernel_nonrev) {
1613         // switch back to REV kernel
1614         params->kernel_nonrev = orig_kernel_nonrev;
1615         setLikelihoodKernel(sse);
1616         clearAllPartialLH();
1617     }
1618     aligned_free(ptn_ancestral_seq);
1619     aligned_free(ptn_ancestral_prob);
1620 
1621     aligned_free(_pattern_lh_cat_state);
1622     _pattern_lh_cat_state = NULL;
1623 }
1624 
1625 /*
1626 void PhyloTree::computeMarginalAncestralProbability(PhyloNeighbor *dad_branch, PhyloNode *dad, double *ptn_ancestral_prob) {
1627     PhyloNode *node = (PhyloNode*) dad_branch->node;
1628     PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
1629     if (!central_partial_lh)
1630         initializeAllPartialLh();
1631     ASSERT(!node->isLeaf());
1632 
1633     // TODO: not working yet
1634 
1635 //    if ((dad_branch->partial_lh_computed & 1) == 0)
1636 //        computePartialLikelihood(dad_branch, dad);
1637 //    if ((node_branch->partial_lh_computed & 1) == 0)
1638 //        computePartialLikelihood(node_branch, node);
1639     size_t nstates = aln->num_states;
1640     const size_t nstatesqr=nstates*nstates;
1641     size_t ncat = site_rate->getNRate();
1642     size_t statecat = nstates * ncat;
1643     size_t nmixture = model->getNMixtures();
1644 
1645     size_t block = ncat * nstates * nmixture;
1646     size_t ptn; // for big data size > 4GB memory required
1647     size_t c, i, m, x;
1648     size_t nptn = aln->size();
1649     double *eval = model->getEigenvalues();
1650     double *evec = model->getEigenvectors();
1651     double *inv_evec = model->getInverseEigenvectors();
1652     ASSERT(eval);
1653 
1654     double echild[block*nstates];
1655 
1656     for (c = 0; c < ncat; c++) {
1657         double expchild[nstates];
1658         double len_child = site_rate->getRate(c) * dad_branch->length;
1659         for (m = 0; m < nmixture; m++) {
1660             for (i = 0; i < nstates; i++) {
1661                 expchild[i] = exp(eval[m*nstates+i]*len_child);
1662             }
1663             for (x = 0; x < nstates; x++)
1664                 for (i = 0; i < nstates; i++) {
1665                     echild[(m*ncat+c)*nstatesqr+x*nstates+i] = evec[m*nstatesqr+x*nstates+i] * expchild[i];
1666                 }
1667         }
1668     }
1669 
1670 
1671 	memset(ptn_ancestral_prob, 0, sizeof(double)*nptn*nstates);
1672 
1673     if (dad->isLeaf()) {
1674     	// special treatment for TIP-INTERNAL NODE case
1675         double partial_lh_leaf[(aln->STATE_UNKNOWN+1)*block];
1676 
1677         for (IntVector::iterator it = aln->seq_states[dad->id].begin(); it != aln->seq_states[dad->id].end(); it++) {
1678             int state = (*it);
1679             for (m = 0; m < nmixture; m++) {
1680                 double *this_echild = &echild[m*nstatesqr*ncat];
1681                 double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixture + m*nstates];
1682                 double *this_partial_lh_leaf = &partial_lh_leaf[state*block+m*statecat];
1683                 for (x = 0; x < statecat; x++) {
1684                     double vchild = 0.0;
1685                     for (i = 0; i < nstates; i++) {
1686                         vchild += this_echild[x*nstates+i] * this_tip_partial_lh[i];
1687                     }
1688                     this_partial_lh_leaf[x] = vchild;
1689                 }
1690             }
1691         }
1692         size_t addr = aln->STATE_UNKNOWN * block;
1693         for (x = 0; x < block; x++) {
1694             partial_lh_leaf[addr+x] = 1.0;
1695         }
1696 
1697 
1698     	// now do the real computation
1699 #ifdef _OPENMP
1700 #pragma omp parallel for private(ptn, i, c, m, x)
1701 #endif
1702     	for (ptn = 0; ptn < nptn; ptn++) {
1703             double *lh_state = ptn_ancestral_prob + ptn*nstates;
1704 			double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1705 			int state_dad = (aln->at(ptn))[dad->id];
1706 			double *lh_leaf = partial_lh_leaf + state_dad*block;
1707             for (m = 0; m < nmixture; m++) {
1708                 double *this_inv_evec = inv_evec + (m*nstatesqr);
1709 				for (c = 0; c < ncat; c++) {
1710 					// compute real partial likelihood vector
1711 					for (x = 0; x < nstates; x++) {
1712 						double vnode = 0.0;
1713 						for (i = 0; i < nstates; i++) {
1714 							vnode += this_inv_evec[i*nstates+x] * partial_lh_dad[i];
1715 						}
1716 						lh_state[x] += lh_leaf[x] * vnode;
1717 					}
1718                     lh_leaf += nstates;
1719                     partial_lh_dad += nstates;
1720                 }
1721             }
1722 
1723             double lh_sum = lh_state[0];
1724             for (x = 1; x < nstates; x++)
1725                 lh_sum += lh_state[x];
1726             lh_sum = 1.0/lh_sum;
1727             for (x = 0; x < nstates; x++)
1728                 lh_state[x] *= lh_sum;
1729 		}
1730     } else {
1731     	// both dad and node are internal nodes
1732 #ifdef _OPENMP
1733 #pragma omp parallel for private(ptn, i, c, m, x)
1734 #endif
1735     	for (ptn = 0; ptn < nptn; ptn++) {
1736             double *lh_state = ptn_ancestral_prob + ptn*nstates;
1737 			double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
1738 			double *partial_lh_node = node_branch->partial_lh + ptn*block;
1739 
1740 			for (m = 0; m < nmixture; m++) {
1741                 double *this_inv_evec = inv_evec + (m*nstatesqr);
1742 				for (c = 0; c < ncat; c++) {
1743 					// compute real partial likelihood vector
1744 					for (x = 0; x < nstates; x++) {
1745 						double vdad = 0.0, vnode = 0.0;
1746 						size_t addr = (m*ncat+c)*nstatesqr+x*nstates;
1747 						for (i = 0; i < nstates; i++) {
1748 							vdad += echild[addr+i] * partial_lh_node[m*statecat+c*nstates+i];
1749                             vnode += this_inv_evec[i*nstates+x] * partial_lh_dad[m*statecat+c*nstates+i];
1750 						}
1751 						lh_state[x] += vnode*vdad;
1752 					}
1753 				}
1754 			}
1755 
1756             double lh_sum = lh_state[0];
1757             for (x = 1; x < nstates; x++)
1758                 lh_sum += lh_state[x];
1759             lh_sum = 1.0/lh_sum;
1760             for (x = 0; x < nstates; x++)
1761                 lh_state[x] *= lh_sum;
1762 
1763 		}
1764     }
1765 }
1766 */
1767 
computeJointAncestralSequences(int * ancestral_seqs)1768 void PhyloTree::computeJointAncestralSequences(int *ancestral_seqs) {
1769 
1770     // step 1-3 of the dynamic programming algorithm of Pupko et al. 2000, MBE 17:890-896
1771     ASSERT(root->isLeaf());
1772     int *C = new int[(size_t)getAlnNPattern()*model->num_states*leafNum];
1773     computeAncestralLikelihood((PhyloNeighbor*)root->neighbors[0], NULL, C);
1774 
1775     // step 4-5 of the dynamic programming algorithm of Pupko et al. 2000, MBE 17:890-896
1776     computeAncestralState((PhyloNeighbor*)root->neighbors[0], NULL, C, ancestral_seqs);
1777 
1778     clearAllPartialLH();
1779 
1780     delete[] C;
1781 }
1782 
computeAncestralLikelihood(PhyloNeighbor * dad_branch,PhyloNode * dad,int * C)1783 void PhyloTree::computeAncestralLikelihood(PhyloNeighbor *dad_branch, PhyloNode *dad, int *C) {
1784     PhyloNode *node = (PhyloNode*)dad_branch->node;
1785     if (node->isLeaf())
1786         return;
1787 
1788     int num_leaves = 0;
1789 
1790     // recursive into subtree
1791     FOR_NEIGHBOR_DECLARE(node, dad, it) {
1792         if ((*it)->node->isLeaf()) {
1793             num_leaves++;
1794         } else {
1795             computeAncestralLikelihood((PhyloNeighbor*)(*it), node, C);
1796         }
1797     }
1798 
1799     // TODO mem save
1800     if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
1801         // re-orient partial_lh
1802         bool done = false;
1803         FOR_NEIGHBOR_IT(node, dad, it2) {
1804             PhyloNeighbor *backnei = ((PhyloNeighbor*)(*it2)->node->findNeighbor(node));
1805             if (backnei->partial_lh) {
1806                 dad_branch->partial_lh = backnei->partial_lh;
1807                 dad_branch->scale_num = backnei->scale_num;
1808                 backnei->partial_lh = NULL;
1809                 backnei->scale_num = NULL;
1810                 backnei->partial_lh_computed &= ~1; // clear bit
1811                 done = true;
1812                 break;
1813             }
1814         }
1815         ASSERT(done && "partial_lh is not re-oriented");
1816     }
1817 
1818     size_t nptn = aln->getNPattern();
1819     size_t ptn;
1820     size_t nstates = model->num_states;
1821     size_t nstatesqr = nstates*nstates;
1822     size_t parent, child;
1823     double *trans_mat = new double[nstatesqr];
1824     double *lh_leaves = NULL;
1825     if (num_leaves > 0) {
1826         lh_leaves = new double[(aln->STATE_UNKNOWN+1)*nstates*num_leaves];
1827     }
1828     if (dad) {
1829         model->computeTransMatrix(dad_branch->length, trans_mat);
1830         for (parent = 0; parent < nstatesqr; parent++)
1831             trans_mat[parent] = log(trans_mat[parent]);
1832     } else {
1833         model->getStateFrequency(trans_mat);
1834         for (parent = 0; parent < nstates; parent++)
1835             trans_mat[parent] = log(trans_mat[parent]);
1836         for (parent = 1; parent < nstates; parent++)
1837             memcpy(trans_mat+parent*nstates, trans_mat, sizeof(double)*nstates);
1838     }
1839 
1840     // compute information buffer for leaves
1841 	int ambi_aa[] = {
1842         4+8, // B = N or D
1843         32+64, // Z = Q or E
1844         512+1024 // U = I or L
1845     };
1846     int leafid = 0;
1847     FOR_NEIGHBOR(node, dad, it) {
1848         if ((*it)->node->isLeaf()) {
1849             double trans_leaf[nstatesqr];
1850             model->computeTransMatrix((*it)->length, trans_leaf);
1851             double *lh_leaf = lh_leaves+leafid*nstates*(aln->STATE_UNKNOWN+1);
1852 
1853             // assign lh_leaf for normal states
1854             for (parent = 0; parent < nstates; parent++)
1855                 for (child = 0; child < nstates; child++)
1856                     lh_leaf[child*nstates+parent] = log(trans_leaf[parent*nstates+child]);
1857 
1858             // for unknown state
1859             double *this_lh_leaf = lh_leaf + (aln->STATE_UNKNOWN*nstates);
1860             for (parent = 0; parent < nstates; parent++)
1861                 this_lh_leaf[parent] = 0.0;
1862 
1863             // special treatment for ambiguous characters
1864             switch (aln->seq_type) {
1865             case SEQ_DNA:
1866                 for (int state = 4; state < 18; state++) {
1867                     this_lh_leaf = lh_leaf + (state*nstates);
1868                     int cstate = state-nstates+1;
1869                     for (parent = 0; parent < nstates; parent++) {
1870                         double sumlh = 0.0;
1871                         for (child = 0; child < nstates; child++) {
1872                             if ((cstate) & (1 << child))
1873                                 sumlh += trans_leaf[parent*nstates+child];
1874                         }
1875                         this_lh_leaf[parent] = log(sumlh);
1876                     }
1877                 }
1878                 break;
1879             case SEQ_PROTEIN:
1880                 for (int state = 0; state < sizeof(ambi_aa)/sizeof(int); state++) {
1881                     this_lh_leaf = lh_leaf + ((state+20)*nstates);
1882                     for (parent = 0; parent < nstates; parent++) {
1883                         double sumlh = 0.0;
1884                         for (child = 0; child < nstates; child++) {
1885                             if (ambi_aa[state] & (1 << child))
1886                                 sumlh += trans_leaf[parent*nstates+child];
1887                         }
1888                         this_lh_leaf[parent] = log(sumlh);
1889                     }
1890                 }
1891                 break;
1892             default:
1893                 break;
1894             }
1895             leafid++;
1896         }
1897     }
1898 
1899     // initialize L_y(i) and C_y(i)
1900 //    memset(dad_branch->partial_lh, 0, nptn*nstates*sizeof(double));
1901 
1902     int *C_node = C + (node->id-leafNum)*nptn*nstates;
1903 
1904     for (ptn = 0; ptn < nptn; ptn++) {
1905         double *lh_dad = dad_branch->partial_lh+ptn*nstates;
1906         int *this_C_node = C_node + (ptn*nstates);
1907         leafid = 0;
1908         double sumlh[nstates];
1909         memset(sumlh, 0, sizeof(double)*nstates);
1910         FOR_NEIGHBOR(node, dad, it) {
1911             PhyloNeighbor *childnei = (PhyloNeighbor*)(*it);
1912             if ((*it)->node->isLeaf()) {
1913                 double *lh_leaf = lh_leaves+leafid*nstates*(aln->STATE_UNKNOWN+1);
1914                 // external node
1915                 int state_child;
1916                 state_child = (aln->at(ptn))[(*it)->node->id];
1917                 double *child_lh = lh_leaf + state_child*nstates;
1918                 for (child = 0; child < nstates; child++)
1919                     sumlh[child] += child_lh[child];
1920                 leafid++;
1921             } else {
1922                 double *child_lh = childnei->partial_lh + ptn*nstates;
1923                 for (child = 0; child < nstates; child++)
1924                     sumlh[child] += child_lh[child];
1925             }
1926         }
1927 
1928 
1929         if (dad) {
1930             // internal node
1931             for (parent = 0; parent < nstates; parent++) {
1932                 lh_dad[parent] = trans_mat[parent*nstates] + sumlh[0];
1933                 this_C_node[parent] = 0;
1934                 for (child = 1; child < nstates; child++) {
1935                     double lh = trans_mat[parent*nstates+child] + sumlh[child];
1936                     if (lh > lh_dad[parent]) {
1937                         lh_dad[parent] = lh;
1938                         this_C_node[parent] = child;
1939                     }
1940                 }
1941             }
1942         } else {
1943             // at the root
1944             lh_dad[0] = trans_mat[0] + sumlh[0];
1945             this_C_node[0] = 0;
1946             for (parent = 1; parent < nstates; parent++) {
1947                 double lh = trans_mat[parent] + sumlh[parent];
1948                 if (lh > lh_dad[0]) {
1949                     lh_dad[0] = lh;
1950                     this_C_node[0] = parent;
1951                 }
1952             }
1953         }
1954     }
1955 
1956 
1957     if (lh_leaves)
1958         delete[] lh_leaves;
1959     delete[] trans_mat;
1960 }
1961 
1962 
computeAncestralState(PhyloNeighbor * dad_branch,PhyloNode * dad,int * C,int * ancestral_seqs)1963 void PhyloTree::computeAncestralState(PhyloNeighbor *dad_branch, PhyloNode *dad, int *C, int *ancestral_seqs) {
1964     PhyloNode *node = (PhyloNode*)dad_branch->node;
1965     if (node->isLeaf())
1966         return;
1967 
1968     size_t nptn = aln->getNPattern();
1969     size_t ptn;
1970     size_t nstates = model->num_states;
1971 
1972     int *C_node = C + (node->id-leafNum)*nptn*nstates;
1973     int *ancestral_seqs_node = ancestral_seqs + (node->id-leafNum)*nptn;
1974     if (dad) {
1975         // at an internal node
1976         int *ancestral_seqs_dad = ancestral_seqs + (dad->id-leafNum)*nptn;
1977         for (ptn = 0; ptn < nptn; ptn++)
1978             ancestral_seqs_node[ptn] = C_node[ptn*nstates+ancestral_seqs_dad[ptn]];
1979 
1980     } else {
1981         // at the root
1982         for (ptn = 0; ptn < nptn; ptn++)
1983             ancestral_seqs_node[ptn] = C_node[ptn*nstates];
1984     }
1985     FOR_NEIGHBOR_IT(node, dad, it)
1986         computeAncestralState((PhyloNeighbor*)(*it), node, C, ancestral_seqs);
1987 }
1988 
1989 
1990 
1991