1 /*
2 
3 PhyML:  a program that  computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5 
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7 
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10 
11 */
12 
13 #include "ancestral.h"
14 
Sample_Ancestral_Seq(int fullmutmap,int fromprior,t_tree * tree)15 void Sample_Ancestral_Seq(int fullmutmap, int fromprior, t_tree *tree)
16 {
17   int rate_cat;
18   int i,j,k,l;
19   phydbl *probs;
20   phydbl sum;
21   int n_mut;
22   FILE *fp;
23   phydbl *muttime;
24   int *muttype,*muttax;
25   char *s,*mut_string;
26   int *ordering;
27 
28 
29   if(tree->is_mixt_tree == YES)
30     {
31       MIXT_Sample_Ancestral_Seq(fullmutmap,fromprior,tree);
32       return;
33     }
34 
35   probs = (phydbl *)mCalloc(tree->mod->ras->n_catg,sizeof(phydbl));
36 
37   muttime = (phydbl *)mCalloc((2*tree->n_otu-3)*5, // At most 5 mutations per branch on average
38                sizeof(phydbl));
39 
40   muttype = (int *)mCalloc((2*tree->n_otu-3)*5, // At most 5 mutations per branch on average
41                sizeof(int));
42 
43   muttax = (int *)mCalloc((2*tree->n_otu-3)*5, // At most 5 mutations per branch on average
44                           sizeof(int));
45 
46   ordering = (int *)mCalloc((2*tree->n_otu-3)*5, // At most 5 mutations per branch on average
47                 sizeof(int));
48 
49   s = (char *)mCalloc(T_MAX_NAME,sizeof(char));
50 
51   for(i=0;i<2*tree->n_otu-1;++i)
52     if(tree->a_nodes[i]->tax == NO)
53       {
54         tree->a_nodes[i]->c_seq_anc = (align *)mCalloc(1,sizeof(align));;
55         tree->a_nodes[i]->c_seq_anc->state = (char *)mCalloc(tree->n_pattern,sizeof(char));
56       }
57 
58 
59   /* Update P(D_x|X=i) for each state i and node X */
60   Set_Both_Sides(YES,tree);
61   Lk(NULL,tree);
62 
63 
64   for(i=0;i<tree->n_pattern;++i)
65     {
66       /* Sample the rate class from its posterior density */
67       for(j=0;j<tree->mod->ras->n_catg;j++)
68         {
69           if(fromprior == NO)
70             probs[j] =
71               tree->unscaled_site_lk_cat[i*tree->mod->ras->n_catg+j]*
72               tree->mod->ras->gamma_r_proba->v[j];
73           else
74             probs[j] = tree->mod->ras->gamma_r_proba->v[j];
75         }
76 
77       /* Scale probas. */
78       sum = .0;
79       for(j=0;j<tree->mod->ras->n_catg;j++) sum += probs[j];
80       for(j=0;j<tree->mod->ras->n_catg;j++) probs[j]/=sum;
81 
82       rate_cat = Sample_i_With_Proba_pi(probs,tree->mod->ras->n_catg);
83 
84       n_mut = 0;
85       if(tree->n_root != NULL)
86         {
87           Sample_Ancestral_Seq_Pre(tree->n_root,tree->n_root->v[1],tree->n_root->b[1],
88                                    i,rate_cat,
89                                    muttype,muttime,muttax,&n_mut,
90                                    fullmutmap,fromprior,tree);
91 
92 
93           Sample_Ancestral_Seq_Pre(tree->n_root,tree->n_root->v[2],tree->n_root->b[2],
94                                    i,rate_cat,
95                                    muttype,muttime,muttax,&n_mut,
96                                    fullmutmap,fromprior,tree);
97 
98 
99         }
100       else
101         {
102           Sample_Ancestral_Seq_Pre(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree->a_nodes[0]->b[0],
103                                    i,rate_cat,
104                                    muttype,muttime,muttax,&n_mut,
105                                    fullmutmap,fromprior,tree);
106         }
107 
108       for(j=0;j<n_mut;j++) ordering[j] = 0;
109 
110       for(j=0;j<n_mut-1;j++)
111         {
112           for(k=j+1;k<n_mut;k++)
113             {
114               if(muttime[k] > muttime[j]) ordering[k]++;
115               else ordering[j]++;
116             }
117         }
118 
119       strcpy(s,"mutmap.");
120       sprintf(s+strlen(s),"%d.",tree->io->r_seed);
121       sprintf(s+strlen(s),"%d",i);
122       fp = fopen(s,"a");
123       PhyML_Fprintf(fp,"\n-1 -1 -1.0 -1 %d",tree->mixt_tree ? tree->mixt_tree->mcmc->run : tree->mcmc->run);
124 
125       Reverse_Muttime(muttime,n_mut,tree);
126 
127       for(j=0;j<n_mut;j++)
128         {
129           for(k=0;k<n_mut;k++)
130             {
131               if(ordering[k] == j)
132                 {
133                   for(l=0;l<tree->data->init_len;l++) if(tree->data->sitepatt[l] == i) break;
134                   mut_string = Mutation_Id(muttype[k],tree);
135                   PhyML_Fprintf(fp,"\n%4d %s %g %4d %s",j,mut_string,muttime[k],l,tree->a_nodes[muttax[k]]->name);
136                   Free(mut_string);
137                   break;
138                 }
139             }
140         }
141 
142 
143       for(j=0;j<n_mut;j++)
144         {
145           muttype[j] = -2;
146           muttime[j] = +1.;
147         }
148 
149       fclose(fp);
150     }
151 
152   Free(s);
153   Free(muttype);
154   Free(muttime);
155   Free(muttax);
156   Free(ordering);
157   Free(probs);
158 }
159 
160 //////////////////////////////////////////////////////////////
161 //////////////////////////////////////////////////////////////
162 
Sample_Ancestral_Seq_Pre(t_node * a,t_node * d,t_edge * b,int site,int r_cat,int * muttype,phydbl * muttime,int * muttax,int * n_mut,int fullmutmap,int fromprior,t_tree * tree)163 void Sample_Ancestral_Seq_Pre(t_node *a, t_node *d, t_edge *b,
164                               int site, int r_cat,
165                               int *muttype, phydbl *muttime, int *muttax, int *n_mut,
166                               int fullmutmap, int fromprior, t_tree *tree)
167 {
168 
169   int i;
170   int sa, sd;
171   int ns;
172   phydbl *probs,sum;
173 
174   /* PhyML_Printf("\n>> a: %d d: %d b->left: %d b->rght: %d",a?a->num:-1,d?d->num:-1,b?b->left->num:-1,b?b->rght->num:-1); */
175 
176   ns = tree->mod->ns;
177 
178   probs = (phydbl *)mCalloc(ns,sizeof(phydbl));
179 
180   if(a->tax == TRUE) // Sample state at tip if observed state is ambiguous
181     {
182       assert(b);
183       if(a == b->left) for(i=0;i<ns;++i) probs[i] = b->p_lk_tip_l[site*ns+i];
184       else             for(i=0;i<ns;++i) probs[i] = b->p_lk_tip_r[site*ns+i];
185       for(i=0;i<ns;++i) probs[i] *= tree->mod->e_frq->pi->v[i];
186       sum = 0.0;
187       for(i=0;i<ns;++i) sum += probs[i];
188       for(i=0;i<ns;++i) probs[i] /= sum;
189       sa = Sample_i_With_Proba_pi(probs,ns);
190     }
191   else if(a == tree->n_root)
192     {
193       sa = Sample_Ancestral_Seq_Core(NULL,tree->n_root,NULL,r_cat,site,tree);
194     }
195   else
196     {
197       sa = Assign_State(a->c_seq_anc->state + site,
198                         tree->mod->io->datatype,
199                         tree->mod->io->state_len);
200     }
201 
202   sd = Sample_Ancestral_Seq_Core(a,d,b,r_cat,site,tree);
203 
204   if(fullmutmap == YES) Map_Mutations(a,d,sa,sd,b,site,r_cat,muttype,muttime,muttax,n_mut,tree);
205 
206   if(d->tax) return;
207   else
208     {
209       for(i=0;i<3;++i)
210         {
211           if(d->v[i] != a && d->b[i] != tree->e_root)
212             {
213               Sample_Ancestral_Seq_Pre(d,d->v[i],d->b[i],site,r_cat,muttype,muttime,muttax,n_mut,fullmutmap,fromprior,tree);
214             }
215         }
216     }
217 }
218 
219 //////////////////////////////////////////////////////////////
220 //////////////////////////////////////////////////////////////
221 
Sample_Ancestral_Seq_Core(t_node * a,t_node * d,t_edge * b,int r_cat,int site,t_tree * tree)222 int Sample_Ancestral_Seq_Core(t_node *a, t_node *d, t_edge *b, int r_cat, int site, t_tree *tree)
223 {
224   int i,j;
225   int ns;
226 
227   ns = tree->mod->ns;
228 
229   int dim1 = tree->mod->ras->n_catg * ns;
230   int dim2 = ns;
231   int dim3 = ns * ns;
232 
233   phydbl *probs,*Pij,sum;
234 
235   int state;
236 
237 
238   probs = (phydbl *)mCalloc(ns,sizeof(phydbl));
239   state = -1;
240 
241   if(d->tax == YES)
242     {
243       assert(b);
244       if(d == b->left)      for(i=0;i<ns;++i) probs[i] = b->p_lk_tip_l[site*ns+i];
245       else if(d == b->rght) for(i=0;i<ns;++i) probs[i] = b->p_lk_tip_r[site*ns+i];
246       else assert(FALSE);
247       for(i=0;i<ns;++i) probs[i] *= tree->mod->e_frq->pi->v[i];
248       sum = 0.0;
249       for(i=0;i<ns;++i) sum += probs[i];
250       for(i=0;i<ns;++i) probs[i] /= sum;
251       state = Sample_i_With_Proba_pi(probs,ns);
252     }
253   else
254     {
255       if(a == NULL) // d is root node
256         {
257           assert(d == tree->n_root);
258 
259           phydbl r,l;
260 
261           Update_PMat_At_Given_Edge(tree->n_root->b[1],tree);
262           Update_PMat_At_Given_Edge(tree->n_root->b[2],tree);
263 
264           for(i=0;i<ns;++i)
265             {
266               if(tree->e_root->left == tree->n_root->v[1])
267                 Pij = tree->n_root->b[1]->Pij_rr;
268               else
269                 Pij = tree->n_root->b[2]->Pij_rr;
270 
271               l = 0.0;
272               for(j=0;j<ns;++j)
273                 {
274                   if(tree->e_root->left->tax == NO)
275                     l += tree->e_root->p_lk_left[site*dim1+r_cat*dim2+j] * Pij[r_cat*dim3+i*dim2+j];
276                   else
277                     l += tree->e_root->p_lk_tip_l[site*dim2+j] * Pij[r_cat*dim3+i*dim2+j];
278                 }
279 
280 
281               if(tree->e_root->rght == tree->n_root->v[1])
282                 Pij = tree->n_root->b[1]->Pij_rr;
283               else
284                 Pij = tree->n_root->b[2]->Pij_rr;
285 
286               r = 0.0;
287               for(j=0;j<ns;++j)
288                 {
289                   if(tree->e_root->rght->tax == NO)
290                     r += tree->e_root->p_lk_rght[site*dim1+r_cat*dim2+j] * Pij[r_cat*dim3+i*dim2+j];
291                   else
292                     r += tree->e_root->p_lk_tip_r[site*dim2+j] * Pij[r_cat*dim3+i*dim2+j];
293                 }
294 
295               probs[i] = r*l*tree->mod->e_frq->pi->v[i];
296             }
297 
298           sum = 0.0;
299           for(i=0;i<ns;++i) sum += probs[i];
300           for(i=0;i<ns;++i) probs[i] /= sum;
301 
302           state = Sample_i_With_Proba_pi(probs,ns);
303 
304           d->c_seq_anc->state[site] = Reciproc_Assign_State(state,tree->io->datatype);
305         }
306       else
307         {
308           assert(b);
309 
310           // State (already) sampled at node a
311           state = Assign_State(a->c_seq_anc->state + site,
312                                tree->mod->io->datatype,
313                                tree->mod->io->state_len);
314 
315           Pij = b->Pij_rr;
316 
317           for(i=0;i<ns;++i)
318             {
319               if(d == b->left)
320                 probs[i] = b->p_lk_left[site*dim1+r_cat*dim2+i] * Pij[r_cat*dim3+state*dim2+i];
321               else if(d == b->rght)
322                 probs[i] = b->p_lk_rght[site*dim1+r_cat*dim2+i] * Pij[r_cat*dim3+state*dim2+i];
323               else assert(FALSE);
324             }
325 
326           sum = 0.0;
327           for(i=0;i<ns;++i) sum += probs[i];
328           for(i=0;i<ns;++i) probs[i] /= sum;
329 
330           state = Sample_i_With_Proba_pi(probs,ns);
331 
332           d->c_seq_anc->state[site] = Reciproc_Assign_State(state,tree->io->datatype);
333         }
334     }
335 
336   Free(probs);
337   return state;
338 
339 }
340 
341 
342 //////////////////////////////////////////////////////////////
343 //////////////////////////////////////////////////////////////
344 
Map_Mutations(t_node * a,t_node * d,int sa,int sd,t_edge * b,int site,int rcat,int * muttype,phydbl * muttime,int * muttax,int * n_mut,t_tree * tree)345 void Map_Mutations(t_node *a, t_node *d, int sa, int sd, t_edge *b, int site, int rcat, int *muttype, phydbl *muttime, int *muttax, int *n_mut, t_tree *tree)
346 {
347   int i,j;
348   phydbl *probs,*all_probs;
349   int slast,snew; // Last state visited
350   phydbl tlast;
351   phydbl *Q;
352   phydbl u,sum;
353   int *mut; // Array of mutations
354   int thismut;
355   int n_mut_branch;
356   int n_iter;
357   int first_mut;
358   phydbl T;
359   int ns,tax_idx;
360   phydbl rr;
361 
362   ns = tree->mod->ns;
363 
364   all_probs = (phydbl *)mCalloc(ns*ns,sizeof(phydbl));
365   mut = (int *)mCalloc(ns*ns,sizeof(int));
366 
367   // Site relative rate
368   rr = tree->mod->ras->gamma_rr->v[rcat];
369 
370   // Rate matrix
371   Q = tree->mod->r_mat->qmat->v;
372 
373   // Length of the 'time' interval considered.
374   T = 0.0;
375 #ifdef PHYTIME
376   T = tree->rates->cur_l[d->num]*rr;
377 #else
378   T = b->l->v*rr;
379 #endif
380 
381 
382   /* PhyML_Printf("\n. Mutmap: a:%d d:%d ta:%G td:%G cr:%G rr:%G l:%G", */
383   /*              a?a->num:-1,d?d->num:-1, */
384   /*              tree->times->nd_t[a->num], */
385   /*              tree->times->nd_t[d->num], */
386   /*              cr,rr, */
387   /*              fabs(tree->times->nd_t[a->num]-tree->times->nd_t[d->num])*cr*rr); */
388 
389   // Matrix of change probabilities
390   for(i=0;i<ns;i++)
391     {
392       // We only care about the non-diagonal elements here
393       for(j=0;j<ns;j++) all_probs[i*ns+j] = Q[i*ns+j];
394 
395       // Set the diagonal to 0 so that p(i->i)=0.0;
396       all_probs[i*ns+i] = 0.0;
397 
398       // Normalise so that \sum_j p(i->j) = 1.0;
399       sum = 0;
400       for(j=0;j<ns;j++) sum += all_probs[i*ns+j];
401       for(j=0;j<ns;j++) all_probs[i*ns+j] /= sum;
402     }
403 
404   for(i=0;i<ns*ns;++i) mut[i] = 0;
405   tlast = .0;
406   slast = sa;
407   snew  = sa;
408   probs = NULL;
409   n_mut_branch = 0;
410   n_iter = 0;
411   first_mut = YES;
412 
413   do
414     {
415       if((sa != sd) && (first_mut == YES)) // ancestral and descendant states are distinct
416         {
417           // Sample a time for the first mutation conditional on at least one mutation
418           // occurring (see formula 2.1 in Hobolth and Stone, 2009).
419           u = Uni();
420           tlast = -log(1. - u*(1.-exp(Q[sa*ns+sa]*T)))/-Q[sa*ns+sa];
421         }
422       else
423         {
424           // Sample a time for the next mutation
425           tlast = tlast + Rexp(-Q[slast*ns+slast]);
426         }
427 
428       // Select the appropriate vector of change probabilities
429       probs = all_probs+slast*ns;
430 
431 
432       /* printf("\n. sa=%2d sd=%2d slast=%2d tlast=%12G T=%12G  rcat=%2d site=%4d",sa,sd,slast,tlast,T,rcat,site); */
433 
434       // The time for the next mutation does not exceed the length
435       // of the time interval -> sample a new mutation event
436       if(tlast < T)
437         {
438           first_mut = NO;
439 
440           n_mut_branch++;
441 
442           if(n_mut_branch > 5 && n_mut_branch%10 == 0)
443             PhyML_Printf("\n. # of mutations on edge %d (length: %g) exceeds 5 (%d) ! The program will probably crash soon...:(",
444                          b->num,
445                          tree->rates ? tree->rates->cur_l[d->num] : b->l->v,
446                          n_mut_branch);
447 
448           snew = Sample_i_With_Proba_pi(probs,ns);
449 
450           // Record mutation type
451           mut[slast*ns+snew]++;
452 
453           // Record mutation type in the site mutation array
454           thismut = slast*ns+snew;
455 
456           muttype[(*n_mut)+n_mut_branch-1] = thismut;
457 
458           // Record time of mutation
459           muttime[(*n_mut)+n_mut_branch-1] = tlast;
460 
461 #ifdef PHYTIME
462           // Transform into time in calendar units
463           muttime[(*n_mut)+n_mut_branch-1] /= tree->rates->cur_l[d->num];
464           muttime[(*n_mut)+n_mut_branch-1] *= fabs(tree->times->nd_t[a->num]-tree->times->nd_t[d->num]);
465 #endif
466 
467           tax_idx = -1;
468           Random_Tax_Idx(a,d,&tax_idx,tree);
469           muttax[(*n_mut)+n_mut_branch-1] = tax_idx;
470 
471           // Update the last state
472           slast = snew;
473         }
474       else
475         {
476           if(slast == sd) break;
477           else
478             {
479               // Restart from the beginning
480               for(i=0;i<ns*ns;++i) mut[i] = 0;
481               for(i=0;i<n_mut_branch;i++) muttype[(*n_mut)+n_mut_branch-1] = -2;
482               for(i=0;i<n_mut_branch;i++) muttime[(*n_mut)+n_mut_branch-1] = +1.;
483               tlast = 0.0;
484               slast = sa;
485               snew = sa;
486               n_mut_branch = 0;
487               first_mut = YES;
488               n_iter++;
489             }
490         }
491     }
492   while(++n_iter < 10000);
493 
494 
495   if(n_iter == 10000)
496     {
497       PhyML_Printf("\n. sa=%2d sd=%2d slast=%2d tlast=%12G T=%12G  rcat=%12f site=%4d",sa,sd,slast,tlast,T,rcat,site);
498       assert(FALSE);
499     }
500 
501   (*n_mut) += n_mut_branch;
502 
503 
504   /* for(i=0;i<ns;i++) */
505   /*   { */
506   /*     for(j=i+1;j<ns;j++) */
507   /*       { */
508   /*         if(mut[i*ns+j] + mut[j*ns+i] > 0) */
509   /*           { */
510   /*             thismut = MIN(i,j) * ns + MAX(i,j) - (MIN(i,j)+1+(int)POW(MIN(i,j)+1,2))/2; */
511 
512   /*             if(tree->mixt_tree != NULL) */
513   /*               tree->mixt_tree->mutmap[thismut*(tree->n_pattern)*(2*tree->n_otu-3) + b->num*(tree->n_pattern) + site]++; */
514   /*             else */
515   /*               tree->mutmap[thismut*(tree->n_pattern)*(2*tree->n_otu-3) + b->num*(tree->n_pattern) + site]++; */
516   /*           } */
517   /*       } */
518   /*   } */
519 
520   Free(all_probs);
521   Free(mut);
522 }
523 
524 /*////////////////////////////////////////////////////////////
525 ////////////////////////////////////////////////////////////*/
526 
Ancestral_Sequences(t_tree * tree,int print)527 void Ancestral_Sequences(t_tree *tree, int print)
528 {
529   int i;
530 
531   if(print == YES)
532     {
533 
534       PhyML_Printf("\n\n. Estimating ancestral sequences...");
535 
536       strcpy(tree->io->out_ancestral_seq_file,tree->io->out_file);
537       strcat(tree->io->out_ancestral_seq_file,"_phyml_ancestral_");
538       if(tree->io->append_run_ID)
539         {
540           strcat(tree->io->out_ancestral_seq_file,tree->io->run_id_string);
541           strcat(tree->io->out_ancestral_seq_file,"_");
542         }
543       strcat(tree->io->out_ancestral_seq_file,"seq.txt");
544       tree->io->fp_out_ancestral_seq = Openfile(tree->io->out_ancestral_seq_file,1);
545 
546 
547       strcpy(tree->io->out_ancestral_tree_file,tree->io->out_file);
548       strcat(tree->io->out_ancestral_tree_file,"_phyml_ancestral_");
549       if(tree->io->append_run_ID)
550         {
551           strcat(tree->io->out_ancestral_tree_file,tree->io->run_id_string);
552           strcat(tree->io->out_ancestral_tree_file,"_");
553         }
554       strcat(tree->io->out_ancestral_tree_file,"tree.txt");
555       tree->io->fp_out_ancestral_tree = Openfile(tree->io->out_ancestral_tree_file,1);
556 
557 
558       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n\n\n");
559       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. Printing marginal probabilities of ancestral sequences at each site");
560       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. of the alignment and each node of the tree. The tree in Newick format");
561       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. with internal nodes labels corresponding to those given below can be");
562       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. found in the file '%s'.",tree->io->out_ancestral_tree_file);
563       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n");
564       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n");
565       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. Ancestral reconstruction is conducted based on the \"Minimum Posterior Expected");
566       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. Error\" (MPEE) criterion, which accomodates for uncertainty in the selection of ");
567       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. ancestral character states.");
568       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n");
569       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n");
570       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. Recommended citation:");
571       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. Oliva A., Pulicani S., Lefort V., Brehelin L., Gascuel O. and S. Guindon,");
572       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. \"Accounting for ambiguity in ancestral sequence reconstruction\",");
573       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n. Bioinformatics, Volume 35, Issue 21, 1 November 2019.");
574       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n");
575 
576       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n\n");
577       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"Site\tNodeLabel\t");
578       for(i=0;i<tree->mod->ns;i++) PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"%10c\t",Reciproc_Assign_State(i,tree->io->datatype));
579       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"MPEE\t");
580       PhyML_Fprintf(tree->io->fp_out_ancestral_seq,"\n");
581 
582 
583       short int bck_support = tree->io->print_support_val;
584 
585       tree->io->print_node_num = YES;
586       tree->io->print_support_val = NO;
587       char *s = Write_Tree(tree);
588       PhyML_Fprintf(tree->io->fp_out_ancestral_tree,"%s",s);
589       tree->io->print_node_num = NO;
590       tree->io->print_support_val = bck_support;
591 
592       Free(s);
593       fclose(tree->io->fp_out_ancestral_tree);
594     }
595 
596   for(i=0;i<2*tree->n_otu-2;i++)
597     if(tree->a_nodes[i]->tax == NO)
598       Ancestral_Sequences_One_Node(tree->a_nodes[i],tree,print);
599 
600   if(tree->n_root) Ancestral_Sequences_One_Node(tree->n_root,tree,print);
601 
602 
603   fclose(tree->io->fp_out_ancestral_seq);
604 }
605 
606 //////////////////////////////////////////////////////////////
607 //////////////////////////////////////////////////////////////
608 
Ancestral_Sequences_One_Node(t_node * d,t_tree * tree,int print)609 void Ancestral_Sequences_One_Node(t_node *d, t_tree *tree, int print)
610 {
611   if(d->tax) return;
612   else
613     {
614       if(tree->is_mixt_tree)
615         {
616           MIXT_Ancestral_Sequences_One_Node(d,tree,print);
617         }
618       else
619         {
620           t_node *v0,*v1,*v2; // three neighbours of d
621           t_edge *b0,*b1,*b2;
622           int i,j;
623           int catg;
624           phydbl p0, p1, p2;
625           phydbl *p;
626           int site,csite;
627           phydbl *p_lk0, *p_lk1, *p_lk2;
628           int *sum_scale0, *sum_scale1, *sum_scale2;
629           phydbl sum_probas;
630           phydbl *Pij0, *Pij1, *Pij2;
631           phydbl inc,sum_scale;
632           FILE *fp;
633 
634           unsigned const int ncatg = tree->mod->ras->n_catg;
635           unsigned const int ns = tree->mod->ns;
636           unsigned const int nsns = ns*ns;
637           unsigned const int ncatgns = ns*ncatg;
638 
639 
640           if(tree->scaling_method == SCALE_RATE_SPECIFIC)
641             {
642               PhyML_Fprintf(stderr,"\n. Likelihood rescaling method not compatible with the calculation of ancestral state probabilities.");
643               Exit("\n");
644             }
645 
646 
647           if(!d) return;
648 
649           fp = tree->io->fp_out_ancestral_seq;
650           assert(fp != NULL);
651 
652 
653           p = (phydbl *)mCalloc(ns,sizeof(phydbl));
654 
655           for(site=0;site<tree->data->init_len;site++) // For each site in the current partition element
656             {
657               csite = tree->data->sitepatt[site];
658 
659               for(i=0;i<ns;i++) p[i] = .0;
660 
661               v0 = d->v[0];
662               v1 = d->v[1];
663               v2 = d->v[2];
664 
665               b0 = d->b[0];
666               b1 = d->b[1];
667               b2 = d->b[2];
668 
669               Pij0 = b0->Pij_rr;
670               Pij1 = b1->Pij_rr;
671               Pij2 = b2->Pij_rr;
672 
673               sum_scale = 0.0;
674 
675               if(v0 == b0->left)
676                 {
677                   p_lk0 = b0->p_lk_left;
678                   sum_scale0 = b0->sum_scale_left;
679                 }
680               else
681                 {
682                   p_lk0 = b0->p_lk_rght;
683                   sum_scale0 = b0->sum_scale_rght;
684                 }
685 
686               if(v1 == b1->left)
687                 {
688                   p_lk1 = b1->p_lk_left;
689                   sum_scale1 = b1->sum_scale_left;
690                 }
691               else
692                 {
693                   p_lk1 = b1->p_lk_rght;
694                   sum_scale1 = b1->sum_scale_rght;
695                 }
696 
697               if(v2 == b2->left)
698                 {
699                   p_lk2 = b2->p_lk_left;
700                   sum_scale2 = b2->sum_scale_left;
701                 }
702               else
703                 {
704                   p_lk2 = b2->p_lk_rght;
705                   sum_scale2 = b2->sum_scale_rght;
706                 }
707 
708 
709               for(catg=0;catg<ncatg;++catg)
710                 {
711                   for(i=0;i<ns;++i)
712                     {
713                       p0 = .0;
714                       if(v0->tax)
715                         {
716                           for(j=0;j<ns;++j)
717                             {
718                               p0 += v0->b[0]->p_lk_tip_r[csite*ns+j] * Pij0[catg*nsns+i*ns+j];
719 
720                               if(isinf(p0) || isnan(p0))
721                                {
722                                   PhyML_Fprintf(stderr,"\n. p0: %G v0->b[0]->p_lk_tip_r[csite*ns+j]: %G Pij0[catg*nsns+i*ns+j]: %G\n",
723                                                 p0,
724                                                 v0->b[0]->p_lk_tip_r[csite*ns+j],
725                                                 Pij0[catg*nsns+i*ns+j]);
726                                   Exit("\n");
727                                 }
728                             }
729                         }
730                       else
731                         {
732                           for(j=0;j<ns;j++)
733                             {
734                               p0 += p_lk0[csite*ncatgns+catg*ns+j] * Pij0[catg*nsns+i*ns+j];
735 
736                               if(isinf(p0) || isnan(p0))
737                                 {
738                                   PhyML_Fprintf(stderr,"\n. p0: %G p_lk0[csite*ncatgns+catg*ns+j]: %G Pij0[catg*nsns+i*ns+j]: %G (phydbl)POW(2,sum_scale0[csite*ncatg+catg]): %G sum_scale0: %d\n",
739                                                 p0,
740                                                 p_lk0[csite*ncatgns+catg*ns+j],
741                                                 Pij0[catg*nsns+i*ns+j],
742                                                 (phydbl)POW(2,sum_scale0[csite]),
743                                                 sum_scale0[csite]);
744                                   Exit("\n");
745                                 }
746                             }
747                           if(catg == 0 && i == 0) sum_scale += sum_scale0[csite];
748                         }
749 
750                       p1 = .0;
751                       if(v1->tax)
752                         {
753                           for(j=0;j<ns;j++)
754                             {
755                               p1 += v1->b[0]->p_lk_tip_r[csite*ns+j] * Pij1[catg*nsns+i*ns+j];
756 
757                               if(isinf(p1) || isnan(p1))
758                                 {
759                                   PhyML_Fprintf(stderr,"\n. p1: %G v1->b[0]->p_lk_tip_r[csite*ns+j]: %G Pij1[catg*nsns+i*ns+j]: %G\n",
760                                                 p1,
761                                                 v1->b[0]->p_lk_tip_r[csite*ns+j],
762                                                 Pij1[catg*nsns+i*ns+j]);
763                                   Exit("\n");
764                                 }
765                             }
766                         }
767                       else
768                         {
769                           for(j=0;j<ns;j++)
770                             {
771                               p1 += p_lk1[csite*ncatgns+catg*ns+j] * Pij1[catg*nsns+i*ns+j];
772 
773                               if(isinf(p1) || isnan(p1))
774                                 {
775                                   PhyML_Fprintf(stderr,"\n. p1: %G p_lk1[csite*ncatgns+catg*ns+j]: %G Pij1[catg*nsns+i*ns+j]: %G (phydbl)POW(2,sum_scale1[csite*ncatg+catg]): %G\n",
776                                                 p1,
777                                                 p_lk1[csite*ncatgns+catg*ns+j],
778                                                 Pij1[catg*nsns+i*ns+j],
779                                                 (phydbl)POW(2,sum_scale1[csite]));
780                                   Exit("\n");
781                                 }
782                             }
783                           if(catg == 0 && i == 0) sum_scale += sum_scale1[csite];
784                         }
785 
786                       p2 = .0;
787                       if(v2->tax)
788                         {
789                           for(j=0;j<ns;j++)
790                             {
791                               p2 += v2->b[0]->p_lk_tip_r[csite*ns+j] * Pij2[catg*nsns+i*ns+j];
792                             }
793                         }
794                       else
795                         {
796                           for(j=0;j<ns;j++)
797                             {
798 
799                               p2 += p_lk2[csite*ncatgns+catg*ns+j] * Pij2[catg*nsns+i*ns+j];
800 
801                               if(isinf(p2) || isnan(p2))
802                                 {
803                                   PhyML_Fprintf(stderr,"\n. p2: %G p_lk2[csite*ncatgns+catg*ns+j]: %G Pij2[catg*nsns+i*ns+j]: %G (phydbl)POW(2,sum_scale2[csite]): %G\n",
804                                                 p2,
805                                                 p_lk2[csite*ncatgns+catg*ns+j],
806                                                 Pij2[catg*nsns+i*ns+j],
807                                                 (phydbl)POW(2,sum_scale2[csite]));
808                                   Exit("\n");
809                                 }
810                             }
811                           if(catg == 0 && i == 0) sum_scale += sum_scale2[csite];
812                         }
813 
814                       inc =
815                         p0*p1*p2*
816                         tree->mod->e_frq->pi->v[i] *
817                         tree->mod->ras->gamma_r_proba->v[catg];
818 
819                       p[i] += inc;
820 
821 
822                       if(isinf(p[i]) || isnan(p[i]))
823                         {
824                           PhyML_Fprintf(stderr,"\n. site: %4d p0: %G p1: %G p2: %G tree->mod->e_frq->pi->v[i]: %G tree->cur_site_lk[csite]: %G  tree->mod->ras->gamma_r_proba->v[catg]: %G tree->c_lnL_sorted[csite]: %G",
825                                        csite,
826                                        p0,p1,p2,
827                                        tree->mod->e_frq->pi->v[i] ,
828                                        tree->cur_site_lk[csite] ,
829                                        tree->mod->ras->gamma_r_proba->v[catg],
830                                        tree->c_lnL_sorted[csite]);
831                           Exit("\n");
832                         }
833                     }
834 
835                   /* printf("\n. site: %d || %d %d %d", */
836                   /*        csite, */
837                   /*        v0->tax ? -1 : sum_scale0[csite*ncatg+catg], */
838                   /*        v1->tax ? -1 : sum_scale1[csite*ncatg+catg], */
839                   /*        v2->tax ? -1 : sum_scale2[csite*ncatg+catg]); */
840 
841                 }
842 
843               if(tree->mod->ras->invar == YES)
844                 {
845                   int num_prec_issue = NO;
846                   phydbl inv_site_lk = Invariant_Lk(sum_scale,csite,&num_prec_issue,tree);
847 
848 
849                   switch(num_prec_issue)
850                     {
851                     case YES :
852                       {
853                         assert(isinf(inv_site_lk));
854                         inv_site_lk = Invariant_Lk(0,csite,&num_prec_issue,tree);
855                         for(i=0;i<ns;i++) p[i] = inv_site_lk * tree->mod->ras->pinvar->v * tree->mod->e_frq->pi->v[i];
856                         break;
857                       }
858                     case NO :
859                       {
860                         for(i=0;i<ns;i++) p[i] = p[i] * (1.-tree->mod->ras->pinvar->v) + inv_site_lk * tree->mod->ras->pinvar->v * tree->mod->e_frq->pi->v[i];
861                         break;
862                       }
863                     }
864 
865                 }
866 
867 
868               for(i=0;i<ns;i++) p[i] = log(p[i]) - (phydbl)LOG2 * sum_scale;
869               for(i=0;i<ns;i++) p[i] -= tree->c_lnL_sorted[csite];
870               for(i=0;i<ns;i++) p[i] = exp(p[i]);
871 
872 
873 
874               /* sum_probas = 0.0; */
875               /* for(i=0;i<ns;i++) sum_probas += p[i]; */
876               /* for(i=0;i<ns;i++) p[i]/=sum_probas; */
877 
878               sum_probas = 0.0;
879               for(i=0;i<ns;i++) sum_probas += p[i];
880               if(Are_Equal(sum_probas,1.0,0.01) == NO)
881                 {
882                   PhyML_Fprintf(stderr,"\n. Probabilities do not sum to 1.0! Aborting.");
883                   for(i=0;i<ns;++i) PhyML_Fprintf(stderr,"\n. p[%2d]=%G",i,p[i]);
884                   Exit("\n");
885                 }
886 
887               if(print == YES)
888                 {
889                   PhyML_Fprintf(fp,"%4d\t%9d\t",site+1,d->num);
890                   for(i=0;i<ns;i++) PhyML_Fprintf(fp,"%10g\t",p[i]);
891                   PhyML_Fprintf(fp,"%s",Bit_To_Character_String(Integer_To_Bit(MPEE_Infer(p,ns),ns),ns));
892                   PhyML_Fprintf(fp,"\n");
893                 }
894 
895 
896 
897             }
898           Free(p);
899         }
900     }
901 }
902 
903 /*////////////////////////////////////////////////////////////
904 ////////////////////////////////////////////////////////////*/
905 
MPEE_Infer(const phydbl * p,const int ns)906 int MPEE_Infer(const phydbl *p, const int ns)
907 {
908   unsigned int i,j,sorted;
909   phydbl *alpha;
910   const unsigned mesh = 50;
911   int *best_state,*idx,*count,highest_count,highest_count_idx,most_frequent_state;
912 
913   idx = (int *)mCalloc(ns,sizeof(int));
914 
915   alpha = (phydbl *)mCalloc(mesh,sizeof(phydbl));
916   best_state = (int *)mCalloc(mesh+1,sizeof(int));
917   count = (int *)mCalloc(mesh+1,sizeof(int));
918 
919 
920   // idx[0] gives the index of the largest value in p.
921   // idx[1] gives the index of the second largest value in p
922   // etc.
923   for(i=0;i<ns;++i) idx[i] = i;
924   do
925     {
926       sorted = YES;
927       for(i=0;i<ns-1;++i)
928         {
929           if(p[idx[i]] < p[idx[i+1]])
930             {
931               sorted = NO;
932 
933               j        = idx[i];
934               idx[i]   = idx[i+1];
935               idx[i+1] = j;
936             }
937         }
938     }
939   while(sorted == NO);
940 
941   /* for(i=0;i<ns;++i) */
942   /*   { */
943   /*     PhyML_Printf("\n. p[%d]=%g",i,p[i]); */
944   /*   } */
945 
946   /* for(i=0;i<ns;++i) */
947   /*   { */
948   /*     PhyML_Printf("\n. idx[%d]=%d",i,idx[i]); */
949   /*   } */
950 
951   for(i=0;i<mesh+1;++i)
952     {
953       for(j=0;j<ns;++j) alpha[j] = i*((phydbl)j/(j+1))/mesh;
954       best_state[i] = MPEE_Score(alpha,idx,p,ns);
955     }
956 
957 
958   /* for(i=0;i<mesh;++i) */
959   /*   { */
960   /*     PhyML_Printf("\n. best_state[%d]=%d",i,best_state[i]); */
961   /*   } */
962 
963   for(i=0;i<mesh+1;++i)
964     {
965       count[i] = 0;
966       for(j=0;j<mesh+1;++j)
967         {
968           if(best_state[i] == best_state[j]) count[i]++;
969         }
970     }
971 
972   highest_count = 0;
973   highest_count_idx = -1;
974   for(i=0;i<mesh+1;++i)
975     {
976       if(count[i] > highest_count)
977         {
978           highest_count = count[i];
979           highest_count_idx = i;
980         }
981     }
982 
983   most_frequent_state = best_state[highest_count_idx];
984 
985   Free(alpha);
986   Free(best_state);
987   Free(idx);
988   Free(count);
989 
990   return(most_frequent_state);
991 }
992 
993 /*////////////////////////////////////////////////////////////
994 ////////////////////////////////////////////////////////////*/
995 
MPEE_Score(const phydbl * alpha,int * idx,const phydbl * p,const int ns)996 int MPEE_Score(const phydbl *alpha, int *idx, const phydbl *p, const int ns)
997 {
998   unsigned int i;
999   phydbl *mpee_score,*cdf_sorted;
1000   int min_idx,res;
1001   phydbl a,b;
1002   phydbl min;
1003 
1004   mpee_score = (phydbl *)mCalloc(ns,sizeof(phydbl));
1005   cdf_sorted = (phydbl *)mCalloc(ns,sizeof(phydbl));
1006 
1007 
1008   cdf_sorted[0] = p[idx[0]];
1009   for(i=1;i<ns;++i) cdf_sorted[i] = cdf_sorted[i-1] + p[idx[i]];
1010 
1011   for(i=0;i<ns-1;++i) // for the different levels of ambiguity
1012     {
1013       a = alpha[i];
1014       b = (phydbl)(ns-1.-a*(i+1))/(ns-i-1);
1015 
1016       mpee_score[i] = a + (b-a)*(1-cdf_sorted[i]);
1017     }
1018   mpee_score[ns-1] = (phydbl)(ns-1.)/ns;
1019 
1020   min = mpee_score[0];
1021   min_idx = 0;
1022   for(i=1;i<ns;++i)
1023     if(mpee_score[i] < min)
1024       {
1025         min = mpee_score[i];
1026         min_idx = i;
1027       }
1028 
1029   res = 0;
1030   for(i=0;i<min_idx+1;++i)
1031     {
1032       res += (int)pow(2,ns-1-idx[i]);
1033     }
1034   /* If best score ambiguity level is say 2 and A and G have highest
1035      posterior probs, then res = 2^(4-1-0) + 2^(4-1-2) = 10, which is
1036      bit representation is [1,0,1,0]
1037   */
1038 
1039   Free(mpee_score);
1040   Free(cdf_sorted);
1041 
1042   return(res);
1043 }
1044 
1045 /*////////////////////////////////////////////////////////////
1046 ////////////////////////////////////////////////////////////*/
1047 
Reverse_Muttime(phydbl * muttime,int n_mut,t_tree * tree)1048 void Reverse_Muttime(phydbl *muttime, int n_mut, t_tree *tree)
1049 {
1050   phydbl h;
1051   int i;
1052   h = Tree_Height(tree);
1053   for(i=0;i<n_mut;++i) muttime[i] = h - muttime[i];
1054 
1055 }
1056 
1057 
1058 /*////////////////////////////////////////////////////////////
1059 ////////////////////////////////////////////////////////////*/
1060 /*////////////////////////////////////////////////////////////
1061 ////////////////////////////////////////////////////////////*/
1062 /*////////////////////////////////////////////////////////////
1063 ////////////////////////////////////////////////////////////*/
1064 /*////////////////////////////////////////////////////////////
1065 ////////////////////////////////////////////////////////////*/
1066 /*////////////////////////////////////////////////////////////
1067 ////////////////////////////////////////////////////////////*/
1068 /*////////////////////////////////////////////////////////////
1069 ////////////////////////////////////////////////////////////*/
1070 /*////////////////////////////////////////////////////////////
1071 ////////////////////////////////////////////////////////////*/
1072