1 /*
2
3 PhyML: a program that computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10
11 */
12
13
14 /* Routines for molecular dating */
15
16
17 #include "date.h"
18
19 //////////////////////////////////////////////////////////////
20 //////////////////////////////////////////////////////////////
21
DATE_Main(int argc,char ** argv)22 int DATE_Main(int argc, char **argv)
23 {
24 option *io;
25
26 io = Get_Input(argc,argv);
27 Free(io);
28 return(0);
29 }
30
31 //////////////////////////////////////////////////////////////
32 //////////////////////////////////////////////////////////////
33
DATE_XML(char * xml_filename)34 void DATE_XML(char *xml_filename)
35 {
36 FILE *fp_xml_in;
37 xml_node *xnd,*xroot;
38 t_tree *mixt_tree,*tree;
39 phydbl *res;
40 int seed;
41 char *dum_string;
42
43 mixt_tree = XML_Process_Base(xml_filename);
44 assert(mixt_tree);
45
46 mixt_tree->rates = RATES_Make_Rate_Struct(mixt_tree->n_otu);
47 RATES_Init_Rate_Struct(mixt_tree->rates,NULL,mixt_tree->n_otu);
48
49 mixt_tree->times = TIMES_Make_Time_Struct(mixt_tree->n_otu);
50 TIMES_Init_Time_Struct(mixt_tree->times,NULL,mixt_tree->n_otu);
51
52 tree = mixt_tree;
53 do
54 {
55 // All rate stuctures point to the same object
56 tree->rates = mixt_tree->rates;
57 tree = tree->next;
58 }
59 while(tree);
60
61
62 fp_xml_in = fopen(xml_filename,"r");
63 if(!fp_xml_in)
64 {
65 PhyML_Fprintf(stderr,"\n. Could not find the XML file '%s'.\n",xml_filename);
66 Exit("\n");
67 }
68
69 /* xroot = XML_Load_File(fp_xml_in); */
70 xroot = mixt_tree->xml_root;
71
72 if(xroot == NULL)
73 {
74 PhyML_Fprintf(stderr,"\n. Encountered an issue while loading the XML file.\n");
75 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
76 }
77
78 xnd = XML_Search_Node_Name("phytime",NO,xroot);
79
80 if(xnd == NULL)
81 {
82 PhyML_Fprintf(stderr,"\n. Cound not find the \"root\" of the XML file (it should have \'phytime\' as tag name).\n");
83 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
84 }
85
86 dum_string = XML_Get_Attribute_Value(xnd,"mcmc.chain.len");
87 if(dum_string != NULL) mixt_tree->io->mcmc->chain_len = (int)String_To_Dbl(dum_string);
88
89 dum_string = XML_Get_Attribute_Value(xnd,"mcmc.sample.every");
90 if(dum_string != NULL) mixt_tree->io->mcmc->sample_interval = (int)String_To_Dbl(dum_string);
91
92 dum_string = XML_Get_Attribute_Value(xnd,"mcmc.print.every");
93 if(dum_string != NULL) mixt_tree->io->mcmc->print_every = (int)String_To_Dbl(dum_string);
94
95 dum_string = XML_Get_Attribute_Value(xnd,"mcmc.burnin");
96 if(dum_string != NULL) mixt_tree->io->mcmc->chain_len_burnin = (int)String_To_Dbl(dum_string);
97
98
99 dum_string = XML_Get_Attribute_Value(xnd,"ignore.sequences");
100 if(dum_string != NULL) mixt_tree->eval_alnL = NO;
101
102 dum_string = XML_Get_Attribute_Value(xnd,"ignore.seq");
103 if(dum_string != NULL) mixt_tree->eval_alnL = NO;
104
105 dum_string = XML_Get_Attribute_Value(xnd,"ignore.data");
106 if(dum_string != NULL) mixt_tree->eval_alnL = NO;
107
108
109 dum_string = XML_Get_Attribute_Value(xnd,"mutmap");
110 if(dum_string != NULL)
111 {
112 int select = XML_Validate_Attr_Int(dum_string,6,
113 "true","yes","y",
114 "false","no","n");
115 if(select < 3) mixt_tree->io->mutmap = YES;
116 else mixt_tree->io->mutmap = NO;
117 }
118
119
120
121
122 // Looking for XML node with rate-across-lineage info
123 xnd = XML_Search_Node_Name("lineagerates",YES,xroot);
124
125
126 if(xnd == NULL)
127 {
128 PhyML_Fprintf(stdout,"\n. The model of rate variation across lineages is not specified.");
129 PhyML_Fprintf(stdout,"\n. Using the geometric Brownian model (see Guindon, 2012, Syst. Biol.).\n");
130 mixt_tree->rates->model = GUINDON;
131 mixt_tree->mod->gamma_mgf_bl = YES;
132 strcpy(mixt_tree->rates->model_name,"geometric Brownian");
133 }
134 else
135 {
136 char *model_name;
137 model_name = XML_Get_Attribute_Value(xnd,"model");
138
139 if(model_name == NULL)
140 {
141 PhyML_Fprintf(stderr,"\n. Please specify a model of rate variation across lineages,");
142 PhyML_Fprintf(stderr,"\n. e.g., <lineagerates model=\"geometricbrownian\"/>.");
143 PhyML_Fprintf(stderr,"\n. See the manual for more options.");
144 assert(FALSE);
145 }
146 else
147 {
148 if(!strcmp(model_name,"geometricbrownian"))
149 {
150 mixt_tree->rates->model = GUINDON;
151 mixt_tree->mod->gamma_mgf_bl = YES;
152 strcpy(mixt_tree->rates->model_name,"geometric Brownian");
153 }
154 else if(!strcmp(model_name,"geometric"))
155 {
156 mixt_tree->rates->model = GUINDON;
157 mixt_tree->mod->gamma_mgf_bl = YES;
158 strcpy(mixt_tree->rates->model_name,"geometric Brownian");
159 }
160 else if(!strcmp(model_name,"brownian"))
161 {
162 mixt_tree->rates->model = GUINDON;
163 mixt_tree->mod->gamma_mgf_bl = YES;
164 strcpy(mixt_tree->rates->model_name,"geometric Brownian");
165 }
166 else if(!strcmp(model_name,"geo"))
167 {
168 mixt_tree->rates->model = GUINDON;
169 mixt_tree->mod->gamma_mgf_bl = YES;
170 strcpy(mixt_tree->rates->model_name,"geometric Brownian");
171 }
172 else if(!strcmp(model_name,"lognormal"))
173 {
174 mixt_tree->rates->model = LOGNORMAL;
175 mixt_tree->mod->gamma_mgf_bl = NO;
176 strcpy(mixt_tree->rates->model_name,"lognormal (uncorrelated)");
177 }
178 else if(!strcmp(model_name,"normal"))
179 {
180 mixt_tree->rates->model = LOGNORMAL;
181 mixt_tree->mod->gamma_mgf_bl = NO;
182 strcpy(mixt_tree->rates->model_name,"lognormal (uncorrelated)");
183 }
184 else if(!strcmp(model_name,"strictclock"))
185 {
186 mixt_tree->rates->model = STRICTCLOCK;
187 mixt_tree->mod->gamma_mgf_bl = NO;
188 strcpy(mixt_tree->rates->model_name,"strict clock");
189 }
190 else if(!strcmp(model_name,"clock"))
191 {
192 mixt_tree->rates->model = STRICTCLOCK;
193 mixt_tree->mod->gamma_mgf_bl = NO;
194 strcpy(mixt_tree->rates->model_name,"strict clock");
195 }
196 else
197 {
198 assert(FALSE);
199 }
200 }
201 }
202
203
204 // Looking for calibration info
205 xnd = XML_Search_Node_Name("calibration",YES,xroot);
206
207 if(xnd == NULL)
208 {
209 PhyML_Fprintf(stderr,"\n. No calibration information seems to be provided.");
210 PhyML_Fprintf(stderr,"\n. Please amend your XML file. \n");
211 assert(FALSE);
212 }
213 else
214 {
215 assert(xnd->child);
216 if(XML_Search_Node_Name("upper",NO,xnd->child) == NULL && XML_Search_Node_Name("lower",NO,xnd->child) == NULL)
217 {
218 PhyML_Fprintf(stderr,"\n. There is no calibration information provided. \n");
219 PhyML_Fprintf(stderr,"\n. Please check your data. \n");
220 assert(FALSE);
221 }
222 }
223
224
225
226 /* MIXT_Check_Model_Validity(mixt_tree); */
227 /* MIXT_Init_Model(mixt_tree); */
228 /* Print_Data_Structure(NO,stdout,mixt_tree); */
229 /* tree = MIXT_Starting_Tree(mixt_tree); */
230 /* Copy_Tree(tree,mixt_tree); */
231 /* Free_Tree(tree); */
232 /* MIXT_Connect_Cseqs_To_Nodes(mixt_tree); */
233 /* MIXT_Init_T_Beg(mixt_tree); */
234 /* MIXT_Chain_Edges(mixt_tree); */
235 /* MIXT_Chain_Nodes(mixt_tree); */
236 /* MIXT_Make_Tree_For_Pars(mixt_tree); */
237 /* MIXT_Make_Tree_For_Lk(mixt_tree); */
238 /* MIXT_Make_Spr(mixt_tree); */
239 /* MIXT_Chain_All(mixt_tree); */
240 /* Add_Root(mixt_tree->a_edges[0],mixt_tree); */
241 /* MIXT_Check_Edge_Lens_In_All_Elem(mixt_tree); */
242 /* MIXT_Turn_Branches_OnOff_In_All_Elem(ON,mixt_tree); */
243 /* MIXT_Check_Invar_Struct_In_Each_Partition_Elem(mixt_tree); */
244 /* MIXT_Check_RAS_Struct_In_Each_Partition_Elem(mixt_tree); */
245
246 /* XML_Read_Calibration(xroot,mixt_tree); */
247
248 /* MIXT_Chain_Cal(mixt_tree); */
249
250 seed = (mixt_tree->io->r_seed < 0)?(time(NULL)):(mixt_tree->io->r_seed);
251 srand(seed);
252 mixt_tree->io->r_seed = seed;
253
254
255 MIXT_Check_Model_Validity(mixt_tree);
256 MIXT_Init_Model(mixt_tree);
257 Print_Data_Structure(NO,stdout,mixt_tree);
258 tree = MIXT_Starting_Tree(mixt_tree);
259 Add_Root(tree->a_edges[0],tree);
260 Copy_Tree(tree,mixt_tree);
261 Free_Tree(tree);
262 MIXT_Connect_Cseqs_To_Nodes(mixt_tree);
263 MIXT_Init_T_Beg(mixt_tree);
264 MIXT_Make_Tree_For_Lk(mixt_tree);
265 MIXT_Make_Tree_For_Pars(mixt_tree);
266 MIXT_Make_Spr(mixt_tree);
267 MIXT_Chain_All(mixt_tree);
268 MIXT_Check_Edge_Lens_In_All_Elem(mixt_tree);
269 MIXT_Turn_Branches_OnOff_In_All_Elem(ON,mixt_tree);
270 MIXT_Check_Invar_Struct_In_Each_Partition_Elem(mixt_tree);
271 MIXT_Check_RAS_Struct_In_Each_Partition_Elem(mixt_tree);
272
273 XML_Read_Calibration(xroot,mixt_tree);
274 MIXT_Chain_Cal(mixt_tree);
275
276 res = DATE_MCMC(mixt_tree);
277
278
279 // Cleaning up...
280 RATES_Free_Rates(mixt_tree->rates);
281 RATES_Free_Rates(mixt_tree->extra_tree->rates);
282 TIMES_Free_Times(mixt_tree->times);
283 TIMES_Free_Times(mixt_tree->extra_tree->times);
284 MCMC_Free_MCMC(mixt_tree->mcmc);
285 MCMC_Free_MCMC(mixt_tree->extra_tree->mcmc);
286 Free_Mmod(mixt_tree->mmod);
287 Free_Spr_List_One_Edge(mixt_tree);
288 Free_Tree_Pars(mixt_tree);
289 Free_Tree_Lk(mixt_tree);
290
291 if(mixt_tree->io->fp_out_trees) fclose(mixt_tree->io->fp_out_trees);
292 if(mixt_tree->io->fp_out_tree) fclose(mixt_tree->io->fp_out_tree);
293 if(mixt_tree->io->fp_out_stats) fclose(mixt_tree->io->fp_out_stats);
294 if(mixt_tree->io->fp_out_json_trace) fclose(mixt_tree->io->fp_out_json_trace);
295 Free_Input(mixt_tree->io);
296
297
298 tree = mixt_tree;
299 do
300 {
301 Free_Calign(tree->data);
302 tree = tree->next_mixt;
303 }
304 while(tree);
305
306 tree = mixt_tree;
307 do
308 {
309 Free_Optimiz(tree->mod->s_opt);
310 tree = tree->next;
311 }
312 while(tree);
313
314
315 Free_Model_Complete(mixt_tree->mod);
316 Free_Model_Basic(mixt_tree->mod);
317 Free_Tree(mixt_tree->extra_tree);
318 Free_Tree(mixt_tree);
319 Free(res);
320 XML_Free_XML_Tree(xroot);
321 fclose(fp_xml_in);
322 }
323
324
325 //////////////////////////////////////////////////////////////
326 //////////////////////////////////////////////////////////////
327 // Update t_prior_min and t_prior_max on a given ranked tree
328 // given (primary and secondary) calibration information.
329 // Make sure secondary and primary calibration are up-to-date
DATE_Update_T_Prior_MinMax(t_tree * tree)330 void DATE_Update_T_Prior_MinMax(t_tree *tree)
331 {
332 int i,j;
333
334
335 for(i=0;i<2*tree->n_otu-1;++i) // All nodes
336 {
337 tree->times->t_prior_max[i] = +INFINITY;
338 tree->times->t_prior_min[i] = -INFINITY;
339
340 if(tree->a_nodes[i]->n_cal > 0) // Primary calibration found on that node
341 {
342 for(j=0;j<tree->a_nodes[i]->n_cal;++j)
343 {
344 tree->times->t_prior_max[i] = MIN(tree->times->t_prior_max[i],tree->a_nodes[i]->cal[j]->upper);
345 tree->times->t_prior_min[i] = MAX(tree->times->t_prior_min[i],tree->a_nodes[i]->cal[j]->lower);
346 }
347 }
348 }
349 }
350
351 //////////////////////////////////////////////////////////////
352 //////////////////////////////////////////////////////////////
353
DATE_Assign_Primary_Calibration(t_tree * tree)354 void DATE_Assign_Primary_Calibration(t_tree *tree)
355 {
356 int i,j,idx,node_num;
357 t_clad *clade;
358 t_cal *cal;
359
360 clade = NULL;
361 cal = NULL;
362
363 for(i=0;i<tree->times->n_cal;++i)
364 {
365 cal = tree->times->a_cal[i];
366 if(cal->clade_list != NULL)
367 {
368 clade = cal->clade_list[cal->current_clade_idx];
369 clade->target_nd = NULL;
370 }
371 }
372
373 for(i=0;i<2*tree->n_otu-1;++i)
374 for(j=0;j<MAX_N_CAL;++j)
375 {
376 tree->a_nodes[i]->cal[j] = NULL;
377 tree->a_nodes[i]->n_cal = 0;
378 }
379
380 for(i=0;i<tree->times->n_cal;++i)
381 {
382 cal = tree->times->a_cal[i];
383
384 if(cal->clade_list != NULL)
385 {
386 clade = cal->clade_list[cal->current_clade_idx];
387
388 node_num = Find_Clade(clade->tax_list,
389 clade->n_tax,
390 tree);
391
392 clade->target_nd = tree->a_nodes[node_num];
393
394 idx = tree->a_nodes[node_num]->n_cal;
395 tree->a_nodes[node_num]->cal[idx] = tree->times->a_cal[i];
396 tree->a_nodes[node_num]->n_cal++;
397
398
399 if(tree->a_nodes[node_num]->n_cal == MAX_N_CAL)
400 {
401 PhyML_Fprintf(stderr,"\n. A node cannot have more than %d calibration",MAX_N_CAL);
402 PhyML_Fprintf(stderr,"\n. constraints attached to it. Feel free to increase the");
403 PhyML_Fprintf(stderr,"\n. value of the variable MAX_N_CAL in utilities.h if");
404 PhyML_Fprintf(stderr,"\n. necessary.");
405 Exit("\n");
406 }
407 }
408 }
409 }
410
411 //////////////////////////////////////////////////////////////
412 //////////////////////////////////////////////////////////////
413 // Return splitted calibration intervals. Make sure all primary
414 // and secondary calibration intervals are up-to-date.
DATE_Splitted_Calibration(t_tree * tree)415 phydbl *DATE_Splitted_Calibration(t_tree *tree)
416 {
417 phydbl *minmax,*splitted_cal,buff;
418 int i,len,done;
419
420 // One t_prior_min and one t_prior_max per internal nodes except root, so
421 // 2 x # of internal nodes boundaries in total at most.
422 minmax = (phydbl *)mCalloc(2*(tree->n_otu-2),sizeof(phydbl));
423 For(i,2*(tree->n_otu-2)) minmax[i] = +INFINITY;
424 splitted_cal = (phydbl *)mCalloc((int)(4*tree->n_otu-10),sizeof(phydbl));
425
426
427 len = 0;
428 for(i = tree->n_otu; i < 2*tree->n_otu-1; i++)
429 {
430 if(tree->a_nodes[i] != tree->n_root)
431 {
432 minmax[len] = MAX(tree->times->t_prior_min[i],tree->times->nd_t[tree->n_root->num]);
433 minmax[len+1] = tree->times->t_prior_max[i];
434 len+=2;
435 }
436 }
437
438
439 // Bubble sort of all these times in increasing order
440 do
441 {
442 done = YES;
443 for(i=0;i<len-1;i++)
444 {
445 if(minmax[i] > minmax[i+1])
446 {
447 buff = minmax[i];
448 minmax[i] = minmax[i+1];
449 minmax[i+1] = buff;
450 done = NO;
451 }
452 }
453 }
454 while(done == NO);
455
456 for(i=0;i<len-1;i++) assert(!(minmax[i] > minmax[i+1]));
457
458 // Remove ties
459 for(i=0;i<len-1;i++)
460 if(Are_Equal(minmax[i],minmax[i+1],1.E-6) == YES)
461 minmax[i] = 0.0;
462
463 // Sort again to effectively remove ties
464 do
465 {
466 done = YES;
467 for(i=0;i<len-1;i++)
468 {
469 if(minmax[i] > minmax[i+1])
470 {
471 buff = minmax[i];
472 minmax[i] = minmax[i+1];
473 minmax[i+1] = buff;
474 done = NO;
475 }
476 }
477 }
478 while(done == NO);
479
480 splitted_cal[0] = minmax[0];
481 len = 1;
482 for(i = 1; i < 2*(tree->n_otu-2); i++)
483 {
484 splitted_cal[len] = minmax[i];
485 if(len+1 < 4*tree->n_otu-10) splitted_cal[len+1] = minmax[i];
486 len+=2;
487 }
488
489 /* For(i,4*tree->n_otu-10) PhyML_Printf("\n. split -- %3d %12f",i,splitted_cal[i]); */
490
491 Free(minmax);
492
493 return splitted_cal;
494 }
495
496 //////////////////////////////////////////////////////////////
497 //////////////////////////////////////////////////////////////
498
DATE_J_Sum_Product(t_tree * tree)499 phydbl DATE_J_Sum_Product(t_tree *tree)
500 {
501 phydbl prod, total,*splitted_cal;
502 int fact,idx,ans,rk;
503
504 DATE_Assign_Primary_Calibration(tree);
505 DATE_Update_T_Prior_MinMax(tree);
506
507 splitted_cal = DATE_Splitted_Calibration(tree);
508
509 ans = 0;
510 total = 0.0;
511 idx = 0;
512 rk = 1;
513 do
514 {
515 prod = 1.0;
516 fact = 1;
517 ans = DATE_J_Sum_Product_Pre(tree->a_nodes[tree->times->t_rank[rk]], // Oldest node after root (as rk=1)
518 idx,
519 -1,
520 prod,fact,&total,splitted_cal,rk,tree);
521 idx+=2;
522 }
523 while(ans != 1);
524
525 Free(splitted_cal);
526
527 return(total);
528 }
529
530 //////////////////////////////////////////////////////////////
531 //////////////////////////////////////////////////////////////
532
DATE_J_Sum_Product_Pre(t_node * d,int split_idx_d,int split_idx_a,phydbl prod,int fact,phydbl * total,phydbl * splitted_cal,int rk,t_tree * tree)533 int DATE_J_Sum_Product_Pre(t_node *d, int split_idx_d, int split_idx_a, phydbl prod, int fact, phydbl *total, phydbl *splitted_cal, int rk, t_tree *tree)
534 {
535 int ans,idx;
536
537 ans = DATE_Is_Split_Accessible(d,split_idx_d,splitted_cal,tree);
538
539 switch(ans)
540 {
541 case 1 : // split interval is younger than t_prior_max. No need to go further.
542 {
543 return ans;
544 break;
545 }
546 case 0 : // split interval is within [t_prior_min,t_prior_max]
547 {
548 int local_ans;
549
550 // Calculate J for this time interval
551 prod *= DATE_J(tree->times->birth_rate,
552 tree->times->death_rate,
553 FABS(splitted_cal[split_idx_d+1]),
554 FABS(splitted_cal[split_idx_d]));
555
556 // Remove factorial term from current product
557 prod *= fact;
558
559 if(split_idx_d == split_idx_a) fact++;
560 else fact = 1;
561
562 prod /= fact;
563
564 if(tree->times->t_rank[tree->n_otu-2] == d->num) // Youngest internal node
565 {
566 (*total) += prod;
567 return 0;
568 }
569
570 idx = split_idx_d;
571 do
572 {
573 local_ans = DATE_J_Sum_Product_Pre(tree->a_nodes[tree->times->t_rank[rk+1]],
574 idx,
575 split_idx_d,
576 prod,fact,total,splitted_cal,rk+1,tree);
577 idx+=2;
578 }
579 while(local_ans == 0);
580
581 break;
582 }
583 case -1 : // split interval is older than t_prior_min. Move forward.
584 {
585 int local_ans;
586 // Advance to younger split intervals and stop once you're in
587 idx = split_idx_d+2;
588 do
589 {
590 local_ans = DATE_J_Sum_Product_Pre(d,
591 idx,
592 idx+1,
593 prod,fact,total,splitted_cal,rk,tree);
594 idx+=2;
595 }
596 while(local_ans == -1);
597 break;
598 }
599 }
600
601 return ans;
602 }
603 //////////////////////////////////////////////////////////////
604 //////////////////////////////////////////////////////////////
605
DATE_Is_Split_Accessible(t_node * d,int which,phydbl * splitted_cal,t_tree * tree)606 int DATE_Is_Split_Accessible(t_node *d, int which, phydbl *splitted_cal, t_tree *tree)
607 {
608 phydbl eps;
609
610 assert(d->tax == NO);
611
612 eps = FABS(tree->times->t_prior_min[d->num]) / 1.E+6;
613
614 assert(eps > MDBL_MIN);
615
616 // Upper and lower bound of splitted calibration interval are equal to zero
617 if(Are_Equal(splitted_cal[which],0.0,eps) && Are_Equal(splitted_cal[which+1],0.0,eps)) return +1;
618
619
620 if(Are_Equal(tree->times->t_prior_min[d->num],splitted_cal[which],eps) ||
621 Are_Equal(tree->times->t_prior_max[d->num],splitted_cal[which+1],eps) ||
622 (tree->times->t_prior_min[d->num] < splitted_cal[which] &&
623 tree->times->t_prior_max[d->num] > splitted_cal[which+1])) return 0; // splitted interval is within [t_prior_min,t_prior_max]
624 else if(Are_Equal(tree->times->t_prior_max[d->num],splitted_cal[which],eps) ||
625 splitted_cal[which] > tree->times->t_prior_max[d->num]) return +1; // splitted interval is younger than [t_prior_min,t_prior_max]
626 else if(Are_Equal(tree->times->t_prior_min[d->num],splitted_cal[which+1],eps) ||
627 splitted_cal[which+1] < tree->times->t_prior_min[d->num]) return -1; // splitted interval is older than [t_prior_min,t_prior_max]
628 else
629 {
630 PhyML_Printf("\n. d->num: %d d->tax: %d",d->num,d->tax);
631 PhyML_Printf("\n. t_prior_min: %f t_prior_max: %f",
632 tree->times->t_prior_min[d->num],
633 tree->times->t_prior_max[d->num]);
634 PhyML_Printf("\n. splitted_cal_min: %f splitted_cal_max: %f",
635 splitted_cal[which],
636 splitted_cal[which+1]);
637 PhyML_Printf("\n");
638 assert(FALSE); // splitted interval cannot be partially overlapping [t_prior_min,t_prior_max]
639 }
640 return(0);
641 }
642
643 //////////////////////////////////////////////////////////////
644 //////////////////////////////////////////////////////////////
645
DATE_J(phydbl birth_r,phydbl death_r,phydbl t_min,phydbl t_pls)646 phydbl DATE_J(phydbl birth_r, phydbl death_r, phydbl t_min, phydbl t_pls)
647 {
648 phydbl d,b,J;
649 assert(t_pls > t_min);
650 d = death_r;
651 b = birth_r;
652 J = (b-d)*(exp(t_min*d+t_pls*b) - exp(t_min*b+t_pls*d));
653 J /= ((b*exp(t_min*b)-d*exp(t_min*d)) * (b*exp(t_pls*b)-d*exp(t_pls*d)));
654 /* printf(" J : %f",J); */
655 return(J);
656 }
657
658 //////////////////////////////////////////////////////////////
659 //////////////////////////////////////////////////////////////
660
DATE_Check_Calibration_Constraints(t_tree * tree)661 int DATE_Check_Calibration_Constraints(t_tree *tree)
662 {
663 int i,j;
664 phydbl lower,upper;
665
666 lower = upper = -1.;
667
668 For(i,2*tree->n_otu-1)
669 {
670 if(tree->a_nodes[i]->n_cal > 1)
671 {
672 lower = tree->a_nodes[i]->cal[0]->lower;
673 upper = tree->a_nodes[i]->cal[0]->upper;
674 for(j=1; j < tree->a_nodes[i]->n_cal; j++)
675 {
676 lower = MAX(lower,tree->a_nodes[i]->cal[j]->lower);
677 upper = MIN(upper,tree->a_nodes[i]->cal[j]->upper);
678 if(upper < lower)
679 {
680 /* PhyML_Printf("\n. Inconsistency detected on node %d",i); */
681 return 0;
682 }
683 }
684 }
685 }
686 return 1;
687 }
688
689 //////////////////////////////////////////////////////////////
690 //////////////////////////////////////////////////////////////
691 // Check that time constraints are satisfied. Note: it is a
692 // requirement to verify that the age of the root node is also
693 // within correct boundaries
DATE_Check_Time_Constraints(t_tree * tree)694 int DATE_Check_Time_Constraints(t_tree *tree)
695 {
696 for(int i=0;i<2*tree->n_otu-1;++i)
697 {
698 if(tree->a_nodes[i]->tax == NO)
699 {
700 if(tree->times->nd_t[i] > tree->times->t_prior_max[i] ||
701 tree->times->nd_t[i] < tree->times->t_prior_min[i])
702 {
703 /* PhyML_Printf("\n!!! Node %d t: %f min:%f max:%f", */
704 /* i, */
705 /* tree->times->nd_t[i], */
706 /* tree->times->t_prior_min[i], */
707 /* tree->times->t_prior_max[i]); */
708 return 0;
709 }
710 }
711 }
712 return 1;
713 }
714
715 //////////////////////////////////////////////////////////////
716 //////////////////////////////////////////////////////////////
717
DATE_MCMC(t_tree * tree)718 phydbl *DATE_MCMC(t_tree *tree)
719 {
720 t_mcmc *mcmc;
721 char *s_tree;
722 int move, n_vars, i, j, adjust_len;
723 phydbl u;
724 phydbl *res;
725 FILE *fp_stats,*fp_tree;
726 int t_beg;
727
728 t_beg = (int)time(NULL);
729
730 fp_stats = tree->io->fp_out_stats;
731 fp_tree = tree->io->fp_out_tree;
732
733 if(tree->io->mutmap == YES) Make_MutMap(tree);
734
735 TIMES_Randomize_Tree_With_Time_Constraints(tree->times->a_cal[0],tree);
736 MIXT_Propagate_Tree_Update(tree);
737
738
739 mcmc = MCMC_Make_MCMC_Struct();
740 tree->mcmc = mcmc;
741 MCMC_Init_MCMC_Struct(NULL,NULL,mcmc);
742 MCMC_Complete_MCMC(mcmc,tree);
743
744 MCMC_Randomize_Birth(tree);
745 MCMC_Randomize_Death(tree);
746 MCMC_Randomize_Clock_Rate(tree);
747 MCMC_Randomize_Rate_Across_Sites(tree);
748 MCMC_Randomize_Rates(tree);
749
750
751 n_vars = 10;
752 adjust_len = tree->io->mcmc->chain_len_burnin;
753 mcmc->sample_interval = tree->io->mcmc->sample_interval;
754 mcmc->print_every = tree->io->mcmc->print_every;
755 mcmc->chain_len = tree->io->mcmc->chain_len;
756 mcmc->chain_len_burnin = tree->io->mcmc->chain_len_burnin;
757 tree->rates->bl_from_rt = YES;
758
759 res = (phydbl *)mCalloc(tree->mcmc->chain_len / tree->mcmc->sample_interval * n_vars,sizeof(phydbl));
760
761 Set_Both_Sides(YES,tree);
762 Set_Update_Eigen(YES,tree->mod);
763 Lk(NULL,tree);
764 Set_Update_Eigen(NO,tree->mod);
765
766
767 RATES_Lk_Rates(tree);
768 DATE_Assign_Primary_Calibration(tree);
769 TIMES_Lk_Times(NO,tree);
770
771 /* Time_To_Branch(tree); */
772 /* tree->bl_ndigits = 1; */
773 /* printf("\n. Random init tree: %s",Write_Tree(tree)); */
774 /* tree->bl_ndigits = 7; */
775 RATES_Update_Cur_Bl(tree);
776
777
778 PhyML_Printf("\n. AVX enabled: %s",
779 #if defined(__AVX__)
780 "yes"
781 #else
782 "no"
783 #endif
784 );
785 PhyML_Printf("\n. SSE enabled: %s",
786 #if defined(__SSE3__)
787 "yes"
788 #else
789 "no"
790 #endif
791 );
792
793 PhyML_Printf("\n\n. Seed: %d",tree->io->r_seed);
794 PhyML_Printf("\n. Ignore sequences: %s",tree->eval_alnL == YES ? "no" : "yes");
795 PhyML_Printf("\n. Model of variation of rates across lineages: %s",tree->rates->model_name);
796 PhyML_Printf("\n. log(Pr(Seq|Tree)) = %f",tree->c_lnL);
797 PhyML_Printf("\n. log(Pr(Tree)) = %f",tree->times->c_lnL_times);
798
799
800 tree->extra_tree = Make_Tree_From_Scratch(tree->n_otu,tree->data);
801 tree->extra_tree->mod = tree->mod;
802 Copy_Tree(tree,tree->extra_tree);
803
804 tree->extra_tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
805 RATES_Init_Rate_Struct(tree->extra_tree->rates,NULL,tree->n_otu);
806 RATES_Copy_Rate_Struct(tree->rates,tree->extra_tree->rates,tree->n_otu);
807 tree->extra_tree->rates->model = LOGNORMAL;
808
809 tree->extra_tree->times = TIMES_Make_Time_Struct(tree->n_otu);
810 TIMES_Init_Time_Struct(tree->extra_tree->times,NULL,tree->n_otu);
811 TIMES_Copy_Time_Struct(tree->times,tree->extra_tree->times,tree->n_otu);
812
813 RATES_Duplicate_Calib_Struct(tree,tree->extra_tree);
814 MIXT_Chain_Cal(tree->extra_tree);
815 DATE_Assign_Primary_Calibration(tree->extra_tree);
816 TIMES_Randomize_Tree_With_Time_Constraints(tree->extra_tree->times->a_cal[0],tree->extra_tree);
817
818
819 TIMES_Lk_Times(NO,tree->extra_tree);
820 PhyML_Printf("\n. log(Pr(extra tree)) = %f",tree->extra_tree->times->c_lnL_times);
821 mcmc = MCMC_Make_MCMC_Struct();
822 tree->extra_tree->mcmc = mcmc;
823 MCMC_Init_MCMC_Struct(NULL,NULL,mcmc);
824 MCMC_Complete_MCMC(mcmc,tree->extra_tree);
825
826 PhyML_Fprintf(fp_stats,"\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t",
827 "sample",
828 "lnL(posterior)",
829 "lnL(seq)",
830 "lnL(times)",
831 "lnL(rates)",
832 "birth",
833 "death",
834 "clock",
835 "root",
836 "tstv",
837 "nu");
838 for(i=0;i<tree->mod->ras->n_catg;i++) PhyML_Fprintf(fp_stats,"rr%d\t",i);
839 for(i=0;i<tree->mod->ras->n_catg;i++) PhyML_Fprintf(fp_stats,"pr%d\t",i);
840
841
842 for(i=0;i<tree->times->n_cal;++i)
843 {
844 t_cal *cal = tree->times->a_cal[i];
845 for(j=0;j<cal->clade_list_size;++j)
846 {
847 t_clad *clade = cal->clade_list[j];
848 PhyML_Fprintf(fp_stats,"t(calib:%s_clade:%s)\t",cal->id,clade->id);
849 }
850 }
851
852 for(i=0;i<tree->times->n_cal;++i)
853 {
854 t_cal *cal = tree->times->a_cal[i];
855 PhyML_Fprintf(fp_stats,"clade(calib:%s)\t",cal->id);
856 }
857
858
859 if(tree->rates->model == THORNE ||
860 tree->rates->model == LOGNORMAL ||
861 tree->rates->model == STRICTCLOCK)
862 for(i=0;i<2*tree->n_otu-2;++i) PhyML_Fprintf(fp_stats,"br%d\t",i);
863 else
864 for(i=0;i<2*tree->n_otu-1;++i) PhyML_Fprintf(fp_stats,"nr%d\t",i);
865
866 for(i=0;i<2*tree->n_otu-1;++i) if(tree->a_nodes[i]->tax == NO) PhyML_Fprintf(fp_stats,"t%d\t",i);
867
868 PhyML_Fprintf(fp_stats,"accRT\t");
869 PhyML_Fprintf(fp_stats,"tuneRT\t");
870
871 PhyML_Fprintf(fp_stats,"accT\t");
872 PhyML_Fprintf(fp_stats,"tuneT\t");
873
874 PhyML_Fprintf(fp_stats,"accClock\t");
875 PhyML_Fprintf(fp_stats,"tuneClock\t");
876
877 PhyML_Fprintf(fp_stats,"accTCr\t");
878 PhyML_Fprintf(fp_stats,"tuneTCr\t");
879
880 PhyML_Fprintf(fp_stats,"accTreeRates\t");
881 PhyML_Fprintf(fp_stats,"tuneTreeRates\t");
882
883 PhyML_Fprintf(fp_stats,"accSprW\t");
884 PhyML_Fprintf(fp_stats,"tuneSprW\t");
885
886 PhyML_Fprintf(fp_stats,"accSpr\t");
887 PhyML_Fprintf(fp_stats,"tuneSpr\t");
888
889 PhyML_Fprintf(fp_stats,"accSprLoc\t");
890 PhyML_Fprintf(fp_stats,"tuneSprLoc\t");
891
892 fflush(NULL);
893
894 PhyML_Printf("\n\n");
895 PhyML_Printf("\n. MCMC settings. Chain length: %g steps.",(phydbl)tree->mcmc->chain_len);
896 PhyML_Printf("\n. MCMC settings. Burnin: %g steps.",(phydbl)tree->mcmc->chain_len_burnin);
897 PhyML_Printf("\n. MCMC settings. Sample every %g steps.",(phydbl)tree->mcmc->sample_interval);
898 PhyML_Printf("\n. MCMC settings. Print out every %g steps.",(phydbl)tree->mcmc->print_every);
899 PhyML_Printf("\n\n");
900
901
902 for(i=0;i<tree->mcmc->n_moves;i++) tree->mcmc->start_ess[i] = YES;
903 Set_Both_Sides(NO,tree);
904 tree->mcmc->always_yes = NO;
905 move = -1;
906
907 // Get in the range of sensible values for clock_r
908 i = 0;
909 do { MCMC_Clock_R(tree); } while(i++ < 100);
910
911 do
912 {
913 if(tree->mcmc->run > adjust_len) for(i=0;i<tree->mcmc->n_moves;i++) tree->mcmc->adjust_tuning[i] = NO;
914
915 if(tree->c_lnL < UNLIKELY + 0.1 && tree->eval_alnL == YES)
916 {
917 PhyML_Printf("\n. Move '%s' failed\n",tree->mcmc->move_name[move]);
918 assert(FALSE);
919 }
920
921 u = Uni();
922 for(move=0;move<tree->mcmc->n_moves;move++) if(tree->mcmc->move_weight[move] > u-1.E-10) break;
923
924 assert(!(move == tree->mcmc->n_moves));
925
926 if(!strcmp(tree->mcmc->move_name[move],"clock")) MCMC_Clock_R(tree);
927 else if(!strcmp(tree->mcmc->move_name[move],"birth_rate")) MCMC_Birth_Rate(tree);
928 else if(!strcmp(tree->mcmc->move_name[move],"death_rate")) MCMC_Death_Rate(tree);
929 else if(!strcmp(tree->mcmc->move_name[move],"birth_death_updown")) MCMC_Birth_Death_Updown(tree);
930 else if(!strcmp(tree->mcmc->move_name[move],"tree_height")) MCMC_Tree_Height(tree);
931 else if(!strcmp(tree->mcmc->move_name[move],"times")) MCMC_Times_All(tree);
932 else if(!strcmp(tree->mcmc->move_name[move],"times_and_rates")) MCMC_Times_And_Rates_All(tree);
933
934 else if(!strcmp(tree->mcmc->move_name[move],"spr")) MCMC_Prune_Regraft(tree);
935 else if(!strcmp(tree->mcmc->move_name[move],"spr_local")) MCMC_Prune_Regraft_Local(tree);
936 else if(!strcmp(tree->mcmc->move_name[move],"spr_weighted")) MCMC_Prune_Regraft_Weighted(tree);
937
938 else if(!strcmp(tree->mcmc->move_name[move],"updown_t_cr")) MCMC_Updown_T_Cr(tree);
939 else if(!strcmp(tree->mcmc->move_name[move],"kappa")) MCMC_Kappa(tree);
940 else if(!strcmp(tree->mcmc->move_name[move],"ras")) MCMC_Rate_Across_Sites(tree);
941 else if(!strcmp(tree->mcmc->move_name[move],"nu")) MCMC_Nu(tree);
942 else if(!strcmp(tree->mcmc->move_name[move],"subtree_height")) MCMC_Subtree_Height(tree);
943 else if(!strcmp(tree->mcmc->move_name[move],"time_slice")) MCMC_Time_Slice(tree);
944 else if(!strcmp(tree->mcmc->move_name[move],"br_rate")) MCMC_Rates_All(tree);
945 else if(!strcmp(tree->mcmc->move_name[move],"tree_rates")) MCMC_Tree_Rates(tree);
946 else if(!strcmp(tree->mcmc->move_name[move],"clade_change")) MCMC_Clade_Change(tree);
947 else continue;
948
949
950 phydbl cur_lk = tree->c_lnL;
951 phydbl new_lk = Lk(NULL,tree);
952 if(Are_Equal(cur_lk,new_lk,1.E-5) == NO)
953 {
954 PhyML_Printf("\n. move: %s",tree->mcmc->move_name[move]);
955 PhyML_Printf("\n. new: %f cur: %f",new_lk,cur_lk);
956 assert(FALSE);
957 }
958
959
960 if(!RATES_Check_Edge_Length_Consistency(tree))
961 {
962 PhyML_Fprintf(stderr,"\n. Issue detected by RATES_Check_Edge_Length_Consistency(tree).");
963 PhyML_Fprintf(stderr,"\n. Move: %s",tree->mcmc->move_name[move]);
964 if(tree->eval_alnL == YES) assert(FALSE);
965 }
966
967 if(!TIMES_Check_Node_Height_Ordering(tree))
968 {
969 PhyML_Fprintf(stderr,"\n. Issue detected by TIMES_Check_Node_Height_Ordering(tree).");
970 PhyML_Fprintf(stderr,"\n. Move: %s",tree->mcmc->move_name[move]);
971 assert(FALSE);
972 }
973
974
975 if(!(tree->times->c_lnL_times > UNLIKELY))
976 {
977 PhyML_Fprintf(stderr,"\n. move: %s",tree->mcmc->move_name[move]);
978 PhyML_Fprintf(stderr,"\n. glnL=%f",tree->times->c_lnL_times);
979 assert(FALSE);
980 }
981
982 (void)signal(SIGINT,MCMC_Terminate);
983
984 if(!(tree->mcmc->run%tree->mcmc->print_every))
985 {
986 phydbl mean_r,post;
987
988 mean_r = RATES_Average_Substitution_Rate(tree);
989 post = Get_Lk(tree) + tree->times->c_lnL_times + tree->rates->c_lnL_rates;
990
991 if(tree->mcmc->run < adjust_len) PhyML_Printf("\nx");
992 else PhyML_Printf("\n.");
993 PhyML_Printf(" %10d lnL: [%12.2f -- %12.2f -- %12.2f -- %12.2f] root age: %12f [time: %7d sec] clock: %15f %20s",
994 tree->mcmc->run,
995 post,
996 Get_Lk(tree),
997 tree->times->c_lnL_times,
998 tree->rates->c_lnL_rates,
999 fabs(tree->times->nd_t[tree->n_root->num]),
1000 (int)time(NULL) - t_beg,
1001 mean_r,
1002 tree->mcmc->move_name[move]);
1003 }
1004
1005 if(!(tree->mcmc->run%tree->mcmc->sample_interval))
1006 {
1007 phydbl mean_r,post;
1008
1009 mean_r = RATES_Average_Substitution_Rate(tree);
1010 post = Get_Lk(tree) + tree->times->c_lnL_times + tree->rates->c_lnL_rates;
1011
1012 PhyML_Fprintf(fp_stats,"\n%6d\t%9.1f\t%9.1f\t%9.1f\t%9.1f\t%12G\t%12G\t%12G\t%12G\t%12G\t%12G\t",
1013 tree->mcmc->run,
1014 post,
1015 Get_Lk(tree),
1016 tree->times->c_lnL_times,
1017 tree->rates->c_lnL_rates,
1018 tree->times->birth_rate,
1019 tree->times->death_rate,
1020 mean_r,
1021 fabs(tree->times->nd_t[tree->n_root->num]),
1022 tree->next->mod->kappa->v,
1023 tree->rates->nu);
1024
1025 for(i=0;i<tree->mod->ras->n_catg;i++) PhyML_Fprintf(fp_stats,"%G\t",tree->mod->ras->gamma_rr->v[i]);
1026 for(i=0;i<tree->mod->ras->n_catg;i++) PhyML_Fprintf(fp_stats,"%G\t",tree->mod->ras->gamma_r_proba->v[i]);
1027
1028
1029 for(i=0;i<tree->times->n_cal;i++)
1030 {
1031 t_cal *cal = tree->times->a_cal[i];
1032 for(j=0;j<cal->clade_list_size;++j)
1033 {
1034 t_clad *clade = cal->clade_list[j];
1035 PhyML_Fprintf(fp_stats,"%G\t",fabs(tree->times->nd_t[clade->target_nd->num]));
1036 }
1037 }
1038
1039 for(i=0;i<tree->times->n_cal;++i)
1040 {
1041 t_cal *cal = tree->times->a_cal[i];
1042 PhyML_Fprintf(fp_stats,"%d\t",cal->current_clade_idx);
1043 /* PhyML_Fprintf(fp_stats,"%s\t",cal->clade_list[cal->current_clade_idx]->id); */
1044 }
1045
1046 if(tree->rates->model == THORNE ||
1047 tree->rates->model == LOGNORMAL ||
1048 tree->rates->model == STRICTCLOCK)
1049 for(i=0;i<2*tree->n_otu-2;++i) PhyML_Fprintf(fp_stats,"%G\t",tree->rates->br_r[i]);
1050 else
1051 for(i=0;i<2*tree->n_otu-1;++i) PhyML_Fprintf(fp_stats,"%G\t",tree->rates->nd_r[i]);
1052
1053 for(i=0;i<2*tree->n_otu-1;++i) if(tree->a_nodes[i]->tax == NO) PhyML_Fprintf(fp_stats,"%G\t",fabs(tree->times->nd_t[i]));
1054
1055 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->acc_rate[tree->mcmc->num_move_times_and_rates_root]);
1056 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->tune_move[tree->mcmc->num_move_times_and_rates_root]);
1057
1058 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->acc_rate[tree->mcmc->num_move_root_time]);
1059 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->tune_move[tree->mcmc->num_move_root_time]);
1060
1061 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->acc_rate[tree->mcmc->num_move_clock_r]);
1062 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->tune_move[tree->mcmc->num_move_clock_r]);
1063
1064 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->acc_rate[tree->mcmc->num_move_updown_t_cr]);
1065 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->tune_move[tree->mcmc->num_move_updown_t_cr]);
1066
1067 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->acc_rate[tree->mcmc->num_move_tree_rates]);
1068 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->tune_move[tree->mcmc->num_move_tree_rates]);
1069
1070 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->acc_rate[tree->mcmc->num_move_spr_weighted]);
1071 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->tune_move[tree->mcmc->num_move_spr_weighted]);
1072
1073 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->acc_rate[tree->mcmc->num_move_spr]);
1074 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->tune_move[tree->mcmc->num_move_spr]);
1075
1076 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->acc_rate[tree->mcmc->num_move_spr_local]);
1077 PhyML_Fprintf(fp_stats,"%G\t",tree->mcmc->tune_move[tree->mcmc->num_move_spr_local]);
1078
1079 if(tree->mcmc->sample_num == 0)
1080 {
1081 PhyML_Fprintf(fp_tree,"\n#NEXUS");
1082 PhyML_Fprintf(fp_tree,"\nBEGIN TREES;");
1083 }
1084 else
1085 {
1086 fseek(fp_tree,-5,SEEK_CUR);
1087 }
1088
1089
1090 TIMES_Time_To_Bl(tree);
1091 tree->bl_ndigits = 3;
1092 /* RATES_Update_Cur_Bl(tree); */
1093 s_tree = Write_Tree(tree);
1094 tree->bl_ndigits = 7;
1095 PhyML_Fprintf(fp_tree,"\ntree %d [&lnP=%f] = [&R] %s",tree->mcmc->sample_num,tree->c_lnL,s_tree);
1096 Free(s_tree);
1097 PhyML_Fprintf(fp_tree,"\nEND;");
1098 fflush(NULL);
1099 RATES_Update_Cur_Bl(tree);
1100
1101
1102 if(tree->mcmc->run > tree->mcmc->chain_len_burnin &&
1103 tree->io->mutmap == YES) Sample_Ancestral_Seq(YES,NO,tree);
1104
1105 tree->mcmc->sample_num++;
1106 }
1107
1108 tree->mcmc->run++;
1109 MCMC_Get_Acc_Rates(tree->mcmc);
1110 }
1111 while(tree->mcmc->run < tree->mcmc->chain_len);
1112
1113
1114 return(res);
1115 }
1116
1117 //////////////////////////////////////////////////////////////
1118 //////////////////////////////////////////////////////////////
1119 // Update the list of nodes that are younger than lim
DATE_List_Of_Nodes_Younger_Than(t_node * a,t_node * d,phydbl lim,t_ll ** list,t_tree * tree)1120 void DATE_List_Of_Nodes_Younger_Than(t_node *a, t_node *d, phydbl lim, t_ll **list, t_tree *tree)
1121 {
1122 if(tree->times->nd_t[d->num] > lim) Push_Bottom_Linked_List(d,list,YES);
1123
1124 if(d->tax == YES) return;
1125 else
1126 {
1127 int i;
1128
1129 if(d == tree->n_root)
1130 {
1131 DATE_List_Of_Nodes_Younger_Than(d,d->v[1],lim,list,tree);
1132 DATE_List_Of_Nodes_Younger_Than(d,d->v[2],lim,list,tree);
1133 }
1134 else
1135 {
1136 for(i=0;i<3;i++)
1137 if(d->v[i] != a && d->b[i] != tree->e_root)
1138 DATE_List_Of_Nodes_Younger_Than(d,d->v[i],lim,list,tree);
1139 }
1140 }
1141 }
1142
1143 //////////////////////////////////////////////////////////////
1144 //////////////////////////////////////////////////////////////
1145 // Update the list of nodes that are younger than lim with direct ancestors
1146 // not younger than lim
DATE_List_Of_Nodes_And_Ancestors_Younger_Than(t_node * a,t_node * d,phydbl lim,t_ll ** list,t_tree * tree)1147 void DATE_List_Of_Nodes_And_Ancestors_Younger_Than(t_node *a, t_node *d, phydbl lim, t_ll **list, t_tree *tree)
1148 {
1149 if(tree->times->nd_t[d->num] > lim && a != NULL && tree->times->nd_t[a->num] > lim) Push_Bottom_Linked_List(d,list,YES);
1150
1151 if(d->tax == YES) return;
1152 else
1153 {
1154 int i;
1155
1156 if(d == tree->n_root)
1157 {
1158 DATE_List_Of_Nodes_And_Ancestors_Younger_Than(d,d->v[1],lim,list,tree);
1159 DATE_List_Of_Nodes_And_Ancestors_Younger_Than(d,d->v[2],lim,list,tree);
1160 }
1161 else
1162 {
1163 for(i=0;i<3;i++)
1164 if(d->v[i] != a && d->b[i] != tree->e_root)
1165 DATE_List_Of_Nodes_And_Ancestors_Younger_Than(d,d->v[i],lim,list,tree);
1166 }
1167 }
1168 }
1169
1170
1171 //////////////////////////////////////////////////////////////
1172 //////////////////////////////////////////////////////////////
1173 // List of valid regraft nodes, taking into account calibration
1174 // constraints. The subtree (defined by prune and prune_daughter
1175 // will be re-attached *on top of* one of the nodes in this list
1176 // (as opposed to *on one of the sister edges below*).
DATE_List_Of_Regraft_Nodes(t_node * prune,t_node * prune_daughter,phydbl * t_min,phydbl * t_max,int verbose,t_tree * tree)1177 t_ll *DATE_List_Of_Regraft_Nodes(t_node *prune, t_node *prune_daughter, phydbl *t_min, phydbl *t_max, int verbose, t_tree *tree)
1178 {
1179 t_node *n,*m;
1180 int i,j;
1181 t_ll *out,*in,*ll;
1182 int is_clade_affected;
1183 t_clad *clade;
1184 t_cal *cal;
1185
1186 cal = NULL;
1187 clade = NULL;
1188 n = NULL;
1189 m = NULL;
1190 *t_min = -INFINITY;
1191 in = NULL;
1192 out = NULL;
1193 is_clade_affected = NO;
1194
1195
1196 // Find the oldest LCA of calibrated sets among the nodes between
1197 // prune and root. These clades might see the position of their
1198 // LCA change.
1199
1200 if(prune != tree->n_root)
1201 {
1202 n = prune;
1203 while(n)
1204 {
1205 for(i=0;i<tree->times->n_cal;++i)
1206 {
1207 // That node is the LCA of calibration a_cal[i]
1208 cal = tree->times->a_cal[i];
1209 clade = cal->clade_list[cal->current_clade_idx];
1210
1211 if(n == clade->target_nd)
1212 {
1213 is_clade_affected = NO;
1214
1215 for(j=0;j<clade->n_tax;++j)
1216 {
1217 m = clade->tip_list[j];
1218 do
1219 {
1220 if(m == prune_daughter)
1221 {
1222 is_clade_affected = YES;
1223 break;
1224 }
1225 m = m->anc;
1226 }
1227 while(m);
1228
1229 if(is_clade_affected == YES) break;
1230 }
1231
1232
1233 // Maximum of the lower bounds for calibration intervals
1234 /* if(is_clade_affected == YES) *t_min = MAX(*t_min,tree->times->a_cal[i]->lower); */
1235 if(is_clade_affected == YES) *t_min = MAX(*t_min,tree->times->t_prior_min[n->num]);
1236 }
1237 }
1238 n = n->anc;
1239 }
1240 }
1241
1242 // Find the oldest internal node within intervals defined by
1243 // calibrations affected by the pruning.
1244 n = prune_daughter;
1245 while(n->anc && !(tree->times->nd_t[n->anc->num] < *t_min))
1246 {
1247 n = n->anc;
1248 assert(n);
1249 }
1250
1251
1252 if(verbose)
1253 {
1254 PhyML_Printf("\n. Apical: %d @ time %f min: %f",n->num,tree->times->nd_t[n->num],*t_min);
1255 fflush(NULL);
1256 }
1257
1258 // List all nodes younger than this apical node
1259 DATE_List_Of_Nodes_Younger_Than(n->anc,n,-INFINITY,&in,tree);
1260
1261 assert(in != NULL);
1262
1263 if(verbose)
1264 {
1265 ll = in->head;
1266 t_node *x;
1267 do
1268 {
1269 x = (t_node *)ll->v;
1270 PhyML_Printf("\nx Inlist %d @ %f",x->num,tree->times->nd_t[x->num]);
1271 ll = ll->next;
1272 }
1273 while(ll != NULL);
1274 }
1275
1276 // Remove from that list the nodes that are too young to be suitable regraft points.
1277
1278
1279 n = prune_daughter;
1280 out = NULL;
1281 while(n)
1282 {
1283 for(i=0;i<tree->times->n_cal;i++)
1284 {
1285 cal = tree->times->a_cal[i];
1286 clade = cal->clade_list[cal->current_clade_idx];
1287
1288 if(n->anc && n->anc == clade->target_nd)
1289 {
1290 for(j=0;j<clade->n_tax;++j)
1291 {
1292 m = clade->tip_list[j];
1293 do
1294 {
1295 if(m == prune_daughter) break;
1296 if(m == prune) break;
1297 m = m->anc;
1298 }
1299 while(m);
1300 if(m == prune) break; // Prune-regraft anywhere below calibrated node will not change that node.
1301 }
1302
1303 if(m != prune)
1304 {
1305 for(j=0;j<3;++j)
1306 {
1307 if(n->anc->v[j] != n->anc->anc && n->anc->b[j] != tree->e_root && n->anc->v[j] != n)
1308 {
1309 DATE_List_Of_Nodes_And_Ancestors_Younger_Than(n->anc,
1310 n->anc->v[j],
1311 tree->times->a_cal[i]->upper,
1312 &out,
1313 tree);
1314 break;
1315 }
1316 }
1317 }
1318 }
1319 }
1320 n = n->anc;
1321 }
1322
1323 // Remove nodes that are `strictly' younger than prune_daughter
1324 DATE_List_Of_Nodes_And_Ancestors_Younger_Than(tree->n_root,tree->n_root->v[1],tree->times->nd_t[prune_daughter->num],&out,tree);
1325 DATE_List_Of_Nodes_And_Ancestors_Younger_Than(tree->n_root,tree->n_root->v[2],tree->times->nd_t[prune_daughter->num],&out,tree);
1326
1327 // Remove nodes that are below prune_daughter (prune_daughter included)
1328 DATE_List_Of_Nodes_Younger_Than(prune,prune_daughter,-INFINITY,&out,tree);
1329
1330 // Add prune node to the list of node that can't be targeted for regraft
1331 Push_Bottom_Linked_List(prune,&out,YES);
1332
1333 // Add root node as one cannot regraft above it
1334 /* Push_Bottom_Linked_List(tree->n_root,&out); */
1335
1336 if(verbose)
1337 {
1338 printf("\nx outlist: %p",(void *)out); fflush(NULL);
1339 ll = out->head;
1340 do
1341 {
1342 t_node *x = (t_node *)ll->v;
1343 PhyML_Printf("\nx Outlist %d @ %f",x->num,tree->times->nd_t[x->num]);
1344 ll = ll->next;
1345 }
1346 while(ll != NULL);
1347 }
1348
1349
1350 /* Print_List(in); */
1351 ll = out->head;
1352 do
1353 {
1354 if(verbose)
1355 {
1356 t_node *x = (t_node *)ll->v;
1357 printf("\nx Remove %d",x->num);
1358 }
1359
1360 Remove_From_Linked_List(NULL,ll->v,&in);
1361
1362 if(verbose) PhyML_Printf("\n. List in (in->head:%p in->tail:%p):",in?in->head:NULL,in?in->tail:NULL);
1363
1364 /* Print_List(in); */
1365 ll = ll->next;
1366 }
1367 while(ll != NULL);
1368
1369 Free_Linked_List(out);
1370
1371 if(verbose)
1372 {
1373 ll = in->head;
1374 do
1375 {
1376 t_node *x;
1377 x = (t_node *)ll->v;
1378 printf("\n. In1: %d",x->num); fflush(NULL);
1379 ll = ll->next;
1380 }
1381 while(ll != NULL);
1382 }
1383
1384 return(in);
1385 }
1386
1387 //////////////////////////////////////////////////////////////
1388 //////////////////////////////////////////////////////////////
1389
DATE_Lk_Calib(t_tree * tree)1390 phydbl DATE_Lk_Calib(t_tree *tree)
1391 {
1392 phydbl lnL;
1393 int i;
1394 t_cal *cal;
1395
1396 lnL = 0.0;
1397 for(i=0;i<tree->times->n_cal;++i)
1398 {
1399 cal = tree->times->a_cal[i];
1400 lnL += LOG(cal->alpha_proba_list[cal->current_clade_idx]);
1401 }
1402
1403 return lnL;
1404 }
1405 //////////////////////////////////////////////////////////////
1406 //////////////////////////////////////////////////////////////
1407
1408 //////////////////////////////////////////////////////////////
1409 //////////////////////////////////////////////////////////////
1410
1411 //////////////////////////////////////////////////////////////
1412 //////////////////////////////////////////////////////////////
1413
1414 //////////////////////////////////////////////////////////////
1415 //////////////////////////////////////////////////////////////
1416
1417 //////////////////////////////////////////////////////////////
1418 //////////////////////////////////////////////////////////////
1419
1420