1 #include "spr.h"
2 #include "utilities.h"
3 #include "lk.h"
4 #include "optimiz.h"
5 #include "bionj.h"
6 #include "models.h"
7 #include "free.h"
8 #include "help.h"
9 #include "simu.h"
10 #include "eigen.h"
11 #include "pars.h"
12 #include "alrt.h"
13 #include "mixt.h"
14 #include "invitee.h"
15 #ifdef MPI
16 #include "mpi_boot.h"
17 #endif
18 #include <stdlib.h>
19
20
21
22 //////////////////////////////////////////////////////////////
23 //////////////////////////////////////////////////////////////
24
PhyTime_XML(char * xml_file)25 void PhyTime_XML(char *xml_file)
26 {
27
28 FILE *f;
29 char **clade, *clade_name, **mon_list;
30 phydbl low, up;
31 int i, j, n_taxa, clade_size, node_num, n_mon; //rnd_num
32 xml_node *n_r, *n_t, *n_m, *n_cur;
33 t_cal *last_calib;
34 align **data, **c_seq;
35 option *io;
36 calign *cdata;
37 t_opt *s_opt;
38 t_mod *mod;
39 time_t t_beg,t_end;
40 int r_seed;
41 char *most_likely_tree;
42 int user_lk_approx;
43 t_tree *tree;
44 t_node **a_nodes; //*node;
45 m4 *m4mod;
46
47 srand(time(NULL));
48
49 i = 0;
50 j = 0;
51 last_calib = NULL;
52 mod = NULL;
53 most_likely_tree = NULL;
54 n_taxa = 0;
55 node_num = -1;
56
57 //file can/cannot be open:
58 if ((f =(FILE *)fopen(xml_file, "r")) == NULL)
59 {
60 PhyML_Printf("\n== File '%s' can not be opened...\n",xml_file);
61 Exit("\n");
62 }
63
64 n_r = XML_Load_File(f);
65
66
67 //memory allocation for model parameters:
68 io = (option *)Make_Input();
69 mod = (t_mod *)Make_Model_Basic();
70 s_opt = (t_opt *)Make_Optimiz();
71 m4mod = (m4 *)M4_Make_Light();
72 Set_Defaults_Input(io);
73 Set_Defaults_Model(mod);
74 Set_Defaults_Optimiz(s_opt);
75 io -> mod = mod;
76 mod = io -> mod;
77 mod -> s_opt = s_opt;
78 clade_size = -1;
79
80 ////////////////////////////////////////////////////////////////////////////
81 //////////////////////reading tree topology:////////////////////////////////
82
83 //looking for a node <topology>
84 n_t = XML_Search_Node_Name("topology", YES, n_r);
85
86 //setting tree:
87 /* tree = (t_tree *)mCalloc(1,sizeof(t_tree)); */
88 n_cur = XML_Search_Node_Name("instance", YES, n_t);
89 if(n_cur != NULL)
90 {
91 if(XML_Search_Attribute(n_cur, "user.tree") != NULL)
92 {
93 strcpy(io -> out_tree_file, XML_Search_Attribute(n_cur, "user.tree") -> value);
94 io -> fp_out_tree = Openfile(io -> out_tree_file, 1);
95 io -> tree = Read_Tree_File_Phylip(io -> fp_in_tree);
96 }
97 else
98 {
99 PhyML_Printf("\n== No input tree was not found. \n");
100 PhyML_Printf("\n== Please specify either a tree file name or enter the whole tree directly in the XML file (in Newick format). \n");
101 Exit("\n");
102 }
103 }
104 else io -> tree = Read_Tree(&n_t -> value);
105 io -> n_otu = io -> tree -> n_otu;
106 tree = io -> tree;
107
108 //setting initial values to n_calib:
109 For(i, 2 * tree -> n_otu - 2)
110 {
111 tree -> a_nodes[i] -> n_calib = 0;
112 //PhyML_Printf("\n. '%d' \n", tree -> a_nodes[i] -> n_calib);
113 }
114
115
116 ////////////////////////////////////////////////////////////////////////////
117 ////////////////////////////////////////////////////////////////////////////
118
119 //memory for nodes:
120 a_nodes = (t_node **)mCalloc(2 * io -> n_otu - 1,sizeof(t_node *));
121 For(i, 2 * io -> n_otu - 2) a_nodes[i] = (t_node *)mCalloc(1,sizeof(t_node));
122
123 //setting a model:
124 tree -> rates = RATES_Make_Rate_Struct(io -> n_otu);
125 RATES_Init_Rate_Struct(tree -> rates, io -> rates, tree -> n_otu);
126
127 tree -> times = TIMES_Make_Time_Struct(io -> n_otu);
128 TIMES_Init_Time_Struct(tree -> times, io -> times, tree -> n_otu);
129
130 //reading seed:
131 if(XML_Search_Attribute(n_r, "seed")) io -> r_seed = String_To_Dbl(XML_Search_Attribute(n_r, "seed") -> value);
132
133 //TO DO: check that the tree has a root
134 Update_Ancestors(io -> tree -> n_root, io -> tree -> n_root -> v[2], io -> tree);
135 Update_Ancestors(io -> tree -> n_root, io -> tree -> n_root -> v[1], io -> tree);
136
137
138 ////////////////////////////////////////////////////////////////////////////
139 //////////////////////memory allocation for temp parameters/////////////////
140
141 //memory for monitor flag:
142 io -> mcmc -> monitor = (int *)mCalloc(2 * io -> n_otu - 1,sizeof(int));
143
144
145 //memory for sequences:
146 n_cur = XML_Search_Node_Name("alignment", YES, n_r);
147
148 data = (align **)mCalloc(io -> n_otu,sizeof(align *));
149 For(i, io -> n_otu)
150 {
151 data[i] = (align *)mCalloc(1,sizeof(align));
152 data[i] -> name = (char *)mCalloc(T_MAX_NAME,sizeof(char));
153 if(n_cur -> child -> value != NULL) data[i] -> state = (char *)mCalloc(strlen(n_cur -> child -> value) + 1,sizeof(char));
154 else data[i] -> state = (char *)mCalloc(T_MAX_SEQ,sizeof(char));
155 }
156 io -> data = data;
157 //tree -> data = data;
158
159
160 //memory for clade:
161 clade_name = (char *)mCalloc(T_MAX_NAME,sizeof(char));
162
163 //memory for list of clades to be monitored
164 mon_list = (char **)mCalloc(T_MAX_FILE,sizeof(char *));
165 For(i, T_MAX_FILE)
166 {
167 mon_list[i] = (char *)mCalloc(T_MAX_NAME,sizeof(char));
168 }
169
170 ////////////////////////////////////////////////////////////////////////////
171 ////////////////////////////////////////////////////////////////////////////
172
173 //reading monitor node:
174 i = 0;
175 n_m = XML_Search_Node_Name("monitor", YES, n_r);
176 if(n_m != NULL)
177 {
178 do
179 {
180 strcpy(mon_list[i], n_m -> child -> attr -> value);
181 i++;
182 if(n_m -> child) n_m -> child = n_m -> child -> next;
183 else break;
184 }
185 while(n_m -> child);
186 n_mon = i;
187 }
188 else
189 {
190 n_mon = 0;
191 PhyML_Printf("\n. There is no clade to be monitored. \n");
192 }
193
194 ////////////////////////////////////////////////////////////////////////////
195 ////////////////////////////////////////////////////////////////////////////
196
197 //chekcing for calibration node (upper or lower bound) to exist:
198 n_cur = XML_Search_Node_Name("calibration", YES, n_r);
199
200 if(n_cur == NULL)
201 {
202 PhyML_Printf("\n== There is no calibration information provided. \n");
203 PhyML_Printf("\n== Please check your data. \n");
204 Exit("\n");
205 }
206 else
207 {
208 if(XML_Search_Node_Name("upper", NO, n_cur -> child) == NULL && XML_Search_Node_Name("lower", NO, n_cur -> child) == NULL)
209 {
210 PhyML_Printf("\n== There is no calibration information provided. \n");
211 PhyML_Printf("\n== Please check your data. \n");
212 Exit("\n");
213 }
214 }
215
216 ////////////////////////////////////////////////////////////////////////////
217 ////////////////////////////////////////////////////////////////////////////
218
219
220 n_r = n_r -> child;
221 tree -> rates -> tot_num_cal = 0;
222 do
223 {
224 if(!strcmp(n_r -> name, "alignment"))//looking for a node <alignment>.
225 {
226 if(!n_r -> attr -> value)
227 {
228 PhyML_Printf("\n== Sequence type (nt / aa) must be provided. \n");
229 PhyML_Printf("\n== Please, include this information as an attribute of component <%s>. \n", n_r -> name);
230 Exit("\n");
231 }
232 char *s;
233 s = To_Upper_String(n_r -> attr -> value);
234 /* if(!strcmp(To_Upper_String(n_r -> attr -> value), "NT")) */
235 if(!strcmp(s, "NT"))
236 {
237 io -> datatype = 0;
238 io -> mod -> ns = 4;
239 }
240 /* if(!strcmp(To_Upper_String(n_r -> attr -> value), "AA")) */
241 if(!strcmp(s, "AA"))
242 {
243 io -> datatype = 1;
244 io -> mod -> ns = 20;
245 }
246 free(s);
247 n_cur = XML_Search_Node_Name("instance", YES, n_r);
248 if(n_cur != NULL)
249 {
250 if(XML_Search_Attribute(n_cur, "user.alignment") != NULL)
251 {
252 strcpy(io -> in_align_file, XML_Search_Attribute(n_cur, "user.alignment") -> value);
253 io -> fp_in_align = Openfile(io -> in_align_file, 1);
254 Detect_Align_File_Format(io);
255 io -> data = Get_Seq(io);
256 }
257 }
258 else
259 {
260 char *s;
261 i = 0;
262 s = To_Upper_String(n_r -> child -> value);
263 do
264 {
265 strcpy(io -> in_align_file, "invitee");
266 strcpy(io -> data[i] -> name, n_r -> child -> attr -> value);
267 /* strcpy(io -> data[i] -> state, To_Upper_String(n_r -> child -> value)); */
268 strcpy(io -> data[i] -> state, s);
269 i++;
270 if(n_r -> child -> next) n_r -> child = n_r -> child -> next;
271 else break;
272 }
273 while(n_r -> child);
274 n_taxa = i;
275 free(s);
276 }
277
278 //checking if a sequences of the same lengths:
279
280 i = 1;
281 For(i, n_taxa) if(strlen(io -> data[0] -> state) != strlen(io -> data[i] -> state))
282 {
283 PhyML_Printf("\n== All the sequences should be of the same length. Please check your data...\n");
284 Exit("\n");
285 break;
286 }
287
288 //checking sequence names:
289 i = 0;
290 For(i, n_taxa) Check_Sequence_Name(io -> data[i] -> name);
291
292 //check if a number of tips is equal to a number of taxa:
293 if(n_taxa != io -> n_otu)
294 {
295 PhyML_Printf("\n== The number of taxa is not the same as a number of tips. Check your data...\n");
296 Exit("\n");
297 }
298
299 //deleting '-', etc. from sequences:
300 io -> data[0] -> len = strlen(io -> data[0] -> state);
301 Post_Process_Data(io);
302 n_r = n_r -> next;
303 }
304 else if(!strcmp(n_r -> name, "calibration"))//looking for a node <calibration>.
305 {
306 tree -> rates -> tot_num_cal++;
307 if (tree -> rates -> calib == NULL) tree -> rates -> calib = Make_Calib(tree -> n_otu);
308 if(last_calib)
309 {
310 last_calib -> next = tree -> rates -> calib;
311 tree -> rates -> calib -> prev = last_calib;
312 }
313 last_calib = tree -> rates -> calib;
314
315 low = -BIG;
316 up = BIG;
317 n_cur = XML_Search_Node_Name("lower", YES, n_r);
318 if(n_cur != NULL) low = String_To_Dbl(n_cur -> value);
319 n_cur = XML_Search_Node_Name("upper", YES, n_r);
320 if(n_cur != NULL) up = String_To_Dbl(n_cur -> value);
321 do
322 {
323 if(!strcmp("appliesto", n_r -> child -> name))
324 {
325 //case of internal node:
326 strcpy(clade_name, n_r -> child -> attr -> value);//reached clade names n_r -> child -> attr -> value
327 if(!strcmp("root", clade_name))
328 {
329 node_num = io -> tree -> n_root -> num;
330 }
331 else if(strcmp("NO_CLADE", clade_name) && strcmp("root", clade_name))
332 {
333 xml_node *n_clade, *nd2;
334 nd2 = n_r -> parent;
335 n_clade = XML_Search_Node_Generic("clade", "id", clade_name, YES, nd2);
336 if(n_clade) //found clade with a given name
337 {
338 i = 0;
339 xml_node *nd;
340 nd = n_clade -> child;
341 clade = XML_Read_Clade(nd, tree);
342 clade_size = XML_Number_Of_Taxa_In_Clade(nd);
343 node_num = Find_Clade(clade, clade_size, io -> tree);
344 }
345 else
346 {
347 PhyML_Printf("\n== The calibration information for clade [%s] was not found.", clade_name);
348 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
349 Exit("\n");
350 }
351 }
352 if(strcmp("NO_CLADE", clade_name))
353 {
354 For(j, n_mon)
355 {
356 if(!strcmp(clade_name, mon_list[j])) io -> mcmc -> monitor[node_num] = YES;
357 }
358 }
359
360 if(strcmp("NO_CLADE", clade_name))
361 {
362 tree -> rates -> calib -> proba[node_num] = String_To_Dbl(n_r -> child -> attr -> next -> value);
363
364 if(!n_r -> child -> attr -> next && n_r -> child -> next == NULL) tree -> rates -> calib -> proba[node_num] = 1.;
365 if(!n_r -> child -> attr -> next && n_r -> child -> next)
366 {
367 PhyML_Printf("\n== You either need to provide information about the probability with which calibration ");
368 PhyML_Printf("\n== applies to a node or you need to apply calibration only to one node.");
369 PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__);
370 Exit("\n");
371 }
372
373 tree -> rates -> calib -> all_applies_to[tree -> rates -> calib -> n_all_applies_to] -> num = node_num;
374 }
375 else tree -> rates -> calib -> proba[2 * tree -> n_otu - 1] = String_To_Dbl(n_r -> child -> attr -> next -> value);
376 tree -> rates -> calib -> n_all_applies_to++;
377 tree -> rates -> calib -> lower = low;
378 tree -> rates -> calib -> upper = up;
379 PhyML_Printf("\n. Lower bound: %f.", low);
380 PhyML_Printf("\n. Upper bound: %f.", up);
381
382 /////////////////////////////////////////////////////////////////////////////////////////////////////
383 PhyML_Printf("\n. .......................................................................");
384 PhyML_Printf("\n");
385 if(strcmp(clade_name, "NO_CLADE")) PhyML_Printf("\n. Clade name: [%s]", clade_name);
386 else
387 {
388 PhyML_Printf("\n. Calibration with:");
389 PhyML_Printf("\n. Lower bound set to: %15f time units.", low);
390 PhyML_Printf("\n. Upper bound set to: %15f time units.", up);
391 PhyML_Printf("\n. does not apply to any node with probability [%f]", String_To_Dbl(n_r -> child -> attr -> next -> value));
392 PhyML_Printf("\n. .......................................................................");
393 }
394
395 if(strcmp(clade_name, "root") && strcmp(clade_name, "NO_CLADE"))
396 {
397 For(i, clade_size) PhyML_Printf("\n. Taxon name: [%s]", clade[i]);
398 }
399 if(strcmp(clade_name, "NO_CLADE"))
400 {
401 PhyML_Printf("\n. Node number to which calibration applies to is [%d] with probability [%f]", node_num, String_To_Dbl(n_r -> child -> attr -> next -> value));
402 PhyML_Printf("\n. Lower bound set to: %15f time units.", low);
403 PhyML_Printf("\n. Upper bound set to: %15f time units.", up);
404 PhyML_Printf("\n. .......................................................................");
405 }
406 /////////////////////////////////////////////////////////////////////////////////////////////////////
407 if(n_r -> child -> next) n_r -> child = n_r -> child -> next;
408 else break;
409 }
410 else if(n_r -> child -> next) n_r -> child = n_r -> child -> next;
411 else break;
412 }
413 while(n_r -> child);
414
415 tree -> rates -> calib = tree -> rates -> calib -> next;
416 n_r = n_r -> next;
417 }
418 else if(!strcmp(n_r -> name, "ratematrices"))//initializing rate matrix:
419 {
420 if(n_r -> child)
421 {
422 Make_Ratematrice_From_XML_Node(n_r -> child, io, mod);
423 n_r = n_r -> next;
424 }
425 else n_r = n_r -> next;
426 }
427 else if(!strcmp(n_r -> name, "equfreqs"))//initializing frequencies:
428 {
429 if(n_r -> child)
430 {
431 Make_Efrq_From_XML_Node(n_r -> child , io, mod);
432 n_r = n_r -> next;
433 }
434 else n_r = n_r -> next;
435 }
436 else if(!strcmp(n_r -> name, "siterates"))//initializing site rates:
437 {
438 if(n_r -> child)
439 {
440 Make_RAS_From_XML_Node(n_r, io -> mod);
441 n_r = n_r -> next;
442 }
443 else n_r = n_r -> next;
444 }
445
446 else if (n_r -> next) n_r = n_r -> next;
447 else break;
448 }
449 while(1);
450
451 tree -> rates -> calib = last_calib;
452 while(tree -> rates -> calib -> prev) tree -> rates -> calib = tree -> rates -> calib -> prev;
453 ////////////////////////////////////////////////////////////////////////////////////////////////
454 //Check for the sum of probabilities for one calibration add up to one
455 do
456 {
457 phydbl p = 0.0;
458 for(i = tree -> n_otu; i < 2 * tree -> n_otu; i++)
459 {
460 p = p + tree -> rates -> calib -> proba[i];
461 /* PhyML_Printf("\n. # applies to %d \n", tree -> rates -> calib -> n_all_applies_to); */
462 /* PhyML_Printf("\n. %f \n", tree -> rates -> calib -> proba[i]); */
463 }
464 if(!Are_Equal(p, 1.0, 1.E-10))
465 {
466 PhyML_Printf("\n. .......................................................................");
467 PhyML_Printf("\n. WARNING! The sum of the probabilities for the calibration with:");
468 PhyML_Printf("\n. Lower bound set to: %15f time units.", tree -> rates -> calib -> lower);
469 PhyML_Printf("\n. Upper bound set to: %15f time units.", tree -> rates -> calib -> upper);
470 PhyML_Printf("\n. is not equal to 1.");
471 PhyML_Printf("\n. The probability that this calibration does not apply to any node is set to [%f].", 1.0 - p);
472 PhyML_Printf("\n. .......................................................................");
473 tree -> rates -> calib -> proba[2 * tree -> n_otu - 1] = 1.0 - p;
474 tree -> rates -> calib -> n_all_applies_to++;
475 /* PhyML_Printf("\n==You need to provide proper probabilities for the calibration. \n"); */
476 /* PhyML_Printf("\n. Err. in file %s at line %d\n",__FILE__,__LINE__); */
477 /* Exit("\n"); */
478 }
479 if(tree -> rates -> calib -> next) tree -> rates -> calib = tree -> rates -> calib -> next;
480 else break;
481
482 }
483 while(tree -> rates -> calib);
484 while(tree -> rates -> calib -> prev) tree -> rates -> calib = tree -> rates -> calib -> prev;
485
486 ////////////////////////////////////////////////////////////////////////////
487 ////////////////////////////////////////////////////////////////////////////
488 //clear memory:
489 free(clade_name);
490 For(i, tree -> n_otu)
491 {
492 free(clade[i]);
493 }
494 free(clade);
495 For(i, T_MAX_FILE)
496 {
497 free(mon_list[i]);
498 }
499 free(mon_list);
500 /* do */
501 /* { */
502 /* printf("\n %f %f \n", tree -> rates -> calib -> lower, tree -> rates -> calib -> upper); */
503 /* printf("\n numb applies to: %d \n", tree -> rates -> calib -> n_all_applies_to); */
504 /* For(i, tree -> rates -> calib -> n_all_applies_to) printf("\n Node number %d \n", tree -> rates -> calib -> all_applies_to[i] -> num); */
505 /* if(tree -> rates -> calib -> next) tree -> rates -> calib = tree -> rates -> calib -> next; */
506 /* else break; */
507
508 /* } */
509 /* while(tree -> rates -> calib); */
510 /* while(tree -> rates -> calib -> prev) tree -> rates -> calib = tree -> rates -> calib -> prev; */
511
512 /* Exit("\n"); */
513 ////////////////////////////////////////////////////////////////////////////
514 ////////////////////////////////////////////////////////////////////////////
515 //START analysis:
516 r_seed = (io -> r_seed < 0)?(time(NULL)):(io -> r_seed);
517 srand(r_seed);
518 rand();
519 PhyML_Printf("\n. Seed: %d\n", r_seed);
520 PhyML_Printf("\n. Pid: %d\n",getpid());
521 PhyML_Printf("\n. Compressing sequences...\n");
522 data = io -> data;
523 data[0] -> len = strlen(data[0] -> state);
524 ////////////////////////////////////////////////////////////////////////////
525 //memory for compressed sequences:
526 cdata = (calign *)mCalloc(1,sizeof(calign));
527 c_seq = (align **)mCalloc(io -> n_otu,sizeof(align *));
528 For(i, io -> n_otu)
529 {
530 c_seq[i] = (align *)mCalloc(1,sizeof(align));
531 c_seq[i] -> name = (char *)mCalloc(T_MAX_NAME,sizeof(char));
532 c_seq[i] -> state = (char *)mCalloc(data[0] -> len + 1,sizeof(char));
533 }
534 cdata -> c_seq = c_seq;
535 ////////////////////////////////////////////////////////////////////////////
536 cdata = Compact_Data(data, io);
537 Free_Seq(io -> data, cdata -> n_otu);
538 io -> mod -> io = io;
539 Check_Ambiguities(cdata, io -> mod -> io -> datatype, io -> state_len);
540 Make_Model_Complete(mod);
541 Init_Model(cdata, mod, io);
542 if(io -> mod -> use_m4mod) M4_Init_Model(mod -> m4mod, cdata, mod);
543 time(&(t_beg));
544
545 RATES_Fill_Lca_Table(tree);
546
547 tree -> mod = mod;
548 tree -> io = io;
549 tree -> data = cdata;
550 tree -> n_pattern = tree -> data -> crunch_len / tree -> io -> state_len;
551
552 Set_Both_Sides(YES, tree);
553 Make_Tree_For_Pars(tree);
554 Make_Tree_For_Lk(tree);
555 Make_Spr(tree);
556
557
558
559 //calculate the probabilities of each combination of calibrations:
560 TIMES_Calib_Partial_Proba(tree);
561 int cal_numb = 0;
562 do
563 {
564 if(!Are_Equal(tree -> rates -> times_partial_proba[cal_numb], 0.0, 1.E-10)) break;
565 else cal_numb += 1;
566 }
567 while(1);
568 PhyML_Printf("\n. Calibration number chosen %d.\n", cal_numb);
569
570
571 Set_Current_Calibration(cal_numb, tree);
572 tree -> rates -> numb_calib_chosen[cal_numb]++;
573
574
575 int tot_num_comb;
576 tot_num_comb = Number_Of_Comb(tree -> rates -> calib);
577 PhyML_Printf("\n. The number of calibration combinations that is going to be considered is %d.\n", tot_num_comb);
578 /* For(i, tot_num_comb) Set_Current_Calibration(i, tree); */
579 /* Exit("\n"); */
580
581 //set initial value for Hastings ratio for conditional jump:
582 /* tree -> rates -> c_lnL_Hastings_ratio = 0.0; */
583
584 TIMES_Set_All_Node_Priors(tree);
585
586 tree -> rates -> cur_comb_numb = cal_numb;
587 tree -> rates -> log_K_cur = K_Constant_Prior_Times_Log(tree);
588
589 TIMES_Get_Number_Of_Time_Slices(tree);
590 TIMES_Label_Edges_With_Calibration_Intervals(tree);
591
592
593 tree -> write_br_lens = NO;
594
595 PhyML_Printf("\n. Input tree with calibration information ('almost' compatible with MCMCtree).\n");
596
597 char *t;
598 t = Write_Tree(tree, YES);
599 PhyML_Printf("\n. %s \n", t);
600 free(t);
601
602 tree -> write_br_lens = YES;
603
604
605 // Work with log of branch lengths?
606 if(tree -> mod -> log_l == YES) Log_Br_Len(tree);
607
608 if(io -> mcmc -> use_data == YES)
609 {
610 // Force the exact likelihood score
611 user_lk_approx = tree -> io -> lk_approx;
612 tree -> io -> lk_approx = EXACT;
613
614 /* MLE for branch lengths */
615 /* printf("\n. %s",Write_Tree(tree,NO)); */
616 /* printf("\n. alpha %f",tree->mod->ras->alpha->v); */
617 /* printf("\n. %f %f %f %f",tree->mod->e_frq->pi->v[0],tree->mod->e_frq->pi->v[1],tree->mod->e_frq->pi->v[2],tree->mod->e_frq->pi->v[3]); */
618 /* Lk(NULL,tree); */
619 /* printf("\n. %f",tree->c_lnL); */
620 /* Exit("\n"); */
621
622 Round_Optimize(tree, tree -> data, ROUND_MAX);
623 // Set vector of mean branch lengths for the Normal approximation of the likelihood
624 RATES_Set_Mean_L(tree);
625
626 // Estimate the matrix of covariance for the Normal approximation of the likelihood
627 PhyML_Printf("\n");
628 PhyML_Printf("\n. Computing Hessian...");
629 tree -> rates -> bl_from_rt = 0;
630 Free(tree -> rates -> cov_l);
631 tree -> rates -> cov_l = Hessian_Seo(tree);
632
633 // tree->rates->cov_l = Hessian_Log(tree);
634 For(i, (2 * tree -> n_otu - 3) * (2 * tree -> n_otu - 3)) tree -> rates -> cov_l[i] *= -1.0;
635 if(!Iter_Matinv(tree -> rates -> cov_l, 2 * tree -> n_otu - 3, 2 * tree -> n_otu - 3, YES)) Exit("\n");
636 tree -> rates -> covdet = Matrix_Det(tree -> rates -> cov_l, 2 * tree -> n_otu - 3, YES);
637 For(i,(2 * tree -> n_otu - 3) * (2 * tree -> n_otu - 3)) tree -> rates -> invcov[i] = tree -> rates -> cov_l[i];
638 if(!Iter_Matinv(tree -> rates -> invcov, 2 * tree -> n_otu - 3, 2 * tree -> n_otu - 3, YES)) Exit("\n");
639 tree -> rates -> grad_l = Gradient(tree);
640
641 // Pre-calculation of conditional variances to speed up calculations
642 RATES_Bl_To_Ml(tree);
643 RATES_Get_Conditional_Variances(tree);
644 RATES_Get_All_Reg_Coeff(tree);
645 RATES_Get_Trip_Conditional_Variances(tree);
646 RATES_Get_All_Trip_Reg_Coeff(tree);
647
648 Lk(NULL, tree);
649 PhyML_Printf("\n");
650 PhyML_Printf("\n. p(data|model) [exact ] ~ %.2f",tree -> c_lnL);
651 tree -> io -> lk_approx = NORMAL;
652 For(i,2 * tree -> n_otu - 3) tree -> rates -> u_cur_l[i] = tree -> rates -> mean_l[i] ;
653 tree -> c_lnL = Lk(NULL,tree);
654
655 PhyML_Printf("\n. p(data|model) [approx] ~ %.2f",tree -> c_lnL);
656
657 tree -> io -> lk_approx = user_lk_approx;
658
659 }
660
661
662 tree -> rates -> model = io -> rates -> model;
663
664 PhyML_Printf("\n. Selected model '%s' \n", RATES_Get_Model_Name(io -> rates -> model));
665
666 if(tree -> rates -> model == GUINDON) tree -> mod -> gamma_mgf_bl = YES;
667
668 tree -> rates -> bl_from_rt = YES;
669
670 if(tree -> io -> cstr_tree) Find_Surviving_Edges_In_Small_Tree(tree, tree -> io -> cstr_tree);
671 time(&t_beg);
672
673 tree -> mcmc = MCMC_Make_MCMC_Struct();
674
675 MCMC_Copy_MCMC_Struct(tree -> io -> mcmc, tree -> mcmc, "phytime");
676
677 tree -> mod -> m4mod = m4mod;
678
679 MCMC_Complete_MCMC(tree -> mcmc, tree);
680
681 tree -> mcmc -> is_burnin = NO;
682
683 MCMC(tree);
684 PhyML_Printf("\n");
685 for(i=0;i<6;i++) printf(" %d ", tree -> rates -> numb_calib_chosen[i]);
686 PhyML_Printf("\n");
687
688 MCMC_Close_MCMC(tree -> mcmc);
689 MCMC_Free_MCMC(tree -> mcmc);
690 PhyML_Printf("\n");
691
692 Free_Calib(tree -> rates -> calib);
693 Add_Root(tree->a_edges[0],tree);
694 Free_Tree_Pars(tree);
695 Free_Tree_Lk(tree);
696 Free_Tree(tree);
697 Free_Calign(cdata);
698 Free_Model(mod);
699 if(io -> fp_in_align) fclose(io -> fp_in_align);
700 if(io -> fp_in_tree) fclose(io -> fp_in_tree);
701 if(io -> fp_out_lk) fclose(io -> fp_out_lk);
702 if(io -> fp_out_tree) fclose(io -> fp_out_tree);
703 if(io -> fp_out_trees) fclose(io -> fp_out_trees);
704 if(io -> fp_out_stats) fclose(io -> fp_out_stats);
705 fclose(f);
706 Free(most_likely_tree);
707 Free_Input(io);
708 time(&t_end);
709 Print_Time_Info(t_beg,t_end);
710
711 }
712
713
714 //////////////////////////////////////////////////////////////
715 //////////////////////////////////////////////////////////////
716 //Calculate the prior probability for node times taking into account the
717 //probailitis with which each calibration applies to the particular node.
TIMES_Calib_Cond_Prob(t_tree * tree)718 phydbl TIMES_Calib_Cond_Prob(t_tree *tree)
719 {
720
721 phydbl times_lk, *Yule_val, *times_partial_proba, times_tot_proba, c, constant, ln_t;
722 int i, tot_num_comb;
723 t_cal *calib;
724
725
726 times_tot_proba = 0.0;
727 calib = tree -> rates -> calib;
728 times_partial_proba = tree -> rates -> times_partial_proba;
729
730 tot_num_comb = Number_Of_Comb(calib);
731
732
733 Yule_val = (phydbl *)mCalloc(tot_num_comb, sizeof(phydbl));
734 For(i, tot_num_comb)
735 {
736
737 Set_Current_Calibration(i, tree);
738
739 int result;
740 result = TRUE;
741 TIMES_Set_All_Node_Priors_S(&result, tree);
742
743 times_lk = TIMES_Lk_Yule_Order_Root_Cond(tree);
744
745 constant = 0.0;
746 if(tot_num_comb > 1 && times_lk > -INFINITY && result != FALSE && tree->rates->update_time_norm_const == YES)
747
748 Yule_val[i] = constant + times_lk;
749
750 while(calib -> prev) calib = calib -> prev;
751 }
752
753 /* Exit("\n"); */
754 c = .0;
755 times_tot_proba = 0.0;
756 For(i, tot_num_comb)
757 {
758 times_tot_proba += times_partial_proba[i] * exp(Yule_val[i] + c);
759 }
760
761 ln_t = -c + log(times_tot_proba);
762
763 free(Yule_val);
764
765 return(ln_t);
766 }
767
768
769
770 ////////////////////////////////////////////////////////////////////////////
771 ////////////////////////////////////////////////////////////////////////////
772 //Function calculates the normalizing constant K of the joint distribution Yule_Order.
773 //Use the fact that density Yule_Order can be used streight forward only in case of the complete
774 //overlap of the calibration intervals for all of the nodes or in case of no overlap.
K_Constant_Prior_Times_Log(t_tree * tree)775 phydbl K_Constant_Prior_Times_Log(t_tree *tree)
776 {
777 int i, j, k, f, n_otu, *indic, *n_slice, *slice_numbers;
778 phydbl buf, chop_bound, *t_prior_min, *t_prior_max, *t_slice, *t_slice_min, *t_slice_max, *g_i_node;
779
780
781 t_prior_min = tree -> rates -> t_prior_min;
782 t_prior_max = tree -> rates -> t_prior_max;
783 n_otu = tree -> n_otu;
784
785 t_slice = (phydbl *)mCalloc(2 * (n_otu - 1), sizeof(phydbl)); //the vector of the union of lower and upper bounds, lined up in incresing order.
786 t_slice_min = (phydbl *)mCalloc(2 * n_otu - 3, sizeof(phydbl)); //vector of the lower bounds of the sliced intervals.
787 t_slice_max = (phydbl *)mCalloc(2 * n_otu - 3, sizeof(phydbl)); //vector of the upper bounds of the sliced intervals.
788 indic = (int *)mCalloc((n_otu - 1) * (2 * n_otu - 3), sizeof(int)); //vector of the indicators, columns - node numbers (i + n_otu), rows - the number of the sliced interval.
789 slice_numbers = (int *)mCalloc((n_otu - 1) * (2 * n_otu - 3), sizeof(int )); //vecor of the slice intervals numbers, columns node numbers (i + n_otu), rows - the number of the sliced interval.
790 n_slice = (int *)mCalloc(n_otu - 1, sizeof(int)); //vector of the numbers of sliced intervals that apply to one node with number (i + n_otu).
791 g_i_node = (phydbl *)mCalloc((n_otu - 1) * (2 * n_otu - 3), sizeof(phydbl)); //vector of the values of the function g evaluated for each node.
792
793 i = 0;
794 /* K = 0; */
795 j = n_otu;
796 ////////////////////////////////////////////////////////////////////////////
797 //Put prior bounds in one vector t_slice. Excluding tips.
798 For(i, n_otu - 1)
799 {
800 t_slice[i] = t_prior_min[j];
801 j++;
802 }
803
804 j = n_otu;
805 for(i = n_otu - 1; i < 2 * n_otu - 3; i++)
806 {
807 t_slice[i] = t_prior_max[j];
808 j++;
809 }
810 if(tree -> rates -> nd_t[tree -> n_root -> num] > t_prior_min[tree -> n_root -> num]) chop_bound = MIN(tree -> rates -> nd_t[tree -> n_root -> num], t_prior_max[tree -> n_root -> num]);
811 else chop_bound = t_prior_min[tree -> n_root -> num];
812 t_slice[2 * n_otu - 3] = chop_bound;
813
814 ////////////////////////////////////////////////////////////////////////////
815 //Get slices in increasing order. Excluding tips.
816 do
817 {
818 f = NO;
819 For(j, 2 * n_otu - 3)
820 {
821 if(t_slice[j] > t_slice[j + 1])
822 {
823 buf = t_slice[j];
824 t_slice[j] = t_slice[j + 1];
825 t_slice[j + 1] = buf;
826 f = YES;
827 }
828 }
829 }
830 while(f);
831
832 for(j = 1; j < 2 * n_otu - 2; j++) t_slice[j] = MAX(chop_bound, t_slice[j]);
833 ////////////////////////////////////////////////////////////////////////////
834 //Get the intervals with respect to slices. Total number of t_slice_min(max) - 2 * n_otu - 3. Excluding tips.
835 i = 0;
836 For(j, 2 * n_otu - 3)
837 {
838 t_slice_min[j] = t_slice[i];
839 t_slice_max[j] = t_slice[i + 1];
840 i++;
841 }
842
843 ////////////////////////////////////////////////////////////////////////////
844 //Getting indicators for the node number [i + n_otu] to have slice. i = i + n_otu is the node number on the tree and j is the slice number, total
845 //number of intervals is 2 * n_otu - 3. Excluding tips.
846 For(i, n_otu - 1)
847 {
848 For(j, 2 * n_otu - 3)
849 {
850
851 if(Are_Equal(t_prior_min[i + n_otu], t_slice_min[j], 1.E-10) && t_prior_max[i + n_otu] > t_slice_max[j] && t_prior_min[i + n_otu] < t_slice_max[j] && !Are_Equal(t_slice_max[j], t_slice_min[j], 1.E-10)) indic[i * (2 * n_otu - 3) + j] = 1;
852 else if(Are_Equal(t_prior_max[i + n_otu], t_slice_max[j], 1.E-10) && t_prior_min[i + n_otu] < t_slice_min[j] && t_prior_max[i + n_otu] > t_slice_min[j] && !Are_Equal(t_slice_max[j], t_slice_min[j], 1.E-10)) indic[i * (2 * n_otu - 3) + j] = 1;
853 else if(t_prior_min[i + n_otu] < t_slice_min[j] && t_prior_max[i + n_otu] > t_slice_max[j] && !Are_Equal(t_slice_max[j], t_slice_min[j], 1.E-10)) indic[i * (2 * n_otu - 3) + j] = 1;
854 else if(Are_Equal(t_prior_min[i + n_otu], t_slice_min[j], 1.E-10) && Are_Equal(t_prior_max[i + n_otu], t_slice_max[j], 1.E-10)) indic[i * (2 * n_otu - 3) + j] = 1;
855 }
856 }
857
858
859 For(i, n_otu - 2)
860 {
861 indic[i * (2 * n_otu - 3)] = 0;
862 }
863
864 for(j = 1; j < 2 * n_otu - 3; j++)
865 {
866 indic[(n_otu - 2) * (2 * n_otu - 3) + j] = 0;
867 }
868
869 /* printf("\n"); */
870 /* printf(" ", j); */
871 /* For(j, 2 * n_otu - 3) printf(". '%d' ", j); */
872 /* printf("\n"); */
873 /* For(i, n_otu - 1) */
874 /* { */
875 /* printf(" ['%d'] ", i + n_otu); */
876 /* For(j, 2 * n_otu - 3) */
877 /* { */
878 /* printf(". '%d' ", indic[i * (2 * n_otu - 3) + j]); */
879 /* } */
880 /* printf("\n"); */
881 /* } */
882
883
884
885 ////////////////////////////////////////////////////////////////////////////
886 //Running through all of the combinations of slices
887 int *cur_slices, *cur_slices_shr, *cur_slices_cpy, *slices_start_node, shr_num_slices;
888 phydbl *t_cur_slice_min, *t_cur_slice_max;
889 phydbl tot_num_comb;
890 phydbl log_g_i, lmbd;
891
892 lmbd = tree -> rates -> birth_rate;
893
894 tot_num_comb = 1;
895
896 t_cur_slice_min = (phydbl *)mCalloc(n_otu - 1, sizeof(phydbl));
897 t_cur_slice_max = (phydbl *)mCalloc(n_otu - 1, sizeof(phydbl));
898 cur_slices = (int *)mCalloc(n_otu - 1, sizeof(int)); //the vector of the current slices with repetition.
899 cur_slices_cpy = (int *)mCalloc(n_otu - 1, sizeof(int));
900 slices_start_node = (int *)mCalloc(n_otu - 1, sizeof(int));
901 cur_slices_shr = (int *)mCalloc(n_otu - 1, sizeof(int)); //the vector of the current slices without repetition.
902
903
904
905 ////////////////////////////////////////////////////////////////////////////
906 //Get the number of slices that can be applied for each node and the vectors of slice numbers for each node.
907 For(i, n_otu - 1)
908 {
909 k = 0;
910 For(j, 2 * n_otu - 3)
911 {
912 if(indic[i * (2 * n_otu - 3) + j] == 1)
913 {
914 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
915 g_i_node[i * (2 * n_otu - 3) + j] = (exp(lmbd * t_slice_max[j]) - exp(lmbd * t_slice_min[j])) /
916 (exp(lmbd * t_prior_max[i + n_otu]) - exp(lmbd * t_prior_min[i + n_otu]));
917 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
918 slice_numbers[i * (2 * n_otu - 3) + k] = j;
919 n_slice[i]++;
920 k++;
921 }
922 }
923 }
924 /* printf("\n"); */
925 /* For(i, n_otu - 1) */
926 /* { */
927 /* printf(" ['%d'] ", i + n_otu); */
928 /* For(j, 2 * n_otu - 3) */
929 /* { */
930 /* printf(". '%f' ", g_i_node[i * (2 * n_otu - 3) + j]); */
931 /* } */
932 /* printf("\n"); */
933 /* } */
934
935 /* printf("\n"); */
936 /* For(i, n_otu - 1) */
937 /* { */
938 /* printf(" ['%d'] ", i + n_otu); */
939 /* For(j, n_slice[i]) */
940 /* { */
941 /* printf(". '%d' ", slice_numbers[i * (2 * n_otu - 3) + j]); */
942 /* } */
943 /* printf("\n"); */
944 /* } */
945 For(i, n_otu - 1)
946 {
947 tot_num_comb = tot_num_comb * n_slice[i];
948 }
949
950
951 int r, numb_unsuc, max_size, comb_numb, *combinations, *max_combination, numb_approx; /* **combinations; */
952 phydbl K_total;
953 phydbl *t_slice_min_f, *t_slice_max_f, *K_val_approx;
954 int *root_nodes, *dif;
955 phydbl K_total_cur, max_K_val, scl_const;
956
957 max_size = 1000000;
958 combinations = (int *)mCalloc(max_size*(n_otu-1), sizeof(int));
959 max_combination = (int *)mCalloc((n_otu-1), sizeof(int));
960 t_slice_min_f = (phydbl *)mCalloc(n_otu - 1, sizeof(phydbl));
961 t_slice_max_f = (phydbl *)mCalloc(n_otu - 1, sizeof(phydbl));
962 K_val_approx = (phydbl *)mCalloc(max_size, sizeof(phydbl));
963 root_nodes = (int *)mCalloc(n_otu - 1, sizeof(int));
964 dif = (int *)mCalloc(n_otu - 1, sizeof(int));
965 numb_approx = 0;
966 max_K_val = 0;
967 scl_const = 0.0;
968 /* lmbd = 4.0; */
969 numb_unsuc = 0;
970
971 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
972 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
973 ////////////////////////////////CALCULATING THE MAXIMUM VALUE OF m_i * g_i FOR ONE COMBINATION OF SLICES:////////////////////////////////////////////////////////////////
974
975 comb_numb = 0;
976 K_total = 0.0;
977
978 do
979 {
980 /* printf("\n ____________________________________________________________________________________________________ \n"); */
981
982 int x = 0, f = FALSE;
983
984 For(i, n_otu - 1)
985 {
986 r = rand()%(n_slice[i]);
987 t_slice_min_f[i] = t_slice_min[slice_numbers[i * (2 * n_otu - 3) + r]];
988 t_slice_max_f[i] = t_slice_max[slice_numbers[i * (2 * n_otu - 3) + r]];
989 cur_slices[i] = slice_numbers[i * (2 * n_otu - 3) + r];
990 }
991
992 For(i, n_otu - 1)
993 {
994 cur_slices_cpy[i] = cur_slices[i];
995 slices_start_node[i] = cur_slices[i];
996 }
997
998 For(i, n_otu - 1)
999 {
1000 for(j = i + 1; j < n_otu - 1; j++)
1001 {
1002 if(cur_slices[i] == cur_slices[j]) cur_slices[j] = -1;
1003 }
1004 }
1005 //For(i, n_otu - 1) printf(" Slice number'%d' \n", cur_slices[i]);
1006
1007 shr_num_slices = 0;
1008 For(i, n_otu -1)
1009 {
1010 if(cur_slices[i] >= 0)
1011 {
1012 cur_slices_shr[shr_num_slices] = cur_slices[i];
1013 shr_num_slices++;
1014 }
1015 }
1016
1017 int result_1;
1018 int c = 0;
1019 int num_elem;
1020
1021
1022 result_1 = TRUE;
1023
1024 Check_Time_Slices(tree -> n_root, tree -> n_root -> v[1], &result_1, t_slice_min_f, t_slice_max_f, tree);
1025 Check_Time_Slices(tree -> n_root, tree -> n_root -> v[2], &result_1, t_slice_min_f, t_slice_max_f, tree);
1026
1027 /* printf("\n. [START] Result '%d' \n", result_1); */
1028
1029
1030 K_total_cur = 0.0;
1031
1032 if(result_1 != TRUE) K_total_cur = 0.0;
1033 else
1034 {
1035 int n_1, n_2;
1036
1037 num_elem = 0;
1038
1039 x = 0;
1040 f = FALSE;
1041
1042 if(comb_numb > 0)
1043 {
1044 For(i, comb_numb)
1045 {
1046 x = 0;
1047 For(j, n_otu - 1)
1048 {
1049
1050 dif[j] = combinations[i*(n_otu-1) + j] - cur_slices_cpy[j];
1051 if(dif[j] == 0) x++;
1052 }
1053 if(x == n_otu - 1) f = TRUE;
1054 }
1055 }
1056 if(!f)
1057 {
1058 For(i, n_otu - 1) combinations[comb_numb*(n_otu - 1) + i] = cur_slices_cpy[i];
1059
1060 /* printf("\n"); */
1061 /* printf(" [1][CUR SLICES PROPOSED] "); */
1062 /* printf(" [COMB NUMBER] = [%d] --- ", comb_numb); */
1063 /* For(i, n_otu - 1) printf(". [%d] .", combinations[comb_numb*(n_otu - 1) + i]); */
1064 /* printf("\n"); */
1065
1066
1067
1068 For(i, shr_num_slices)
1069 {
1070 Search_Root_Node_In_Slice(tree -> n_root, tree -> n_root -> v[1], root_nodes, &num_elem, t_slice_min[cur_slices_shr[i]], t_slice_max[cur_slices_shr[i]], t_slice_min_f, t_slice_max_f, tree);
1071 Search_Root_Node_In_Slice(tree -> n_root, tree -> n_root -> v[2], root_nodes, &num_elem, t_slice_min[cur_slices_shr[i]], t_slice_max[cur_slices_shr[i]], t_slice_min_f, t_slice_max_f, tree);
1072 }
1073 /* for(i = 0; i < n_otu - 2; i++) printf("\n. [START] Node [%d] Slice_min [%f] Slice_max [%f] \n", i + n_otu, t_slice_min_f[i], t_slice_max_f[i]); */
1074 For(j, num_elem)
1075 {
1076 n_1 = 0;
1077 n_2 = 0;
1078 Number_Of_Nodes_In_Slice(tree -> a_nodes[root_nodes[j] + n_otu], tree -> a_nodes[root_nodes[j] + n_otu] -> v[1], &n_1, t_slice_min_f, t_slice_max_f, tree);
1079 Number_Of_Nodes_In_Slice(tree -> a_nodes[root_nodes[j] + n_otu], tree -> a_nodes[root_nodes[j] + n_otu] -> v[2], &n_2, t_slice_min_f, t_slice_max_f, tree);
1080 /* printf("\n. n_1 [%d] n_2 [%d]\n", n_1, n_2); */
1081 K_total_cur = K_total_cur + log(1) - log(n_1 + n_2 + 1) - LnGamma(n_1 + 1) - LnGamma(n_2 + 1);
1082 /* printf("\n. K_total_cur [%f] \n", K_total_cur); */
1083 /* printf("\n. [START] log(m_i) [%f] \n", log(1) - log(n_1 + n_2 + 1) - LnGamma(n_1 + 1) - LnGamma(n_2 + 1)); */
1084 }
1085 /* printf("\n. [START] log(m_i) [%f] \n", K_total_cur); */
1086 /* printf("\n. K_total_cur [%f] \n", K_total_cur); */
1087
1088 log_g_i = 0.0;
1089 For(i, n_otu - 2)
1090 log_g_i += log_g_i(lmbd,
1091 t_slice_max_f[i],
1092 t_slice_min_f[i],
1093 t_prior_max[i + n_otu],
1094 MAX(t_prior_min[i + n_otu],tree->times->nd_t[tree->n_root->num]));
1095
1096 /* printf("\n. [START] log(g_i) [%f] \n", log_g_i); */
1097
1098 K_total_cur = exp(K_total_cur + log_g_i + scl_const);
1099
1100 if(K_total_cur > max_K_val)
1101 {
1102 For(i, n_otu - 1) max_combination[i] = cur_slices_cpy[i];
1103 max_K_val = K_total_cur;
1104 }
1105
1106 K_val_approx[numb_approx] = K_total_cur;
1107 numb_approx++;
1108 /* printf("\n. [APPROX] Approximated constant after start (K_total_cur) = [%f] \n", (K_total_cur)); */
1109
1110 /* printf("\n. [START] m_i * g_i [%f] \n", K_total_cur); */
1111 /* printf("\n. [START] sum(m_i * g_i) = m_i * g_i [%f] \n", K_total_cur); */
1112 /* printf("\n. K of the tree for starting combination of slices [%f] \n", K_total_cur); */
1113 comb_numb++;
1114 if(comb_numb > max_size)
1115 {
1116 combinations = (int *)mRealloc(combinations, max_size*(n_otu - 1) + (n_otu - 1),sizeof(char));
1117 max_size = max_size + 1;
1118 }
1119 c++;
1120 }
1121 }
1122
1123 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1124 /* printf("\n. ____________________________________________________________________________________________ \n"); */
1125 phydbl K_part, K_max;
1126 int m, n, count;
1127
1128 K_max = K_total_cur;
1129
1130
1131 /* printf("\n. K_total [%f] \n", K_total); */
1132 do
1133 {
1134 r = rand()%(n_otu - 1);
1135
1136 count = 0;
1137 For(m, r)
1138 {
1139 /* printf("\n. NODE NUMBER ['%d'] \n", m + n_otu); */
1140 For(n, 2 * n_otu - 3)
1141 {
1142 K_part = 0.0;
1143 if(g_i_node[m * (2 * n_otu - 3) + n] > 0)
1144 {
1145 t_slice_min_f[m] = t_slice_min[n];
1146 t_slice_max_f[m] = t_slice_max[n];
1147 cur_slices_cpy[m] = n;
1148
1149 For(i, n_otu - 1) cur_slices[i] = cur_slices_cpy[i];
1150 For(i, n_otu - 1)
1151 {
1152 for(j = i + 1; j < n_otu - 1; j++)
1153 {
1154 if(cur_slices[i] == cur_slices[j]) cur_slices[j] = -1;
1155 }
1156 }
1157
1158 shr_num_slices = 0;
1159 For(i, n_otu -1)
1160 {
1161 if(cur_slices[i] >= 0)
1162 {
1163 cur_slices_shr[shr_num_slices] = cur_slices[i];
1164 shr_num_slices++;
1165 }
1166 }
1167
1168 result_1 = TRUE;
1169
1170 Check_Time_Slices(tree -> n_root, tree -> n_root -> v[1], &result_1, t_slice_min_f, t_slice_max_f, tree);
1171 Check_Time_Slices(tree -> n_root, tree -> n_root -> v[2], &result_1, t_slice_min_f, t_slice_max_f, tree);
1172
1173 if(result_1 != TRUE) K_part = 0.0;
1174 else
1175 {
1176 int n_1, n_2;
1177
1178 x = 0;
1179 f = FALSE;
1180
1181 num_elem = 0;
1182
1183 if(comb_numb > 0)
1184 {
1185 For(i, comb_numb)
1186 {
1187 x = 0;
1188 For(j, n_otu - 1)
1189 {
1190
1191 dif[j] = combinations[i*(n_otu-1) + j] - cur_slices_cpy[j];
1192 if(dif[j] == 0) x++;
1193 }
1194 if(x == n_otu - 1) f = TRUE;
1195 }
1196 }
1197
1198
1199 if(!f)
1200 {
1201 For(i, n_otu - 1) combinations[comb_numb*(n_otu-1) + i] = cur_slices_cpy[i];
1202 /* printf("\n"); */
1203 /* printf(" [2][CUR SLICES PROPOSED] "); */
1204 /* printf(" [COMB NUMBER] = [%d] --- ", comb_numb); */
1205 /* For(i, n_otu - 1) printf(". [%d] .", combinations[comb_numb*(n_otu-1) + i]); */
1206 /* printf("\n"); */
1207
1208
1209 For(i, shr_num_slices)
1210 {
1211 Search_Root_Node_In_Slice(tree -> n_root, tree -> n_root -> v[1], root_nodes, &num_elem, t_slice_min[cur_slices_shr[i]], t_slice_max[cur_slices_shr[i]], t_slice_min_f, t_slice_max_f, tree);
1212 Search_Root_Node_In_Slice(tree -> n_root, tree -> n_root -> v[2], root_nodes, &num_elem, t_slice_min[cur_slices_shr[i]], t_slice_max[cur_slices_shr[i]], t_slice_min_f, t_slice_max_f, tree);
1213 }
1214 For(j, num_elem)
1215 {
1216 n_1 = 0;
1217 n_2 = 0;
1218
1219 Number_Of_Nodes_In_Slice(tree -> a_nodes[root_nodes[j] + n_otu], tree -> a_nodes[root_nodes[j] + n_otu] -> v[1], &n_1, t_slice_min_f, t_slice_max_f, tree);
1220 Number_Of_Nodes_In_Slice(tree -> a_nodes[root_nodes[j] + n_otu], tree -> a_nodes[root_nodes[j] + n_otu] -> v[2], &n_2, t_slice_min_f, t_slice_max_f, tree);
1221 /* printf("\n. n_1 [%d] n_2 [%d]\n", n_1, n_2); */
1222
1223 K_part = K_part + log(1) - log(n_1 + n_2 + 1) - LnGamma(n_1 + 1) - LnGamma(n_2 + 1);
1224 /* printf("\n. [CONT] log(m_i) [%f] \n", log(1) - log(n_1 + n_2 + 1) - LnGamma(n_1 + 1) - LnGamma(n_2 + 1)); */
1225 }
1226 /* printf("\n. [CONT] log(m_i) [%f] \n", K_part); */
1227 /* printf("\n. K_part [%f] \n", K_part); */
1228
1229
1230
1231 if(K_part > max_K_val)
1232 {
1233 For(i, n_otu - 1) max_combination[i] = cur_slices_cpy[i];
1234 max_K_val = K_part;
1235 }
1236
1237 log_g_i = 0.0;
1238 For(i, n_otu - 2)
1239 log_g_i += log_g_i(lmbd,
1240 t_slice_max_f[i],
1241 t_slice_min_f[i],
1242 t_prior_max[i + n_otu],
1243 MAX(t_prior_min[i + n_otu],tree->times->nd_t[tree->n_root->num]));
1244
1245 /* printf("\n. [START] log(g_i) [%f] \n", log_g_i); */
1246
1247 K_part = exp(K_part + log_g_i + scl_const);
1248
1249 if(K_part > max_K_val)
1250 {
1251 For(i, n_otu - 1) max_combination[i] = cur_slices_cpy[i];
1252 max_K_val = K_part;
1253 }
1254
1255 /* printf("\n. [START] m_i * g_i [%f] \n", K_part); */
1256
1257 K_val_approx[numb_approx] = K_part;
1258 numb_approx++;
1259
1260 K_total_cur = K_total_cur + K_part;
1261 /* printf("\n. [APPROX] Approximated constant after one run (K_part) = [%f] \n", (K_part)); */
1262 /* printf("\n. K_max [%f] K_part [%f] \n", K_max, K_part); */
1263 /* printf("\n. [CONT] sum(m_i * g_i) [%f] \n", K_total_cur); */
1264 if(K_max < K_part)
1265 {
1266 K_max = K_part;
1267 /* For(i, n_otu - 1) slices_start_node[i] = cur_slices_cpy[i]; */
1268 count++;
1269 }
1270
1271 comb_numb++;
1272 if(comb_numb > max_size)
1273 {
1274 combinations = (int *)mRealloc(combinations, max_size*(n_otu - 1) + (n_otu - 1),sizeof(char));
1275 max_size = max_size + 1;
1276 }
1277 c++;
1278
1279 }
1280 }
1281 }
1282 }
1283 }
1284
1285
1286 for(m = r; m < n_otu - 1; m++)
1287 {
1288 For(n, 2 * n_otu - 3)
1289 {
1290
1291 K_part = 0.0;
1292 if(g_i_node[m * (2 * n_otu - 3) + n] > 0)
1293 {
1294 t_slice_min_f[m] = t_slice_min[n];
1295 t_slice_max_f[m] = t_slice_max[n];
1296 cur_slices_cpy[m] = n;
1297
1298 For(i, n_otu - 1) cur_slices[i] = cur_slices_cpy[i];
1299 For(i, n_otu - 1)
1300 {
1301 for(j = i + 1; j < n_otu - 1; j++)
1302 {
1303 if(cur_slices[i] == cur_slices[j]) cur_slices[j] = -1;
1304 }
1305 }
1306
1307 shr_num_slices = 0;
1308 For(i, n_otu -1)
1309 {
1310 if(cur_slices[i] >= 0)
1311 {
1312 cur_slices_shr[shr_num_slices] = cur_slices[i];
1313 shr_num_slices++;
1314 }
1315 }
1316
1317 result_1 = TRUE;
1318
1319 Check_Time_Slices(tree -> n_root, tree -> n_root -> v[1], &result_1, t_slice_min_f, t_slice_max_f, tree);
1320 Check_Time_Slices(tree -> n_root, tree -> n_root -> v[2], &result_1, t_slice_min_f, t_slice_max_f, tree);
1321
1322 if(result_1 != TRUE) K_part = 0.0;
1323 else
1324 {
1325 int n_1, n_2;
1326
1327 x = 0;
1328 f = FALSE;
1329
1330 num_elem = 0;
1331
1332 if(comb_numb > 0)
1333 {
1334 For(i, comb_numb)
1335 {
1336 x = 0;
1337 For(j, n_otu - 1)
1338 {
1339
1340 dif[j] = combinations[i*(n_otu-1) + j] - cur_slices_cpy[j];
1341 /* dif[j] = combinations[i][j] - cur_slices_cpy[j]; */
1342 if(dif[j] == 0) x++;
1343 /* printf(". [%d] ", dif[j]); */
1344 }
1345 if(x == n_otu - 1) f = TRUE;
1346 }
1347 }
1348
1349
1350 if(!f)
1351 {
1352 For(i, n_otu - 1) combinations[comb_numb*(n_otu-1) + i] = cur_slices_cpy[i];
1353 /* printf("\n"); */
1354 /* printf(" [2][CUR SLICES PROPOSED] "); */
1355 /* printf(" [COMB NUMBER] = [%d] --- ", comb_numb); */
1356 /* For(i, n_otu - 1) printf(". [%d] .", combinations[comb_numb*(n_otu-1) + i]); */
1357 /* printf("\n"); */
1358
1359
1360 For(i, shr_num_slices)
1361 {
1362 Search_Root_Node_In_Slice(tree -> n_root, tree -> n_root -> v[1], root_nodes, &num_elem, t_slice_min[cur_slices_shr[i]], t_slice_max[cur_slices_shr[i]], t_slice_min_f, t_slice_max_f, tree);
1363 Search_Root_Node_In_Slice(tree -> n_root, tree -> n_root -> v[2], root_nodes, &num_elem, t_slice_min[cur_slices_shr[i]], t_slice_max[cur_slices_shr[i]], t_slice_min_f, t_slice_max_f, tree);
1364 }
1365 For(j, num_elem)
1366 {
1367 n_1 = 0;
1368 n_2 = 0;
1369
1370 Number_Of_Nodes_In_Slice(tree -> a_nodes[root_nodes[j] + n_otu], tree -> a_nodes[root_nodes[j] + n_otu] -> v[1], &n_1, t_slice_min_f, t_slice_max_f, tree);
1371 Number_Of_Nodes_In_Slice(tree -> a_nodes[root_nodes[j] + n_otu], tree -> a_nodes[root_nodes[j] + n_otu] -> v[2], &n_2, t_slice_min_f, t_slice_max_f, tree);
1372 /* printf("\n. n_1 [%d] n_2 [%d]\n", n_1, n_2); */
1373
1374 K_part = K_part + log(1) - log(n_1 + n_2 + 1) - LnGamma(n_1 + 1) - LnGamma(n_2 + 1);
1375 /* printf("\n. [CONT] log(m_i) [%f] \n", log(1) - log(n_1 + n_2 + 1) - LnGamma(n_1 + 1) - LnGamma(n_2 + 1)); */
1376 }
1377 /* printf("\n. [CONT] log(m_i) [%f] \n", K_part); */
1378 /* printf("\n. K_part [%f] \n", K_part); */
1379
1380 log_g_i = 0.0;
1381 For(i, n_otu - 2)
1382 log_g_i += log_g_i(lmbd,
1383 t_slice_max_f[i],
1384 t_slice_min_f[i],
1385 t_prior_max[i + n_otu],
1386 MAX(t_prior_min[i + n_otu],tree->times->nd_t[tree->n_root->num]));
1387 /* printf("\n. [START] log(g_i) [%f] \n", log_g_i); */
1388
1389 K_part = exp(K_part + log_g_i + scl_const);
1390
1391 if(K_part > max_K_val)
1392 {
1393 For(i, n_otu - 1) max_combination[i] = cur_slices_cpy[i];
1394 max_K_val = K_part;
1395 }
1396
1397 /* printf("\n. [START] m_i * g_i [%f] \n", K_part); */
1398
1399 K_val_approx[numb_approx] = K_part;
1400 numb_approx++;
1401
1402 K_total_cur = K_total_cur + K_part;
1403
1404 /* printf("\n. [APPROX] Approximated constant after one run (K_part) = [%f] \n", (K_part)); */
1405 /* printf("\n. K_max [%f] K_part [%f] \n", K_max, K_part); */
1406 /* printf("\n. [CONT] sum(m_i * g_i) [%f] \n", K_total_cur); */
1407 if(K_max < K_part)
1408 {
1409 K_max = K_part;
1410 /* For(i, n_otu - 1) slices_start_node[i] = cur_slices_cpy[i]; */
1411 count++;
1412 }
1413
1414 comb_numb++;
1415 if(comb_numb > max_size)
1416 {
1417 combinations = (int *)mRealloc(combinations, max_size*(n_otu - 1) + (n_otu - 1),sizeof(char));
1418 max_size = max_size + 1;
1419 }
1420 c++;
1421 }
1422 }
1423 }
1424 }
1425 }
1426 if(Are_Equal(count, 0, 1.E-10)) break;
1427 }
1428 while(1);
1429 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1430
1431 For(m, n_otu - 1)
1432 {
1433 For(i, n_otu - 1) cur_slices_cpy[i] = max_combination[i];
1434 For(i, n_otu - 1)
1435 {
1436 t_slice_min_f[i] = t_slice_min[cur_slices_cpy[i]];
1437 t_slice_max_f[i] = t_slice_max[cur_slices_cpy[i]];
1438 }
1439 For(n, 2 * n_otu - 3)
1440 {
1441
1442 K_part = 0.0;
1443 if(g_i_node[m * (2 * n_otu - 3) + n] > 0)
1444 {
1445 t_slice_min_f[m] = t_slice_min[n];
1446 t_slice_max_f[m] = t_slice_max[n];
1447 cur_slices_cpy[m] = n;
1448
1449 For(i, n_otu - 1) cur_slices[i] = cur_slices_cpy[i];
1450 For(i, n_otu - 1)
1451 {
1452 for(j = i + 1; j < n_otu - 1; j++)
1453 {
1454 if(cur_slices[i] == cur_slices[j]) cur_slices[j] = -1;
1455 }
1456 }
1457
1458 shr_num_slices = 0;
1459 For(i, n_otu -1)
1460 {
1461 if(cur_slices[i] >= 0)
1462 {
1463 cur_slices_shr[shr_num_slices] = cur_slices[i];
1464 shr_num_slices++;
1465 }
1466 }
1467
1468 result_1 = TRUE;
1469
1470 Check_Time_Slices(tree -> n_root, tree -> n_root -> v[1], &result_1, t_slice_min_f, t_slice_max_f, tree);
1471 Check_Time_Slices(tree -> n_root, tree -> n_root -> v[2], &result_1, t_slice_min_f, t_slice_max_f, tree);
1472
1473 if(result_1 != TRUE) K_part = 0.0;
1474 else
1475 {
1476 int n_1, n_2;
1477
1478 x = 0;
1479 f = FALSE;
1480
1481 num_elem = 0;
1482
1483 if(comb_numb > 0)
1484 {
1485 For(i, comb_numb)
1486 {
1487 x = 0;
1488 For(j, n_otu - 1)
1489 {
1490
1491 dif[j] = combinations[i*(n_otu-1) + j] - cur_slices_cpy[j];
1492 /* dif[j] = combinations[i][j] - cur_slices_cpy[j]; */
1493 if(dif[j] == 0) x++;
1494 /* printf(". [%d] ", dif[j]); */
1495 }
1496 if(x == n_otu - 1) f = TRUE;
1497 }
1498 }
1499
1500
1501 if(!f)
1502 {
1503 For(i, n_otu - 1) combinations[comb_numb*(n_otu-1) + i] = cur_slices_cpy[i];
1504 /* printf("\n"); */
1505 /* printf(" [2][CUR SLICES PROPOSED] "); */
1506 /* printf(" [COMB NUMBER] = [%d] --- ", comb_numb); */
1507 /* For(i, n_otu - 1) printf(". [%d] .", combinations[comb_numb*(n_otu-1) + i]); */
1508 /* printf("\n"); */
1509
1510
1511 For(i, shr_num_slices)
1512 {
1513 Search_Root_Node_In_Slice(tree -> n_root, tree -> n_root -> v[1], root_nodes, &num_elem, t_slice_min[cur_slices_shr[i]], t_slice_max[cur_slices_shr[i]], t_slice_min_f, t_slice_max_f, tree);
1514 Search_Root_Node_In_Slice(tree -> n_root, tree -> n_root -> v[2], root_nodes, &num_elem, t_slice_min[cur_slices_shr[i]], t_slice_max[cur_slices_shr[i]], t_slice_min_f, t_slice_max_f, tree);
1515 }
1516 For(j, num_elem)
1517 {
1518 n_1 = 0;
1519 n_2 = 0;
1520
1521 Number_Of_Nodes_In_Slice(tree -> a_nodes[root_nodes[j] + n_otu], tree -> a_nodes[root_nodes[j] + n_otu] -> v[1], &n_1, t_slice_min_f, t_slice_max_f, tree);
1522 Number_Of_Nodes_In_Slice(tree -> a_nodes[root_nodes[j] + n_otu], tree -> a_nodes[root_nodes[j] + n_otu] -> v[2], &n_2, t_slice_min_f, t_slice_max_f, tree);
1523 /* printf("\n. n_1 [%d] n_2 [%d]\n", n_1, n_2); */
1524
1525 K_part = K_part + log(1) - log(n_1 + n_2 + 1) - LnGamma(n_1 + 1) - LnGamma(n_2 + 1);
1526 /* printf("\n. [CONT] log(m_i) [%f] \n", log(1) - log(n_1 + n_2 + 1) - LnGamma(n_1 + 1) - LnGamma(n_2 + 1)); */
1527 }
1528 /* printf("\n. [CONT] log(m_i) [%f] \n", K_part); */
1529 /* printf("\n. K_part [%f] \n", K_part); */
1530
1531 log_g_i = 0.0;
1532 For(i, n_otu - 2)
1533 log_g_i += log_g_i(lmbd,
1534 t_slice_max_f[i],
1535 t_slice_min_f[i],
1536 t_prior_max[i + n_otu],
1537 MAX(t_prior_min[i + n_otu],tree->times->nd_t[tree->n_root->num]));
1538 /* printf("\n. [START] log(g_i) [%f] \n", log_g_i); */
1539
1540 K_part = exp(K_part + log_g_i + scl_const);
1541
1542 if(K_part > max_K_val)
1543 {
1544 For(i, n_otu - 1) max_combination[i] = cur_slices_cpy[i];
1545 max_K_val = K_part;
1546 }
1547
1548 /* printf("\n. [START] m_i * g_i [%f] \n", K_part); */
1549
1550 K_val_approx[numb_approx] = K_part;
1551 numb_approx++;
1552
1553 K_total_cur = K_total_cur + K_part;
1554
1555 comb_numb++;
1556 if(comb_numb > max_size)
1557 {
1558 combinations = (int *)mRealloc(combinations, max_size*(n_otu - 1) + (n_otu - 1),sizeof(char));
1559 max_size = max_size + 1;
1560 }
1561 c++;
1562 }
1563 }
1564 }
1565 }
1566 }
1567 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1568 K_total = K_total + K_total_cur;
1569 /* printf("\n. [APPROX TOTAL] Approximated constant (K_total) = [%f] \n", (K_total)); */
1570 /* printf("\n%G %d", K_total, c); */
1571 if(Are_Equal(c, 0, 1.E-10)) numb_unsuc++;
1572 else numb_unsuc = 0;
1573 }
1574 while(numb_unsuc < 100);
1575
1576 /* printf("\n"); */
1577 /* printf("\n. [APPROX TOTAL] log(K_total) = [%f] \n", log(K_total)); */
1578 /* printf("\n. [APPROX TOTAL] log(Constant) = scl_const - log(K_total) = [%f] \n", scl_const - log(K_total)); */
1579 /* printf("\n. [APPROX TOTAL] Approximated constant 1 / (K_total) = [%f] \n", 1 / (K_total)); */
1580 /* printf("\n ____________________________________________________________________________________________________ \n"); */
1581 /* printf("\n. [APPROX TOTAL] Numb_approx = [%d] \n", numb_approx); */
1582 /* printf("\n"); */
1583 /* } */
1584
1585 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1586 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1587 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1588
1589
1590 free(combinations);
1591 free(max_combination);
1592 free(dif);
1593
1594 free(t_cur_slice_min);
1595 free(t_cur_slice_max);
1596 free(cur_slices);
1597 free(cur_slices_cpy);
1598 free(slices_start_node);
1599 free(cur_slices_shr);
1600 free(t_slice);
1601 free(t_slice_min);
1602 free(t_slice_max);
1603 free(t_slice_min_f);
1604 free(t_slice_max_f);
1605 free(indic);
1606 free(slice_numbers);
1607 free(root_nodes);
1608 free(n_slice);
1609 free(g_i_node);
1610 free(K_val_approx);
1611
1612 return(-log(K_total));
1613 }
1614
1615 ////////////////////////////////////////////////////////////////////////////
1616 ////////////////////////////////////////////////////////////////////////////
Number_Of_Comb_Slices(int m,int num_elem,int * n_slice)1617 int Number_Of_Comb_Slices(int m, int num_elem, int *n_slice)
1618 {
1619 int i, num_comb;
1620
1621 i = 0;
1622 num_comb = 1;
1623
1624 for(i = m; i < num_elem; i++) num_comb = num_comb * n_slice[i];
1625
1626 return(num_comb);
1627 }
1628
1629
1630
1631 //////////////////////////////////////////////////////////////
1632 //////////////////////////////////////////////////////////////
1633 //Check the combination of the time slices to be set correctly.
Check_Time_Slices(t_node * a,t_node * d,int * result,phydbl * t_cur_slice_min,phydbl * t_cur_slice_max,t_tree * tree)1634 void Check_Time_Slices(t_node *a, t_node *d, int *result, phydbl *t_cur_slice_min, phydbl *t_cur_slice_max, t_tree *tree)
1635 {
1636 int n_otu;
1637
1638 n_otu = tree -> n_otu;
1639
1640
1641 d -> anc = a;
1642 if(d -> tax) return;
1643 else
1644 {
1645 if(t_cur_slice_max[d -> num - n_otu] < t_cur_slice_max[a -> num - n_otu])
1646 {
1647 *result = FALSE;
1648 }
1649
1650 int i;
1651 for(i=0;i<3;i++)
1652 if((d -> v[i] != d -> anc) && (d -> b[i] != tree -> e_root))
1653 Check_Time_Slices(d, d -> v[i], result, t_cur_slice_min, t_cur_slice_max, tree);
1654 }
1655 }
1656
1657 //////////////////////////////////////////////////////////////
1658 /////////////////////////////////////////////////////////////
1659 //Getting the number of nodes on both sides from the node d_start, that are in the slice of that node.
Number_Of_Nodes_In_Slice(t_node * d_start,t_node * d,int * n,phydbl * t_cur_slice_min,phydbl * t_cur_slice_max,t_tree * tree)1660 void Number_Of_Nodes_In_Slice(t_node *d_start, t_node *d, int *n, phydbl *t_cur_slice_min, phydbl *t_cur_slice_max, t_tree *tree)
1661 {
1662 int n_otu;
1663
1664 n_otu = tree -> n_otu;
1665
1666
1667 if(d -> tax) return;
1668 else
1669 {
1670 if(Are_Equal(t_cur_slice_max[d_start -> num - n_otu], t_cur_slice_max[d -> num - n_otu], 1.E-10) && Are_Equal(t_cur_slice_min[d_start -> num - n_otu], t_cur_slice_min[d -> num - n_otu], 1.E-10))
1671 {
1672 (*n)++;
1673 int i;
1674 for(i=0;i<3;i++)
1675 if((d -> v[i] != d -> anc) && (d -> b[i] != tree -> e_root))
1676 Number_Of_Nodes_In_Slice(d_start, d -> v[i], n, t_cur_slice_min, t_cur_slice_max, tree);
1677 }
1678 }
1679 }
1680
1681
1682 //////////////////////////////////////////////////////////////
1683 /////////////////////////////////////////////////////////////
1684 //Returnig the root node in the given slice.
Search_Root_Node_In_Slice(t_node * d_start,t_node * d,int * root_nodes,int * num_elem,phydbl t_slice_min,phydbl t_slice_max,phydbl * t_cur_slice_min,phydbl * t_cur_slice_max,t_tree * tree)1685 void Search_Root_Node_In_Slice(t_node *d_start, t_node *d, int *root_nodes, int *num_elem, phydbl t_slice_min, phydbl t_slice_max, phydbl *t_cur_slice_min, phydbl *t_cur_slice_max, t_tree *tree)
1686 {
1687 int j, n_otu, f;
1688 j = 0;
1689 f = FALSE;
1690 n_otu = tree -> n_otu;
1691
1692 if(Are_Equal(t_cur_slice_max[d_start -> num - n_otu], t_slice_max, 1.E-10) && Are_Equal(t_cur_slice_min[d_start -> num - n_otu], t_slice_min, 1.E-10))
1693 {
1694 For(j, *num_elem) if(d_start -> num - n_otu == (root_nodes[j])) f = TRUE;
1695 if(f != TRUE)
1696 {
1697 (root_nodes[(*num_elem)]) = d_start -> num - n_otu;
1698 (*num_elem)++;
1699 return;
1700 }
1701
1702 }
1703 else
1704 {
1705 d -> anc = d_start;
1706 if(d -> tax) return;
1707 else
1708 {
1709 if(Are_Equal(t_cur_slice_max[d -> num - n_otu], t_slice_max, 1.E-10) && Are_Equal(t_cur_slice_min[d -> num - n_otu], t_slice_min, 1.E-10))
1710 {
1711 (root_nodes[*num_elem]) = d -> num - n_otu;
1712 (*num_elem)++;
1713 return;
1714 }
1715
1716 int i;
1717 for(i=0;i<3;i++)
1718 if((d -> v[i] != d -> anc) && (d -> b[i] != tree -> e_root))
1719 Search_Root_Node_In_Slice(d, d -> v[i], root_nodes, num_elem, t_slice_min, t_slice_max, t_cur_slice_min, t_cur_slice_max, tree);
1720 }
1721 }
1722 }
1723
1724
1725 //////////////////////////////////////////////////////////////
1726 //////////////////////////////////////////////////////////////
Factorial(int base)1727 int Factorial(int base)
1728 {
1729 if(base == 0) return(1);
1730 if(base == 1) return(1);
1731 return(base * Factorial(base-1));
1732 }
1733
1734
1735 //////////////////////////////////////////////////////////////
1736 //////////////////////////////////////////////////////////////
1737 //Calculate the vector of the norm.constants for prior on node times.
1738 //The length of the vector is the total number of combinations of calibrations.
Norm_Constant_Prior_Times(t_tree * tree)1739 phydbl *Norm_Constant_Prior_Times(t_tree *tree)
1740 {
1741
1742 phydbl *log_K_val;
1743 int i, tot_num_comb;
1744 t_cal *calib;
1745
1746 calib = tree -> rates -> calib;
1747
1748 tot_num_comb = Number_Of_Comb(calib);
1749
1750
1751 log_K_val = (phydbl *)mCalloc(tot_num_comb, sizeof(phydbl));
1752
1753 For(i, tot_num_comb)
1754 {
1755
1756 Set_Current_Calibration(i, tree);
1757
1758 int result = TRUE;
1759
1760 TIMES_Set_All_Node_Priors_S(&result, tree);
1761
1762 if(result == TRUE) log_K_val[i] = K_Constant_Prior_Times_Log(tree);
1763
1764 while(calib -> prev) calib = calib -> prev;
1765 }
1766 return(log_K_val);
1767 }
1768
1769
1770 //////////////////////////////////////////////////////////////
1771 //////////////////////////////////////////////////////////////
1772 //Sets a vector of the partial probabilities for each combination of calibrations
TIMES_Calib_Partial_Proba(t_tree * tree)1773 void TIMES_Calib_Partial_Proba(t_tree *tree)
1774 {
1775
1776 phydbl *times_partial_proba, proba, *t_prior_min, *t_prior_max;
1777 int i, j, k, tot_num_comb;
1778 t_cal *calib;
1779 short int *t_has_prior;
1780
1781 proba = 0.0;
1782
1783 times_partial_proba = tree -> rates -> times_partial_proba;
1784 calib = tree -> rates -> calib;
1785 t_prior_min = tree -> rates -> t_prior_min;
1786 t_prior_max = tree -> rates -> t_prior_max;
1787 t_has_prior = tree -> rates -> t_has_prior;
1788
1789 tot_num_comb = Number_Of_Comb(calib);
1790
1791 For(i, tot_num_comb)
1792 {
1793 times_partial_proba[i] = 1.0;
1794 for(j = tree -> n_otu; j < 2 * tree -> n_otu - 1; j++)
1795 {
1796 t_prior_min[j] = -BIG;
1797 t_prior_max[j] = BIG;
1798 t_has_prior[j] = NO;
1799 }
1800 do
1801 {
1802 k = (i % Number_Of_Comb(calib)) / Number_Of_Comb(calib -> next);
1803 if(calib -> all_applies_to[k] -> num)
1804 {
1805 t_prior_min[calib -> all_applies_to[k] -> num] = MAX(t_prior_min[calib -> all_applies_to[k] -> num], calib -> lower);
1806 t_prior_max[calib -> all_applies_to[k] -> num] = MIN(t_prior_max[calib -> all_applies_to[k] -> num], calib -> upper);
1807 t_has_prior[calib -> all_applies_to[k] -> num] = YES;
1808 proba = calib -> proba[calib -> all_applies_to[k] -> num];
1809 times_partial_proba[i] *= proba;
1810 }
1811 else
1812 {
1813 proba = calib -> proba[2 * tree -> n_otu - 1];
1814 times_partial_proba[i] *= proba;
1815
1816 }
1817
1818 if(calib -> next) calib = calib -> next;
1819 else break;
1820 }
1821 while(calib);
1822
1823 int result;
1824
1825 result = TRUE;
1826
1827 TIMES_Set_All_Node_Priors_S(&result, tree);
1828
1829 if(result != TRUE) times_partial_proba[i] = 0; /* printf("\n. [4] Partial Proba [%f] \n", times_partial_proba[i]); */
1830
1831 while(calib -> prev) calib = calib -> prev;
1832
1833 }
1834
1835 phydbl sum_proba;
1836 sum_proba = 0.0;
1837
1838 For(i, tot_num_comb) sum_proba += times_partial_proba[i];
1839 if(!Are_Equal(sum_proba, 1.0, 1.E-10))
1840 {
1841 For(i, tot_num_comb) times_partial_proba[i] = times_partial_proba[i] / sum_proba;
1842 }
1843
1844 }
1845
1846
1847
1848 //////////////////////////////////////////////////////////////
1849 //////////////////////////////////////////////////////////////
1850 //Function checks if the randomized node times are within the
1851 //upper and lower time limits, taken into account the times of
1852 //the ancestor and descendent.
Check_Node_Time(t_node * a,t_node * d,int * result,t_tree * tree)1853 void Check_Node_Time(t_node *a, t_node *d, int *result, t_tree *tree)
1854 {
1855 phydbl t_low, t_up;
1856 phydbl *t_prior_min, *t_prior_max, *nd_t;
1857
1858 t_prior_min = tree -> rates -> t_prior_min;
1859 t_prior_max = tree -> rates -> t_prior_max;
1860 nd_t = tree -> rates -> nd_t;
1861
1862 if((a == tree -> n_root) &&
1863 ((nd_t[a -> num] > MIN(t_prior_max[a -> num], MIN(nd_t[a -> v[1] -> num], nd_t[a -> v[2] -> num]))) ||
1864 (nd_t[a -> num] < t_prior_min[a -> num])))
1865
1866 {
1867 *result = FALSE;
1868 return;
1869 }
1870
1871 if(d -> tax) return;
1872 else
1873 {
1874 t_low = MAX(t_prior_min[d -> num], nd_t[d -> anc -> num]);
1875 t_up = MIN(t_prior_max[d -> num], MIN(nd_t[d -> v[1] -> num], nd_t[d -> v[2] -> num]));
1876 /* printf("\n. CHECK: %d t:%f u:%f d:%f",d->num,nd_t[d->num],t_up,t_low); */
1877 if(nd_t[d -> num] < t_low || nd_t[d -> num] > t_up)
1878 {
1879 *result = FALSE;
1880 return;
1881 }
1882
1883 int i;
1884 for(i=0;i<3;i++)
1885 if((d -> v[i] != d -> anc) && (d -> b[i] != tree -> e_root))
1886 Check_Node_Time(d, d -> v[i], result, tree);
1887 }
1888 }
1889
1890 //////////////////////////////////////////////////////////////
1891 //////////////////////////////////////////////////////////////
1892 //Function calculates the TOTAL number of calibration combinations,
1893 //given the number of nodes to which each calibartion applies to.
Number_Of_Comb(t_cal * calib)1894 int Number_Of_Comb(t_cal *calib)
1895 {
1896
1897 int num_comb;
1898
1899 if(!calib) return(1);
1900 num_comb = 1;
1901 do
1902 {
1903 num_comb *= calib -> n_all_applies_to;
1904 if(calib -> next) calib = calib -> next;
1905 else break;
1906 }
1907 while(calib);
1908 return(num_comb);
1909 }
1910
1911
1912 //////////////////////////////////////////////////////////////
1913 //////////////////////////////////////////////////////////////
1914 //Function calculates the TOTAL number of calibration combinations,
1915 //given the number of nodes to which each calibartion applies to.
Number_Of_Calib(t_cal * calib)1916 int Number_Of_Calib(t_cal *calib)
1917 {
1918
1919 int num_calib;
1920
1921
1922 num_calib = 0;
1923 do
1924 {
1925 num_calib++;
1926 if(calib -> next) calib = calib -> next;
1927 else break;
1928 }
1929 while(calib);
1930 return(num_calib);
1931 }
1932
1933
1934 //////////////////////////////////////////////////////////////
1935 //////////////////////////////////////////////////////////////
1936 // Function sets current calibration in the following way:
1937 // Suppose we have a vector of calibrations C=(C1, C2, C3), each calibration
1938 // applies to a set of nodes. We can reach each node number through the indexes (corresponds
1939 // to the number the information was read). C1={0,1,2}, C2={0,1}, C3={0};
1940 // The total number of combinations is 3*2*1=6. The first combination with row number 0
1941 // will be {0,0,0}, the second row will be {0,1,0} and so on. Calling the node numbers with
1942 // the above indexes will return current calibration. Also sets the vector of probabilities
1943 // for current calibration combination.
Set_Current_Calibration(int row,t_tree * tree)1944 void Set_Current_Calibration(int row, t_tree *tree)
1945 {
1946 t_cal *calib;
1947 phydbl *t_prior_min, *t_prior_max;
1948 short int *t_has_prior;
1949 int k, i, j, *curr_nd_for_cal;
1950
1951 calib = tree -> rates -> calib;
1952 t_prior_min = tree -> rates -> t_prior_min;
1953 t_prior_max = tree -> rates -> t_prior_max;
1954 t_has_prior = tree -> rates -> t_has_prior;
1955 curr_nd_for_cal = tree -> rates -> curr_nd_for_cal;
1956 /* printf("\n COMBINATION NUMBER [%d] \n", row); */
1957 for(j = tree -> n_otu; j < 2 * tree -> n_otu - 1; j++)
1958 {
1959 t_prior_min[j] = -BIG;
1960 t_prior_max[j] = BIG;
1961 t_has_prior[j] = NO;
1962 }
1963
1964 k = -1;
1965 i = 0;
1966 do
1967 {
1968 k = (row % Number_Of_Comb(calib)) / Number_Of_Comb(calib -> next);
1969 if(calib -> all_applies_to[k] -> num)
1970 {
1971 /* printf("\n"); */
1972 /* printf(" %f %f %f %f ", calib -> lower, calib -> upper, t_prior_min[calib -> all_applies_to[k] -> num], t_prior_max[calib -> all_applies_to[k] -> num]); */
1973 /* printf("\n"); */
1974 /* printf("\n"); */
1975 /* printf(" Node number [%d] ", calib -> all_applies_to[k] -> num); */
1976 /* printf("\n"); */
1977 t_prior_min[calib -> all_applies_to[k] -> num] = MAX(t_prior_min[calib -> all_applies_to[k] -> num], calib -> lower); /* printf("\n Prior min [%f] \n", t_prior_min[calib -> all_applies_to[k] -> num]); */
1978 t_prior_max[calib -> all_applies_to[k] -> num] = MIN(t_prior_max[calib -> all_applies_to[k] -> num], calib -> upper); /* printf("\n Prior max [%f] \n", t_prior_max[calib -> all_applies_to[k] -> num]); */
1979 t_has_prior[calib -> all_applies_to[k] -> num] = YES;
1980 curr_nd_for_cal[i] = calib -> all_applies_to[k] -> num;
1981 i++;
1982 }
1983 if(calib->next) calib = calib->next;
1984 else break;
1985 }
1986 while(calib);
1987 while(calib -> prev) calib = calib -> prev;
1988
1989 }
1990
1991 //////////////////////////////////////////////////////////////
1992 //////////////////////////////////////////////////////////////
1993 //Randomly choose a combination of calibrations drawing an index of calibration combination,
1994 //used function Set_Cur_Calibration.
Random_Calibration(t_tree * tree)1995 void Random_Calibration(t_tree *tree)
1996 {
1997 int rnd, num_comb;
1998 t_cal *calib;
1999
2000 calib = tree -> rates -> calib;
2001
2002 num_comb = Number_Of_Comb(calib);
2003
2004 srand(time(NULL));
2005 rnd = rand()%(num_comb);
2006
2007 Set_Current_Calibration(rnd, tree);
2008 TIMES_Set_All_Node_Priors(tree);
2009
2010 }
2011
2012
2013
2014 //////////////////////////////////////////////////////////////
2015 //////////////////////////////////////////////////////////////
2016 //Variable curr_nd_for_cal is a vector of node numbers, the length of that vector is a number of calibrations.
2017 //Function randomly updates that vector by randomly changing one node and setting times limits with respect
2018 //to a new vector.
RND_Calibration_And_Node_Number(t_tree * tree)2019 int RND_Calibration_And_Node_Number(t_tree *tree)
2020 {
2021 int i, j, tot_num_cal, cal_num, node_ind, node_num, *curr_nd_for_cal;
2022 phydbl *t_prior_min, *t_prior_max; //*times_partial_proba;
2023 short int *t_has_prior;
2024 t_cal *cal;
2025
2026 tot_num_cal = tree -> rates -> tot_num_cal;
2027 t_prior_min = tree -> rates -> t_prior_min;
2028 t_prior_max = tree -> rates -> t_prior_max;
2029 t_has_prior = tree -> rates -> t_has_prior;
2030 curr_nd_for_cal = tree -> rates -> curr_nd_for_cal;
2031 cal = tree -> rates -> calib;
2032
2033 cal_num = rand()%(tot_num_cal - 1);
2034
2035 i = 0;
2036 while (i != cal_num)
2037 {
2038 cal = cal -> next;
2039 i++;
2040 }
2041
2042 node_ind = rand()%(cal -> n_all_applies_to);
2043 node_num = cal -> all_applies_to[node_ind] -> num;
2044
2045 curr_nd_for_cal[cal_num] = node_num;
2046
2047 for(j = tree -> n_otu; j < 2 * tree -> n_otu - 1; j++)
2048 {
2049 t_prior_min[j] = -BIG;
2050 t_prior_max[j] = BIG;
2051 t_has_prior[j] = NO;
2052 }
2053
2054 while(cal -> prev) cal = cal -> prev;
2055
2056 i = 0;
2057 do
2058 {
2059 t_prior_min[curr_nd_for_cal[i]] = cal -> lower;
2060 t_prior_max[curr_nd_for_cal[i]] = cal -> upper;
2061 t_has_prior[curr_nd_for_cal[i]] = YES;
2062 i++;
2063 if(cal->next) cal = cal -> next;
2064 else break;
2065 }
2066 while(cal);
2067
2068 while(cal -> prev) cal = cal -> prev;
2069
2070 TIMES_Set_All_Node_Priors(tree);
2071
2072 return(node_num);
2073 }
2074
2075
2076
2077 //////////////////////////////////////////////////////////////
2078 //////////////////////////////////////////////////////////////
2079 //Return the value uniformly distributed between two values.
Randomize_One_Node_Time(phydbl min,phydbl max)2080 phydbl Randomize_One_Node_Time(phydbl min, phydbl max)
2081 {
2082 phydbl u;
2083
2084 u = Uni();
2085 u *= (max - min);
2086 u += min;
2087
2088 return(u);
2089 }
2090
2091
2092
2093 //////////////////////////////////////////////////////////////
2094 //////////////////////////////////////////////////////////////
2095 //Calculates the Hastings ratio for the analysis. Used in case of
2096 //calibration conditional jump. NOT THE RIGHT ONE TO USE!
Lk_Hastings_Ratio_Times(t_node * a,t_node * d,phydbl * tot_prob,t_tree * tree)2097 void Lk_Hastings_Ratio_Times(t_node *a, t_node *d, phydbl *tot_prob, t_tree *tree)
2098 {
2099 phydbl t_low, t_up;
2100 phydbl *t_prior_min, *t_prior_max, *nd_t;
2101
2102 t_prior_min = tree -> rates -> t_prior_min;
2103 t_prior_max = tree -> rates -> t_prior_max;
2104 nd_t = tree -> rates -> nd_t;
2105
2106 if(d -> tax) return;
2107 else
2108 {
2109 t_low = MAX(t_prior_min[d -> num], nd_t[d -> anc -> num]);
2110 t_up = MIN(t_prior_max[d -> num], MIN(nd_t[d -> v[1] -> num], nd_t[d -> v[2] -> num]));
2111
2112 (*tot_prob) += log(1) - log(t_up - t_low);
2113
2114 int i;
2115 for(i=0;i<3;i++)
2116 if((d -> v[i] != d -> anc) && (d -> b[i] != tree -> e_root))
2117 {
2118 Lk_Hastings_Ratio_Times(d, d -> v[i], tot_prob, tree);
2119 }
2120 }
2121 }
2122
2123
2124
2125 //////////////////////////////////////////////////////////////
2126 //////////////////////////////////////////////////////////////
2127 //Updates nodes which are below a randomized node in case if new proposed time
2128 //for that node is below the current value.
Update_Descendent_Cond_Jump(t_node * a,t_node * d,phydbl * L_Hast_ratio,t_tree * tree)2129 void Update_Descendent_Cond_Jump(t_node *a, t_node *d, phydbl *L_Hast_ratio, t_tree *tree)
2130 {
2131 int result = TRUE;
2132 phydbl t_low, t_up;
2133 phydbl *t_prior_min, *t_prior_max, *nd_t;
2134
2135 t_prior_min = tree -> rates -> t_prior_min;
2136 t_prior_max = tree -> rates -> t_prior_max;
2137 nd_t = tree -> rates -> nd_t;
2138
2139 Check_Node_Time(tree -> n_root, tree -> n_root -> v[1], &result, tree);
2140 Check_Node_Time(tree -> n_root, tree -> n_root -> v[2], &result, tree);
2141
2142 if(d -> tax) return;
2143 else
2144 {
2145 if(result != TRUE)
2146 {
2147 int i;
2148 t_low = MAX(nd_t[a -> num], t_prior_min[d -> num]);
2149 if(t_low < MIN(nd_t[d -> v[1] -> num], nd_t[d -> v[2] -> num])) t_up = MIN(t_prior_max[d -> num], MIN(nd_t[d -> v[1] -> num], nd_t[d -> v[2] -> num]));
2150 else t_up = t_prior_max[d -> num];
2151 nd_t[d -> num] = Randomize_One_Node_Time(t_low, t_up);
2152 (*L_Hast_ratio) += log(1) - log(t_up - t_low);
2153 for(i=0;i<3;i++)
2154 if((d -> v[i] != d -> anc) && (d -> b[i] != tree -> e_root))
2155 Update_Descendent_Cond_Jump(d, d -> v[i], L_Hast_ratio, tree);
2156 }
2157 else return;
2158 }
2159 }
2160
2161
2162
2163 //////////////////////////////////////////////////////////////
2164 //////////////////////////////////////////////////////////////
2165 //Updates nodes which are above a randomized node in case if new proposed time
2166 //for that node is above the current value.
Update_Ancestor_Cond_Jump(t_node * d,phydbl * L_Hast_ratio,t_tree * tree)2167 void Update_Ancestor_Cond_Jump(t_node *d, phydbl *L_Hast_ratio, t_tree *tree)
2168 {
2169 int result = TRUE;
2170 phydbl t_low, t_up;
2171 phydbl *t_prior_min, *t_prior_max, *nd_t;
2172
2173 t_prior_min = tree -> rates -> t_prior_min;
2174 t_prior_max = tree -> rates -> t_prior_max;
2175 nd_t = tree -> rates -> nd_t;
2176
2177 Check_Node_Time(tree -> n_root, tree -> n_root -> v[1], &result, tree);
2178 Check_Node_Time(tree -> n_root, tree -> n_root -> v[2], &result, tree);
2179
2180 if(result != TRUE)
2181 {
2182 if(d == tree -> n_root)
2183 {
2184
2185 t_low = t_prior_min[d -> num];
2186 t_up = MIN(t_prior_max[d -> num], MIN(nd_t[d -> v[1] -> num], nd_t[d -> v[2] -> num]));
2187 nd_t[d -> num] = Randomize_One_Node_Time(t_low, t_up);
2188 (*L_Hast_ratio) += log(1) - log(t_up - t_low);
2189 return;
2190 }
2191 else
2192 {
2193 t_up = MIN(t_prior_max[d -> num], MIN(nd_t[d -> v[1] -> num], nd_t[d -> v[2] -> num]));
2194 if(nd_t[d -> anc -> num] > t_up) t_low = t_prior_min[d -> num];
2195 else t_low = MAX(t_prior_min[d -> num], nd_t[d -> anc -> num]);
2196 nd_t[d -> num] = Randomize_One_Node_Time(t_low, t_up);
2197 (*L_Hast_ratio) += log(1) - log(t_up - t_low);
2198 Update_Ancestor_Cond_Jump(d -> anc, L_Hast_ratio, tree);
2199 }
2200
2201 }
2202 else return;
2203 }
2204
2205 //////////////////////////////////////////////////////////////
2206 //////////////////////////////////////////////////////////////
2207 //when made a calibration conditional jump, updates node times
2208 //with respect to the new calibration which was made with respect
2209 //to the randomly chosen node, the root is fixed. Updates only those nodes
2210 //that are not within new intervals. Traverse up and down.
Update_Times_RND_Node_Ancestor_Descendant(int rnd_node,phydbl * L_Hast_ratio,t_tree * tree)2211 void Update_Times_RND_Node_Ancestor_Descendant(int rnd_node, phydbl *L_Hast_ratio, t_tree *tree)
2212 {
2213 int i;
2214 phydbl *t_prior_min, *t_prior_max, *nd_t;
2215 phydbl new_time_rnd_node = 0.0;
2216
2217 t_prior_min = tree -> rates -> t_prior_min;
2218 t_prior_max = tree -> rates -> t_prior_max;
2219 nd_t = tree -> rates -> nd_t;
2220
2221 new_time_rnd_node = Randomize_One_Node_Time(t_prior_min[rnd_node], t_prior_max[rnd_node]);
2222
2223 nd_t[rnd_node] = new_time_rnd_node;
2224
2225 Update_Ancestor_Cond_Jump(tree -> a_nodes[rnd_node] -> anc, L_Hast_ratio, tree);
2226 for(i=0;i<3;i++)
2227 if((tree -> a_nodes[rnd_node] -> v[i] != tree -> a_nodes[rnd_node] -> anc) && (tree -> a_nodes[rnd_node] -> b[i] != tree -> e_root))
2228 Update_Descendent_Cond_Jump(tree -> a_nodes[rnd_node], tree -> a_nodes[rnd_node] -> v[i], L_Hast_ratio, tree);
2229
2230 }
2231
2232
2233
2234 //////////////////////////////////////////////////////////////
2235 //////////////////////////////////////////////////////////////
2236 //when made a calibration conditional jump, updates node times
2237 //with respect to the new calibration which was made with respect
2238 //to the randomly chosen node, starting from the root down to the tips.
2239 //Updates only those nodes that are not within new intervals.
Update_Times_Down_Tree(t_node * a,t_node * d,phydbl * L_Hastings_ratio,t_tree * tree)2240 void Update_Times_Down_Tree(t_node *a, t_node *d, phydbl *L_Hastings_ratio, t_tree *tree)
2241 {
2242 int i;
2243 phydbl *t_prior_min, *t_prior_max, *nd_t, t_low, t_up;
2244
2245 t_prior_min = tree -> rates -> t_prior_min;
2246 t_prior_max = tree -> rates -> t_prior_max;
2247 nd_t = tree -> rates -> nd_t;
2248
2249
2250 t_low = MAX(t_prior_min[d -> num], nd_t[a -> num]);
2251 t_up = t_prior_max[d -> num];
2252
2253 //printf("\n. [1] Node number: [%d] \n", d -> num);
2254 if(d -> tax) return;
2255 else
2256 {
2257 if(nd_t[d -> num] > t_up || nd_t[d -> num] < t_low)
2258 {
2259 //printf("\n. [2] Node number: [%d] \n", d -> num);
2260 //(*L_Hastings_ratio) += (log(1) - log(t_up - t_low));
2261 (*L_Hastings_ratio) += (- log(t_up - t_low));
2262 nd_t[d -> num] = Randomize_One_Node_Time(t_low, t_up);
2263 /* t_prior_min[d -> num] = t_low; */
2264 /* t_prior_max[d -> num] = t_up; */
2265 }
2266
2267 for(i=0;i<3;i++)
2268 if((d -> v[i] != d -> anc) && (d -> b[i] != tree -> e_root))
2269 Update_Times_Down_Tree(d, d -> v[i], L_Hastings_ratio, tree);
2270 }
2271 }
2272
2273 //////////////////////////////////////////////////////////////
2274 //////////////////////////////////////////////////////////////
2275
XML_Search_Node_Attribute_Value_Clade(char * attr_name,char * value,int skip,xml_node * node)2276 xml_node *XML_Search_Node_Attribute_Value_Clade(char *attr_name, char *value, int skip, xml_node *node)
2277 {
2278 xml_node *match;
2279
2280 //printf("\n. Node name [%s] Attr name [%s] Attr value [%s] \n", node -> name, attr_name, value);
2281 if(!node)
2282 {
2283 PhyML_Printf("\n== node: %p attr: %p",node,node?node->attr:NULL);
2284 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
2285 Exit("\n");
2286 }
2287
2288 match = NULL;
2289 if(skip == NO && node -> attr)
2290 {
2291 xml_attr *attr;
2292 attr = node -> attr;
2293 do
2294 {
2295 if(!strcmp(attr -> name, attr_name) && !strcmp(attr -> value, value))
2296 {
2297 match = node;
2298 break;
2299 }
2300 attr = attr->next;
2301 if(!attr) break;
2302 }
2303 while(1);
2304 }
2305 if(match) return(match);
2306 if(node -> next && !match)
2307 {
2308 match = XML_Search_Node_Attribute_Value_Clade(attr_name, value, NO, node -> next);
2309 return match;
2310 }
2311 return NULL;
2312 }
2313
2314 //////////////////////////////////////////////////////////////
2315 //////////////////////////////////////////////////////////////
2316
2317
2318 //////////////////////////////////////////////////////////////
2319 //////////////////////////////////////////////////////////////
2320
2321
2322 //////////////////////////////////////////////////////////////
2323 //////////////////////////////////////////////////////////////
TIMES_Set_All_Node_Priors_S(int * result,t_tree * tree)2324 void TIMES_Set_All_Node_Priors_S(int *result, t_tree *tree)
2325 {
2326 int i;
2327 phydbl min_prior;
2328
2329
2330 /* Set all t_prior_max values */
2331 TIMES_Set_All_Node_Priors_Bottom_Up_S(tree->n_root,tree->n_root->v[2], result, tree);
2332 TIMES_Set_All_Node_Priors_Bottom_Up_S(tree->n_root,tree->n_root->v[1], result, tree);
2333
2334 tree->times->t_prior_max[tree->n_root->num] =
2335 MIN(tree->times->t_prior_max[tree->n_root->num],
2336 MIN(tree->times->t_prior_max[tree->n_root->v[2]->num],
2337 tree->times->t_prior_max[tree->n_root->v[1]->num]));
2338
2339
2340 /* Set all t_prior_min values */
2341 if(!tree->times->t_has_prior[tree->n_root->num])
2342 {
2343 min_prior = 1.E+10;
2344 For(i,2*tree->n_otu-2)
2345 {
2346 if(tree->times->t_has_prior[i])
2347 {
2348 if(tree->times->t_prior_min[i] < min_prior)
2349 min_prior = tree->times->t_prior_min[i];
2350 }
2351 }
2352 tree->times->t_prior_min[tree->n_root->num] = 2.0 * min_prior;
2353 }
2354
2355 if(tree->times->t_prior_min[tree->n_root->num] > 0.0)
2356 {
2357 *result = FALSE;
2358 }
2359
2360
2361 TIMES_Set_All_Node_Priors_Top_Down_S(tree->n_root,tree->n_root->v[2], result, tree);
2362 TIMES_Set_All_Node_Priors_Top_Down_S(tree->n_root,tree->n_root->v[1], result, tree);
2363
2364
2365 }
2366
2367 //////////////////////////////////////////////////////////////
2368 //////////////////////////////////////////////////////////////
2369
2370
TIMES_Set_All_Node_Priors_Bottom_Up_S(t_node * a,t_node * d,int * result,t_tree * tree)2371 void TIMES_Set_All_Node_Priors_Bottom_Up_S(t_node *a, t_node *d, int *result, t_tree *tree)
2372 {
2373 int i;
2374 phydbl t_sup;
2375
2376 if(d->tax) return;
2377 else
2378 {
2379 t_node *v1, *v2; /* the two sons of d */
2380
2381 for(i=0;i<3;i++)
2382 {
2383 if((d->v[i] != a) && (d->b[i] != tree->e_root))
2384 {
2385 TIMES_Set_All_Node_Priors_Bottom_Up_S(d,d->v[i], result, tree);
2386 }
2387 }
2388
2389 v1 = v2 = NULL;
2390 for(i=0;i<3;i++) if((d->v[i] != a) && (d->b[i] != tree->e_root))
2391 {
2392 if(!v1) v1 = d->v[i];
2393 else v2 = d->v[i];
2394 }
2395
2396 if(tree->times->t_has_prior[d->num] == YES)
2397 {
2398 t_sup = MIN(tree->times->t_prior_max[d->num],
2399 MIN(tree->times->t_prior_max[v1->num],
2400 tree->times->t_prior_max[v2->num]));
2401
2402 tree->times->t_prior_max[d->num] = t_sup;
2403
2404
2405 if(tree->times->t_prior_max[d->num] < tree->times->t_prior_min[d->num])
2406 {
2407
2408 *result = FALSE;
2409
2410 }
2411 }
2412 else
2413 {
2414 tree->times->t_prior_max[d->num] =
2415 MIN(tree->times->t_prior_max[v1->num],
2416 tree->times->t_prior_max[v2->num]);
2417 }
2418 }
2419 }
2420
2421 //////////////////////////////////////////////////////////////
2422 //////////////////////////////////////////////////////////////
2423
2424
TIMES_Set_All_Node_Priors_Top_Down_S(t_node * a,t_node * d,int * result,t_tree * tree)2425 void TIMES_Set_All_Node_Priors_Top_Down_S(t_node *a, t_node *d, int *result, t_tree *tree)
2426 {
2427 if(d->tax) return;
2428 else
2429 {
2430 int i;
2431
2432 if(tree->times->t_has_prior[d->num] == YES)
2433 {
2434 tree->times->t_prior_min[d->num] = MAX(tree->times->t_prior_min[d->num],tree->times->t_prior_min[a->num]);
2435
2436
2437 if(tree->times->t_prior_max[d->num] < tree->times->t_prior_min[d->num])
2438 {
2439
2440 *result = FALSE;
2441
2442 }
2443 }
2444 else
2445 {
2446 tree->times->t_prior_min[d->num] = tree->times->t_prior_min[a->num];
2447 }
2448
2449 for(i=0;i<3;i++)
2450 {
2451 if((d->v[i] != a) && (d->b[i] != tree->e_root))
2452 {
2453 TIMES_Set_All_Node_Priors_Top_Down_S(d,d->v[i], result, tree);
2454 }
2455 }
2456 }
2457 }
2458
2459 //////////////////////////////////////////////////////////////
2460 //////////////////////////////////////////////////////////////
2461
log_g_i(phydbl lmbd,phydbl t_slice_max,phydbl t_slice_min,phydbl t_prior_max,phydbl t_prior_min)2462 phydbl log_g_i(phydbl lmbd, phydbl t_slice_max, phydbl t_slice_min, phydbl t_prior_max, phydbl t_prior_min)
2463 {
2464 phydbl result = 0.0;
2465
2466
2467 if(lmbd * t_prior_min < -10.0)
2468 {
2469 phydbl K;
2470 K = -10.0 - lmbd * t_prior_min;
2471 /* printf("\n. K = [%f] \n", K); */
2472 if(lmbd * t_prior_max + K > 700.0)
2473 {
2474 PhyML_Printf("\n. Please scale your calibration intervals. \n");
2475 PhyML_Printf("\n. Cannot calculate exp(-lmbd * t_prior_max). \n");
2476 PhyML_Printf("\n. Err. in file %s at line %d\n\n",__FILE__,__LINE__);
2477 Warn_And_Exit("\n");
2478 }
2479 else
2480 {
2481 result = log(exp(lmbd * t_slice_max + K) - exp(lmbd * t_slice_min + K)) - log(exp(lmbd * t_prior_max + K) - exp(lmbd * t_prior_min + K));
2482 }
2483 }
2484 else
2485 {
2486 result = log(exp(lmbd * t_slice_max) - exp(lmbd * t_slice_min)) - log(exp(lmbd * t_prior_max) - exp(lmbd * t_prior_min));
2487 }
2488 return(result);
2489 }
2490
2491 //////////////////////////////////////////////////////////////
2492 //////////////////////////////////////////////////////////////
2493
2494 //////////////////////////////////////////////////////////////
2495 //////////////////////////////////////////////////////////////
2496
CombineInt(int int1,int int2)2497 int CombineInt(int int1, int int2)
2498 {
2499 int mem;
2500
2501 mem = 500 * 2;
2502
2503 char cResult[mem];
2504
2505 sprintf(cResult, "%d%d", int1, int2);
2506 return atoi(cResult);
2507 }
2508
2509 //////////////////////////////////////////////////////////////
2510 //////////////////////////////////////////////////////////////
2511
Jump_Calibration_Move_Pre(t_node * a,t_node * d,phydbl old_ta,phydbl * log_hastings_ratio,t_tree * tree)2512 void Jump_Calibration_Move_Pre(t_node *a, t_node *d, phydbl old_ta, phydbl *log_hastings_ratio, t_tree *tree)
2513 {
2514 int i;
2515 phydbl *t_prior_min_new, *t_prior_max_new, t_low_new, t_up_new;
2516 phydbl *t_prior_min_old, *t_prior_max_old, t_low_old, t_up_old;
2517 phydbl *nd_t, old_t;
2518 phydbl eps = DBL_MIN;
2519 int move_anyway;
2520
2521 t_prior_min_new = tree -> rates -> t_prior_min;
2522 t_prior_max_new = tree -> rates -> t_prior_max;
2523
2524 t_prior_min_old = tree -> rates -> t_prior_min_ori;
2525 t_prior_max_old = tree -> rates -> t_prior_max_ori;
2526
2527 nd_t = tree -> rates -> nd_t;
2528
2529 t_low_new = MAX(t_prior_min_new[d -> num], nd_t[a -> num]);
2530 t_up_new = t_prior_max_new[d -> num];
2531
2532 t_low_old = MAX(t_prior_min_old[d -> num], old_ta);
2533 t_up_old = t_prior_max_old[d -> num];
2534
2535 old_t = nd_t[d -> num];
2536
2537 /* move_anyway = NO; */
2538 /* if(Uni() < .5) move_anyway = YES; */
2539 move_anyway = YES;
2540
2541
2542 if(d -> tax) return;
2543 else
2544 {
2545 if((nd_t[d -> num] > t_up_new || nd_t[d -> num] < t_low_new) || (move_anyway == YES)) /* Hastings ratio */
2546 {
2547 (*log_hastings_ratio) += log(t_up_new - t_low_new + eps);
2548 }
2549
2550 if((nd_t[d -> num] > t_up_new || nd_t[d -> num] < t_low_new) || (move_anyway == YES)) /* Do the jump */
2551 {
2552 nd_t[d -> num] = Uni()*(t_up_new - t_low_new) + t_low_new;
2553 }
2554
2555
2556 if((nd_t[d -> num] > t_up_old || nd_t[d -> num] < t_low_old) || (move_anyway == YES)) /* Hastings ratio */
2557 {
2558 (*log_hastings_ratio) -= log(t_up_old - t_low_old + eps);
2559 }
2560
2561
2562 for(i=0;i<3;i++)
2563 if((d -> v[i] != d -> anc) && (d -> b[i] != tree -> e_root))
2564 Jump_Calibration_Move_Pre(d, d -> v[i], old_t, log_hastings_ratio, tree);
2565 }
2566 }
2567
2568 //////////////////////////////////////////////////////////////
2569 //////////////////////////////////////////////////////////////
2570
Multiple_Time_Proposal_Density(t_node * a,t_node * d,phydbl * time_proposal_density,t_tree * tree)2571 void Multiple_Time_Proposal_Density(t_node *a, t_node *d, phydbl *time_proposal_density, t_tree *tree)
2572 {
2573 int i;
2574 phydbl *t_prior_min, *t_prior_max, *nd_t, t_low, t_up;
2575
2576 t_prior_min = tree -> rates -> t_prior_min;
2577 t_prior_max = tree -> rates -> t_prior_max;
2578 nd_t = tree -> rates -> nd_t;
2579
2580
2581 //printf("\n. [1] Node number: [%d] \n", d -> num);
2582 if(d -> tax) return;
2583 else
2584 {
2585
2586 t_low = MAX(t_prior_min[d -> num], nd_t[a -> num]);
2587 t_up = t_prior_max[d -> num];
2588 /* t_up = MIN(t_prior_max[d -> num], MIN(nd_t[d -> v[1] -> num], nd_t[d -> v[2] -> num])); */
2589 /* printf("\n. Low [%f] Up [%f] \n", t_low, t_up); */
2590
2591 (*time_proposal_density) += (- log(t_up - t_low));
2592
2593 for(i=0;i<3;i++)
2594 if((d -> v[i] != d -> anc) && (d -> b[i] != tree -> e_root))
2595 Multiple_Time_Proposal_Density(d, d -> v[i], time_proposal_density, tree);
2596 }
2597 }
2598
2599 //////////////////////////////////////////////////////////////
2600 //////////////////////////////////////////////////////////////
2601
2602
2603