1 /*
2
3 PhyML: a program that computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10
11 */
12
13
14 #include "geo.h"
15
16 //////////////////////////////////////////////////////////////
17 //////////////////////////////////////////////////////////////
18
GEO_Main(int argc,char ** argv)19 int GEO_Main(int argc, char **argv)
20 {
21 /* GEO_Simulate_Estimate(argc,argv); */
22 GEO_Estimate(argc,argv);
23 return(1);
24 }
25
26 //////////////////////////////////////////////////////////////
27 //////////////////////////////////////////////////////////////
28
GEO_Estimate(int argc,char ** argv)29 int GEO_Estimate(int argc, char **argv)
30 {
31 t_geo *t;
32 int seed;
33 FILE *fp;
34 t_tree *tree;
35 phydbl *ldscp;
36 int *loc_hash;
37 int i,j;
38 phydbl *probs;
39 phydbl sum;
40
41 // geo ./ban
42
43
44 seed = getpid();
45 /* seed = 28224; */
46 /* seed = 21249; */
47 /* seed = 21596; */
48
49 printf("\n. Seed = %d",seed);
50 srand(seed);
51
52 t = GEO_Make_Geo_Basic();
53 GEO_Init_Geo_Struct(t);
54
55 fp = Openfile(argv[1],0); /* Open tree file */
56
57 tree = Read_Tree_File_Phylip(fp); /* Read it */
58
59 Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
60 Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
61
62 tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
63 RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu);
64
65 tree->times = TIMES_Make_Time_Struct(tree->n_otu);
66 TIMES_Init_Time_Struct(tree->times,NULL,tree->n_otu);
67
68 Branch_To_Time(tree);
69
70 tree->geo = t;
71
72 GEO_Read_In_Landscape(argv[2],t,&ldscp,&loc_hash,tree);
73
74 GEO_Make_Geo_Complete(t->ldscape_sz,t->n_dim,tree->n_otu,t);
75
76
77 j = 0;
78 for(i=0;i<t->ldscape_sz;i++)
79 {
80 t->coord_loc[i]->lonlat[0] = ldscp[j];
81 t->coord_loc[i]->lonlat[1] = ldscp[j+1];
82 PhyML_Printf("\n. Location %d: %13f %13f",i+1,t->coord_loc[i]->lonlat[0],t->coord_loc[i]->lonlat[1]);
83 j+=2;
84 }
85
86 for(i=0;i<tree->n_otu;i++) t->idx_loc[i] = loc_hash[i];
87
88 t->cov[0*t->n_dim+0] = t->sigma;
89 t->cov[1*t->n_dim+1] = t->sigma;
90 t->cov[0*t->n_dim+1] = 0.0;
91 t->cov[1*t->n_dim+0] = 0.0;
92
93 GEO_Get_Sigma_Max(t);
94
95 t->max_sigma = t->sigma_thresh * 2.;
96
97 PhyML_Printf("\n. Sigma max: %f",t->sigma_thresh);
98
99 GEO_Get_Locations_Beneath(t,tree);
100
101 probs = (phydbl *)mCalloc(tree->geo->ldscape_sz,sizeof(phydbl));
102
103 sum = 0.0;
104 for(i=0;i<tree->geo->ldscape_sz;i++) sum += tree->geo->idx_loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i];
105 for(i=0;i<tree->geo->ldscape_sz;i++) probs[i] = tree->geo->idx_loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]/sum;
106 tree->geo->idx_loc[tree->n_root->num] = Sample_i_With_Proba_pi(probs,tree->geo->ldscape_sz);
107 Free(probs);
108
109 GEO_Randomize_Locations(tree->n_root,t,tree);
110
111
112 GEO_Update_Occup(t,tree);
113 GEO_Lk(t,tree);
114
115 PhyML_Printf("\n. Init loglk: %f",tree->geo->c_lnL);
116
117 /* DR_Draw_Tree("essai.ps",tree); */
118
119 GEO_MCMC(tree);
120
121 fclose(fp);
122 Free(ldscp);
123 Free(loc_hash);
124
125 return(1);
126 }
127
128 //////////////////////////////////////////////////////////////
129 //////////////////////////////////////////////////////////////
130
GEO_Simulate_Estimate(int argc,char ** argv)131 int GEO_Simulate_Estimate(int argc, char **argv)
132 {
133 t_geo *t;
134 int n_tax;
135 t_tree *tree,*ori_tree;
136 int seed;
137 phydbl *res;
138 FILE *fp;
139 int pid;
140 char *s;
141 int rand_loc;
142 phydbl *probs,sum;
143 int i;
144 phydbl mantel_p_val;
145 phydbl rf;
146
147 s = (char *)mCalloc(T_MAX_NAME,sizeof(char));
148
149 strcpy(s,"geo.out");
150 pid = getpid();
151 sprintf(s+strlen(s),".%d",pid);
152
153 fp = fopen(s,"w");
154
155 seed = getpid();
156 /* seed = 15520; */
157 /* seed = 5023; */
158 printf("\n. Seed = %d",seed);
159 srand(seed);
160
161 t = GEO_Make_Geo_Basic();
162 GEO_Init_Geo_Struct(t);
163
164 /* t->tau = 3.0; */
165 /* t->lbda = 0.02; */
166 /* t->sigma = 10.; */
167
168
169 t->ldscape_sz = (int)atoi(argv[1]);
170 t->n_dim = 2;
171 n_tax = (int)atoi(argv[2]);
172
173 GEO_Make_Geo_Complete(t->ldscape_sz,t->n_dim,n_tax,t);
174
175 t->cov[0*t->n_dim+0] = t->sigma;
176 t->cov[1*t->n_dim+1] = t->sigma;
177 t->cov[0*t->n_dim+1] = 0.0;
178 t->cov[1*t->n_dim+0] = 0.0;
179
180 GEO_Simulate_Coordinates(t->ldscape_sz,t);
181
182 GEO_Get_Sigma_Max(t);
183
184 t->max_sigma = t->sigma_thresh * 2.;
185
186 PhyML_Printf("\n. sigma max: %f threshold: %f",t->max_sigma,t->sigma_thresh);
187
188 /* t->tau = Uni()*(t->max_tau/100.-t->min_tau*10.) + t->min_tau*10.; */
189 /* t->lbda = exp(Uni()*(log(t->max_lbda/100.)-log(t->min_lbda*10.)) + log(t->min_lbda*10.)); */
190 /* t->sigma = Uni()*(t->max_sigma-t->min_sigma) + t->min_sigma; */
191
192 t->lbda = 1.;
193 t->sigma = 0.3;
194 t->tau = 1.;
195
196 PhyML_Fprintf(fp,"\n# SigmaTrue\t SigmaThresh\t LbdaTrue\t TauTrue\txTrue\t yTrue\t xRand\t yRand\t RF\t Sigma5\t Sigma50\t Sigma95\t ProbSigmaInfThresh\t Lbda5\t Lbda50\t Lbda95\t ProbLbdaInf1\t Tau5\t Tau50\t Tau95\t X5\t X50\t X95\t Y5\t Y50\t Y95\t RandX5\t RandX50\t RandX95\t RandY5\t RandY50\t RandY95\t Mantel\t");
197 PhyML_Fprintf(fp,"\n");
198
199 tree = GEO_Simulate(t,n_tax);
200
201 ori_tree = Make_Tree_From_Scratch(tree->n_otu,NULL);
202 Copy_Tree(tree,ori_tree);
203
204 Random_SPRs_On_Rooted_Tree(tree);
205
206 Free_Bip(ori_tree);
207 Alloc_Bip(ori_tree);
208 Get_Bip(ori_tree->a_nodes[0],ori_tree->a_nodes[0]->v[0],ori_tree);
209
210 Free_Bip(tree);
211 Alloc_Bip(tree);
212 Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
213 Match_Tip_Numbers(tree,ori_tree);
214
215 rf = (phydbl)Compare_Bip(ori_tree,tree,NO)/(tree->n_otu-3);
216 PhyML_Printf("\n. rf: %f",rf);
217
218 phydbl scale;
219 scale = exp(Rnorm(0,0.2));
220 PhyML_Printf("\n. Scale: %f",scale);
221 For(i,2*tree->n_otu-1) tree->times->nd_t[i] *= scale;
222
223
224 phydbl *tree_dist,*geo_dist;
225 int j;
226
227 Time_To_Branch(tree);
228 tree_dist = Dist_Btw_Tips(tree);
229
230 geo_dist = (phydbl *)mCalloc(tree->n_otu*tree->n_otu,sizeof(phydbl));
231
232 for(i=0;i<tree->n_otu;i++)
233 {
234 for(j=0;j<tree->n_otu;j++)
235 {
236 /* geo_dist[i*tree->n_otu+j] = */
237 /* FABS(t->ldscape[t->idx_loc[i]*t->n_dim+0] - */
238 /* t->ldscape[t->idx_loc[j]*t->n_dim+0])+ */
239 /* FABS(t->ldscape[t->idx_loc[i]*t->n_dim+1] - */
240 /* t->ldscape[t->idx_loc[j]*t->n_dim+1]); */
241 geo_dist[i*tree->n_otu+j] =
242 FABS(t->coord_loc[t->idx_loc[i]]->lonlat[0] -
243 t->coord_loc[t->idx_loc[j]]->lonlat[0])+
244 FABS(t->coord_loc[t->idx_loc[i]]->lonlat[1] -
245 t->coord_loc[t->idx_loc[j]]->lonlat[1]);
246
247 }
248 }
249
250 printf("\n. tau: %f lbda: %f sigma: %f",t->tau,t->lbda,t->sigma);
251 mantel_p_val = Mantel(tree_dist,geo_dist,tree->n_otu,tree->n_otu);
252 printf("\n. Mantel test p-value: %f",mantel_p_val);
253
254 Free(tree_dist);
255 Free(geo_dist);
256
257 rand_loc = Rand_Int(0,t->ldscape_sz-1);
258
259 PhyML_Printf("\n. sigma: %f\t lbda: %f\t tau:%f\t x:%f\t y:%f rand.x:%f\t rand.y:%f\t",
260 t->sigma,
261 t->lbda,
262 t->tau,
263 /* t->ldscape[t->idx_loc[tree->n_root->num]*t->n_dim+0], */
264 /* t->ldscape[t->idx_loc[tree->n_root->num]*t->n_dim+1], */
265 /* t->ldscape[rand_loc*t->n_dim+0], */
266 /* t->ldscape[rand_loc*t->n_dim+1]); */
267 t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[0],
268 t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[1],
269 t->coord_loc[rand_loc]->lonlat[0],
270 t->coord_loc[rand_loc]->lonlat[1]);
271
272 PhyML_Fprintf(fp,"%f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t",
273 t->sigma,
274 t->sigma_thresh,
275 t->lbda,
276 t->tau,
277 t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[0],
278 t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[1],
279 t->coord_loc[rand_loc]->lonlat[0],
280 t->coord_loc[rand_loc]->lonlat[1],
281 rf);
282
283 GEO_Get_Locations_Beneath(t,tree);
284
285 probs = (phydbl *)mCalloc(tree->geo->ldscape_sz,sizeof(phydbl));
286
287 sum = 0.0;
288 for(i=0;i<tree->geo->ldscape_sz;i++) sum += tree->geo->idx_loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i];
289 for(i=0;i<tree->geo->ldscape_sz;i++) probs[i] = tree->geo->idx_loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]/sum;
290 tree->geo->idx_loc[tree->n_root->num] = Sample_i_With_Proba_pi(probs,tree->geo->ldscape_sz);
291 Free(probs);
292 GEO_Randomize_Locations(tree->n_root,tree->geo,tree);
293
294 GEO_Update_Occup(t,tree);
295 GEO_Lk(t,tree);
296 PhyML_Printf("\n. Init loglk: %f",tree->geo->c_lnL);
297
298
299 res = GEO_MCMC(tree);
300
301 PhyML_Fprintf(fp,"%f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f \n",
302
303 /* Sigma5 */ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
304 /* Sigma50*/ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
305 /* Sigma95*/ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
306
307 /* ProbInfThesh */ Prob(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval,t->sigma_thresh),
308
309 /* Lbda5 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
310 /* Lbda50 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
311 /* Lbda95 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
312
313 /* ProbInf1 */ Prob(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval,1.0),
314
315 /* Tau5 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
316 /* Tau50 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
317 /* Tau 95 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
318
319 /* X5 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
320 /* X50 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
321 /* X95 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
322
323 /* Y5 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
324 /* Y50 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
325 /* Y95 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
326
327 /* RandX5 */ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
328 /* RandX50*/ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
329 /* RandX95*/ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
330
331 /* RandY5 */ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
332 /* RandY50*/ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
333 /* RandY95*/ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
334
335 mantel_p_val
336 );
337 fflush(NULL);
338
339 Free(s);
340 Free(res);
341
342 fclose(fp);
343
344 return 1;
345 }
346
347 //////////////////////////////////////////////////////////////
348 //////////////////////////////////////////////////////////////
349
GEO_MCMC(t_tree * tree)350 phydbl *GEO_MCMC(t_tree *tree)
351 {
352 phydbl *res;
353 int n_vars;
354 int rand_loc;
355 t_geo *t;
356
357 t = tree->geo;
358
359 tree->mcmc = MCMC_Make_MCMC_Struct();
360 MCMC_Complete_MCMC(tree->mcmc,tree);
361
362 tree->mcmc->io = NULL;
363 tree->mcmc->is = NO;
364 tree->mcmc->run = 0;
365 tree->mcmc->sample_interval = 1E+3;
366 tree->mcmc->chain_len = 1E+6;
367 tree->mcmc->chain_len_burnin = 1E+5;
368 tree->mcmc->randomize = YES;
369 tree->mcmc->norm_freq = 1E+3;
370 tree->mcmc->max_tune = 1.E+20;
371 tree->mcmc->min_tune = 1.E-10;
372 tree->mcmc->print_every = 2;
373 tree->mcmc->is_burnin = NO;
374 tree->mcmc->nd_t_digits = 1;
375
376 t->tau = 1.0;
377 t->lbda = 1.0;
378 t->sigma = 1.0;
379 t->dum = 1.0;
380
381 tree->mcmc->chain_len = 1.E+8;
382 tree->mcmc->sample_interval = 50;
383
384 MCMC_Complete_MCMC(tree->mcmc,tree);
385
386 GEO_Update_Occup(t,tree);
387 GEO_Lk(t,tree);
388
389 tree->mcmc->start_ess[tree->mcmc->num_move_geo_sigma] = YES;
390 tree->mcmc->start_ess[tree->mcmc->num_move_geo_lambda] = YES;
391 tree->mcmc->start_ess[tree->mcmc->num_move_geo_tau] = YES;
392 tree->mcmc->start_ess[tree->mcmc->num_move_geo_dum] = YES;
393
394 n_vars = 10;
395 res = (phydbl *)mCalloc(tree->mcmc->chain_len / tree->mcmc->sample_interval * n_vars,sizeof(phydbl));
396
397 PhyML_Printf("\n. Run Sigma [ESS] Lambda [ESS] Tau [ESS] Dummy [ESS] LogLk");
398
399 tree->mcmc->run = 0;
400 do
401 {
402 MCMC_GEO_Lbda(tree);
403 MCMC_GEO_Tau(tree);
404 /* MCMC_Geo_Dum(tree); */
405 MCMC_GEO_Loc(tree);
406
407 t->update_fmat = YES;
408 MCMC_GEO_Sigma(tree);
409 t->update_fmat = NO;
410
411
412 /* t->update_fmat = YES; */
413 /* MCMC_Geo_Updown_Lbda_Sigma(tree); */
414 /* t->update_fmat = NO; */
415
416
417 /* MCMC_Geo_Updown_Tau_Lbda(tree); */
418 /* MCMC_Geo_Updown_Tau_Lbda(tree); */
419 /* MCMC_Geo_Updown_Tau_Lbda(tree); */
420
421
422 /* printf("\n"); */
423 /* int i; */
424 /* For(i,2*tree->n_otu-1) */
425 /* { */
426 /* if(tree->a_nodes[i]->tax == NO) */
427 /* { */
428 /* printf("%2d ",tree->geo->idx_loc[i]); */
429 /* } */
430 /* } */
431
432
433 if(tree->mcmc->run%tree->mcmc->sample_interval == 0)
434 {
435 MCMC_Copy_To_New_Param_Val(tree->mcmc,tree);
436
437 MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_lambda,tree->mcmc,tree);
438 MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_sigma,tree->mcmc,tree);
439 MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_tau,tree->mcmc,tree);
440
441 /* printf("\n. lbda:%f,%f tau:%f,%f sigma:%f,%f", */
442 /* tree->mcmc->acc_rate[tree->mcmc->num_move_geo_lambda], */
443 /* tree->mcmc->tune_move[tree->mcmc->num_move_geo_lambda], */
444 /* tree->mcmc->acc_rate[tree->mcmc->num_move_geo_tau], */
445 /* tree->mcmc->tune_move[tree->mcmc->num_move_geo_tau], */
446 /* tree->mcmc->acc_rate[tree->mcmc->num_move_geo_sigma], */
447 /* tree->mcmc->tune_move[tree->mcmc->num_move_geo_sigma]); */
448
449 PhyML_Printf("\n. %6d %12f %4.0f %12f %4.0f %12f %4.0f %12f %4.0f %12f",
450 tree->mcmc->run,
451
452 t->sigma,
453 tree->mcmc->ess[tree->mcmc->num_move_geo_sigma],
454
455 t->lbda,
456 tree->mcmc->ess[tree->mcmc->num_move_geo_lambda],
457
458 t->tau,
459 tree->mcmc->ess[tree->mcmc->num_move_geo_tau],
460
461 t->dum,
462 tree->mcmc->ess[tree->mcmc->num_move_geo_dum],
463
464 t->c_lnL);
465
466
467 /* PhyML_Printf("\n%12f %12f %12f %12f", */
468 /* t->sigma, */
469 /* t->lbda, */
470 /* t->tau, */
471 /* t->dum); */
472
473
474 rand_loc = Rand_Int(0,t->ldscape_sz-1);
475
476 res[0 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->sigma;
477 res[1 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->lbda;
478 res[2 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->tau;
479 res[3 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->c_lnL;
480
481 /* res[4 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[t->idx_loc[tree->n_root->num]*t->n_dim+0]; */
482 /* res[5 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[t->idx_loc[tree->n_root->num]*t->n_dim+1]; */
483 res[4 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[0];
484 res[5 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[1];
485
486 /* res[6 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[rand_loc*t->n_dim+0]; */
487 /* res[7 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[rand_loc*t->n_dim+1]; */
488 res[6 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->coord_loc[rand_loc]->lonlat[0];
489 res[7 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->coord_loc[rand_loc]->lonlat[1];
490 }
491
492 tree->mcmc->run++;
493
494 if(tree->mcmc->ess[tree->mcmc->num_move_geo_sigma] > 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_sigma] = NO;
495 if(tree->mcmc->ess[tree->mcmc->num_move_geo_tau] > 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_tau] = NO;
496 if(tree->mcmc->ess[tree->mcmc->num_move_geo_lambda]> 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_lambda] = NO;
497
498 MCMC_Get_Acc_Rates(tree->mcmc);
499
500 if(tree->mcmc->ess[tree->mcmc->num_move_geo_sigma] > 1000. &&
501 tree->mcmc->ess[tree->mcmc->num_move_geo_tau] > 1000. &&
502 tree->mcmc->ess[tree->mcmc->num_move_geo_lambda]> 1000.) break;
503 }
504 while(tree->mcmc->run < tree->mcmc->chain_len);
505
506 return(res);
507
508 }
509
510 //////////////////////////////////////////////////////////////
511 //////////////////////////////////////////////////////////////
512
513 //////////////////////////////////////////////////////////////
514 //////////////////////////////////////////////////////////////
515
516 // Update F matrix. Assume a diagonal covariance matrix.
GEO_Update_Fmat(t_geo * t)517 void GEO_Update_Fmat(t_geo *t)
518 {
519 phydbl *loc1, *loc2;
520 int i,j,k;
521 int err;
522 phydbl lognloc;
523
524 for(i=0;i<t->n_dim;i++) t->cov[i*t->n_dim+i] = t->sigma; // Diagonal covariance matrix. Same variance in every direction
525
526 lognloc = log(t->ldscape_sz);
527
528 // Fill in F matrix;
529 for(i=0;i<t->ldscape_sz;i++)
530 {
531 loc1 = t->coord_loc[i]->lonlat;
532
533 for(j=i;j<t->ldscape_sz;j++)
534 {
535 loc2 = t->coord_loc[j]->lonlat;
536
537 t->f_mat[i*t->ldscape_sz+j] = .0;
538
539 // Calculate log(f(l_i,l_j)) - log(f(l_i,l_i) - log(m)
540 for(k=0;k<t->n_dim;k++) t->f_mat[i*t->ldscape_sz+j] += Log_Dnorm(loc2[k],loc1[k],t->cov[k*t->n_dim+k],&err);
541 t->f_mat[i*t->ldscape_sz+j] -= lognloc;
542 for(k=0;k<t->n_dim;k++) t->f_mat[i*t->ldscape_sz+j] -= Log_Dnorm(loc1[k],loc1[k],t->cov[k*t->n_dim+k],&err);
543
544 // Take the exponential
545 t->f_mat[i*t->ldscape_sz+j] = exp(t->f_mat[i*t->ldscape_sz+j]);
546
547 // Matrix is symmetric
548 t->f_mat[j*t->ldscape_sz+i] = t->f_mat[i*t->ldscape_sz+j];
549
550 /* printf("\n. f[%d,%d] = %f (1:[%f;%f] 2:[%f;%f]) sigma=%f",i,j,t->f_mat[i*t->ldscape_sz+j],loc1[0],loc1[1],loc2[0],loc2[1],SQRT(t->cov[0*t->n_dim+0])); */
551 }
552 }
553 }
554
555 //////////////////////////////////////////////////////////////
556 //////////////////////////////////////////////////////////////
557
558 // Sort node heights from oldest to youngest age.
GEO_Update_Sorted_Nd(t_geo * t,t_tree * tree)559 void GEO_Update_Sorted_Nd(t_geo *t, t_tree *tree)
560 {
561 int i;
562 int swap;
563 t_node *buff;
564
565 buff = NULL;
566
567 For(i,2*tree->n_otu-1) t->sorted_nd[i] = tree->a_nodes[i];
568
569 // Bubble sort of the node heights
570 do
571 {
572 swap = NO;
573 For(i,2*tree->n_otu-2)
574 {
575 if(tree->times->nd_t[t->sorted_nd[i+1]->num] < tree->times->nd_t[t->sorted_nd[i]->num])
576 {
577 buff = t->sorted_nd[i];
578 t->sorted_nd[i] = t->sorted_nd[i+1];
579 t->sorted_nd[i+1] = buff;
580
581 swap = YES;
582 }
583 }
584 }
585 while(swap == YES);
586 }
587
588 //////////////////////////////////////////////////////////////
589 //////////////////////////////////////////////////////////////
590
591 // Update the set of vectors of occupation along the tree
GEO_Update_Occup(t_geo * t,t_tree * tree)592 void GEO_Update_Occup(t_geo *t, t_tree *tree)
593 {
594 int i,j;
595 t_node *v1, *v2;
596
597 GEO_Update_Sorted_Nd(t,tree);
598
599 For(i,t->ldscape_sz*(2*tree->n_otu-1)) t->occup[i] = 0;
600
601 t->occup[tree->n_root->num*t->ldscape_sz + t->idx_loc[tree->n_root->num]] = 1;
602
603 for(i=1;i<2*tree->n_otu-1;i++)
604 {
605 for(j=0;j<t->ldscape_sz;j++)
606 {
607 t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j] =
608 t->occup[t->sorted_nd[i-1]->num*t->ldscape_sz + j];
609 }
610
611
612 if(t->sorted_nd[i-1]->tax == NO)
613 {
614 v1 = v2 = NULL;
615 if(t->sorted_nd[i-1] == tree->n_root)
616 {
617 v1 = tree->n_root->v[1];
618 v2 = tree->n_root->v[2];
619 }
620 else
621 {
622 for(j=0;j<3;j++)
623 {
624 if(t->sorted_nd[i-1]->v[j] != t->sorted_nd[i-1]->anc &&
625 t->sorted_nd[i-1]->b[j] != tree->e_root)
626 {
627 if(!v1) v1 = t->sorted_nd[i-1]->v[j];
628 else v2 = t->sorted_nd[i-1]->v[j];
629 }
630 }
631 }
632
633
634 if(t->idx_loc[v1->num] != t->idx_loc[t->sorted_nd[i-1]->num])
635 {
636 t->occup[t->sorted_nd[i]->num * t->ldscape_sz + t->idx_loc[v1->num]]++;
637 }
638 else
639 {
640 t->occup[t->sorted_nd[i]->num * t->ldscape_sz + t->idx_loc[v2->num]]++;
641 }
642 }
643 }
644
645 /* printf("\n"); */
646 /* For(i,2*tree->n_otu-1) */
647 /* { */
648 /* printf("\n. Node %3d: ",t->sorted_nd[i]->num); */
649 /* for(j=0;j<t->ldscape_sz;j++) */
650 /* { */
651 /* /\* printf("%3d [%12f;%12f] ", *\/ */
652 /* /\* t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j], *\/ */
653 /* /\* t->ldscape[j*t->n_dim+0],t->ldscape[j*t->n_dim+1]); *\/ */
654 /* /\* printf("%3d ", *\/ */
655 /* /\* t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j]); *\/ */
656 /* } */
657 /* } */
658 }
659
660 //////////////////////////////////////////////////////////////
661 //////////////////////////////////////////////////////////////
662
663 // Calculate R mat at node n
GEO_Update_Rmat(t_node * n,t_geo * t,t_tree * tree)664 void GEO_Update_Rmat(t_node *n, t_geo *t, t_tree *tree)
665 {
666 int i,j;
667 phydbl lbda_j;
668
669 for(i=0;i<t->ldscape_sz;i++)
670 {
671 for(j=0;j<t->ldscape_sz;j++)
672 {
673 lbda_j = ((t->occup[n->num*t->ldscape_sz + j]==0) ? (1.0) : (t->lbda));
674 t->r_mat[i*t->ldscape_sz+j] = t->f_mat[i*t->ldscape_sz+j] * lbda_j * t->tau * t->dum;
675 }
676 }
677 }
678
679 //////////////////////////////////////////////////////////////
680 //////////////////////////////////////////////////////////////
681
GEO_Lk(t_geo * t,t_tree * tree)682 phydbl GEO_Lk(t_geo *t, t_tree *tree)
683 {
684 int i,j;
685 phydbl loglk;
686 phydbl R;
687 int dep,arr; // departure and arrival location indices;
688 t_node *curr_n,*prev_n,*v1,*v2;
689 phydbl sum;
690
691 GEO_Update_Occup(t,tree); // Same here.
692
693 if(t->update_fmat == YES) GEO_Update_Fmat(t);
694
695 prev_n = NULL;
696 curr_n = NULL;
697 loglk = .0;
698 for(i=1;i<tree->n_otu-1;i++) // Consider all the time slices, from oldest to youngest.
699 // Start at first node below root
700 {
701 prev_n = t->sorted_nd[i-1]; // node just above
702 curr_n = t->sorted_nd[i]; // current node
703
704 GEO_Update_Rmat(curr_n,t,tree); // NOTE: don't need to do that every time. Add check later.
705
706 R = GEO_Total_Migration_Rate(curr_n,t); // Total migration rate calculated at node n
707
708 /* if(i < 2) */
709 /* { */
710 /* printf("\n. t0: %f t1: %f R: %f tau: %f sigma: %f lbda: %f x1: %f y1: %f x2: %f y2: %f log0: %d loc1: %d rij: %G fij: %G", */
711 /* tree->times->nd_t[t->sorted_nd[i-1]->num], */
712 /* tree->times->nd_t[t->sorted_nd[i]->num], */
713 /* R, */
714 /* t->tau, */
715 /* t->lbda, */
716 /* t->sigma, */
717 /* t->ldscape[t->idx_loc[tree->geo->sorted_nd[i-1]->num]*t->n_dim+0], */
718 /* t->ldscape[t->idx_loc[tree->geo->sorted_nd[i-1]->num]*t->n_dim+1], */
719 /* t->ldscape[t->idx_loc[tree->geo->sorted_nd[i]->num]*t->n_dim+0], */
720 /* t->ldscape[t->idx_loc[tree->geo->sorted_nd[i]->num]*t->n_dim+1], */
721 /* t->idx_loc[tree->geo->sorted_nd[i-1]->num], */
722 /* t->idx_loc[tree->geo->sorted_nd[i]->num], */
723 /* t->r_mat[t->idx_loc[tree->geo->sorted_nd[i-1]->num] * t->ldscape_sz + t->idx_loc[tree->geo->sorted_nd[i]->num]], */
724 /* t->f_mat[t->idx_loc[tree->geo->sorted_nd[i-1]->num] * t->ldscape_sz + t->idx_loc[tree->geo->sorted_nd[i]->num]] */
725 /* ); */
726
727 /* printf("\n >> "); */
728 /* int j; */
729 /* for(j=0;j<t->ldscape_sz;j++) */
730 /* if(t->occup[t->sorted_nd[i]->num * t->ldscape_sz + j] > 0) */
731 /* printf("%f %f -- ", */
732 /* t->ldscape[j*t->n_dim+0], */
733 /* t->ldscape[j*t->n_dim+1]); */
734 /* } */
735
736
737 /* printf("\n. %d %d (%d) %f %p %p \n",i,curr_n->num,curr_n->tax,tree->times->nd_t[curr_n->num],curr_n->v[1],curr_n->v[2]); */
738
739 v1 = v2 = NULL;
740 for(j=0;j<3;j++)
741 if(curr_n->v[j] != curr_n->anc && curr_n->b[j] != tree->e_root)
742 {
743 if(!v1) v1 = curr_n->v[j];
744 else v2 = curr_n->v[j];
745 }
746
747 dep = t->idx_loc[curr_n->num]; // departure location
748 arr = // arrival location
749 (t->idx_loc[v1->num] == t->idx_loc[curr_n->num] ?
750 t->idx_loc[v2->num] :
751 t->idx_loc[v1->num]);
752
753 /* printf("\n%f\t%f", */
754 /* t->ldscape[arr*t->n_dim+0]-t->ldscape[dep*t->n_dim+0], */
755 /* t->ldscape[arr*t->n_dim+1]-t->ldscape[dep*t->n_dim+1]); */
756
757 loglk -= R * FABS(tree->times->nd_t[curr_n->num] - tree->times->nd_t[prev_n->num]);
758 loglk += log(t->r_mat[dep * t->ldscape_sz + arr]);
759
760 /* printf("\n <> %d %f %f %d %d v1:%d v2:%d anc:%d %d %d %d", */
761 /* curr_n->num, */
762 /* R * FABS(tree->times->nd_t[curr_n->num] - tree->times->nd_t[prev_n->num]), */
763 /* log(t->r_mat[dep * t->ldscape_sz + arr]), */
764 /* dep,arr,v1->num,v2->num,curr_n->anc->num,curr_n->v[0]->num,curr_n->v[1]->num,curr_n->v[2]->num); */
765
766 /* if(i<2) */
767 /* { */
768 /* printf("\n. R = %f r_mat = %f f_mat = %f dt = %f loglk = %f", */
769 /* R, */
770 /* t->r_mat[dep * t->ldscape_sz + arr], */
771 /* t->f_mat[dep * t->ldscape_sz + arr], */
772 /* FABS(t->sorted_nd[i] - t->sorted_nd[i-1]),loglk); */
773 /* Exit("\n"); */
774 /* } */
775
776 }
777
778
779 // Likelihood for the first 'slice' (i.e., the part just below the root down to
780 // the next node)
781 GEO_Update_Rmat(tree->n_root,t,tree);
782
783 loglk -= log(t->ldscape_sz);
784 dep = t->idx_loc[tree->n_root->num];
785 arr =
786 (t->idx_loc[tree->n_root->num] != t->idx_loc[tree->n_root->v[1]->num] ?
787 t->idx_loc[tree->n_root->v[1]->num] :
788 t->idx_loc[tree->n_root->v[2]->num]);
789
790 /* printf("\n %f %f",t->ldscape[dep],t->ldscape[arr]); */
791
792 loglk += log(t->r_mat[dep * t->ldscape_sz + arr]);
793
794 sum = .0;
795 for(i=0;i<t->ldscape_sz;i++) sum += t->r_mat[dep * t->ldscape_sz + i];
796
797 loglk -= log(sum);
798
799 tree->geo->c_lnL = loglk;
800
801 return loglk;
802 }
803
804 //////////////////////////////////////////////////////////////
805 //////////////////////////////////////////////////////////////
806
GEO_Init_Tloc_Tips(t_geo * t,t_tree * tree)807 void GEO_Init_Tloc_Tips(t_geo *t, t_tree *tree)
808 {
809 int i;
810
811 // TO DO
812 for(i=0;i<tree->n_otu;i++)
813 {
814 t->idx_loc[i] = i;
815 }
816 }
817
818 //////////////////////////////////////////////////////////////
819 //////////////////////////////////////////////////////////////
820
821 // Do not forget to call GEO_Update_Rmat (with node n) before calling this function
GEO_Total_Migration_Rate(t_node * n,t_geo * t)822 phydbl GEO_Total_Migration_Rate(t_node *n, t_geo *t)
823 {
824 phydbl R;
825 int i,j;
826
827 R = .0;
828
829 for(i=0;i<t->ldscape_sz;i++)
830 {
831 for(j=0;j<t->ldscape_sz;j++)
832 {
833 R +=
834 t->r_mat[i * t->ldscape_sz + j] *
835 t->occup[n->num * t->ldscape_sz + i];
836 }
837 }
838
839 return R;
840 }
841
842 //////////////////////////////////////////////////////////////
843 //////////////////////////////////////////////////////////////
844
845 // Find the arrival location for the migration leaving from n
GEO_Get_Arrival_Location(t_node * n,t_geo * t,t_tree * tree)846 int GEO_Get_Arrival_Location(t_node *n, t_geo *t, t_tree *tree)
847 {
848 int i;
849 t_node *v1, *v2; // the two daughters of n
850
851 v1 = v2 = NULL;
852
853 for(i=0;i<3;i++)
854 {
855 if(n->v[i] && n->v[i] != n->anc)
856 {
857 if(!v1) v1 = n->v[i];
858 else v2 = n->v[i];
859 }
860 }
861
862 if(t->idx_loc[v1->num] == t->idx_loc[v2->num]) // Migrated to the same location as that of n
863 {
864 if(t->idx_loc[n->num] != t->idx_loc[v1->num])
865 {
866 PhyML_Printf("\n== Error detected in location labeling.");
867 PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
868 Exit("\n");
869 }
870 else
871 return t->idx_loc[n->num];
872 }
873 else // Migrated to a different spot
874 {
875 if((t->idx_loc[v1->num] != t->idx_loc[n->num]) && (t->idx_loc[v2->num] != t->idx_loc[n->num]))
876 {
877 PhyML_Printf("\n== Error detected in location labeling.");
878 PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
879 Exit("\n");
880 }
881 else
882 {
883 if(t->idx_loc[v1->num] == t->idx_loc[n->num]) return t->idx_loc[v2->num]; // v2 gets the new location, v1 inheritates from n
884 else return t->idx_loc[v1->num]; // v1 gets the new location, v2 inheritates from n
885 }
886 }
887 return -1;
888 }
889
890 //////////////////////////////////////////////////////////////
891 //////////////////////////////////////////////////////////////
892
GEO_Simulate(t_geo * t,int n_otu)893 t_tree *GEO_Simulate(t_geo *t, int n_otu)
894 {
895 t_tree *tree;
896 int n_branching_nodes;
897 t_node **branching_nodes; // vector of nodes available for branching out
898 phydbl *p_branch; // p_branch[i]: proba that the i-th node in branching_nodes branches out (p_x vector in the article)
899 phydbl *p_mig; // p_branch[i]: proba of migrating to location i from the location of the edge branching out (q_i vector in the article)
900 int hit;
901 phydbl time;
902 int dep, arr;
903 int i,j;
904 phydbl sum;
905 phydbl R;
906 int *occup; // occupation vector. Updated as we move down the tree
907 int nd_idx;
908 t_node *buff_nd;
909 phydbl buff_t;
910 int buff_l;
911 int swap;
912
913 tree = Make_Tree_From_Scratch(n_otu,NULL);
914
915 tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
916 RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu);
917
918 tree->times = TIMES_Make_Time_Struct(tree->n_otu);
919 TIMES_Init_Time_Struct(tree->times,NULL,tree->n_otu);
920
921 tree->n_root = tree->a_nodes[2*tree->n_otu-2]; // Set the root node to the last element in the list of nodes
922 tree->geo = t;
923
924
925 For(i,2*tree->n_otu-2) tree->times->nd_t[i] = -1.;
926
927 occup = (int *)mCalloc(t->ldscape_sz,sizeof(int));
928
929 GEO_Update_Fmat(t);
930
931 branching_nodes = (t_node **)mCalloc(tree->n_otu,sizeof(t_node *));
932 branching_nodes[0] = tree->n_root;
933 n_branching_nodes = 1;
934 nd_idx = 0;
935
936
937 p_branch = (phydbl *)mCalloc(tree->n_otu,sizeof(phydbl ));
938 p_branch[0] = 1.0;
939
940 p_mig = (phydbl *)mCalloc(t->ldscape_sz,sizeof(phydbl ));
941
942 time = 0.0; // Time at the root node (this will be changed afterward)
943
944 // Sample a location uniformly for the root
945 t->idx_loc[tree->n_root->num] = Rand_Int(0,t->ldscape_sz-1);
946
947 // Update the occupancy vector
948 occup[t->idx_loc[tree->n_root->num]] = 1;
949
950 dep = arr = -1;
951
952
953 // total migration rate
954 R = 0.0;
955 for(i=0;i<t->ldscape_sz;i++)
956 {
957 R +=
958 t->f_mat[t->idx_loc[tree->n_root->num]*t->ldscape_sz+i] *
959 ((occup[i] == 0) ? (1.0) : (t->lbda)) *
960 t->tau * t->dum;
961 }
962
963 do
964 {
965 // Select the node that branches out
966 hit = Sample_i_With_Proba_pi(p_branch,n_branching_nodes);
967
968 /* printf("\n. [%d] Select node %d (location %d)",n_branching_nodes,branching_nodes[hit]->num,t->idx_loc[branching_nodes[hit]->num]); */
969
970 // Set the time for the branching node
971 tree->times->nd_t[branching_nodes[hit]->num] = time;
972
973
974 /* printf("\n. Set its time to %f",time); */
975
976 // Select the destination location
977 dep = t->idx_loc[branching_nodes[hit]->num]; // Departure point
978
979 sum = .0;
980 for(i=0;i<t->ldscape_sz;i++) // Total rate of migration out of departure point
981 {
982 p_mig[i] =
983 t->f_mat[dep*t->ldscape_sz+i] *
984 ((occup[i] == 0) ? (1.0) : (t->lbda)) *
985 t->tau * t->dum;
986
987 sum += p_mig[i];
988 }
989 for(i=0;i<t->ldscape_sz;i++) p_mig[i] /= sum;
990
991 arr = Sample_i_With_Proba_pi(p_mig,t->ldscape_sz);
992
993 /* printf("\n. Migrate from %d [%5.2f,%5.2f] to %d [%5.2f,%5.2f]", */
994 /* dep, */
995 /* t->ldscape[dep*t->n_dim+0], */
996 /* t->ldscape[dep*t->n_dim+1], */
997 /* arr, */
998 /* t->ldscape[arr*t->n_dim+0], */
999 /* t->ldscape[arr*t->n_dim+1]); */
1000
1001 /* printf("\n%f\t%f", */
1002 /* t->ldscape[arr*t->n_dim+0]-t->ldscape[dep*t->n_dim+0], */
1003 /* t->ldscape[arr*t->n_dim+1]-t->ldscape[dep*t->n_dim+1]); */
1004
1005
1006 // Update vector of occupation
1007 occup[arr]++;
1008
1009 /* printf("\n. Remove %d. Add %d and %d",branching_nodes[hit]->num,tree->a_nodes[nd_idx]->num,tree->a_nodes[nd_idx+1]->num); */
1010 // Connect two new nodes to the node undergoing a branching event
1011 tree->a_nodes[nd_idx]->v[0] = branching_nodes[hit];
1012 tree->a_nodes[nd_idx+1]->v[0] = branching_nodes[hit];
1013 branching_nodes[hit]->v[1] = tree->a_nodes[nd_idx];
1014 branching_nodes[hit]->v[2] = tree->a_nodes[nd_idx+1];
1015
1016 // update branching_nodes vector. Element 'hit' is being replaced so that the corresponding node can no longer branch out
1017 branching_nodes[hit] = tree->a_nodes[nd_idx];
1018 branching_nodes[n_branching_nodes] = tree->a_nodes[nd_idx+1];
1019
1020 // Update t_loc vector.
1021 t->idx_loc[tree->a_nodes[nd_idx]->num] = dep;
1022 t->idx_loc[tree->a_nodes[nd_idx+1]->num] = arr;
1023
1024 // Update total migration rate
1025 R = .0;
1026 for(i=0;i<t->ldscape_sz;i++)
1027 {
1028 if(occup[i] > 0)
1029 {
1030 for(j=0;j<t->ldscape_sz;j++)
1031 {
1032 R +=
1033 occup[i] *
1034 t->f_mat[i*t->ldscape_sz+j] *
1035 ((occup[j] == 0) ? (1.0) : (t->lbda)) *
1036 t->tau * t->dum;
1037 }
1038 }
1039 }
1040
1041 // Set the time until next branching event
1042 time = time + Rexp(R);
1043
1044 // Update p_branch vector
1045 For(i,n_branching_nodes+1)
1046 {
1047 dep = t->idx_loc[branching_nodes[i]->num];
1048 p_branch[i] = 0.0;
1049 for(j=0;j<t->ldscape_sz;j++)
1050 {
1051 p_branch[i] +=
1052 t->f_mat[dep*t->ldscape_sz+j] *
1053 ((occup[j] == 0) ? (1.0) : (t->lbda)) *
1054 t->tau * t->dum / R;
1055
1056 /* printf("\n. %f %f %f %f", */
1057 /* R, */
1058 /* t->f_mat[dep*t->ldscape_sz+j], */
1059 /* ((occup[j]>0) ? (t->lbda) : (1.0)), */
1060 /* t->tau); */
1061 }
1062 /* printf("\n. %f ",p_branch[i]); */
1063 }
1064
1065
1066 // Increase the number of branching nodes by one (you just added 2 new and removed 1 old)
1067 n_branching_nodes++;
1068 nd_idx += 2;
1069
1070 }
1071 while(n_branching_nodes < n_otu);
1072
1073
1074 // Set the times at the tips
1075 For(i,2*tree->n_otu-1) if(tree->times->nd_t[i] < 0.0) tree->times->nd_t[i] = time;
1076
1077 // Reverse time scale
1078 For(i,2*tree->n_otu-1) tree->times->nd_t[i] -= time;
1079 /* For(i,2*tree->n_otu-1) tree->times->nd_t[i] = FABS(tree->times->nd_t[i]); */
1080
1081 // Bubble sort to put all the tips at the top of the tree->a_nodes array
1082 do
1083 {
1084 swap = NO;
1085 For(i,2*tree->n_otu-2)
1086 {
1087 if(!tree->a_nodes[i+1]->v[1] && tree->a_nodes[i]->v[1])
1088 {
1089 buff_nd = tree->a_nodes[i+1];
1090 tree->a_nodes[i+1] = tree->a_nodes[i];
1091 tree->a_nodes[i] = buff_nd;
1092
1093 buff_t = tree->times->nd_t[i+1];
1094 tree->times->nd_t[i+1] = tree->times->nd_t[i];
1095 tree->times->nd_t[i] = buff_t;
1096
1097 buff_l = t->idx_loc[i+1];
1098 t->idx_loc[i+1] = t->idx_loc[i];
1099 t->idx_loc[i] = buff_l;
1100
1101 swap = YES;
1102 }
1103 }
1104 }
1105 while(swap == YES);
1106
1107
1108 // The rest below is just bookeeping...
1109
1110
1111 For(i,2*tree->n_otu-1) tree->a_nodes[i]->num = i;
1112 For(i,2*tree->n_otu-1)
1113 {
1114 if(i < tree->n_otu) tree->a_nodes[i]->tax = YES;
1115 else tree->a_nodes[i]->tax = NO;
1116 }
1117
1118 /* printf("\n++++++++++++++++++\n"); */
1119 /* For(i,2*tree->n_otu-1) */
1120 /* { */
1121 /* printf("\n. Node %3d [%p] anc:%3d v1:%3d v2:%3d time: %f", */
1122 /* tree->a_nodes[i]->num, */
1123 /* (void *)tree->a_nodes[i], */
1124 /* tree->a_nodes[i]->v[0] ? tree->a_nodes[i]->v[0]->num : -1, */
1125 /* tree->a_nodes[i]->v[1] ? tree->a_nodes[i]->v[1]->num : -1, */
1126 /* tree->a_nodes[i]->v[2] ? tree->a_nodes[i]->v[2]->num : -1, */
1127 /* tree->times->nd_t[i]); */
1128 /* } */
1129
1130
1131 for(i=0;i<tree->n_otu;i++)
1132 {
1133 if(!tree->a_nodes[i]->name) tree->a_nodes[i]->name = (char *)mCalloc(T_MAX_NAME,sizeof(char));
1134 strcpy(tree->a_nodes[i]->name,"x");
1135 sprintf(tree->a_nodes[i]->name+1,"%d",i);
1136 }
1137
1138 tree->n_root->v[1]->v[0] = tree->n_root->v[2];
1139 tree->n_root->v[2]->v[0] = tree->n_root->v[1];
1140
1141 tree->num_curr_branch_available = 0;
1142 Connect_Edges_To_Nodes_Recur(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
1143
1144 tree->e_root = tree->n_root->v[1]->b[0];
1145
1146 For(i,2*tree->n_otu-3)
1147 {
1148 tree->a_edges[i]->l->v = FABS(tree->times->nd_t[tree->a_edges[i]->left->num] -
1149 tree->times->nd_t[tree->a_edges[i]->rght->num]);
1150 }
1151
1152 tree->e_root->l->v =
1153 FABS(tree->times->nd_t[tree->n_root->v[1]->num] -
1154 tree->times->nd_t[tree->n_root->num]) +
1155 FABS(tree->times->nd_t[tree->n_root->v[2]->num] -
1156 tree->times->nd_t[tree->n_root->num]);
1157
1158 tree->n_root->l[1] = FABS(tree->times->nd_t[tree->n_root->v[1]->num] -
1159 tree->times->nd_t[tree->n_root->num]);
1160
1161 tree->n_root->l[2] = FABS(tree->times->nd_t[tree->n_root->v[2]->num] -
1162 tree->times->nd_t[tree->n_root->num]);
1163
1164 tree->n_root_pos =
1165 FABS(tree->times->nd_t[tree->n_root->v[2]->num] -
1166 tree->times->nd_t[tree->n_root->num]) / tree->e_root->l->v;
1167
1168 /* printf("\n. %s ",Write_Tree(tree)); */
1169
1170 DR_Draw_Tree("essai.ps",tree);
1171
1172 /* for(i=0;i<tree->n_otu;i++) */
1173 /* printf("\n. %4s %4d [%5.2f %5.2f]",tree->a_nodes[i]->name, */
1174 /* t->idx_loc[i], */
1175 /* t->ldscape[t->idx_loc[i]*t->n_dim+0], */
1176 /* t->ldscape[t->idx_loc[i]*t->n_dim+1]); */
1177
1178
1179 Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
1180 Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
1181
1182 Free(branching_nodes);
1183 Free(p_branch);
1184 Free(p_mig);
1185
1186 return(tree);
1187 }
1188
1189 //////////////////////////////////////////////////////////////
1190 //////////////////////////////////////////////////////////////
1191
1192 // Simualte n coordinates (in 2D)
GEO_Simulate_Coordinates(int n,t_geo * t)1193 void GEO_Simulate_Coordinates(int n, t_geo *t)
1194 {
1195 int i;
1196 phydbl width;
1197
1198 width = 5.;
1199
1200 for(i=0;i<n;i++)
1201 {
1202 /* t->ldscape[i*t->n_dim+0] = -width/2. + Uni()*width; */
1203 /* t->ldscape[i*t->n_dim+1] = width/2.; */
1204 t->coord_loc[i]->lonlat[0] = -width/2. + Uni()*width;
1205 t->coord_loc[i]->lonlat[1] = width/2.;
1206 }
1207
1208 /* t->ldscape[0*t->n_dim+0] = 0.0; */
1209 /* t->ldscape[0*t->n_dim+1] = 0.0; */
1210
1211 /* t->ldscape[1*t->n_dim+0] = 0.1; */
1212 /* t->ldscape[1*t->n_dim+1] = 0.1; */
1213
1214 }
1215
1216 //////////////////////////////////////////////////////////////
1217 //////////////////////////////////////////////////////////////
1218
GEO_Optimize_Sigma(t_geo * t,t_tree * tree)1219 void GEO_Optimize_Sigma(t_geo *t, t_tree *tree)
1220 {
1221 Generic_Brent_Lk(&(t->sigma),
1222 t->min_sigma,
1223 t->max_sigma,
1224 1.E-5,
1225 1000,
1226 NO,
1227 GEO_Wrap_Lk,NULL,tree,NULL,NO);
1228 }
1229
1230 //////////////////////////////////////////////////////////////
1231 //////////////////////////////////////////////////////////////
1232
GEO_Optimize_Lambda(t_geo * t,t_tree * tree)1233 void GEO_Optimize_Lambda(t_geo *t, t_tree *tree)
1234 {
1235 Generic_Brent_Lk(&(t->lbda),
1236 t->min_lbda,
1237 t->max_lbda,
1238 1.E-5,
1239 1000,
1240 NO,
1241 GEO_Wrap_Lk,NULL,tree,NULL,NO);
1242 }
1243
1244 //////////////////////////////////////////////////////////////
1245 //////////////////////////////////////////////////////////////
1246
GEO_Optimize_Tau(t_geo * t,t_tree * tree)1247 void GEO_Optimize_Tau(t_geo *t, t_tree *tree)
1248 {
1249 Generic_Brent_Lk(&(t->tau),
1250 t->min_tau,
1251 t->max_tau,
1252 1.E-5,
1253 1000,
1254 NO,
1255 GEO_Wrap_Lk,NULL,tree,NULL,NO);
1256 }
1257
1258 //////////////////////////////////////////////////////////////
1259 //////////////////////////////////////////////////////////////
1260
GEO_Wrap_Lk(t_edge * b,t_tree * tree,supert_tree * stree)1261 phydbl GEO_Wrap_Lk(t_edge *b, t_tree *tree, supert_tree *stree)
1262 {
1263 return GEO_Lk(tree->geo,tree);
1264 }
1265
1266 //////////////////////////////////////////////////////////////
1267 //////////////////////////////////////////////////////////////
1268
GEO_Init_Geo_Struct(t_geo * t)1269 void GEO_Init_Geo_Struct(t_geo *t)
1270 {
1271 t->c_lnL = UNLIKELY;
1272
1273 t->sigma = 1.0;
1274 t->min_sigma = 1.E-3;
1275 t->max_sigma = 1.E+3;
1276 t->sigma_thresh = t->max_sigma;
1277
1278 t->lbda = 1.0;
1279 t->min_lbda = 1.E-3;
1280 t->max_lbda = 1.E+3;
1281
1282 t->tau = 1.0;
1283 t->min_tau = 1.E-3;
1284 t->max_tau = 1.E+3;
1285
1286 t->dum = 1.0;
1287 t->min_dum = 1.E-3;
1288 t->max_dum = 1.E+3;
1289
1290 t->n_dim = 2;
1291 t->ldscape_sz = 1;
1292
1293 t->update_fmat = YES;
1294 }
1295
1296 //////////////////////////////////////////////////////////////
1297 //////////////////////////////////////////////////////////////
1298
GEO_Randomize_Locations(t_node * n,t_geo * t,t_tree * tree)1299 void GEO_Randomize_Locations(t_node *n, t_geo *t, t_tree *tree)
1300 {
1301 if(n->tax == YES) return;
1302 else
1303 {
1304 t_node *v1, *v2;
1305 int i;
1306 phydbl *probs; // vector of probability of picking each location
1307 phydbl sum;
1308 phydbl u;
1309
1310
1311 probs = (phydbl *)mCalloc(t->ldscape_sz,sizeof(phydbl));
1312
1313 v1 = v2 = NULL;
1314 for(i=0;i<3;i++)
1315 {
1316 if(n->v[i] != n->anc && n->b[i] != tree->e_root)
1317 {
1318 if(!v1) v1 = n->v[i];
1319 else v2 = n->v[i];
1320 }
1321 }
1322
1323 if(v1->tax && v2->tax)
1324 {
1325 Free(probs);
1326 return;
1327 }
1328 else if(v1->tax && !v2->tax && t->idx_loc[v1->num] != t->idx_loc[n->num])
1329 {
1330 t->idx_loc[v2->num] = t->idx_loc[n->num];
1331 }
1332 else if(v2->tax && !v1->tax && t->idx_loc[v2->num] != t->idx_loc[n->num])
1333 {
1334 t->idx_loc[v1->num] = t->idx_loc[n->num];
1335 }
1336 else if(v1->tax && !v2->tax && t->idx_loc[v1->num] == t->idx_loc[n->num])
1337 {
1338 sum = 0.0;
1339 for(i=0;i<t->ldscape_sz;i++) sum += t->idx_loc_beneath[v2->num * t->ldscape_sz + i];
1340 for(i=0;i<t->ldscape_sz;i++) probs[i] = t->idx_loc_beneath[v2->num * t->ldscape_sz + i]/sum;
1341
1342 t->idx_loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1343 }
1344 else if(v2->tax && !v1->tax && t->idx_loc[v2->num] == t->idx_loc[n->num])
1345 {
1346 sum = 0.0;
1347 for(i=0;i<t->ldscape_sz;i++) sum += t->idx_loc_beneath[v1->num * t->ldscape_sz + i];
1348 for(i=0;i<t->ldscape_sz;i++) probs[i] = t->idx_loc_beneath[v1->num * t->ldscape_sz + i]/sum;
1349
1350 t->idx_loc[v1->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1351 }
1352 else
1353 {
1354 int n_v1, n_v2;
1355 phydbl p;
1356
1357 n_v1 = t->idx_loc_beneath[v1->num * t->ldscape_sz + t->idx_loc[n->num]];
1358 n_v2 = t->idx_loc_beneath[v2->num * t->ldscape_sz + t->idx_loc[n->num]];
1359
1360 if(n_v1 + n_v2 < 1)
1361 {
1362 PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1363 Exit("\n");
1364 }
1365
1366
1367 p = n_v1 / (n_v1 + n_v2);
1368
1369 u = Uni();
1370
1371 if(u < p)
1372 {
1373 sum = 0.0;
1374 for(i=0;i<t->ldscape_sz;i++) sum += t->idx_loc_beneath[v2->num * t->ldscape_sz + i];
1375 for(i=0;i<t->ldscape_sz;i++) probs[i] = t->idx_loc_beneath[v2->num * t->ldscape_sz + i]/sum;
1376
1377 t->idx_loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1378 t->idx_loc[v1->num] = t->idx_loc[n->num];
1379 }
1380 else
1381 {
1382 if(t->idx_loc_beneath[v2->num * t->ldscape_sz + t->idx_loc[n->num]] > 0)
1383 {
1384 sum = 0.0;
1385 for(i=0;i<t->ldscape_sz;i++) sum += t->idx_loc_beneath[v1->num * t->ldscape_sz + i];
1386 for(i=0;i<t->ldscape_sz;i++) probs[i] = t->idx_loc_beneath[v1->num * t->ldscape_sz + i]/sum;
1387
1388 t->idx_loc[v1->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1389 t->idx_loc[v2->num] = t->idx_loc[n->num];
1390 }
1391 else
1392 {
1393 sum = 0.0;
1394 for(i=0;i<t->ldscape_sz;i++) sum += t->idx_loc_beneath[v2->num * t->ldscape_sz + i];
1395 for(i=0;i<t->ldscape_sz;i++) probs[i] = t->idx_loc_beneath[v2->num * t->ldscape_sz + i]/sum;
1396
1397 t->idx_loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1398 t->idx_loc[v1->num] = t->idx_loc[n->num];
1399 }
1400 }
1401
1402 if(t->idx_loc[v1->num] != t->idx_loc[n->num] && t->idx_loc[v2->num] != t->idx_loc[n->num])
1403 {
1404 PhyML_Printf("\n. %d %d %d",t->idx_loc[v1->num],t->idx_loc[v2->num],t->idx_loc[n->num]);
1405 PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1406 Exit("\n");
1407 }
1408
1409 if(t->idx_loc_beneath[v1->num * t->ldscape_sz + t->idx_loc[v1->num]] < 1)
1410 {
1411 PhyML_Printf("\n. %d %d %d",t->idx_loc[v1->num],t->idx_loc[v2->num],t->idx_loc[n->num]);
1412 PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1413 Exit("\n");
1414 }
1415
1416 if(t->idx_loc_beneath[v2->num * t->ldscape_sz + t->idx_loc[v2->num]] < 1)
1417 {
1418 PhyML_Printf("\n. %d %d %d",t->idx_loc[v1->num],t->idx_loc[v2->num],t->idx_loc[n->num]);
1419 PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1420 Exit("\n");
1421 }
1422
1423 }
1424
1425 Free(probs);
1426
1427 for(i=0;i<3;i++)
1428 if(n->v[i] != n->anc && n->b[i] != tree->e_root)
1429 GEO_Randomize_Locations(n->v[i],t,tree);
1430 }
1431 }
1432
1433 //////////////////////////////////////////////////////////////
1434 //////////////////////////////////////////////////////////////
1435
GEO_Get_Locations_Beneath(t_geo * t,t_tree * tree)1436 void GEO_Get_Locations_Beneath(t_geo *t, t_tree *tree)
1437 {
1438 int i;
1439 GEO_Get_Locations_Beneath_Post(tree->n_root,tree->n_root->v[1],t,tree);
1440 GEO_Get_Locations_Beneath_Post(tree->n_root,tree->n_root->v[2],t,tree);
1441
1442 for(i=0;i<t->ldscape_sz;i++)
1443 {
1444 t->idx_loc_beneath[tree->n_root->num*t->ldscape_sz+i] =
1445 t->idx_loc_beneath[tree->n_root->v[1]->num*t->ldscape_sz+i] +
1446 t->idx_loc_beneath[tree->n_root->v[2]->num*t->ldscape_sz+i];
1447 }
1448 }
1449
1450 //////////////////////////////////////////////////////////////
1451 //////////////////////////////////////////////////////////////
1452
GEO_Get_Locations_Beneath_Post(t_node * a,t_node * d,t_geo * t,t_tree * tree)1453 void GEO_Get_Locations_Beneath_Post(t_node *a, t_node *d, t_geo *t, t_tree *tree)
1454 {
1455
1456 if(d->tax)
1457 {
1458 t->idx_loc_beneath[d->num*t->ldscape_sz+t->idx_loc[d->num]] = 1;
1459 return;
1460 }
1461 else
1462 {
1463 int i;
1464 t_node *v1, *v2;
1465
1466 for(i=0;i<3;i++)
1467 {
1468 if(d->v[i] != a && d->b[i] != tree->e_root)
1469 {
1470 GEO_Get_Locations_Beneath_Post(d,d->v[i],t,tree);
1471 }
1472 }
1473
1474
1475 v1 = v2 = NULL;
1476 for(i=0;i<3;i++)
1477 {
1478 if(d->v[i] != a && d->b[i] != tree->e_root)
1479 {
1480 if(!v1) v1 = d->v[i];
1481 else v2 = d->v[i];
1482 }
1483 }
1484
1485
1486 for(i=0;i<t->ldscape_sz;i++)
1487 {
1488 t->idx_loc_beneath[ d->num*t->ldscape_sz+i] =
1489 t->idx_loc_beneath[v1->num*t->ldscape_sz+i] +
1490 t->idx_loc_beneath[v2->num*t->ldscape_sz+i] ;
1491 }
1492 }
1493 }
1494
1495 //////////////////////////////////////////////////////////////
1496 //////////////////////////////////////////////////////////////
1497
GEO_Get_Sigma_Max(t_geo * t)1498 void GEO_Get_Sigma_Max(t_geo *t)
1499 {
1500 int i,j;
1501 phydbl mean_dist,inv_mean_dist;
1502 phydbl sigma_a, sigma_b, sigma_c;
1503 phydbl overlap_a, overlap_b, overlap_c;
1504 phydbl d_intersect;
1505 phydbl overlap_target;
1506 phydbl eps;
1507 int n_iter,n_iter_max;
1508
1509 eps = 1.E-6;
1510 overlap_target = 0.95;
1511 n_iter_max = 100;
1512
1513 mean_dist = -1.;
1514 inv_mean_dist = -1.;
1515 for(i=0;i<t->ldscape_sz-1;i++)
1516 {
1517 for(j=i+1;j<t->ldscape_sz;j++)
1518 {
1519 /* dist = POW(t->ldscape[i*t->n_dim+0] - t->ldscape[j*t->n_dim+0],1); */
1520 /* if(dist > mean_dist) mean_dist = dist; */
1521 /* dist = POW(t->ldscape[i*t->n_dim+1] - t->ldscape[j*t->n_dim+1],1); */
1522 /* if(dist > mean_dist) mean_dist = dist; */
1523 /* mean_dist += FABS(t->ldscape[i*t->n_dim+0] - t->ldscape[j*t->n_dim+0]); */
1524 /* mean_dist += FABS(t->ldscape[i*t->n_dim+1] - t->ldscape[j*t->n_dim+1]); */
1525 mean_dist += FABS(t->coord_loc[i]->lonlat[0] - t->coord_loc[j]->lonlat[0]);
1526 mean_dist += FABS(t->coord_loc[i]->lonlat[1] - t->coord_loc[j]->lonlat[1]);
1527 }
1528 }
1529
1530 mean_dist /= t->ldscape_sz*(t->ldscape_sz-1)/2.;
1531 inv_mean_dist = 1./mean_dist;
1532
1533 PhyML_Printf("\n. Mean distance between locations: %f",mean_dist);
1534
1535 sigma_a = t->min_sigma; sigma_b = 1.0; sigma_c = t->max_sigma;
1536 /* sigma_a = t->min_sigma; sigma_b = 1.0; sigma_c = 10.; */
1537 n_iter = 0;
1538 do
1539 {
1540 d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_a,0.0,mean_dist);
1541 overlap_a =
1542 (Pnorm(mean_dist,0.0,sigma_a) - Pnorm(d_intersect,0.0,sigma_a))/
1543 (Pnorm(mean_dist,0.0,sigma_a) - Pnorm(0.0,0.0,sigma_a)) +
1544 d_intersect / mean_dist;
1545 /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
1546
1547 d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_b,0.0,mean_dist);
1548 overlap_b =
1549 (Pnorm(mean_dist,0.0,sigma_b) - Pnorm(d_intersect,0.0,sigma_b))/
1550 (Pnorm(mean_dist,0.0,sigma_b) - Pnorm(0.0,0.0,sigma_b)) +
1551 d_intersect / mean_dist;
1552 /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
1553
1554 d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_c,0.0,mean_dist);
1555 overlap_c =
1556 (Pnorm(mean_dist,0.0,sigma_c) - Pnorm(d_intersect,0.0,sigma_c))/
1557 (Pnorm(mean_dist,0.0,sigma_c) - Pnorm(0.0,0.0,sigma_c)) +
1558 d_intersect / mean_dist;
1559
1560 /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
1561
1562 /* printf("\n. sigma_a:%f overlap_a:%f sigma_b:%f overlap_b:%f sigma_c:%f overlap_c:%f", */
1563 /* sigma_a,overlap_a, */
1564 /* sigma_b,overlap_b, */
1565 /* sigma_c,overlap_c); */
1566
1567 if(overlap_target > overlap_a && overlap_target < overlap_b)
1568 {
1569 sigma_c = sigma_b;
1570 sigma_b = sigma_a + (sigma_c - sigma_a)/2.;
1571 }
1572 else if(overlap_target > overlap_b && overlap_target < overlap_c)
1573 {
1574 sigma_a = sigma_b;
1575 sigma_b = sigma_a + (sigma_c - sigma_a)/2.;
1576 }
1577 else if(overlap_target < overlap_a)
1578 {
1579 sigma_a /= 2.;
1580 }
1581 else if(overlap_target > overlap_c)
1582 {
1583 sigma_c *= 2.;
1584 }
1585
1586 n_iter++;
1587
1588 }
1589 while(sigma_c - sigma_a > eps && n_iter < n_iter_max);
1590
1591 /* if(sigma_c - sigma_a > eps) */
1592 /* { */
1593 /* PhyML_Printf("\n== Error detected in getting maximum value of sigma."); */
1594 /* PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); */
1595 /* Exit("\n"); */
1596 /* } */
1597 /* else */
1598 /* { */
1599 /* PhyML_Printf("\n== Threshold for sigma: %f",sigma_b); */
1600 /* } */
1601
1602 t->sigma_thresh = sigma_b;
1603 }
1604
1605 //////////////////////////////////////////////////////////////
1606 //////////////////////////////////////////////////////////////
1607
MCMC_Geo_Updown_Tau_Lbda(t_tree * tree)1608 void MCMC_Geo_Updown_Tau_Lbda(t_tree *tree)
1609 {
1610 phydbl K,mult,u,alpha,ratio;
1611 phydbl cur_lnL,new_lnL;
1612 phydbl cur_tau,new_tau;
1613 phydbl cur_lbda,new_lbda;
1614
1615 K = tree->mcmc->tune_move[tree->mcmc->num_move_geo_updown_tau_lbda];
1616 cur_lnL = tree->geo->c_lnL;
1617 new_lnL = tree->geo->c_lnL;
1618 cur_tau = tree->geo->tau;
1619 new_tau = tree->geo->tau;
1620 cur_lbda = tree->geo->lbda;
1621 new_lbda = tree->geo->lbda;
1622
1623 u = Uni();
1624 mult = exp(K*(u-0.5));
1625
1626 /* Multiply tau by K */
1627 new_tau = cur_tau * K;
1628
1629 /* Divide lbda by same amount */
1630 new_lbda = cur_lbda / K;
1631
1632
1633 if(
1634 new_lbda < tree->geo->min_lbda || new_lbda > tree->geo->max_lbda ||
1635 new_tau < tree->geo->min_tau || new_tau > tree->geo->max_tau
1636 )
1637 {
1638 tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
1639 return;
1640 }
1641
1642 tree->geo->tau = new_tau;
1643 tree->geo->lbda = new_lbda;
1644
1645 if(tree->eval_alnL) new_lnL = GEO_Lk(tree->geo,tree);
1646
1647 ratio = 0.0;
1648 /* Proposal ratio: 2n-2=> number of multiplications, 1=>number of divisions */
1649 ratio += 0.0*log(mult); /* (1-1)*log(mult); */
1650 /* Likelihood density ratio */
1651 ratio += (new_lnL - cur_lnL);
1652
1653 /* printf("\n. new_tau: %f new_lbda:%f cur_lnL:%f new_lnL:%f",new_tau,new_lbda,cur_lnL,new_lnL); */
1654
1655
1656 ratio = exp(ratio);
1657 alpha = MIN(1.,ratio);
1658 u = Uni();
1659
1660 if(u > alpha) /* Reject */
1661 {
1662 tree->geo->tau = cur_tau;
1663 tree->geo->lbda = cur_lbda;
1664 tree->geo->c_lnL = cur_lnL;
1665 }
1666 else
1667 {
1668 tree->mcmc->acc_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
1669 }
1670
1671 tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
1672 }
1673
1674 //////////////////////////////////////////////////////////////
1675 //////////////////////////////////////////////////////////////
1676
1677
MCMC_Geo_Updown_Lbda_Sigma(t_tree * tree)1678 void MCMC_Geo_Updown_Lbda_Sigma(t_tree *tree)
1679 {
1680 phydbl K,mult,u,alpha,ratio;
1681 phydbl cur_lnL,new_lnL;
1682 phydbl cur_lbda,new_lbda;
1683 phydbl cur_sigma,new_sigma;
1684
1685 K = tree->mcmc->tune_move[tree->mcmc->num_move_geo_updown_lbda_sigma];
1686 cur_lnL = tree->geo->c_lnL;
1687 new_lnL = tree->geo->c_lnL;
1688 cur_lbda = tree->geo->lbda;
1689 new_lbda = tree->geo->lbda;
1690 cur_sigma = tree->geo->sigma;
1691 new_sigma = tree->geo->sigma;
1692
1693 u = Uni();
1694 mult = exp(K*(u-0.5));
1695
1696 /* Multiply lbda by K */
1697 new_lbda = cur_lbda * K;
1698
1699 /* Divide sigma by same amount */
1700 new_sigma = cur_sigma / K;
1701
1702
1703 if(
1704 new_sigma < tree->geo->min_sigma || new_sigma > tree->geo->max_sigma ||
1705 new_lbda < tree->geo->min_lbda || new_lbda > tree->geo->max_lbda
1706 )
1707 {
1708 tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
1709 return;
1710 }
1711
1712 tree->geo->lbda = new_lbda;
1713 tree->geo->sigma = new_sigma;
1714
1715 if(tree->eval_alnL) new_lnL = GEO_Lk(tree->geo,tree);
1716
1717 ratio = 0.0;
1718 /* Proposal ratio: 2n-2=> number of multiplications, 1=>number of divisions */
1719 ratio += 0.0*log(mult); /* (1-1)*log(mult); */
1720 /* Likelihood density ratio */
1721 ratio += (new_lnL - cur_lnL);
1722
1723 /* printf("\n. new_lbda: %f new_sigma:%f cur_lnL:%f new_lnL:%f",new_lbda,new_sigma,cur_lnL,new_lnL); */
1724
1725
1726 ratio = exp(ratio);
1727 alpha = MIN(1.,ratio);
1728 u = Uni();
1729
1730 if(u > alpha) /* Reject */
1731 {
1732 tree->geo->lbda = cur_lbda;
1733 tree->geo->sigma = cur_sigma;
1734 tree->geo->c_lnL = cur_lnL;
1735 }
1736 else
1737 {
1738 tree->mcmc->acc_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
1739 }
1740
1741 tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
1742 }
1743
1744 //////////////////////////////////////////////////////////////
1745 //////////////////////////////////////////////////////////////
1746
GEO_Read_In_Landscape(char * file_name,t_geo * t,phydbl ** ldscape,int ** loc_hash,t_tree * tree)1747 void GEO_Read_In_Landscape(char *file_name, t_geo *t, phydbl **ldscape, int **loc_hash, t_tree *tree)
1748 {
1749 FILE *fp;
1750 char *s;
1751 phydbl longitude, lattitude;
1752 int tax,loc;
1753
1754 PhyML_Printf("\n");
1755 PhyML_Printf("\n. Reading landscape file '%s'.\n",file_name);
1756
1757 s = (char *)mCalloc(T_MAX_LINE,sizeof(char));
1758 (*ldscape) = (phydbl *)mCalloc(10*t->n_dim,sizeof(phydbl));
1759 (*loc_hash) = (int *)mCalloc(tree->n_otu,sizeof(int));
1760
1761 fp = Openfile(file_name,0);
1762
1763 tax = loc = -1;
1764
1765 t->ldscape_sz = 0;
1766
1767 do
1768 {
1769 if(fscanf(fp,"%s",s) == EOF) break;
1770
1771 if(strlen(s) > 0) for(tax=0;tax<tree->n_otu;tax++) if(!strcmp(tree->a_nodes[tax]->name,s)) break;
1772
1773 if(tax == tree->n_otu)
1774 {
1775 PhyML_Printf("\n== Could not find a taxon with name '%s' in the tree provided.",s);
1776 /* PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__); */
1777 /* Exit("\n"); */
1778 continue;
1779 }
1780
1781 /* sscanf(line+pos,"%lf %lf",&longitude,&lattitude); */
1782 if(fscanf(fp,"%lf",&longitude) == EOF) break;
1783 if(fscanf(fp,"%lf",&lattitude) == EOF) break;
1784
1785 /* printf("\n. s: %s %f %f",s,longitude,lattitude); */
1786
1787 for(loc=0;loc<t->ldscape_sz;loc++)
1788 {
1789 if(FABS(longitude-(*ldscape)[loc*t->n_dim+0]) < 1.E-10 &&
1790 FABS(lattitude-(*ldscape)[loc*t->n_dim+1]) < 1.E-10)
1791 {
1792 break;
1793 }
1794 }
1795
1796 if(loc == t->ldscape_sz)
1797 {
1798 t->ldscape_sz++;
1799 (*ldscape)[(t->ldscape_sz-1)*t->n_dim+0] = longitude;
1800 (*ldscape)[(t->ldscape_sz-1)*t->n_dim+1] = lattitude;
1801
1802 if(!(t->ldscape_sz%10))
1803 {
1804 (*ldscape) = (phydbl *)mRealloc((*ldscape),(t->ldscape_sz+10)*t->n_dim,sizeof(phydbl));
1805 }
1806 }
1807
1808 (*loc_hash)[tax] = loc;
1809
1810 }
1811 while(1);
1812
1813 for(tax=0;tax<tree->n_otu;tax++)
1814 PhyML_Printf("\n. Taxon %30s, longitude: %12f, lattitude: %12f [%4d]",
1815 tree->a_nodes[tax]->name,
1816 (*ldscape)[(*loc_hash)[tax]*t->n_dim+0],
1817 (*ldscape)[(*loc_hash)[tax]*t->n_dim+1],
1818 (*loc_hash)[tax]);
1819
1820
1821 }
1822
1823 //////////////////////////////////////////////////////////////
1824 //////////////////////////////////////////////////////////////
1825
1826 //////////////////////////////////////////////////////////////
1827 //////////////////////////////////////////////////////////////
1828
1829 //////////////////////////////////////////////////////////////
1830 //////////////////////////////////////////////////////////////
1831
1832
1833