1 /*
2 
3 PhyML:  a program that  computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5 
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7 
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10 
11 */
12 
13 
14 #include "geo.h"
15 
16 //////////////////////////////////////////////////////////////
17 //////////////////////////////////////////////////////////////
18 
GEO_Main(int argc,char ** argv)19 int GEO_Main(int argc, char **argv)
20 {
21   /* GEO_Simulate_Estimate(argc,argv); */
22   GEO_Estimate(argc,argv);
23   return(1);
24 }
25 
26 //////////////////////////////////////////////////////////////
27 //////////////////////////////////////////////////////////////
28 
GEO_Estimate(int argc,char ** argv)29 int GEO_Estimate(int argc, char **argv)
30 {
31   t_geo *t;
32   int seed;
33   FILE *fp;
34   t_tree *tree;
35   phydbl *ldscp;
36   int *loc_hash;
37   int i,j;
38   phydbl *probs;
39   phydbl sum;
40 
41   // geo ./ban
42 
43 
44   seed = getpid();
45   /* seed = 28224; */
46   /* seed = 21249; */
47   /* seed = 21596; */
48 
49   printf("\n. Seed = %d",seed);
50   srand(seed);
51 
52   t = GEO_Make_Geo_Basic();
53   GEO_Init_Geo_Struct(t);
54 
55   fp = Openfile(argv[1],0); /* Open tree file  */
56 
57   tree = Read_Tree_File_Phylip(fp); /* Read it */
58 
59   Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
60   Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
61 
62   tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
63   RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu);
64 
65   tree->times = TIMES_Make_Time_Struct(tree->n_otu);
66   TIMES_Init_Time_Struct(tree->times,NULL,tree->n_otu);
67 
68   Branch_To_Time(tree);
69 
70   tree->geo = t;
71 
72   GEO_Read_In_Landscape(argv[2],t,&ldscp,&loc_hash,tree);
73 
74   GEO_Make_Geo_Complete(t->ldscape_sz,t->n_dim,tree->n_otu,t);
75 
76 
77   j = 0;
78   for(i=0;i<t->ldscape_sz;i++)
79     {
80       t->coord_loc[i]->lonlat[0] = ldscp[j];
81       t->coord_loc[i]->lonlat[1] = ldscp[j+1];
82       PhyML_Printf("\n. Location %d: %13f %13f",i+1,t->coord_loc[i]->lonlat[0],t->coord_loc[i]->lonlat[1]);
83       j+=2;
84     }
85 
86   for(i=0;i<tree->n_otu;i++) t->idx_loc[i] = loc_hash[i];
87 
88   t->cov[0*t->n_dim+0] = t->sigma;
89   t->cov[1*t->n_dim+1] = t->sigma;
90   t->cov[0*t->n_dim+1] = 0.0;
91   t->cov[1*t->n_dim+0] = 0.0;
92 
93   GEO_Get_Sigma_Max(t);
94 
95   t->max_sigma = t->sigma_thresh * 2.;
96 
97   PhyML_Printf("\n. Sigma max: %f",t->sigma_thresh);
98 
99   GEO_Get_Locations_Beneath(t,tree);
100 
101   probs = (phydbl *)mCalloc(tree->geo->ldscape_sz,sizeof(phydbl));
102 
103   sum = 0.0;
104   for(i=0;i<tree->geo->ldscape_sz;i++) sum += tree->geo->idx_loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i];
105   for(i=0;i<tree->geo->ldscape_sz;i++) probs[i] = tree->geo->idx_loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]/sum;
106   tree->geo->idx_loc[tree->n_root->num] = Sample_i_With_Proba_pi(probs,tree->geo->ldscape_sz);
107   Free(probs);
108 
109   GEO_Randomize_Locations(tree->n_root,t,tree);
110 
111 
112   GEO_Update_Occup(t,tree);
113   GEO_Lk(t,tree);
114 
115   PhyML_Printf("\n. Init loglk: %f",tree->geo->c_lnL);
116 
117   /* DR_Draw_Tree("essai.ps",tree); */
118 
119   GEO_MCMC(tree);
120 
121   fclose(fp);
122   Free(ldscp);
123   Free(loc_hash);
124 
125   return(1);
126 }
127 
128 //////////////////////////////////////////////////////////////
129 //////////////////////////////////////////////////////////////
130 
GEO_Simulate_Estimate(int argc,char ** argv)131 int GEO_Simulate_Estimate(int argc, char **argv)
132 {
133   t_geo *t;
134   int n_tax;
135   t_tree *tree,*ori_tree;
136   int seed;
137   phydbl *res;
138   FILE *fp;
139   int pid;
140   char *s;
141   int rand_loc;
142   phydbl *probs,sum;
143   int i;
144   phydbl mantel_p_val;
145   phydbl rf;
146 
147   s = (char *)mCalloc(T_MAX_NAME,sizeof(char));
148 
149   strcpy(s,"geo.out");
150   pid = getpid();
151   sprintf(s+strlen(s),".%d",pid);
152 
153   fp = fopen(s,"w");
154 
155   seed = getpid();
156   /* seed = 15520; */
157   /* seed = 5023; */
158   printf("\n. Seed = %d",seed);
159   srand(seed);
160 
161   t = GEO_Make_Geo_Basic();
162   GEO_Init_Geo_Struct(t);
163 
164   /* t->tau        = 3.0; */
165   /* t->lbda       = 0.02; */
166   /* t->sigma      = 10.; */
167 
168 
169   t->ldscape_sz = (int)atoi(argv[1]);
170   t->n_dim      = 2;
171   n_tax         = (int)atoi(argv[2]);
172 
173   GEO_Make_Geo_Complete(t->ldscape_sz,t->n_dim,n_tax,t);
174 
175   t->cov[0*t->n_dim+0] = t->sigma;
176   t->cov[1*t->n_dim+1] = t->sigma;
177   t->cov[0*t->n_dim+1] = 0.0;
178   t->cov[1*t->n_dim+0] = 0.0;
179 
180   GEO_Simulate_Coordinates(t->ldscape_sz,t);
181 
182   GEO_Get_Sigma_Max(t);
183 
184   t->max_sigma = t->sigma_thresh * 2.;
185 
186   PhyML_Printf("\n. sigma max: %f threshold: %f",t->max_sigma,t->sigma_thresh);
187 
188   /* t->tau   = Uni()*(t->max_tau/100.-t->min_tau*10.)  + t->min_tau*10.; */
189   /* t->lbda  = exp(Uni()*(log(t->max_lbda/100.)-log(t->min_lbda*10.))  + log(t->min_lbda*10.)); */
190   /* t->sigma = Uni()*(t->max_sigma-t->min_sigma) + t->min_sigma; */
191 
192   t->lbda  = 1.;
193   t->sigma = 0.3;
194   t->tau   = 1.;
195 
196   PhyML_Fprintf(fp,"\n# SigmaTrue\t SigmaThresh\t LbdaTrue\t TauTrue\txTrue\t yTrue\t xRand\t yRand\t RF\t Sigma5\t Sigma50\t Sigma95\t ProbSigmaInfThresh\t Lbda5\t Lbda50\t Lbda95\t ProbLbdaInf1\t Tau5\t Tau50\t Tau95\t X5\t X50\t X95\t Y5\t Y50\t Y95\t RandX5\t RandX50\t RandX95\t RandY5\t RandY50\t RandY95\t Mantel\t");
197   PhyML_Fprintf(fp,"\n");
198 
199   tree = GEO_Simulate(t,n_tax);
200 
201   ori_tree = Make_Tree_From_Scratch(tree->n_otu,NULL);
202   Copy_Tree(tree,ori_tree);
203 
204   Random_SPRs_On_Rooted_Tree(tree);
205 
206   Free_Bip(ori_tree);
207   Alloc_Bip(ori_tree);
208   Get_Bip(ori_tree->a_nodes[0],ori_tree->a_nodes[0]->v[0],ori_tree);
209 
210   Free_Bip(tree);
211   Alloc_Bip(tree);
212   Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
213   Match_Tip_Numbers(tree,ori_tree);
214 
215   rf = (phydbl)Compare_Bip(ori_tree,tree,NO)/(tree->n_otu-3);
216   PhyML_Printf("\n. rf: %f",rf);
217 
218   phydbl scale;
219   scale = exp(Rnorm(0,0.2));
220   PhyML_Printf("\n. Scale: %f",scale);
221   For(i,2*tree->n_otu-1) tree->times->nd_t[i] *= scale;
222 
223 
224   phydbl *tree_dist,*geo_dist;
225   int j;
226 
227   Time_To_Branch(tree);
228   tree_dist = Dist_Btw_Tips(tree);
229 
230   geo_dist = (phydbl *)mCalloc(tree->n_otu*tree->n_otu,sizeof(phydbl));
231 
232   for(i=0;i<tree->n_otu;i++)
233     {
234       for(j=0;j<tree->n_otu;j++)
235         {
236           /* geo_dist[i*tree->n_otu+j] = */
237           /*   FABS(t->ldscape[t->idx_loc[i]*t->n_dim+0] - */
238           /*        t->ldscape[t->idx_loc[j]*t->n_dim+0])+ */
239           /*   FABS(t->ldscape[t->idx_loc[i]*t->n_dim+1] - */
240           /*        t->ldscape[t->idx_loc[j]*t->n_dim+1]); */
241           geo_dist[i*tree->n_otu+j] =
242             FABS(t->coord_loc[t->idx_loc[i]]->lonlat[0] -
243                  t->coord_loc[t->idx_loc[j]]->lonlat[0])+
244             FABS(t->coord_loc[t->idx_loc[i]]->lonlat[1] -
245                  t->coord_loc[t->idx_loc[j]]->lonlat[1]);
246 
247         }
248     }
249 
250   printf("\n. tau: %f lbda: %f sigma: %f",t->tau,t->lbda,t->sigma);
251   mantel_p_val = Mantel(tree_dist,geo_dist,tree->n_otu,tree->n_otu);
252   printf("\n. Mantel test p-value: %f",mantel_p_val);
253 
254   Free(tree_dist);
255   Free(geo_dist);
256 
257   rand_loc = Rand_Int(0,t->ldscape_sz-1);
258 
259   PhyML_Printf("\n. sigma: %f\t lbda: %f\t tau:%f\t x:%f\t y:%f rand.x:%f\t rand.y:%f\t",
260                t->sigma,
261                t->lbda,
262                t->tau,
263                /* t->ldscape[t->idx_loc[tree->n_root->num]*t->n_dim+0], */
264                /* t->ldscape[t->idx_loc[tree->n_root->num]*t->n_dim+1], */
265                /* t->ldscape[rand_loc*t->n_dim+0], */
266                /* t->ldscape[rand_loc*t->n_dim+1]); */
267                t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[0],
268                t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[1],
269                t->coord_loc[rand_loc]->lonlat[0],
270                t->coord_loc[rand_loc]->lonlat[1]);
271 
272   PhyML_Fprintf(fp,"%f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t",
273                 t->sigma,
274                 t->sigma_thresh,
275                 t->lbda,
276                 t->tau,
277                 t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[0],
278                 t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[1],
279                 t->coord_loc[rand_loc]->lonlat[0],
280                 t->coord_loc[rand_loc]->lonlat[1],
281                 rf);
282 
283   GEO_Get_Locations_Beneath(t,tree);
284 
285   probs = (phydbl *)mCalloc(tree->geo->ldscape_sz,sizeof(phydbl));
286 
287   sum = 0.0;
288   for(i=0;i<tree->geo->ldscape_sz;i++) sum += tree->geo->idx_loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i];
289   for(i=0;i<tree->geo->ldscape_sz;i++) probs[i] = tree->geo->idx_loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]/sum;
290   tree->geo->idx_loc[tree->n_root->num] = Sample_i_With_Proba_pi(probs,tree->geo->ldscape_sz);
291   Free(probs);
292   GEO_Randomize_Locations(tree->n_root,tree->geo,tree);
293 
294   GEO_Update_Occup(t,tree);
295   GEO_Lk(t,tree);
296   PhyML_Printf("\n. Init loglk: %f",tree->geo->c_lnL);
297 
298 
299   res = GEO_MCMC(tree);
300 
301   PhyML_Fprintf(fp,"%f\t %f\t %f\t %f\t  %f\t %f\t %f\t  %f\t  %f\t %f\t %f\t  %f\t %f\t %f\t  %f\t %f\t %f\t  %f\t %f\t %f\t  %f\t %f\t %f\t  %f \n",
302 
303                 /* Sigma5 */ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
304                 /* Sigma50*/ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
305                 /* Sigma95*/ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
306 
307                 /* ProbInfThesh */ Prob(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval,t->sigma_thresh),
308 
309                 /* Lbda5 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
310                 /* Lbda50 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
311                 /* Lbda95 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
312 
313                 /* ProbInf1 */ Prob(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval,1.0),
314 
315                 /* Tau5 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
316                 /* Tau50 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
317                 /* Tau 95 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
318 
319                 /* X5 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
320                 /* X50 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
321                 /* X95 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
322 
323                 /* Y5 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
324                 /* Y50 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
325                 /* Y95 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
326 
327                 /* RandX5 */ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
328                 /* RandX50*/ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
329                 /* RandX95*/ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
330 
331                 /* RandY5 */ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025),
332                 /* RandY50*/ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50),
333                 /* RandY95*/ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975),
334 
335                 mantel_p_val
336                 );
337   fflush(NULL);
338 
339   Free(s);
340   Free(res);
341 
342   fclose(fp);
343 
344   return 1;
345 }
346 
347 //////////////////////////////////////////////////////////////
348 //////////////////////////////////////////////////////////////
349 
GEO_MCMC(t_tree * tree)350 phydbl *GEO_MCMC(t_tree *tree)
351 {
352   phydbl *res;
353   int n_vars;
354   int rand_loc;
355   t_geo *t;
356 
357   t = tree->geo;
358 
359   tree->mcmc = MCMC_Make_MCMC_Struct();
360   MCMC_Complete_MCMC(tree->mcmc,tree);
361 
362   tree->mcmc->io               = NULL;
363   tree->mcmc->is               = NO;
364   tree->mcmc->run              = 0;
365   tree->mcmc->sample_interval  = 1E+3;
366   tree->mcmc->chain_len        = 1E+6;
367   tree->mcmc->chain_len_burnin = 1E+5;
368   tree->mcmc->randomize        = YES;
369   tree->mcmc->norm_freq        = 1E+3;
370   tree->mcmc->max_tune         = 1.E+20;
371   tree->mcmc->min_tune         = 1.E-10;
372   tree->mcmc->print_every      = 2;
373   tree->mcmc->is_burnin        = NO;
374   tree->mcmc->nd_t_digits      = 1;
375 
376   t->tau   = 1.0;
377   t->lbda  = 1.0;
378   t->sigma = 1.0;
379   t->dum   = 1.0;
380 
381   tree->mcmc->chain_len = 1.E+8;
382   tree->mcmc->sample_interval = 50;
383 
384   MCMC_Complete_MCMC(tree->mcmc,tree);
385 
386   GEO_Update_Occup(t,tree);
387   GEO_Lk(t,tree);
388 
389   tree->mcmc->start_ess[tree->mcmc->num_move_geo_sigma]  = YES;
390   tree->mcmc->start_ess[tree->mcmc->num_move_geo_lambda] = YES;
391   tree->mcmc->start_ess[tree->mcmc->num_move_geo_tau]    = YES;
392   tree->mcmc->start_ess[tree->mcmc->num_move_geo_dum]    = YES;
393 
394   n_vars = 10;
395   res = (phydbl *)mCalloc(tree->mcmc->chain_len / tree->mcmc->sample_interval * n_vars,sizeof(phydbl));
396 
397   PhyML_Printf("\n. Run  Sigma [ESS] Lambda [ESS] Tau [ESS] Dummy [ESS] LogLk");
398 
399   tree->mcmc->run = 0;
400   do
401     {
402       MCMC_GEO_Lbda(tree);
403       MCMC_GEO_Tau(tree);
404       /* MCMC_Geo_Dum(tree); */
405       MCMC_GEO_Loc(tree);
406 
407       t->update_fmat = YES;
408       MCMC_GEO_Sigma(tree);
409       t->update_fmat = NO;
410 
411 
412       /* t->update_fmat = YES; */
413       /* MCMC_Geo_Updown_Lbda_Sigma(tree); */
414       /* t->update_fmat = NO; */
415 
416 
417       /* MCMC_Geo_Updown_Tau_Lbda(tree); */
418       /* MCMC_Geo_Updown_Tau_Lbda(tree); */
419       /* MCMC_Geo_Updown_Tau_Lbda(tree); */
420 
421 
422       /* printf("\n"); */
423       /* int i; */
424       /* For(i,2*tree->n_otu-1) */
425       /*   { */
426       /*     if(tree->a_nodes[i]->tax == NO) */
427       /*       { */
428       /*         printf("%2d ",tree->geo->idx_loc[i]); */
429       /*       } */
430       /*   } */
431 
432 
433       if(tree->mcmc->run%tree->mcmc->sample_interval == 0)
434         {
435           MCMC_Copy_To_New_Param_Val(tree->mcmc,tree);
436 
437           MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_lambda,tree->mcmc,tree);
438           MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_sigma,tree->mcmc,tree);
439           MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_tau,tree->mcmc,tree);
440 
441           /* printf("\n. lbda:%f,%f tau:%f,%f sigma:%f,%f", */
442           /*        tree->mcmc->acc_rate[tree->mcmc->num_move_geo_lambda], */
443           /*        tree->mcmc->tune_move[tree->mcmc->num_move_geo_lambda], */
444           /*        tree->mcmc->acc_rate[tree->mcmc->num_move_geo_tau], */
445           /*        tree->mcmc->tune_move[tree->mcmc->num_move_geo_tau], */
446           /*        tree->mcmc->acc_rate[tree->mcmc->num_move_geo_sigma], */
447           /*        tree->mcmc->tune_move[tree->mcmc->num_move_geo_sigma]); */
448 
449           PhyML_Printf("\n. %6d  %12f %4.0f %12f %4.0f %12f %4.0f %12f %4.0f %12f",
450                        tree->mcmc->run,
451 
452                        t->sigma,
453                        tree->mcmc->ess[tree->mcmc->num_move_geo_sigma],
454 
455                        t->lbda,
456                        tree->mcmc->ess[tree->mcmc->num_move_geo_lambda],
457 
458                        t->tau,
459                        tree->mcmc->ess[tree->mcmc->num_move_geo_tau],
460 
461                        t->dum,
462                        tree->mcmc->ess[tree->mcmc->num_move_geo_dum],
463 
464                        t->c_lnL);
465 
466 
467           /* PhyML_Printf("\n%12f %12f %12f %12f", */
468           /*              t->sigma, */
469           /*              t->lbda, */
470           /*              t->tau, */
471           /*              t->dum); */
472 
473 
474           rand_loc = Rand_Int(0,t->ldscape_sz-1);
475 
476           res[0 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->sigma;
477           res[1 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->lbda;
478           res[2 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->tau;
479           res[3 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->c_lnL;
480 
481           /* res[4 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[t->idx_loc[tree->n_root->num]*t->n_dim+0]; */
482           /* res[5 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[t->idx_loc[tree->n_root->num]*t->n_dim+1]; */
483           res[4 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[0];
484           res[5 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->coord_loc[t->idx_loc[tree->n_root->num]]->lonlat[1];
485 
486           /* res[6 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[rand_loc*t->n_dim+0]; */
487           /* res[7 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[rand_loc*t->n_dim+1]; */
488           res[6 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->coord_loc[rand_loc]->lonlat[0];
489           res[7 * tree->mcmc->chain_len / tree->mcmc->sample_interval +  tree->mcmc->run / tree->mcmc->sample_interval] = t->coord_loc[rand_loc]->lonlat[1];
490         }
491 
492       tree->mcmc->run++;
493 
494       if(tree->mcmc->ess[tree->mcmc->num_move_geo_sigma] > 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_sigma]  = NO;
495       if(tree->mcmc->ess[tree->mcmc->num_move_geo_tau]   > 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_tau]    = NO;
496       if(tree->mcmc->ess[tree->mcmc->num_move_geo_lambda]> 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_lambda] = NO;
497 
498       MCMC_Get_Acc_Rates(tree->mcmc);
499 
500       if(tree->mcmc->ess[tree->mcmc->num_move_geo_sigma] > 1000. &&
501          tree->mcmc->ess[tree->mcmc->num_move_geo_tau]   > 1000. &&
502          tree->mcmc->ess[tree->mcmc->num_move_geo_lambda]> 1000.) break;
503     }
504   while(tree->mcmc->run < tree->mcmc->chain_len);
505 
506   return(res);
507 
508 }
509 
510 //////////////////////////////////////////////////////////////
511 //////////////////////////////////////////////////////////////
512 
513 //////////////////////////////////////////////////////////////
514 //////////////////////////////////////////////////////////////
515 
516 // Update F matrix. Assume a diagonal covariance matrix.
GEO_Update_Fmat(t_geo * t)517 void GEO_Update_Fmat(t_geo *t)
518 {
519   phydbl *loc1, *loc2;
520   int i,j,k;
521   int err;
522   phydbl lognloc;
523 
524   for(i=0;i<t->n_dim;i++) t->cov[i*t->n_dim+i] = t->sigma; // Diagonal covariance matrix. Same variance in every direction
525 
526   lognloc = log(t->ldscape_sz);
527 
528   // Fill in F matrix;
529   for(i=0;i<t->ldscape_sz;i++)
530     {
531       loc1 = t->coord_loc[i]->lonlat;
532 
533       for(j=i;j<t->ldscape_sz;j++)
534         {
535           loc2 = t->coord_loc[j]->lonlat;
536 
537           t->f_mat[i*t->ldscape_sz+j] = .0;
538 
539           // Calculate log(f(l_i,l_j)) - log(f(l_i,l_i) - log(m)
540           for(k=0;k<t->n_dim;k++) t->f_mat[i*t->ldscape_sz+j] += Log_Dnorm(loc2[k],loc1[k],t->cov[k*t->n_dim+k],&err);
541           t->f_mat[i*t->ldscape_sz+j] -= lognloc;
542           for(k=0;k<t->n_dim;k++) t->f_mat[i*t->ldscape_sz+j] -= Log_Dnorm(loc1[k],loc1[k],t->cov[k*t->n_dim+k],&err);
543 
544           // Take the exponential
545           t->f_mat[i*t->ldscape_sz+j] = exp(t->f_mat[i*t->ldscape_sz+j]);
546 
547           // Matrix is symmetric
548           t->f_mat[j*t->ldscape_sz+i] = t->f_mat[i*t->ldscape_sz+j];
549 
550           /* printf("\n. f[%d,%d] = %f (1:[%f;%f] 2:[%f;%f]) sigma=%f",i,j,t->f_mat[i*t->ldscape_sz+j],loc1[0],loc1[1],loc2[0],loc2[1],SQRT(t->cov[0*t->n_dim+0])); */
551         }
552     }
553 }
554 
555 //////////////////////////////////////////////////////////////
556 //////////////////////////////////////////////////////////////
557 
558 // Sort node heights from oldest to youngest age.
GEO_Update_Sorted_Nd(t_geo * t,t_tree * tree)559 void GEO_Update_Sorted_Nd(t_geo *t, t_tree *tree)
560 {
561   int i;
562   int swap;
563   t_node *buff;
564 
565   buff = NULL;
566 
567   For(i,2*tree->n_otu-1) t->sorted_nd[i] = tree->a_nodes[i];
568 
569   // Bubble sort of the node heights
570   do
571     {
572       swap = NO;
573       For(i,2*tree->n_otu-2)
574         {
575           if(tree->times->nd_t[t->sorted_nd[i+1]->num] < tree->times->nd_t[t->sorted_nd[i]->num])
576             {
577               buff              = t->sorted_nd[i];
578               t->sorted_nd[i]   = t->sorted_nd[i+1];
579               t->sorted_nd[i+1] = buff;
580 
581               swap = YES;
582             }
583         }
584     }
585   while(swap == YES);
586 }
587 
588 //////////////////////////////////////////////////////////////
589 //////////////////////////////////////////////////////////////
590 
591 // Update the set of vectors of occupation along the tree
GEO_Update_Occup(t_geo * t,t_tree * tree)592 void GEO_Update_Occup(t_geo *t, t_tree *tree)
593 {
594   int i,j;
595   t_node *v1, *v2;
596 
597   GEO_Update_Sorted_Nd(t,tree);
598 
599   For(i,t->ldscape_sz*(2*tree->n_otu-1)) t->occup[i] = 0;
600 
601   t->occup[tree->n_root->num*t->ldscape_sz + t->idx_loc[tree->n_root->num]] = 1;
602 
603   for(i=1;i<2*tree->n_otu-1;i++)
604     {
605       for(j=0;j<t->ldscape_sz;j++)
606         {
607           t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j] =
608             t->occup[t->sorted_nd[i-1]->num*t->ldscape_sz + j];
609         }
610 
611 
612       if(t->sorted_nd[i-1]->tax == NO)
613         {
614           v1 = v2 = NULL;
615           if(t->sorted_nd[i-1] == tree->n_root)
616             {
617               v1 = tree->n_root->v[1];
618               v2 = tree->n_root->v[2];
619             }
620           else
621             {
622               for(j=0;j<3;j++)
623                 {
624                   if(t->sorted_nd[i-1]->v[j] != t->sorted_nd[i-1]->anc &&
625                      t->sorted_nd[i-1]->b[j] != tree->e_root)
626                     {
627                       if(!v1) v1 = t->sorted_nd[i-1]->v[j];
628                       else    v2 = t->sorted_nd[i-1]->v[j];
629                     }
630                 }
631             }
632 
633 
634           if(t->idx_loc[v1->num] != t->idx_loc[t->sorted_nd[i-1]->num])
635             {
636               t->occup[t->sorted_nd[i]->num * t->ldscape_sz + t->idx_loc[v1->num]]++;
637             }
638           else
639             {
640               t->occup[t->sorted_nd[i]->num * t->ldscape_sz + t->idx_loc[v2->num]]++;
641             }
642         }
643     }
644 
645   /* printf("\n"); */
646   /* For(i,2*tree->n_otu-1) */
647   /*   { */
648   /*     printf("\n. Node %3d: ",t->sorted_nd[i]->num); */
649   /*     for(j=0;j<t->ldscape_sz;j++) */
650   /*       { */
651   /*         /\* printf("%3d [%12f;%12f]   ", *\/ */
652   /*         /\*        t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j], *\/ */
653   /*         /\*        t->ldscape[j*t->n_dim+0],t->ldscape[j*t->n_dim+1]); *\/ */
654   /*         /\* printf("%3d ", *\/ */
655   /*         /\*        t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j]); *\/ */
656   /*       } */
657   /*   } */
658 }
659 
660 //////////////////////////////////////////////////////////////
661 //////////////////////////////////////////////////////////////
662 
663 // Calculate R mat at node n
GEO_Update_Rmat(t_node * n,t_geo * t,t_tree * tree)664 void GEO_Update_Rmat(t_node *n, t_geo *t, t_tree *tree)
665 {
666   int i,j;
667   phydbl lbda_j;
668 
669   for(i=0;i<t->ldscape_sz;i++)
670     {
671       for(j=0;j<t->ldscape_sz;j++)
672         {
673           lbda_j = ((t->occup[n->num*t->ldscape_sz + j]==0) ? (1.0) : (t->lbda));
674           t->r_mat[i*t->ldscape_sz+j] = t->f_mat[i*t->ldscape_sz+j] * lbda_j * t->tau * t->dum;
675         }
676     }
677 }
678 
679 //////////////////////////////////////////////////////////////
680 //////////////////////////////////////////////////////////////
681 
GEO_Lk(t_geo * t,t_tree * tree)682 phydbl GEO_Lk(t_geo *t, t_tree *tree)
683 {
684   int i,j;
685   phydbl loglk;
686   phydbl R;
687   int dep,arr; // departure and arrival location indices;
688   t_node *curr_n,*prev_n,*v1,*v2;
689   phydbl sum;
690 
691   GEO_Update_Occup(t,tree);     // Same here.
692 
693   if(t->update_fmat == YES) GEO_Update_Fmat(t);
694 
695   prev_n = NULL;
696   curr_n = NULL;
697   loglk = .0;
698   for(i=1;i<tree->n_otu-1;i++) // Consider all the time slices, from oldest to youngest.
699                                // Start at first node below root
700     {
701       prev_n = t->sorted_nd[i-1]; // node just above
702       curr_n = t->sorted_nd[i];   // current node
703 
704       GEO_Update_Rmat(curr_n,t,tree); // NOTE: don't need to do that every time. Add check later.
705 
706       R = GEO_Total_Migration_Rate(curr_n,t); // Total migration rate calculated at node n
707 
708       /* if(i < 2) */
709       /*   { */
710       /*     printf("\n. t0: %f t1: %f  R: %f tau: %f sigma: %f lbda: %f x1: %f y1: %f x2: %f y2: %f log0: %d loc1: %d rij: %G fij: %G", */
711       /*            tree->times->nd_t[t->sorted_nd[i-1]->num], */
712       /*            tree->times->nd_t[t->sorted_nd[i]->num], */
713       /*            R, */
714       /*            t->tau,                  */
715       /*            t->lbda,                  */
716       /*            t->sigma,                  */
717       /*            t->ldscape[t->idx_loc[tree->geo->sorted_nd[i-1]->num]*t->n_dim+0], */
718       /*            t->ldscape[t->idx_loc[tree->geo->sorted_nd[i-1]->num]*t->n_dim+1], */
719       /*            t->ldscape[t->idx_loc[tree->geo->sorted_nd[i]->num]*t->n_dim+0], */
720       /*            t->ldscape[t->idx_loc[tree->geo->sorted_nd[i]->num]*t->n_dim+1], */
721       /*            t->idx_loc[tree->geo->sorted_nd[i-1]->num], */
722       /*            t->idx_loc[tree->geo->sorted_nd[i]->num], */
723       /*            t->r_mat[t->idx_loc[tree->geo->sorted_nd[i-1]->num] * t->ldscape_sz + t->idx_loc[tree->geo->sorted_nd[i]->num]], */
724       /*            t->f_mat[t->idx_loc[tree->geo->sorted_nd[i-1]->num] * t->ldscape_sz + t->idx_loc[tree->geo->sorted_nd[i]->num]] */
725       /*            );           */
726 
727       /*     printf("\n >> "); */
728       /*     int j; */
729       /*     for(j=0;j<t->ldscape_sz;j++)  */
730       /*       if(t->occup[t->sorted_nd[i]->num * t->ldscape_sz + j] > 0)  */
731       /*         printf("%f %f -- ", */
732       /*                t->ldscape[j*t->n_dim+0], */
733       /*                t->ldscape[j*t->n_dim+1]); */
734       /*   } */
735 
736 
737       /* printf("\n. %d %d (%d) %f %p %p \n",i,curr_n->num,curr_n->tax,tree->times->nd_t[curr_n->num],curr_n->v[1],curr_n->v[2]); */
738 
739       v1 = v2 = NULL;
740       for(j=0;j<3;j++)
741         if(curr_n->v[j] != curr_n->anc && curr_n->b[j] != tree->e_root)
742           {
743             if(!v1) v1 = curr_n->v[j];
744             else    v2 = curr_n->v[j];
745           }
746 
747       dep = t->idx_loc[curr_n->num]; // departure location
748       arr =                      // arrival location
749         (t->idx_loc[v1->num] == t->idx_loc[curr_n->num] ?
750          t->idx_loc[v2->num] :
751          t->idx_loc[v1->num]);
752 
753       /* printf("\n%f\t%f", */
754       /*        t->ldscape[arr*t->n_dim+0]-t->ldscape[dep*t->n_dim+0], */
755       /*        t->ldscape[arr*t->n_dim+1]-t->ldscape[dep*t->n_dim+1]); */
756 
757       loglk -= R * FABS(tree->times->nd_t[curr_n->num] - tree->times->nd_t[prev_n->num]);
758       loglk += log(t->r_mat[dep * t->ldscape_sz + arr]);
759 
760       /* printf("\n <> %d %f %f %d %d v1:%d v2:%d anc:%d %d %d %d", */
761       /*        curr_n->num, */
762       /*        R * FABS(tree->times->nd_t[curr_n->num] - tree->times->nd_t[prev_n->num]), */
763       /*        log(t->r_mat[dep * t->ldscape_sz + arr]), */
764       /*        dep,arr,v1->num,v2->num,curr_n->anc->num,curr_n->v[0]->num,curr_n->v[1]->num,curr_n->v[2]->num); */
765 
766       /* if(i<2) */
767       /*   { */
768           /* printf("\n. R = %f r_mat = %f f_mat = %f dt = %f loglk = %f", */
769           /*        R, */
770           /*        t->r_mat[dep * t->ldscape_sz + arr], */
771           /*        t->f_mat[dep * t->ldscape_sz + arr], */
772           /*        FABS(t->sorted_nd[i] - t->sorted_nd[i-1]),loglk); */
773           /* Exit("\n"); */
774         /* } */
775 
776     }
777 
778 
779   // Likelihood for the first 'slice' (i.e., the part just below the root down to
780   // the next node)
781   GEO_Update_Rmat(tree->n_root,t,tree);
782 
783   loglk -= log(t->ldscape_sz);
784   dep = t->idx_loc[tree->n_root->num];
785   arr =
786     (t->idx_loc[tree->n_root->num] != t->idx_loc[tree->n_root->v[1]->num] ?
787      t->idx_loc[tree->n_root->v[1]->num] :
788      t->idx_loc[tree->n_root->v[2]->num]);
789 
790   /* printf("\n %f %f",t->ldscape[dep],t->ldscape[arr]); */
791 
792   loglk += log(t->r_mat[dep * t->ldscape_sz + arr]);
793 
794   sum = .0;
795   for(i=0;i<t->ldscape_sz;i++) sum += t->r_mat[dep * t->ldscape_sz + i];
796 
797   loglk -= log(sum);
798 
799   tree->geo->c_lnL = loglk;
800 
801   return loglk;
802 }
803 
804 //////////////////////////////////////////////////////////////
805 //////////////////////////////////////////////////////////////
806 
GEO_Init_Tloc_Tips(t_geo * t,t_tree * tree)807 void GEO_Init_Tloc_Tips(t_geo *t, t_tree *tree)
808 {
809   int i;
810 
811   // TO DO
812   for(i=0;i<tree->n_otu;i++)
813     {
814       t->idx_loc[i] = i;
815     }
816 }
817 
818 //////////////////////////////////////////////////////////////
819 //////////////////////////////////////////////////////////////
820 
821 // Do not forget to call GEO_Update_Rmat (with node n) before calling this function
GEO_Total_Migration_Rate(t_node * n,t_geo * t)822 phydbl GEO_Total_Migration_Rate(t_node *n, t_geo *t)
823 {
824   phydbl R;
825   int i,j;
826 
827   R = .0;
828 
829   for(i=0;i<t->ldscape_sz;i++)
830     {
831       for(j=0;j<t->ldscape_sz;j++)
832         {
833           R +=
834             t->r_mat[i * t->ldscape_sz + j] *
835             t->occup[n->num * t->ldscape_sz + i];
836         }
837     }
838 
839   return R;
840 }
841 
842 //////////////////////////////////////////////////////////////
843 //////////////////////////////////////////////////////////////
844 
845 // Find the arrival location for the migration leaving from n
GEO_Get_Arrival_Location(t_node * n,t_geo * t,t_tree * tree)846 int GEO_Get_Arrival_Location(t_node *n, t_geo *t, t_tree *tree)
847 {
848   int i;
849   t_node *v1, *v2; // the two daughters of n
850 
851   v1 = v2 = NULL;
852 
853   for(i=0;i<3;i++)
854     {
855       if(n->v[i] && n->v[i] != n->anc)
856         {
857           if(!v1) v1 = n->v[i];
858           else    v2 = n->v[i];
859         }
860     }
861 
862   if(t->idx_loc[v1->num] == t->idx_loc[v2->num]) // Migrated to the same location as that of n
863     {
864       if(t->idx_loc[n->num] != t->idx_loc[v1->num])
865         {
866           PhyML_Printf("\n== Error detected in location labeling.");
867           PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
868           Exit("\n");
869         }
870       else
871         return t->idx_loc[n->num];
872     }
873   else // Migrated to a different spot
874     {
875       if((t->idx_loc[v1->num] != t->idx_loc[n->num]) && (t->idx_loc[v2->num] != t->idx_loc[n->num]))
876         {
877           PhyML_Printf("\n== Error detected in location labeling.");
878           PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
879           Exit("\n");
880         }
881       else
882         {
883           if(t->idx_loc[v1->num] == t->idx_loc[n->num]) return t->idx_loc[v2->num]; // v2 gets the new location, v1 inheritates from n
884           else                                  return t->idx_loc[v1->num]; // v1 gets the new location, v2 inheritates from n
885         }
886     }
887   return -1;
888 }
889 
890 //////////////////////////////////////////////////////////////
891 //////////////////////////////////////////////////////////////
892 
GEO_Simulate(t_geo * t,int n_otu)893 t_tree *GEO_Simulate(t_geo *t, int n_otu)
894 {
895   t_tree *tree;
896   int n_branching_nodes;
897   t_node **branching_nodes; // vector of nodes available for branching out
898   phydbl *p_branch; // p_branch[i]: proba that the i-th node in branching_nodes branches out (p_x vector in the article)
899   phydbl *p_mig; // p_branch[i]: proba of migrating to location i from the location of the edge branching out (q_i vector in the article)
900   int hit;
901   phydbl time;
902   int dep, arr;
903   int i,j;
904   phydbl sum;
905   phydbl R;
906   int *occup; // occupation vector. Updated as we move down the tree
907   int nd_idx;
908   t_node *buff_nd;
909   phydbl buff_t;
910   int buff_l;
911   int swap;
912 
913   tree = Make_Tree_From_Scratch(n_otu,NULL);
914 
915   tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
916   RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu);
917 
918   tree->times = TIMES_Make_Time_Struct(tree->n_otu);
919   TIMES_Init_Time_Struct(tree->times,NULL,tree->n_otu);
920 
921   tree->n_root = tree->a_nodes[2*tree->n_otu-2]; // Set the root node to the last element in the list of nodes
922   tree->geo = t;
923 
924 
925   For(i,2*tree->n_otu-2) tree->times->nd_t[i] = -1.;
926 
927   occup = (int *)mCalloc(t->ldscape_sz,sizeof(int));
928 
929   GEO_Update_Fmat(t);
930 
931   branching_nodes = (t_node **)mCalloc(tree->n_otu,sizeof(t_node *));
932   branching_nodes[0] = tree->n_root;
933   n_branching_nodes  = 1;
934   nd_idx = 0;
935 
936 
937   p_branch = (phydbl *)mCalloc(tree->n_otu,sizeof(phydbl ));
938   p_branch[0] = 1.0;
939 
940   p_mig = (phydbl *)mCalloc(t->ldscape_sz,sizeof(phydbl ));
941 
942   time = 0.0; // Time at the root node (this will be changed afterward)
943 
944   // Sample a location uniformly for the root
945   t->idx_loc[tree->n_root->num] = Rand_Int(0,t->ldscape_sz-1);
946 
947   // Update the occupancy vector
948   occup[t->idx_loc[tree->n_root->num]] = 1;
949 
950   dep = arr = -1;
951 
952 
953  // total migration rate
954   R = 0.0;
955   for(i=0;i<t->ldscape_sz;i++)
956     {
957       R +=
958         t->f_mat[t->idx_loc[tree->n_root->num]*t->ldscape_sz+i] *
959         ((occup[i] == 0) ? (1.0) : (t->lbda)) *
960         t->tau * t->dum;
961     }
962 
963   do
964     {
965       // Select the node that branches out
966       hit = Sample_i_With_Proba_pi(p_branch,n_branching_nodes);
967 
968       /* printf("\n. [%d] Select node %d (location %d)",n_branching_nodes,branching_nodes[hit]->num,t->idx_loc[branching_nodes[hit]->num]); */
969 
970       // Set the time for the branching node
971       tree->times->nd_t[branching_nodes[hit]->num] = time;
972 
973 
974       /* printf("\n. Set its time to %f",time); */
975 
976       // Select the destination location
977       dep = t->idx_loc[branching_nodes[hit]->num]; // Departure point
978 
979       sum = .0;
980       for(i=0;i<t->ldscape_sz;i++) // Total rate of migration out of departure point
981         {
982           p_mig[i] =
983             t->f_mat[dep*t->ldscape_sz+i] *
984             ((occup[i] == 0) ? (1.0) : (t->lbda)) *
985             t->tau * t->dum;
986 
987           sum += p_mig[i];
988         }
989       for(i=0;i<t->ldscape_sz;i++) p_mig[i] /= sum;
990 
991       arr = Sample_i_With_Proba_pi(p_mig,t->ldscape_sz);
992 
993       /* printf("\n. Migrate from %d [%5.2f,%5.2f] to %d [%5.2f,%5.2f]", */
994       /*        dep, */
995       /*        t->ldscape[dep*t->n_dim+0], */
996       /*        t->ldscape[dep*t->n_dim+1], */
997       /*        arr, */
998       /*        t->ldscape[arr*t->n_dim+0], */
999       /*        t->ldscape[arr*t->n_dim+1]); */
1000 
1001       /* printf("\n%f\t%f", */
1002       /*        t->ldscape[arr*t->n_dim+0]-t->ldscape[dep*t->n_dim+0], */
1003       /*        t->ldscape[arr*t->n_dim+1]-t->ldscape[dep*t->n_dim+1]); */
1004 
1005 
1006       // Update vector of occupation
1007       occup[arr]++;
1008 
1009       /* printf("\n. Remove %d. Add %d and %d",branching_nodes[hit]->num,tree->a_nodes[nd_idx]->num,tree->a_nodes[nd_idx+1]->num); */
1010       // Connect two new nodes to the node undergoing a branching event
1011       tree->a_nodes[nd_idx]->v[0]   = branching_nodes[hit];
1012       tree->a_nodes[nd_idx+1]->v[0] = branching_nodes[hit];
1013       branching_nodes[hit]->v[1] = tree->a_nodes[nd_idx];
1014       branching_nodes[hit]->v[2] = tree->a_nodes[nd_idx+1];
1015 
1016       // update branching_nodes vector. Element 'hit' is being replaced so that the corresponding node can no longer branch out
1017       branching_nodes[hit]                 = tree->a_nodes[nd_idx];
1018       branching_nodes[n_branching_nodes]   = tree->a_nodes[nd_idx+1];
1019 
1020       // Update t_loc vector.
1021       t->idx_loc[tree->a_nodes[nd_idx]->num]   = dep;
1022       t->idx_loc[tree->a_nodes[nd_idx+1]->num] = arr;
1023 
1024       // Update total migration rate
1025       R = .0;
1026       for(i=0;i<t->ldscape_sz;i++)
1027         {
1028           if(occup[i] > 0)
1029             {
1030               for(j=0;j<t->ldscape_sz;j++)
1031                 {
1032                   R +=
1033                     occup[i] *
1034                     t->f_mat[i*t->ldscape_sz+j] *
1035                     ((occup[j] == 0) ? (1.0) : (t->lbda)) *
1036                     t->tau * t->dum;
1037                 }
1038             }
1039         }
1040 
1041       // Set the time until next branching event
1042       time = time + Rexp(R);
1043 
1044       // Update p_branch vector
1045       For(i,n_branching_nodes+1)
1046         {
1047           dep = t->idx_loc[branching_nodes[i]->num];
1048           p_branch[i] = 0.0;
1049           for(j=0;j<t->ldscape_sz;j++)
1050             {
1051               p_branch[i] +=
1052                 t->f_mat[dep*t->ldscape_sz+j] *
1053                 ((occup[j] == 0) ? (1.0) : (t->lbda)) *
1054                 t->tau * t->dum / R;
1055 
1056               /* printf("\n. %f %f %f %f", */
1057               /*        R, */
1058               /*        t->f_mat[dep*t->ldscape_sz+j], */
1059               /*        ((occup[j]>0) ? (t->lbda) : (1.0)), */
1060               /*        t->tau); */
1061             }
1062           /* printf("\n. %f ",p_branch[i]); */
1063         }
1064 
1065 
1066       // Increase the number of branching nodes by one (you just added 2 new and removed 1 old)
1067       n_branching_nodes++;
1068       nd_idx += 2;
1069 
1070     }
1071   while(n_branching_nodes < n_otu);
1072 
1073 
1074   // Set the times at the tips
1075   For(i,2*tree->n_otu-1) if(tree->times->nd_t[i] < 0.0) tree->times->nd_t[i] = time;
1076 
1077   // Reverse time scale
1078   For(i,2*tree->n_otu-1) tree->times->nd_t[i] -= time;
1079   /* For(i,2*tree->n_otu-1) tree->times->nd_t[i] = FABS(tree->times->nd_t[i]); */
1080 
1081   //  Bubble sort to put all the tips at the top of the tree->a_nodes array
1082   do
1083     {
1084       swap = NO;
1085       For(i,2*tree->n_otu-2)
1086         {
1087           if(!tree->a_nodes[i+1]->v[1] && tree->a_nodes[i]->v[1])
1088             {
1089               buff_nd            = tree->a_nodes[i+1];
1090               tree->a_nodes[i+1] = tree->a_nodes[i];
1091               tree->a_nodes[i]   = buff_nd;
1092 
1093               buff_t                 = tree->times->nd_t[i+1];
1094               tree->times->nd_t[i+1] = tree->times->nd_t[i];
1095               tree->times->nd_t[i]   = buff_t;
1096 
1097               buff_l      = t->idx_loc[i+1];
1098               t->idx_loc[i+1] = t->idx_loc[i];
1099               t->idx_loc[i]   = buff_l;
1100 
1101               swap = YES;
1102             }
1103         }
1104     }
1105   while(swap == YES);
1106 
1107 
1108   // The rest below is just bookeeping...
1109 
1110 
1111   For(i,2*tree->n_otu-1) tree->a_nodes[i]->num = i;
1112   For(i,2*tree->n_otu-1)
1113     {
1114       if(i < tree->n_otu) tree->a_nodes[i]->tax = YES;
1115       else                tree->a_nodes[i]->tax = NO;
1116     }
1117 
1118   /* printf("\n++++++++++++++++++\n"); */
1119   /* For(i,2*tree->n_otu-1) */
1120   /*   { */
1121   /*     printf("\n. Node %3d [%p] anc:%3d v1:%3d v2:%3d time: %f", */
1122   /*            tree->a_nodes[i]->num, */
1123   /*            (void *)tree->a_nodes[i], */
1124   /*            tree->a_nodes[i]->v[0] ? tree->a_nodes[i]->v[0]->num : -1, */
1125   /*            tree->a_nodes[i]->v[1] ? tree->a_nodes[i]->v[1]->num : -1, */
1126   /*            tree->a_nodes[i]->v[2] ? tree->a_nodes[i]->v[2]->num : -1, */
1127   /*            tree->times->nd_t[i]);              */
1128     /* } */
1129 
1130 
1131   for(i=0;i<tree->n_otu;i++)
1132     {
1133       if(!tree->a_nodes[i]->name) tree->a_nodes[i]->name = (char *)mCalloc(T_MAX_NAME,sizeof(char));
1134       strcpy(tree->a_nodes[i]->name,"x");
1135       sprintf(tree->a_nodes[i]->name+1,"%d",i);
1136     }
1137 
1138   tree->n_root->v[1]->v[0] = tree->n_root->v[2];
1139   tree->n_root->v[2]->v[0] = tree->n_root->v[1];
1140 
1141   tree->num_curr_branch_available = 0;
1142   Connect_Edges_To_Nodes_Recur(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
1143 
1144   tree->e_root = tree->n_root->v[1]->b[0];
1145 
1146   For(i,2*tree->n_otu-3)
1147     {
1148       tree->a_edges[i]->l->v = FABS(tree->times->nd_t[tree->a_edges[i]->left->num] -
1149                                     tree->times->nd_t[tree->a_edges[i]->rght->num]);
1150     }
1151 
1152   tree->e_root->l->v =
1153     FABS(tree->times->nd_t[tree->n_root->v[1]->num] -
1154          tree->times->nd_t[tree->n_root->num]) +
1155     FABS(tree->times->nd_t[tree->n_root->v[2]->num] -
1156          tree->times->nd_t[tree->n_root->num]);
1157 
1158   tree->n_root->l[1] = FABS(tree->times->nd_t[tree->n_root->v[1]->num] -
1159                             tree->times->nd_t[tree->n_root->num]);
1160 
1161   tree->n_root->l[2] = FABS(tree->times->nd_t[tree->n_root->v[2]->num] -
1162                             tree->times->nd_t[tree->n_root->num]);
1163 
1164   tree->n_root_pos =
1165     FABS(tree->times->nd_t[tree->n_root->v[2]->num] -
1166          tree->times->nd_t[tree->n_root->num]) / tree->e_root->l->v;
1167 
1168   /* printf("\n. %s ",Write_Tree(tree)); */
1169 
1170   DR_Draw_Tree("essai.ps",tree);
1171 
1172   /* for(i=0;i<tree->n_otu;i++) */
1173   /*   printf("\n. %4s %4d [%5.2f %5.2f]",tree->a_nodes[i]->name, */
1174   /*          t->idx_loc[i], */
1175   /*          t->ldscape[t->idx_loc[i]*t->n_dim+0], */
1176   /*          t->ldscape[t->idx_loc[i]*t->n_dim+1]); */
1177 
1178 
1179   Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
1180   Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
1181 
1182   Free(branching_nodes);
1183   Free(p_branch);
1184   Free(p_mig);
1185 
1186   return(tree);
1187 }
1188 
1189 //////////////////////////////////////////////////////////////
1190 //////////////////////////////////////////////////////////////
1191 
1192 // Simualte n coordinates (in 2D)
GEO_Simulate_Coordinates(int n,t_geo * t)1193 void GEO_Simulate_Coordinates(int n, t_geo *t)
1194 {
1195   int i;
1196   phydbl width;
1197 
1198   width = 5.;
1199 
1200   for(i=0;i<n;i++)
1201     {
1202       /* t->ldscape[i*t->n_dim+0] = -width/2. + Uni()*width; */
1203       /* t->ldscape[i*t->n_dim+1] = width/2.; */
1204       t->coord_loc[i]->lonlat[0] = -width/2. + Uni()*width;
1205       t->coord_loc[i]->lonlat[1] = width/2.;
1206     }
1207 
1208   /* t->ldscape[0*t->n_dim+0] = 0.0; */
1209   /* t->ldscape[0*t->n_dim+1] = 0.0; */
1210 
1211   /* t->ldscape[1*t->n_dim+0] = 0.1; */
1212   /* t->ldscape[1*t->n_dim+1] = 0.1; */
1213 
1214 }
1215 
1216 //////////////////////////////////////////////////////////////
1217 //////////////////////////////////////////////////////////////
1218 
GEO_Optimize_Sigma(t_geo * t,t_tree * tree)1219 void GEO_Optimize_Sigma(t_geo *t, t_tree *tree)
1220 {
1221   Generic_Brent_Lk(&(t->sigma),
1222                    t->min_sigma,
1223                    t->max_sigma,
1224                    1.E-5,
1225                    1000,
1226                    NO,
1227                    GEO_Wrap_Lk,NULL,tree,NULL,NO);
1228 }
1229 
1230 //////////////////////////////////////////////////////////////
1231 //////////////////////////////////////////////////////////////
1232 
GEO_Optimize_Lambda(t_geo * t,t_tree * tree)1233 void GEO_Optimize_Lambda(t_geo *t, t_tree *tree)
1234 {
1235   Generic_Brent_Lk(&(t->lbda),
1236                    t->min_lbda,
1237                    t->max_lbda,
1238                    1.E-5,
1239                    1000,
1240                    NO,
1241                    GEO_Wrap_Lk,NULL,tree,NULL,NO);
1242 }
1243 
1244 //////////////////////////////////////////////////////////////
1245 //////////////////////////////////////////////////////////////
1246 
GEO_Optimize_Tau(t_geo * t,t_tree * tree)1247 void GEO_Optimize_Tau(t_geo *t, t_tree *tree)
1248 {
1249   Generic_Brent_Lk(&(t->tau),
1250                    t->min_tau,
1251                    t->max_tau,
1252                    1.E-5,
1253                    1000,
1254                    NO,
1255                    GEO_Wrap_Lk,NULL,tree,NULL,NO);
1256 }
1257 
1258 //////////////////////////////////////////////////////////////
1259 //////////////////////////////////////////////////////////////
1260 
GEO_Wrap_Lk(t_edge * b,t_tree * tree,supert_tree * stree)1261 phydbl GEO_Wrap_Lk(t_edge *b, t_tree *tree, supert_tree *stree)
1262 {
1263   return GEO_Lk(tree->geo,tree);
1264 }
1265 
1266 //////////////////////////////////////////////////////////////
1267 //////////////////////////////////////////////////////////////
1268 
GEO_Init_Geo_Struct(t_geo * t)1269 void GEO_Init_Geo_Struct(t_geo *t)
1270 {
1271   t->c_lnL        = UNLIKELY;
1272 
1273   t->sigma        = 1.0;
1274   t->min_sigma    = 1.E-3;
1275   t->max_sigma    = 1.E+3;
1276   t->sigma_thresh = t->max_sigma;
1277 
1278   t->lbda         = 1.0;
1279   t->min_lbda     = 1.E-3;
1280   t->max_lbda     = 1.E+3;
1281 
1282   t->tau          = 1.0;
1283   t->min_tau      = 1.E-3;
1284   t->max_tau      = 1.E+3;
1285 
1286   t->dum          = 1.0;
1287   t->min_dum      = 1.E-3;
1288   t->max_dum      = 1.E+3;
1289 
1290   t->n_dim        = 2;
1291   t->ldscape_sz   = 1;
1292 
1293   t->update_fmat  = YES;
1294 }
1295 
1296 //////////////////////////////////////////////////////////////
1297 //////////////////////////////////////////////////////////////
1298 
GEO_Randomize_Locations(t_node * n,t_geo * t,t_tree * tree)1299 void GEO_Randomize_Locations(t_node *n, t_geo *t, t_tree *tree)
1300 {
1301   if(n->tax == YES) return;
1302   else
1303     {
1304       t_node *v1, *v2;
1305       int i;
1306       phydbl *probs; // vector of probability of picking each location
1307       phydbl sum;
1308       phydbl u;
1309 
1310 
1311       probs = (phydbl *)mCalloc(t->ldscape_sz,sizeof(phydbl));
1312 
1313       v1 = v2 = NULL;
1314       for(i=0;i<3;i++)
1315         {
1316           if(n->v[i] != n->anc && n->b[i] != tree->e_root)
1317             {
1318               if(!v1) v1 = n->v[i];
1319               else    v2 = n->v[i];
1320             }
1321         }
1322 
1323       if(v1->tax && v2->tax)
1324         {
1325           Free(probs);
1326           return;
1327         }
1328       else if(v1->tax && !v2->tax && t->idx_loc[v1->num] != t->idx_loc[n->num])
1329         {
1330           t->idx_loc[v2->num] = t->idx_loc[n->num];
1331         }
1332       else if(v2->tax && !v1->tax && t->idx_loc[v2->num] != t->idx_loc[n->num])
1333         {
1334           t->idx_loc[v1->num] = t->idx_loc[n->num];
1335         }
1336       else if(v1->tax && !v2->tax && t->idx_loc[v1->num] == t->idx_loc[n->num])
1337         {
1338           sum = 0.0;
1339           for(i=0;i<t->ldscape_sz;i++) sum += t->idx_loc_beneath[v2->num * t->ldscape_sz + i];
1340           for(i=0;i<t->ldscape_sz;i++) probs[i] = t->idx_loc_beneath[v2->num * t->ldscape_sz + i]/sum;
1341 
1342           t->idx_loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1343         }
1344       else if(v2->tax && !v1->tax && t->idx_loc[v2->num] == t->idx_loc[n->num])
1345         {
1346           sum = 0.0;
1347           for(i=0;i<t->ldscape_sz;i++) sum += t->idx_loc_beneath[v1->num * t->ldscape_sz + i];
1348           for(i=0;i<t->ldscape_sz;i++) probs[i] = t->idx_loc_beneath[v1->num * t->ldscape_sz + i]/sum;
1349 
1350           t->idx_loc[v1->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1351         }
1352       else
1353         {
1354           int n_v1, n_v2;
1355           phydbl p;
1356 
1357           n_v1 = t->idx_loc_beneath[v1->num * t->ldscape_sz + t->idx_loc[n->num]];
1358           n_v2 = t->idx_loc_beneath[v2->num * t->ldscape_sz + t->idx_loc[n->num]];
1359 
1360           if(n_v1 + n_v2 < 1)
1361             {
1362               PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1363               Exit("\n");
1364             }
1365 
1366 
1367           p = n_v1 / (n_v1 + n_v2);
1368 
1369           u = Uni();
1370 
1371           if(u < p)
1372             {
1373               sum = 0.0;
1374               for(i=0;i<t->ldscape_sz;i++) sum += t->idx_loc_beneath[v2->num * t->ldscape_sz + i];
1375               for(i=0;i<t->ldscape_sz;i++) probs[i] = t->idx_loc_beneath[v2->num * t->ldscape_sz + i]/sum;
1376 
1377               t->idx_loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1378               t->idx_loc[v1->num] = t->idx_loc[n->num];
1379             }
1380           else
1381             {
1382               if(t->idx_loc_beneath[v2->num * t->ldscape_sz + t->idx_loc[n->num]] > 0)
1383                 {
1384                   sum = 0.0;
1385                   for(i=0;i<t->ldscape_sz;i++) sum += t->idx_loc_beneath[v1->num * t->ldscape_sz + i];
1386                   for(i=0;i<t->ldscape_sz;i++) probs[i] = t->idx_loc_beneath[v1->num * t->ldscape_sz + i]/sum;
1387 
1388                   t->idx_loc[v1->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1389                   t->idx_loc[v2->num] = t->idx_loc[n->num];
1390                 }
1391               else
1392                 {
1393                   sum = 0.0;
1394                   for(i=0;i<t->ldscape_sz;i++) sum += t->idx_loc_beneath[v2->num * t->ldscape_sz + i];
1395                   for(i=0;i<t->ldscape_sz;i++) probs[i] = t->idx_loc_beneath[v2->num * t->ldscape_sz + i]/sum;
1396 
1397                   t->idx_loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz);
1398                   t->idx_loc[v1->num] = t->idx_loc[n->num];
1399                 }
1400             }
1401 
1402           if(t->idx_loc[v1->num] != t->idx_loc[n->num] && t->idx_loc[v2->num] != t->idx_loc[n->num])
1403             {
1404               PhyML_Printf("\n. %d %d %d",t->idx_loc[v1->num],t->idx_loc[v2->num],t->idx_loc[n->num]);
1405               PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1406               Exit("\n");
1407             }
1408 
1409           if(t->idx_loc_beneath[v1->num * t->ldscape_sz + t->idx_loc[v1->num]] < 1)
1410             {
1411               PhyML_Printf("\n. %d %d %d",t->idx_loc[v1->num],t->idx_loc[v2->num],t->idx_loc[n->num]);
1412               PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1413               Exit("\n");
1414             }
1415 
1416           if(t->idx_loc_beneath[v2->num * t->ldscape_sz + t->idx_loc[v2->num]] < 1)
1417             {
1418               PhyML_Printf("\n. %d %d %d",t->idx_loc[v1->num],t->idx_loc[v2->num],t->idx_loc[n->num]);
1419               PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1420               Exit("\n");
1421             }
1422 
1423         }
1424 
1425       Free(probs);
1426 
1427       for(i=0;i<3;i++)
1428         if(n->v[i] != n->anc && n->b[i] != tree->e_root)
1429           GEO_Randomize_Locations(n->v[i],t,tree);
1430     }
1431 }
1432 
1433 //////////////////////////////////////////////////////////////
1434 //////////////////////////////////////////////////////////////
1435 
GEO_Get_Locations_Beneath(t_geo * t,t_tree * tree)1436 void GEO_Get_Locations_Beneath(t_geo *t, t_tree *tree)
1437 {
1438   int i;
1439   GEO_Get_Locations_Beneath_Post(tree->n_root,tree->n_root->v[1],t,tree);
1440   GEO_Get_Locations_Beneath_Post(tree->n_root,tree->n_root->v[2],t,tree);
1441 
1442   for(i=0;i<t->ldscape_sz;i++)
1443     {
1444       t->idx_loc_beneath[tree->n_root->num*t->ldscape_sz+i] =
1445         t->idx_loc_beneath[tree->n_root->v[1]->num*t->ldscape_sz+i] +
1446         t->idx_loc_beneath[tree->n_root->v[2]->num*t->ldscape_sz+i];
1447     }
1448 }
1449 
1450 //////////////////////////////////////////////////////////////
1451 //////////////////////////////////////////////////////////////
1452 
GEO_Get_Locations_Beneath_Post(t_node * a,t_node * d,t_geo * t,t_tree * tree)1453 void GEO_Get_Locations_Beneath_Post(t_node *a, t_node *d, t_geo *t, t_tree *tree)
1454 {
1455 
1456   if(d->tax)
1457     {
1458       t->idx_loc_beneath[d->num*t->ldscape_sz+t->idx_loc[d->num]] = 1;
1459       return;
1460     }
1461   else
1462     {
1463       int i;
1464       t_node *v1, *v2;
1465 
1466       for(i=0;i<3;i++)
1467         {
1468           if(d->v[i] != a && d->b[i] != tree->e_root)
1469             {
1470               GEO_Get_Locations_Beneath_Post(d,d->v[i],t,tree);
1471             }
1472         }
1473 
1474 
1475       v1 = v2 = NULL;
1476       for(i=0;i<3;i++)
1477         {
1478           if(d->v[i] != a && d->b[i] != tree->e_root)
1479             {
1480               if(!v1) v1 = d->v[i];
1481               else    v2 = d->v[i];
1482             }
1483         }
1484 
1485 
1486       for(i=0;i<t->ldscape_sz;i++)
1487         {
1488           t->idx_loc_beneath[ d->num*t->ldscape_sz+i] =
1489             t->idx_loc_beneath[v1->num*t->ldscape_sz+i] +
1490             t->idx_loc_beneath[v2->num*t->ldscape_sz+i] ;
1491         }
1492     }
1493 }
1494 
1495 //////////////////////////////////////////////////////////////
1496 //////////////////////////////////////////////////////////////
1497 
GEO_Get_Sigma_Max(t_geo * t)1498 void GEO_Get_Sigma_Max(t_geo *t)
1499 {
1500   int i,j;
1501   phydbl mean_dist,inv_mean_dist;
1502   phydbl sigma_a, sigma_b, sigma_c;
1503   phydbl overlap_a, overlap_b, overlap_c;
1504   phydbl d_intersect;
1505   phydbl overlap_target;
1506   phydbl eps;
1507   int n_iter,n_iter_max;
1508 
1509   eps = 1.E-6;
1510   overlap_target = 0.95;
1511   n_iter_max = 100;
1512 
1513   mean_dist = -1.;
1514   inv_mean_dist = -1.;
1515   for(i=0;i<t->ldscape_sz-1;i++)
1516     {
1517       for(j=i+1;j<t->ldscape_sz;j++)
1518         {
1519           /* dist = POW(t->ldscape[i*t->n_dim+0] - t->ldscape[j*t->n_dim+0],1); */
1520           /* if(dist > mean_dist) mean_dist = dist; */
1521           /* dist = POW(t->ldscape[i*t->n_dim+1] - t->ldscape[j*t->n_dim+1],1); */
1522           /* if(dist > mean_dist) mean_dist = dist; */
1523           /* mean_dist += FABS(t->ldscape[i*t->n_dim+0] - t->ldscape[j*t->n_dim+0]); */
1524           /* mean_dist += FABS(t->ldscape[i*t->n_dim+1] - t->ldscape[j*t->n_dim+1]); */
1525           mean_dist += FABS(t->coord_loc[i]->lonlat[0] - t->coord_loc[j]->lonlat[0]);
1526           mean_dist += FABS(t->coord_loc[i]->lonlat[1] - t->coord_loc[j]->lonlat[1]);
1527         }
1528     }
1529 
1530   mean_dist /= t->ldscape_sz*(t->ldscape_sz-1)/2.;
1531   inv_mean_dist = 1./mean_dist;
1532 
1533   PhyML_Printf("\n. Mean distance between locations: %f",mean_dist);
1534 
1535   sigma_a = t->min_sigma; sigma_b = 1.0; sigma_c = t->max_sigma;
1536   /* sigma_a = t->min_sigma; sigma_b = 1.0; sigma_c = 10.; */
1537   n_iter = 0;
1538   do
1539     {
1540       d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_a,0.0,mean_dist);
1541       overlap_a =
1542         (Pnorm(mean_dist,0.0,sigma_a) - Pnorm(d_intersect,0.0,sigma_a))/
1543         (Pnorm(mean_dist,0.0,sigma_a) - Pnorm(0.0,0.0,sigma_a)) +
1544         d_intersect / mean_dist;
1545       /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
1546 
1547       d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_b,0.0,mean_dist);
1548       overlap_b =
1549         (Pnorm(mean_dist,0.0,sigma_b) - Pnorm(d_intersect,0.0,sigma_b))/
1550         (Pnorm(mean_dist,0.0,sigma_b) - Pnorm(0.0,0.0,sigma_b)) +
1551         d_intersect / mean_dist;
1552       /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
1553 
1554       d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_c,0.0,mean_dist);
1555       overlap_c =
1556         (Pnorm(mean_dist,0.0,sigma_c) - Pnorm(d_intersect,0.0,sigma_c))/
1557         (Pnorm(mean_dist,0.0,sigma_c) - Pnorm(0.0,0.0,sigma_c)) +
1558         d_intersect / mean_dist;
1559 
1560       /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */
1561 
1562       /* printf("\n. sigma_a:%f overlap_a:%f sigma_b:%f overlap_b:%f sigma_c:%f overlap_c:%f", */
1563       /*        sigma_a,overlap_a, */
1564       /*        sigma_b,overlap_b, */
1565       /*        sigma_c,overlap_c); */
1566 
1567       if(overlap_target > overlap_a && overlap_target < overlap_b)
1568         {
1569           sigma_c = sigma_b;
1570           sigma_b = sigma_a + (sigma_c - sigma_a)/2.;
1571         }
1572       else if(overlap_target > overlap_b && overlap_target < overlap_c)
1573         {
1574           sigma_a = sigma_b;
1575           sigma_b = sigma_a + (sigma_c - sigma_a)/2.;
1576         }
1577       else if(overlap_target < overlap_a)
1578         {
1579           sigma_a /= 2.;
1580         }
1581       else if(overlap_target > overlap_c)
1582         {
1583           sigma_c *= 2.;
1584         }
1585 
1586       n_iter++;
1587 
1588     }
1589   while(sigma_c - sigma_a > eps && n_iter < n_iter_max);
1590 
1591   /* if(sigma_c - sigma_a > eps) */
1592   /*   { */
1593   /*     PhyML_Printf("\n== Error detected in getting maximum value of sigma."); */
1594   /*     PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); */
1595   /*     Exit("\n");     */
1596   /*   } */
1597   /* else */
1598   /*   { */
1599   /*     PhyML_Printf("\n== Threshold for sigma: %f",sigma_b); */
1600   /*   } */
1601 
1602   t->sigma_thresh = sigma_b;
1603 }
1604 
1605 //////////////////////////////////////////////////////////////
1606 //////////////////////////////////////////////////////////////
1607 
MCMC_Geo_Updown_Tau_Lbda(t_tree * tree)1608 void MCMC_Geo_Updown_Tau_Lbda(t_tree *tree)
1609 {
1610   phydbl K,mult,u,alpha,ratio;
1611   phydbl cur_lnL,new_lnL;
1612   phydbl cur_tau,new_tau;
1613   phydbl cur_lbda,new_lbda;
1614 
1615   K        = tree->mcmc->tune_move[tree->mcmc->num_move_geo_updown_tau_lbda];
1616   cur_lnL  = tree->geo->c_lnL;
1617   new_lnL  = tree->geo->c_lnL;
1618   cur_tau  = tree->geo->tau;
1619   new_tau  = tree->geo->tau;
1620   cur_lbda = tree->geo->lbda;
1621   new_lbda = tree->geo->lbda;
1622 
1623   u = Uni();
1624   mult = exp(K*(u-0.5));
1625 
1626   /* Multiply tau by K */
1627   new_tau = cur_tau * K;
1628 
1629   /* Divide lbda by same amount */
1630   new_lbda = cur_lbda / K;
1631 
1632 
1633   if(
1634      new_lbda < tree->geo->min_lbda || new_lbda > tree->geo->max_lbda ||
1635      new_tau  < tree->geo->min_tau  || new_tau  > tree->geo->max_tau
1636      )
1637     {
1638       tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
1639       return;
1640     }
1641 
1642   tree->geo->tau  = new_tau;
1643   tree->geo->lbda = new_lbda;
1644 
1645   if(tree->eval_alnL) new_lnL = GEO_Lk(tree->geo,tree);
1646 
1647   ratio = 0.0;
1648   /* Proposal ratio: 2n-2=> number of multiplications, 1=>number of divisions */
1649   ratio += 0.0*log(mult); /* (1-1)*log(mult); */
1650   /* Likelihood density ratio */
1651   ratio += (new_lnL - cur_lnL);
1652 
1653   /* printf("\n. new_tau: %f new_lbda:%f cur_lnL:%f new_lnL:%f",new_tau,new_lbda,cur_lnL,new_lnL); */
1654 
1655 
1656   ratio = exp(ratio);
1657   alpha = MIN(1.,ratio);
1658   u = Uni();
1659 
1660   if(u > alpha) /* Reject */
1661     {
1662       tree->geo->tau   = cur_tau;
1663       tree->geo->lbda  = cur_lbda;
1664       tree->geo->c_lnL = cur_lnL;
1665     }
1666   else
1667     {
1668       tree->mcmc->acc_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
1669     }
1670 
1671   tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_tau_lbda]++;
1672 }
1673 
1674 //////////////////////////////////////////////////////////////
1675 //////////////////////////////////////////////////////////////
1676 
1677 
MCMC_Geo_Updown_Lbda_Sigma(t_tree * tree)1678 void MCMC_Geo_Updown_Lbda_Sigma(t_tree *tree)
1679 {
1680   phydbl K,mult,u,alpha,ratio;
1681   phydbl cur_lnL,new_lnL;
1682   phydbl cur_lbda,new_lbda;
1683   phydbl cur_sigma,new_sigma;
1684 
1685   K        = tree->mcmc->tune_move[tree->mcmc->num_move_geo_updown_lbda_sigma];
1686   cur_lnL  = tree->geo->c_lnL;
1687   new_lnL  = tree->geo->c_lnL;
1688   cur_lbda  = tree->geo->lbda;
1689   new_lbda  = tree->geo->lbda;
1690   cur_sigma = tree->geo->sigma;
1691   new_sigma = tree->geo->sigma;
1692 
1693   u = Uni();
1694   mult = exp(K*(u-0.5));
1695 
1696   /* Multiply lbda by K */
1697   new_lbda = cur_lbda * K;
1698 
1699   /* Divide sigma by same amount */
1700   new_sigma = cur_sigma / K;
1701 
1702 
1703   if(
1704      new_sigma < tree->geo->min_sigma || new_sigma > tree->geo->max_sigma ||
1705      new_lbda  < tree->geo->min_lbda  || new_lbda  > tree->geo->max_lbda
1706      )
1707     {
1708       tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
1709       return;
1710     }
1711 
1712   tree->geo->lbda   = new_lbda;
1713   tree->geo->sigma = new_sigma;
1714 
1715   if(tree->eval_alnL) new_lnL = GEO_Lk(tree->geo,tree);
1716 
1717   ratio = 0.0;
1718   /* Proposal ratio: 2n-2=> number of multiplications, 1=>number of divisions */
1719   ratio += 0.0*log(mult); /* (1-1)*log(mult); */
1720   /* Likelihood density ratio */
1721   ratio += (new_lnL - cur_lnL);
1722 
1723   /* printf("\n. new_lbda: %f new_sigma:%f cur_lnL:%f new_lnL:%f",new_lbda,new_sigma,cur_lnL,new_lnL); */
1724 
1725 
1726   ratio = exp(ratio);
1727   alpha = MIN(1.,ratio);
1728   u = Uni();
1729 
1730   if(u > alpha) /* Reject */
1731     {
1732       tree->geo->lbda   = cur_lbda;
1733       tree->geo->sigma  = cur_sigma;
1734       tree->geo->c_lnL = cur_lnL;
1735     }
1736   else
1737     {
1738       tree->mcmc->acc_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
1739     }
1740 
1741   tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++;
1742 }
1743 
1744 //////////////////////////////////////////////////////////////
1745 //////////////////////////////////////////////////////////////
1746 
GEO_Read_In_Landscape(char * file_name,t_geo * t,phydbl ** ldscape,int ** loc_hash,t_tree * tree)1747 void GEO_Read_In_Landscape(char *file_name, t_geo *t, phydbl **ldscape, int **loc_hash, t_tree *tree)
1748 {
1749   FILE *fp;
1750   char *s;
1751   phydbl longitude, lattitude;
1752   int tax,loc;
1753 
1754   PhyML_Printf("\n");
1755   PhyML_Printf("\n. Reading landscape file '%s'.\n",file_name);
1756 
1757   s           = (char *)mCalloc(T_MAX_LINE,sizeof(char));
1758   (*ldscape)  = (phydbl *)mCalloc(10*t->n_dim,sizeof(phydbl));
1759   (*loc_hash) = (int *)mCalloc(tree->n_otu,sizeof(int));
1760 
1761   fp = Openfile(file_name,0);
1762 
1763   tax = loc = -1;
1764 
1765   t->ldscape_sz = 0;
1766 
1767   do
1768     {
1769       if(fscanf(fp,"%s",s) == EOF) break;
1770 
1771       if(strlen(s) > 0) for(tax=0;tax<tree->n_otu;tax++) if(!strcmp(tree->a_nodes[tax]->name,s)) break;
1772 
1773       if(tax == tree->n_otu)
1774         {
1775           PhyML_Printf("\n== Could not find a taxon with name '%s' in the tree provided.",s);
1776           /* PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__); */
1777           /* Exit("\n"); */
1778           continue;
1779         }
1780 
1781       /* sscanf(line+pos,"%lf %lf",&longitude,&lattitude); */
1782       if(fscanf(fp,"%lf",&longitude) == EOF) break;
1783       if(fscanf(fp,"%lf",&lattitude) == EOF) break;
1784 
1785       /* printf("\n. s: %s %f %f",s,longitude,lattitude); */
1786 
1787       for(loc=0;loc<t->ldscape_sz;loc++)
1788         {
1789           if(FABS(longitude-(*ldscape)[loc*t->n_dim+0]) < 1.E-10 &&
1790              FABS(lattitude-(*ldscape)[loc*t->n_dim+1]) < 1.E-10)
1791             {
1792               break;
1793             }
1794         }
1795 
1796       if(loc == t->ldscape_sz)
1797         {
1798           t->ldscape_sz++;
1799           (*ldscape)[(t->ldscape_sz-1)*t->n_dim+0] = longitude;
1800           (*ldscape)[(t->ldscape_sz-1)*t->n_dim+1] = lattitude;
1801 
1802           if(!(t->ldscape_sz%10))
1803             {
1804               (*ldscape)  = (phydbl *)mRealloc((*ldscape),(t->ldscape_sz+10)*t->n_dim,sizeof(phydbl));
1805             }
1806         }
1807 
1808       (*loc_hash)[tax] = loc;
1809 
1810     }
1811   while(1);
1812 
1813   for(tax=0;tax<tree->n_otu;tax++)
1814     PhyML_Printf("\n. Taxon %30s, longitude: %12f, lattitude: %12f [%4d]",
1815                  tree->a_nodes[tax]->name,
1816                  (*ldscape)[(*loc_hash)[tax]*t->n_dim+0],
1817                  (*ldscape)[(*loc_hash)[tax]*t->n_dim+1],
1818                  (*loc_hash)[tax]);
1819 
1820 
1821 }
1822 
1823 //////////////////////////////////////////////////////////////
1824 //////////////////////////////////////////////////////////////
1825 
1826 //////////////////////////////////////////////////////////////
1827 //////////////////////////////////////////////////////////////
1828 
1829 //////////////////////////////////////////////////////////////
1830 //////////////////////////////////////////////////////////////
1831 
1832 
1833