1 /*
2 
3 PhyML:  a program that  computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5 
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7 
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10 
11 */
12 
13 #include "slfv.h"
14 
15 
16 /*////////////////////////////////////////////////////////////
17 ////////////////////////////////////////////////////////////*/
18 
SLFV_Prob_Two_Lineages_Coal(t_ldsk * l0,t_ldsk * l1,t_tree * tree)19 phydbl SLFV_Prob_Two_Lineages_Coal(t_ldsk *l0, t_ldsk *l1, t_tree *tree)
20 {
21   phydbl eucl,prob,area,W,H;
22 
23   assert(tree->mmod->n_dim == 2);
24 
25   W = (tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
26   H = (tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]);
27 
28   area = W*H;
29 
30   eucl = Euclidean_Dist(l0->coord,l1->coord);
31 
32   prob = 0.0;
33   prob += 2.*log(tree->mmod->mu);
34   prob += 2.*log(tree->mmod->rad);
35   prob += log(PI);
36   prob -= log(4.*area);
37   prob -= 0.25*eucl*eucl/(tree->mmod->rad*tree->mmod->rad);
38 
39 
40   prob +=
41     log(erf((l0->coord->lonlat[0] + l1->coord->lonlat[0])/(2.*tree->mmod->rad)) +
42         erf((2.*W - l0->coord->lonlat[0] - l1->coord->lonlat[0])/(2.*tree->mmod->rad)));
43 
44   prob +=
45     log(erf((l0->coord->lonlat[1] + l1->coord->lonlat[1])/(2.*tree->mmod->rad)) +
46         erf((2.*H - l0->coord->lonlat[1] - l1->coord->lonlat[1])/(2.*tree->mmod->rad)));
47 
48   prob = exp(prob);
49 
50   return(prob);
51 }
52 
53 /*////////////////////////////////////////////////////////////
54 ////////////////////////////////////////////////////////////*/
55 
SLFV_Prob_Two_Random_Lineages_Coal_One_Event(phydbl w,phydbl h,phydbl mu,phydbl rad)56 phydbl SLFV_Prob_Two_Random_Lineages_Coal_One_Event(phydbl w, phydbl h, phydbl mu, phydbl rad)
57 {
58   phydbl cx,cy;
59   phydbl l1x,l1y;
60   phydbl l2x,l2y;
61   phydbl prob_hit;
62   int n_hit,n_trials,trial;
63 
64   n_trials = 10000000;
65   trial = 0;
66   n_hit = 0;
67   do
68     {
69       cx = Uni()*w;
70       cy = Uni()*h;
71 
72       l1x = Uni()*w;
73       l1y = Uni()*h;
74 
75       l2x = Uni()*w;
76       l2y = Uni()*h;
77 
78       prob_hit = log(mu);
79       prob_hit += -POW(l1x - cx,2)/(2.*pow(rad,2));
80       prob_hit += -POW(l1y - cy,2)/(2.*pow(rad,2));
81 
82       prob_hit += log(mu);
83       prob_hit += -POW(l2x - cx,2)/(2.*pow(rad,2));
84       prob_hit += -POW(l2y - cy,2)/(2.*pow(rad,2));
85 
86       prob_hit = exp(prob_hit);
87 
88       if(!(Uni() > prob_hit)) n_hit++;
89 
90       trial++;
91     }
92   while(trial != n_trials);
93 
94   return((phydbl)n_hit/n_trials);
95 }
96 
97 /*////////////////////////////////////////////////////////////
98 ////////////////////////////////////////////////////////////*/
99 // Coalescence rate with time expressed in calendar unit
SLFV_Coalescence_Rate(t_tree * tree)100 phydbl SLFV_Coalescence_Rate(t_tree *tree)
101 {
102   phydbl mu,theta,lbda,w,h;
103 
104   mu    = tree->mmod->mu;
105   theta = tree->mmod->rad;
106   lbda  = tree->mmod->lbda;
107   w     = (tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
108   h     = (tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]);
109 
110   return(4.*POW(PI,2)*POW(theta,4)*POW(mu,2)*lbda / (POW(w,2)*POW(h,2)));
111 }
112 
113 /*////////////////////////////////////////////////////////////
114 ////////////////////////////////////////////////////////////*/
115 
SLFV_Path_Logdensity(t_ldsk * young,t_ldsk * old,phydbl sd,t_tree * tree)116 phydbl SLFV_Path_Logdensity(t_ldsk *young, t_ldsk *old, phydbl sd, t_tree *tree)
117 {
118   int i,j,err;
119   t_ldsk *ldsk;
120   phydbl lnDens,mode;
121   phydbl X,Y,Xp,Yp;
122   phydbl slope,inter;
123   int dir_to_young;
124 
125   lnDens = 0.0;
126   mode   = 0.0;
127 
128   for(i=0;i<tree->mmod->n_dim;i++)
129     {
130       j    = 0;
131       ldsk = young->prev;
132 
133       X = old->coord->lonlat[i];
134       Y = old->disk->time;
135 
136       // Density up to the last jump (to old ldsk) which should
137       // not be accounted for (it is not part of the simulation
138       // when randomly generating a path using SLFV_Generate_Path
139       while(ldsk != old)
140         {
141 
142           if(ldsk->n_next > 1)
143             dir_to_young = PHYREX_Get_Next_Direction(young,ldsk);
144           else
145             dir_to_young = 0;
146 
147           Xp = ldsk->next[dir_to_young]->coord->lonlat[i];
148           Yp = ldsk->next[dir_to_young]->disk->time;
149 
150           slope = (Yp-Y)/(Xp-X);
151           inter = Y - slope*X;
152 
153           assert(ldsk != NULL);
154 
155           /* mode = (old->coord->lonlat[i] - young->coord->lonlat[i])/(n_evt+1.-j) + young->coord->lonlat[i]; */
156 
157           mode = (ldsk->disk->time - inter)/slope;
158 
159           lnDens += Log_Dnorm_Trunc(ldsk->coord->lonlat[i],
160                                     mode,
161                                     sd,
162                                     tree->mmod->lim_do->lonlat[i],
163                                     tree->mmod->lim_up->lonlat[i],&err);
164 
165           /* lnDens += Log_Dnorm_Trunc(ldsk->disk->centr->lonlat[i], */
166           /*                           mode, */
167           /*                           sd, */
168           /*                           0.0, */
169           /*                           tree->mmod->lim->lonlat[i],&err); */
170 
171           lnDens += Log_Dnorm_Trunc(ldsk->disk->centr->lonlat[i],
172                                     ldsk->coord->lonlat[i],
173                                     sd,
174                                     tree->mmod->lim_do->lonlat[i],
175                                     tree->mmod->lim_up->lonlat[i],&err);
176           /* lnDens += log(1./tree->mmod->lim->lonlat[i]); */
177 
178           ldsk = ldsk->prev;
179           j++;
180         }
181     }
182 
183   return(lnDens);
184 }
185 
186 /*////////////////////////////////////////////////////////////
187 ////////////////////////////////////////////////////////////*/
188 
SLFV_Sample_Path(t_ldsk * young,t_ldsk * old,phydbl sd,phydbl * global_hr,t_tree * tree)189 void SLFV_Sample_Path(t_ldsk *young, t_ldsk *old, phydbl sd, phydbl *global_hr, t_tree *tree)
190 {
191   phydbl new_ldsk_pos,cur_ldsk_pos;
192   phydbl new_centr_pos,cur_centr_pos;
193   phydbl new_glnL,cur_glnL;
194   phydbl u,alpha,ratio,hr;
195   int n_mcmc_iter;
196   int i,err;
197   t_ldsk *ldsk;
198   int accept,reject;
199   int path_len;
200 
201 
202   path_len = PHYREX_Path_Len(young,old)-2;
203   if(path_len == 0) return;
204 
205   assert(young != old);
206   new_glnL = cur_glnL = tree->mmod->c_lnL;
207 
208   n_mcmc_iter = 0;
209   accept = reject = 0;
210   do
211     {
212 
213       ldsk = young->prev;
214       do
215         {
216           assert(ldsk->prev);
217 
218           PHYREX_Store_Geo_Coord(ldsk->coord);
219           PHYREX_Store_Geo_Coord(ldsk->disk->centr);
220 
221           hr = 0.0;
222           ratio = 0.0;
223 
224           /* cur_glnL = SLFV_Lk_Range(young->disk,old->disk,tree); */
225           cur_glnL = SLFV_Lk_Range(ldsk->disk,ldsk->prev->disk,tree);
226           new_glnL = cur_glnL;
227 
228           for(i=0;i<tree->mmod->n_dim;i++)
229             {
230               cur_ldsk_pos  = ldsk->coord->lonlat[i];
231               cur_centr_pos = ldsk->disk->centr->lonlat[i];
232 
233               /* new_centr_pos = Rnorm_Trunc(cur_centr_pos, */
234               /*                             0.5*sd, */
235               /*                             0.0, */
236               /*                             tree->mmod->lim->lonlat[i],&err); */
237 
238               new_centr_pos = Uni() * (tree->mmod->lim_up->lonlat[i]-tree->mmod->lim_do->lonlat[i])+tree->mmod->lim_do->lonlat[i];
239 
240               new_ldsk_pos = Rnorm_Trunc(new_centr_pos,
241                                          sd,
242                                          tree->mmod->lim_do->lonlat[i],
243                                          tree->mmod->lim_up->lonlat[i],&err);
244 
245 
246               /* PhyML_Printf("\n. cur_centr: %12f new_centr: %12f",cur_centr_pos,new_centr_pos); */
247               /* PhyML_Printf(" d(new|cur)=%12f d(cur|new): %12f", */
248               /*              Log_Dnorm_Trunc(new_centr_pos, */
249               /*                              cur_centr_pos, */
250               /*                              0.5*sd, */
251               /*                              0.0, */
252               /*                              tree->mmod->lim->lonlat[i],&err), */
253               /*              Log_Dnorm_Trunc(cur_centr_pos, */
254               /*                              new_centr_pos, */
255               /*                              0.5*sd, */
256               /*                              0.0, */
257               /*                              tree->mmod->lim->lonlat[i],&err)); */
258 
259 
260               /* PhyML_Printf(" cur_ldsk: %12f new_ldsk: %12f",cur_ldsk_pos,new_ldsk_pos); */
261               /* PhyML_Printf(" d(new|cur)=%12f d(cur|new): %12f", */
262               /*              Log_Dnorm_Trunc(new_ldsk_pos, */
263               /*                              new_centr_pos, */
264               /*                              0.5*sd, */
265               /*                              0.0, */
266               /*                              tree->mmod->lim->lonlat[i],&err), */
267               /*              Log_Dnorm_Trunc(cur_ldsk_pos, */
268               /*                              cur_centr_pos, */
269               /*                              0.5*sd, */
270               /*                              0.0, */
271               /*                              tree->mmod->lim->lonlat[i],&err)); */
272 
273 
274               hr -= Log_Dnorm_Trunc(new_ldsk_pos,
275                                     new_centr_pos,
276                                     sd,
277                                     tree->mmod->lim_do->lonlat[i],
278                                     tree->mmod->lim_up->lonlat[i],&err);
279 
280               /* hr -= Log_Dnorm_Trunc(new_centr_pos, */
281               /*                       cur_centr_pos, */
282               /*                       0.5*sd, */
283               /*                       0.0, */
284               /*                       tree->mmod->lim->lonlat[i],&err); */
285 
286 
287               hr += Log_Dnorm_Trunc(cur_ldsk_pos,
288                                     cur_centr_pos,
289                                     sd,
290                                     tree->mmod->lim_do->lonlat[i],
291                                     tree->mmod->lim_up->lonlat[i],&err);
292 
293               /* hr += Log_Dnorm_Trunc(cur_centr_pos, */
294               /*                       new_centr_pos, */
295               /*                       0.5*sd, */
296               /*                       0.0, */
297               /*                       tree->mmod->lim->lonlat[i],&err); */
298 
299 
300               ldsk->coord->lonlat[i] = new_ldsk_pos;
301               ldsk->disk->centr->lonlat[i]  = new_centr_pos;
302             }
303 
304           /* new_glnL = SLFV_Lk_Range(young->disk,old->disk,tree); */
305           /* new_glnL = SLFV_Lk_Core_Range(young->disk,old->disk,tree); */
306           new_glnL = SLFV_Lk_Range(ldsk->disk,ldsk->prev->disk,tree);
307           /* new_glnL = SLFV_Lk(tree); */
308 
309 
310           /* PhyML_Printf("\n. k: %4d lnL: %f",k,cur_glnL); */
311 
312           ratio += (new_glnL - cur_glnL);
313           ratio += hr;
314 
315           /* (*global_hr) -= ratio; */
316 
317           ratio = exp(ratio);
318           alpha = MIN(1.,ratio);
319 
320           u = Uni();
321 
322           if(u > alpha) /* Reject */
323             {
324               PHYREX_Restore_Geo_Coord(ldsk->coord);
325               PHYREX_Restore_Geo_Coord(ldsk->disk->centr);
326               reject++;
327             }
328           else
329             {
330               cur_glnL = new_glnL;
331               accept++;
332             }
333 
334           ldsk = ldsk->prev;
335         }
336       while(ldsk != old);
337 
338       n_mcmc_iter++;
339     }
340   while(n_mcmc_iter < 100 * path_len);
341   /* while(n_mcmc_iter < 1); */
342   /* PhyML_Printf("\n. ratio: %f %d %d",(phydbl)(accept)/(phydbl)(accept+reject),accept,reject); */
343 }
344 
345 /*////////////////////////////////////////////////////////////
346 ////////////////////////////////////////////////////////////*/
347 
SLFV_Generate_Path(t_ldsk * young,t_ldsk * old,phydbl n_evt,phydbl sd,t_tree * tree)348 t_ldsk *SLFV_Generate_Path(t_ldsk *young, t_ldsk *old, phydbl n_evt, phydbl sd, t_tree *tree)
349 {
350   int i,j,swap,err;
351   phydbl *time,dum,mode;
352   t_ldsk *path,**ldsk_a;
353   t_dsk *disk;
354   phydbl X,Y,Xp,Yp;
355   phydbl slope,inter;
356 
357   path = NULL;
358 
359   if(n_evt == 0) return(NULL); // path is set to NULL
360 
361   time   = (phydbl *)mCalloc(n_evt,sizeof(phydbl));
362   ldsk_a = (t_ldsk **)mCalloc(n_evt,sizeof(t_ldsk *));
363 
364 
365   for(i=0;i<n_evt;i++) time[i] =  Uni()*(young->disk->time - old->disk->time) + old->disk->time;
366 
367   /* Bubble sort time in decreasing order */
368   do
369     {
370       swap = NO;
371       for(i=0;i<n_evt-1;i++)
372         {
373           if(time[i+1] > time[i])
374             {
375               swap = YES;
376               dum       = time[i+1];
377               time[i+1] = time[i];
378               time[i]   = dum;
379             }
380         }
381     }
382   while(swap == YES);
383 
384   for(i=0;i<n_evt;i++)
385     {
386       ldsk_a[i] = PHYREX_Make_Lindisk_Node(tree->mmod->n_dim);
387       disk = PHYREX_Make_Disk_Event(tree->mmod->n_dim,tree->n_otu);
388       PHYREX_Init_Lindisk_Node(ldsk_a[i],disk,tree->mmod->n_dim);
389       PHYREX_Make_Lindisk_Next(ldsk_a[i]);
390       PHYREX_Init_Disk_Event(disk,tree->mmod->n_dim,tree->mmod);
391       disk->ldsk = ldsk_a[i];
392       disk->time = time[i];
393     }
394 
395   for(i=0;i<n_evt-1;i++) ldsk_a[i]->prev = ldsk_a[i+1];
396   ldsk_a[i]->prev = NULL;
397 
398   for(i=1;i<n_evt;i++) ldsk_a[i]->next[0] = ldsk_a[i-1];
399   ldsk_a[0]->next[0] = NULL;
400 
401   path = ldsk_a[0];
402 
403   /* Instantiate path */
404   for(i=0;i<tree->mmod->n_dim;i++)
405     {
406       X = old->coord->lonlat[i];
407       Y = old->disk->time;
408 
409       for(j=0;j<n_evt;j++)
410         {
411 
412           if(j == 0)
413             {
414               Xp = young->coord->lonlat[i];
415               Yp = young->disk->time;
416             }
417           else
418             {
419               assert(ldsk_a[j]->n_next == 1);
420               Xp = ldsk_a[j]->next[0]->coord->lonlat[i];
421               Yp = ldsk_a[j]->next[0]->disk->time;
422             }
423 
424           slope = (Yp-Y)/(Xp-X);
425           inter = Y - slope*X;
426 
427           if(j == 0)
428             {
429               mode = (young->disk->time - inter)/slope;
430             }
431           else
432             {
433               mode = (ldsk_a[j]->next[0]->disk->time - inter)/slope;
434             }
435 
436           ldsk_a[j]->coord->lonlat[i] = Rnorm_Trunc(mode,
437                                                     sd,
438                                                     tree->mmod->lim_do->lonlat[i],
439                                                     tree->mmod->lim_up->lonlat[i],&err);
440 
441 
442           /* ldsk_a[j]->disk->centr->lonlat[i] = Rnorm_Trunc(mode, */
443           /*                                                 sd, */
444           /*                                                 0.0, */
445           /*                                                 tree->mmod->lim->lonlat[i],&err); */
446 
447 
448           ldsk_a[j]->disk->centr->lonlat[i] = Rnorm_Trunc(ldsk_a[j]->coord->lonlat[i],
449                                                           sd,
450                                                           tree->mmod->lim_do->lonlat[i],
451                                                           tree->mmod->lim_up->lonlat[i],&err);
452         }
453     }
454 
455   Free(ldsk_a);
456   Free(time);
457 
458   return(path);
459 }
460 
461 /*////////////////////////////////////////////////////////////
462 ////////////////////////////////////////////////////////////*/
463 
SLFV_Effective_Density(t_tree * tree)464 phydbl SLFV_Effective_Density(t_tree *tree)
465 {
466   return(SLFV_Neighborhood_Size(tree)/(4.*PI*SLFV_Update_Sigsq(tree)));
467 }
468 
469 /*////////////////////////////////////////////////////////////
470 ////////////////////////////////////////////////////////////*/
471 
SLFV_Rate_Per_Unit_Area(t_tree * tree)472 phydbl SLFV_Rate_Per_Unit_Area(t_tree *tree)
473 {
474   int i;
475   phydbl denom;
476 
477   denom = (tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
478   for(i=1;i<tree->mmod->n_dim;i++) denom *= (tree->mmod->lim_up->lonlat[i]-tree->mmod->lim_do->lonlat[i]);
479 
480   return(tree->mmod->lbda / denom);
481 
482 }
483 
484 /*////////////////////////////////////////////////////////////
485 ////////////////////////////////////////////////////////////*/
486 
SLFV_Sample_Rad_From_Prior(t_tree * tree)487 phydbl SLFV_Sample_Rad_From_Prior(t_tree *tree)
488 {
489   phydbl u,h,w,lbda,mu,b,a;
490 
491   h    = (tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
492   w    = (tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]);
493   lbda = tree->mmod->lbda;
494   mu   = tree->mmod->mu;
495   a    = tree->mmod->min_sigsq;
496   b    = tree->mmod->max_sigsq;
497 
498   u = Uni();
499 
500   return(POW(((b-a)*u+a)*h*w/(4.*PI*lbda*mu),0.25));
501 
502 }
503 
504 /*////////////////////////////////////////////////////////////
505 ////////////////////////////////////////////////////////////*/
506 
SLFV_Update_Radius(t_tree * tree)507 phydbl SLFV_Update_Radius(t_tree *tree)
508 {
509   switch(tree->mmod->id)
510     {
511     case SLFV_UNIFORM: { return(-1.0); break;}
512     case SLFV_GAUSSIAN:
513       {
514         return(POW(tree->mmod->sigsq/(SLFV_Rate_Per_Unit_Area(tree)*4.*PI*tree->mmod->mu),0.25));
515         break;
516       }
517     }
518   return(-1.);
519 }
520 
521 /*////////////////////////////////////////////////////////////
522 ////////////////////////////////////////////////////////////*/
523 
SLFV_Generation_Length(t_tree * tree)524 phydbl SLFV_Generation_Length(t_tree *tree)
525 {
526   return(1./(2.*tree->mmod->mu*POW(tree->mmod->rad,2)*PI*SLFV_Rate_Per_Unit_Area(tree)));
527 }
528 
529 /*////////////////////////////////////////////////////////////
530 ////////////////////////////////////////////////////////////*/
531 
SLFV_Neighborhood_Size(t_tree * tree)532 phydbl SLFV_Neighborhood_Size(t_tree *tree)
533 {
534   switch(tree->mmod->id)
535     {
536     case SLFV_UNIFORM: { return(1./tree->mmod->mu); break; }
537     case SLFV_GAUSSIAN:  { return(2./tree->mmod->mu); break; }
538     }
539   return(-1.);
540 }
541 
542 /*////////////////////////////////////////////////////////////
543 ////////////////////////////////////////////////////////////*/
544 
SLFV_Update_Sigsq(t_tree * tree)545 phydbl SLFV_Update_Sigsq(t_tree *tree)
546 {
547   switch(tree->mmod->id)
548     {
549     case SLFV_UNIFORM : { return(-1.0); break;}
550     case SLFV_GAUSSIAN :
551       {
552         return(4.*PI*
553                SLFV_Rate_Per_Unit_Area(tree) *
554                pow(tree->mmod->rad,4)*
555                tree->mmod->mu);
556         break;
557       }
558     }
559   return(-1.);
560 }
561 
562 /*////////////////////////////////////////////////////////////
563 ////////////////////////////////////////////////////////////*/
564 
565 /* Uses the regression technique described in Barton et al. TPB (2013)
566    to estimate the size of the neighborhood when the population evolve
567    according to a spatial Lambda-Fleming-Viot process.
568 */
SLFV_Neighborhood_Size_Regression(t_tree * tree)569 phydbl SLFV_Neighborhood_Size_Regression(t_tree *tree)
570 {
571   int i,j,pair;
572   t_node *anc;
573   phydbl *dist,min_dist;
574   phydbl QA,Qr,*fst,fst0,fst_min_dist;
575   phydbl cov_fst_dist,var_dist,slope;
576   phydbl eps;
577 
578   eps = 1.E-10;
579 
580   QA = Mean_Identity(tree->data);
581 
582   fst  = (phydbl *)mCalloc(tree->n_otu*(tree->n_otu-1)/2,sizeof(phydbl));
583   dist = (phydbl *)mCalloc(tree->n_otu*(tree->n_otu-1)/2,sizeof(phydbl));
584 
585   pair = 0;
586   fst0 = 0.0;
587   for(i=0;i<tree->n_otu-1;i++)
588     {
589       fst_min_dist = 0.0;
590       min_dist = MDBL_MAX;
591       for(j=i+1;j<tree->n_otu;j++)
592         {
593           anc = Find_Lca_Pair_Of_Nodes(tree->a_nodes[i],tree->a_nodes[j],tree);
594           if(anc == NULL)
595             {
596               PhyML_Fprintf(stderr,"\n. %s",Write_Tree(tree));
597               PhyML_Fprintf(stderr,"\n. %s %s",tree->a_nodes[i]->name,tree->a_nodes[j]->name);
598               Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
599             }
600 
601           dist[pair] = Euclidean_Dist(tree->a_nodes[i]->ldsk->coord,tree->a_nodes[j]->ldsk->coord);
602           dist[pair] = log(dist[pair]);
603 
604           Qr = Pairwise_Identity(i,j,tree->data);
605           fst[pair] = (Qr-QA)/(1.-QA);
606 
607           if(dist[pair] < min_dist)
608             {
609               min_dist     = dist[pair];
610               fst_min_dist = fst[pair];
611             }
612 
613           pair++;
614         }
615       fst0 += fst_min_dist;
616     }
617 
618   fst0 /= (phydbl)(tree->n_otu);
619 
620   cov_fst_dist = Covariance(dist,fst,pair);
621   var_dist = Variance(dist,pair);
622 
623   slope = cov_fst_dist / var_dist;
624 
625   Free(dist);
626   Free(fst);
627 
628   return((fst0-1.+eps)/(slope+eps));
629 }
630 
631 /*////////////////////////////////////////////////////////////
632 ////////////////////////////////////////////////////////////*/
633 
SLFV_Lk_Range(t_dsk * young,t_dsk * old,t_tree * tree)634 phydbl SLFV_Lk_Range(t_dsk *young, t_dsk *old, t_tree *tree)
635 {
636   if(tree->mmod->id == SLFV_GAUSSIAN) return(SLFV_Lk_Gaussian_Range(young,old,tree));
637   else if(tree->mmod->id == SLFV_UNIFORM) assert(FALSE);
638   else assert(FALSE);
639   return(-1.);
640 }
641 
642 /*////////////////////////////////////////////////////////////
643 ////////////////////////////////////////////////////////////*/
644 
SLFV_Lk_Gaussian_Range(t_dsk * young,t_dsk * old,t_tree * tree)645 phydbl SLFV_Lk_Gaussian_Range(t_dsk *young, t_dsk *old, t_tree *tree)
646 {
647   t_dsk *disk;
648   phydbl lnL,dt;
649   int n_evt;
650 
651   /* return(PHYREX_Lk(tree)); */
652 
653   if(PHYREX_Total_Number_Of_Intervals(tree) > tree->mmod->max_num_of_intervals) return UNLIKELY;
654 
655   assert(young);
656 
657   lnL  = 0.0;
658 
659   PHYREX_Update_Lindisk_List_Core(young,tree);
660   lnL += SLFV_Lk_Gaussian_Core(young,tree);
661 
662   dt = 0.0;
663   n_evt = 0;
664   disk = young->prev;
665   do
666     {
667       if(disk->age_fixed == NO)
668         {
669           dt += fabs(disk->next->time - disk->time);
670           n_evt++;
671         }
672 
673       assert(disk);
674       if(disk->time > disk->next->time) return UNLIKELY;
675       PHYREX_Update_Lindisk_List_Core(disk,tree);
676       lnL += SLFV_Lk_Gaussian_Core(disk,tree);
677       if(disk == old) break;
678       disk = disk->prev;
679     }
680   while(disk);
681 
682   lnL += n_evt * log(tree->mmod->lbda) - tree->mmod->lbda * dt;
683 
684   return(lnL);
685 }
686 
687 //////////////////////////////////////////////////////////////
688 //////////////////////////////////////////////////////////////
689 
SLFV_Lk_Gaussian_Core(t_dsk * disk,t_tree * tree)690 phydbl SLFV_Lk_Gaussian_Core(t_dsk *disk, t_tree *tree)
691 {
692   phydbl lnL,log_prob_hit,log_mu;
693   int was_hit,i,j,err;
694   phydbl two_theta_two;
695 
696   two_theta_two = 2.*pow(tree->mmod->rad,2);
697   lnL           = 0.0;
698   log_mu        = log(tree->mmod->mu);
699   was_hit       = (disk->ldsk != NULL);
700 
701   if(disk == tree->young_disk) return 0.0;
702   if(disk->age_fixed == YES) return 0.0;
703 
704   for(i=0;i<disk->n_ldsk_a;++i)
705     if(disk->ldsk_a[i]->prev->disk->time > disk->time)
706       return UNLIKELY;
707 
708   if(disk->ldsk != NULL)
709     {
710       for(i=0;i<disk->n_ldsk_a;++i) if(disk->ldsk_a[i]->prev == disk->ldsk) break;
711       if(i == disk->n_ldsk_a) return UNLIKELY;
712       /* assert(i != disk->n_ldsk_a); */
713     }
714 
715   for(i=0;i<disk->n_ldsk_a;++i)
716     {
717       if(PHYREX_Is_In_Ldscape(disk->ldsk_a[i],tree->mmod) == NO)
718         {
719           for(j=0;j<tree->mmod->n_dim;j++) PhyML_Fprintf(stdout,"\n. Found a lineage outside habitat (%20f)",disk->ldsk_a[i]->coord->lonlat[j]);
720           return(UNLIKELY);
721         }
722 
723       log_prob_hit = log_mu;
724       for(j=0;j<tree->mmod->n_dim;j++)
725         log_prob_hit += -pow(disk->ldsk_a[i]->coord->lonlat[j] - disk->centr->lonlat[j],2)/two_theta_two;
726 
727 
728       if(disk->ldsk_a[i]->prev == disk->ldsk) // disk->ldsk_a[i] was hit
729         {
730           lnL += log_prob_hit;
731         }
732       else // disk->ldsk_a[i] was not hit
733         {
734           lnL += log(1. - exp(log_prob_hit));
735         }
736     }
737 
738   /* a hit occurred */
739   if(was_hit == TRUE)
740     {
741       err = NO;
742       for(j=0;j<tree->mmod->n_dim;j++) lnL += Log_Dnorm_Trunc(disk->ldsk->coord->lonlat[j],
743                                                               disk->centr->lonlat[j],
744                                                               tree->mmod->rad,
745                                                               tree->mmod->lim_do->lonlat[j],
746                                                               tree->mmod->lim_up->lonlat[j],&err);
747     }
748 
749   /* Likelihood for the disk center */
750   for(j=0;j<tree->mmod->n_dim;j++) lnL += log(1./(tree->mmod->lim_up->lonlat[j]-tree->mmod->lim_do->lonlat[j]));
751 
752   return(lnL);
753 }
754 
755 //////////////////////////////////////////////////////////////
756 //////////////////////////////////////////////////////////////
757 
SLFV_Lk_Gaussian(t_tree * tree)758 phydbl SLFV_Lk_Gaussian(t_tree *tree)
759 {
760   t_dsk *disk;
761   int n_evt;
762 
763   assert(!tree->young_disk->next);
764   assert(tree->young_disk->prev);
765 
766   tree->mmod->c_lnL = 0.0;
767 
768   if(PHYREX_Total_Number_Of_Intervals(tree) > tree->mmod->max_num_of_intervals)
769    {
770      tree->mmod->c_lnL = UNLIKELY;
771      return UNLIKELY;
772    }
773 
774   if(isinf(tree->mmod->c_lnL) || isnan(tree->mmod->c_lnL))
775     {
776       tree->mmod->c_lnL = UNLIKELY;
777       return tree->mmod->c_lnL;
778     }
779 
780   tree->mmod->c_lnL += PHYREX_LnPrior_Radius(tree);
781 
782   PHYREX_Update_Lindisk_List(tree);
783 
784   tree->mmod->c_lnL += SLFV_Lk_Gaussian_Core(tree->young_disk,tree);
785 
786   n_evt = 0;
787   disk = tree->young_disk->prev;
788   do
789     {
790       if(disk->age_fixed == NO) n_evt++;
791 
792       assert(disk);
793       if(disk->time > disk->next->time)
794         {
795           PhyML_Printf("\n. Anomaly in ordering of disk times (disk->time: %f disk->next->time: %f)",disk->time,disk->next->time);
796           PhyML_Printf("\n. Currently doing following move: %s",tree->mcmc->move_name[tree->mcmc->move_idx]);
797           tree->mmod->c_lnL = UNLIKELY;
798           return tree->mmod->c_lnL;
799         }
800 
801       tree->mmod->c_lnL += SLFV_Lk_Gaussian_Core(disk,tree);
802       if(disk->prev == NULL) break;
803       disk = disk->prev;
804     }
805   while(1);
806 
807   tree->mmod->c_lnL += PHYREX_Lk_Time_Component(tree);
808 
809   /* tree->mmod->c_lnL += n_evt * log(tree->mmod->lbda) - tree->mmod->lbda * fabs(tree->young_disk->time - disk->time); */
810 
811   if(isinf(tree->mmod->c_lnL) || isnan(tree->mmod->c_lnL)) tree->mmod->c_lnL = UNLIKELY;
812 
813   return(tree->mmod->c_lnL);
814 }
815 
816 /*////////////////////////////////////////////////////////////
817 ////////////////////////////////////////////////////////////*/
818 // Simulate Etheridge-Barton model forwards in time, following n_otu lineages
819 // on a rectangle of dimension width x h
SLFV_Simulate_Forward_Core(int n_sites,t_tree * tree)820 t_sarea *SLFV_Simulate_Forward_Core(int n_sites, t_tree *tree)
821 {
822   t_dsk *disk,*new_disk;
823   t_ldsk *ldsk,**ldsk_a_pop,**ldsk_a_samp,**ldsk_a_tmp,**ldsk_a_tips,*new_ldsk;
824   t_ll *ldsk_list,*dum_ll;
825   int i,j,n_disk,n_dim,n_otu,init_pop_size,curr_pop_size,parent_id,n_lineages,sample_size,n_poly,*permut,n_sampled_demes;
826   phydbl sum,*parent_prob,prob_death,tree_height,max_x,max_y,trans_x,trans_y,t_sim, one_gen;
827   short int dies,n_remain;
828   t_phyrex_mod *mmod;
829   t_poly **poly;
830   t_sarea *area;
831   short int *is_sampled;
832   phydbl w,h;
833   phydbl cx,cy;
834   phydbl lx,ly;
835   int n_survive,n_offspring, n_gen;
836 
837   mmod          = tree->mmod;
838   n_dim         = 2;
839   n_otu         = tree->n_otu;
840   w             = (mmod->lim_up->lonlat[0]-mmod->lim_do->lonlat[0]);
841   h             = (mmod->lim_up->lonlat[1]-mmod->lim_do->lonlat[1]);
842   init_pop_size = mmod->rho*w*h;
843   n_gen         = 10; // number of generations to simulate
844   one_gen       = mmod->gen_cal_time; // time duration of one generation (calendar unit)
845   t_sim         = n_gen * one_gen; // total simulation duration (calendar unit)
846 
847 
848   /* Allocate and initialise first disk event */
849   disk = PHYREX_Make_Disk_Event(n_dim,init_pop_size);
850   PHYREX_Init_Disk_Event(disk,n_dim,NULL);
851   disk->time = 0.0;
852   disk->prev = NULL;
853   disk->n_ldsk_a = init_pop_size;
854 
855   /* Allocate coordinates for all individuals in the starting population */
856   ldsk_list = NULL;
857   i = 0;
858   do
859     {
860       ldsk = PHYREX_Make_Lindisk_Node(n_dim);
861       PHYREX_Init_Lindisk_Node(ldsk,disk,n_dim);
862       Push_Bottom_Linked_List(ldsk,&ldsk_list,NO);
863       i++;
864     }
865   while(i != init_pop_size);
866 
867 
868   /* Fill in array of lineages on the first disk */
869   dum_ll = ldsk_list->head;
870   i = 0;
871   do
872     {
873       disk->ldsk_a[i] = (t_ldsk *)dum_ll->v;
874       dum_ll = dum_ll->next;
875       i++;
876     }
877   while(dum_ll != NULL);
878 
879 
880 
881   /* Generate coordinates for all individuals in starting population */
882   for(i=0;i<init_pop_size;i++)
883     {
884       disk->ldsk_a[i]->coord->lonlat[0] = Uni()*w; // longitude
885       disk->ldsk_a[i]->coord->lonlat[1] = Uni()*h; // latitude
886     }
887 
888 
889   n_disk = 0;
890   do
891     {
892       /* Create new disk */
893       new_disk = PHYREX_Make_Disk_Event(n_dim,1);
894       Free(new_disk->ldsk_a);
895       new_disk->ldsk_a = NULL;
896       PHYREX_Init_Disk_Event(new_disk,n_dim,NULL);
897       new_disk->prev = disk;
898       disk->next = new_disk;
899       n_disk++;
900 
901 
902       /* Coordinates of event */
903       new_disk->centr->lonlat[0] = Uni()*(mmod->lim_up->lonlat[0]-mmod->lim_do->lonlat[0])+mmod->lim_do->lonlat[0];
904       new_disk->centr->lonlat[1] = Uni()*(mmod->lim_up->lonlat[1]-mmod->lim_do->lonlat[1])+mmod->lim_do->lonlat[1];
905 
906       cx = new_disk->centr->lonlat[0];
907       cy = new_disk->centr->lonlat[1];
908 
909 
910       /* Time of new disk */
911       new_disk->time = disk->time + Rexp(mmod->lbda);
912 
913       /* PhyML_Printf("\n. new_disk->time: %f t_sim: %f",new_disk->time,t_sim); */
914 
915       /* Size of current population */
916       curr_pop_size = disk->n_ldsk_a;
917 
918       if(curr_pop_size == 0)
919         {
920           PhyML_Fprintf(stderr,"\n== Population went extinct after %d events",n_disk);
921           Exit("\n");
922         }
923 
924 
925       if(new_disk->time < t_sim)
926         {
927           /* Select one parent */
928           parent_prob = (phydbl *)mCalloc(curr_pop_size,sizeof(phydbl));
929           for(i=0;i<curr_pop_size;++i)
930             {
931               lx = disk->ldsk_a[i]->coord->lonlat[0];
932               ly = disk->ldsk_a[i]->coord->lonlat[1];
933 
934               switch(mmod->id)
935                 {
936                 case SLFV_UNIFORM:
937                   {
938                     if(PHYREX_Is_In_Disk(disk->ldsk_a[i]->coord,new_disk,mmod) == YES)
939                       parent_prob[i] = 1.0;
940                     else
941                       parent_prob[i] = 0.0;
942                     break;
943                   }
944                 case SLFV_GAUSSIAN:
945                   {
946                     parent_prob[i] = 0.0;
947                     parent_prob[i] += -pow(lx - cx,2)/(2.*pow(mmod->rad,2));
948                     parent_prob[i] += -pow(ly - cy,2)/(2.*pow(mmod->rad,2));
949                     parent_prob[i] = exp(parent_prob[i]);
950                     break;
951                   }
952                 }
953             }
954 
955           sum = 0.0;
956           for(i=0;i<curr_pop_size;++i) sum += parent_prob[i];
957 
958           if(sum < 1.E-100)
959             {
960               sum = curr_pop_size;
961               for(i=0;i<curr_pop_size;++i) parent_prob[i] = 1.;
962             }
963 
964           for(i=0;i<curr_pop_size;++i) parent_prob[i] /= sum;
965 
966 
967 
968           parent_id = Sample_i_With_Proba_pi(parent_prob,curr_pop_size);
969           new_disk->ldsk = disk->ldsk_a[parent_id];
970 
971           Free(parent_prob);
972 
973 
974           /* Which lineages survive that event? */
975           n_survive = 0;
976           for(i=0;i<curr_pop_size;++i)
977             {
978               lx = disk->ldsk_a[i]->coord->lonlat[0];
979               ly = disk->ldsk_a[i]->coord->lonlat[1];
980 
981               prob_death = 0.0;
982               switch(mmod->id)
983                 {
984                 case SLFV_UNIFORM:
985                   {
986                     if(PHYREX_Is_In_Disk(disk->ldsk_a[i]->coord,new_disk,mmod) == YES)
987                       prob_death = mmod->mu;
988                     break;
989                   }
990                 case SLFV_GAUSSIAN:
991                   {
992                     prob_death = log(mmod->mu);
993                     prob_death += -pow(lx - cx,2)/(2.*pow(mmod->rad,2));
994                     prob_death += -pow(ly - cy,2)/(2.*pow(mmod->rad,2));
995                     prob_death = exp(prob_death);
996                     break;
997                   }
998                 }
999 
1000               dies = NO;
1001               if(Uni() < prob_death) dies = YES;
1002 
1003               if(dies == NO)
1004                 {
1005                   if(n_survive == 0) new_disk->ldsk_a = (t_ldsk **)mCalloc(1,sizeof(t_ldsk *));
1006                   else new_disk->ldsk_a = (t_ldsk **)mRealloc(new_disk->ldsk_a,n_survive+1,sizeof(t_ldsk *));
1007                   new_disk->ldsk_a[n_survive] = disk->ldsk_a[i];
1008                   n_survive++;
1009                 }
1010             }
1011 
1012           /* PhyML_Printf("\n. n_survive: %d",n_survive); */
1013 
1014           /* Offspring */
1015           /* How many? */
1016           phydbl r = mmod->rad;
1017           phydbl u = mmod->mu;
1018           phydbl rho = mmod->rho;
1019           phydbl mean =
1020             0.5*PI*rho*u*r*r*
1021             (erf(0.5*sqrt(2.)/r*(cy-0.0))*erf(0.5*sqrt(2.)/r*(cx-0.0)) +
1022              erf(0.5*sqrt(2.)/r*(cy-h))*erf(0.5*sqrt(2.)/r*(cx-w)) -
1023              erf(0.5*sqrt(2.)/r*(cy-0.0))*erf(0.5*sqrt(2.)/r*(cx-w)) -
1024              erf(0.5*sqrt(2.)/r*(cy-h))*erf(0.5*sqrt(2.)/r*(cx-0.0)));
1025 
1026           n_offspring = Rpois(mean);
1027 
1028           new_disk->n_ldsk_a = n_survive + n_offspring;
1029 
1030           /* PhyML_Printf("\n. n_offspring: %d",n_offspring); */
1031 
1032           /* Where */
1033           for(i=0;i<n_offspring;++i)
1034             {
1035               /* New lindisk */
1036               new_ldsk = PHYREX_Make_Lindisk_Node(n_dim);
1037               PHYREX_Init_Lindisk_Node(new_ldsk,new_disk,n_dim);
1038               Push_Bottom_Linked_List(new_ldsk,&ldsk_list,NO);
1039 
1040               if(new_disk->ldsk_a == NULL) new_disk->ldsk_a = (t_ldsk **)mCalloc(n_survive+1,sizeof(t_ldsk *));
1041               else new_disk->ldsk_a = (t_ldsk **)mRealloc(new_disk->ldsk_a,n_survive+i+1,sizeof(t_ldsk *));
1042 
1043               new_disk->ldsk_a[n_survive+i] = new_ldsk;
1044 
1045 
1046               /* Generate new location */
1047               switch(mmod->id)
1048                 {
1049                 case SLFV_UNIFORM: { PHYREX_Runif_Rectangle_Overlap(new_ldsk,new_disk,mmod); break; }
1050                 case SLFV_GAUSSIAN:  { PHYREX_Rnorm_Trunc(new_ldsk,new_disk,mmod); break; }
1051                 }
1052 
1053               /* Connect to parent */
1054               new_ldsk->prev = disk->ldsk_a[parent_id];
1055             }
1056         }
1057       else
1058         {
1059           new_disk->time = t_sim;
1060           new_disk->n_ldsk_a = disk->n_ldsk_a;
1061           new_disk->ldsk_a = (t_ldsk **)mCalloc(curr_pop_size,sizeof(t_ldsk *));
1062           for(i=0;i<curr_pop_size;++i) new_disk->ldsk_a[i] = disk->ldsk_a[i];
1063         }
1064 
1065 
1066       disk = new_disk;
1067 
1068       /* printf("\n. pop size: %6d # of events: %6d",disk->n_ldsk_a,n_disk); */
1069     }
1070   while(disk->time < t_sim);
1071 
1072 
1073   /* Dispersal stuff */
1074   /* { */
1075   /*   phydbl T = disk->time; // total simulation time (in calendar unit) */
1076   /*   t_ldsk *dum_ldsk = disk->ldsk_a[Rand_Int(0,disk->n_ldsk_a-1)]; // random selection of a lineage among all the lineages available at present time */
1077   /*   t_dsk *dum_dsk = disk; */
1078   /*   phydbl s = w*h; // area */
1079   /*   printf("\n. s: %f",s); fflush(NULL); */
1080   /*   phydbl gentime = 1./(2.*PI*mmod->rad*mmod->rad*mmod->mu*mmod->lbda/s); */
1081   /*   phydbl ssq = 0.0; // sum of squared difference between current and previous position of lineage, when previous pos != current pos. */
1082   /*   phydbl curr_pos,prev_pos; */
1083   /*   int curr_gen,prev_gen,nhits=0; */
1084   /*   prev_pos = dum_ldsk->coord->lonlat[0]; */
1085   /*   curr_pos = prev_pos; */
1086   /*   curr_gen = prev_gen = 1; */
1087   /*   // Compute sum of squared difference of positions */
1088   /*   do */
1089   /*     { */
1090   /*       curr_gen  = 1 + (int)(dum_dsk->time-T)/gentime; // increment the number of generations (generation time measured in calendar time units) */
1091   /*       curr_pos  = dum_ldsk->coord->lonlat[0]; // current position of lineage */
1092 
1093   /*       if(dum_ldsk->disk == dum_dsk) // lineage was born at that time */
1094   /*         { */
1095   /*           dum_ldsk = dum_ldsk->prev; // jump to parent */
1096   /*           nhits++; */
1097   /*         } */
1098 
1099   /*       if(curr_gen != prev_gen) */
1100   /*         { */
1101   /*           ssq += pow(curr_pos-prev_pos,2); */
1102   /*           prev_pos = curr_pos; */
1103   /*           prev_gen = curr_gen; */
1104   /*         } */
1105 
1106   /*       dum_dsk = dum_dsk->prev; */
1107   /*     } */
1108   /*   while(dum_dsk); */
1109 
1110   /*   PhyML_Printf("\n # var T nhits T/nhits gentime sigsq theta u lambda"); */
1111   /*   PhyML_Printf("\n a@z %G %f %d %f %f %f %f %f %f", */
1112   /*                (1./(T/gentime))*ssq, */
1113   /*                T, */
1114   /*                nhits, */
1115   /*                T/nhits, */
1116   /*                gentime, */
1117   /*                4*pow(mmod->rad,4)*mmod->lbda/s*PI*mmod->mu*gentime, */
1118   /*                mmod->rad, */
1119   /*                mmod->mu, */
1120   /*                mmod->lbda); */
1121 
1122   /*   Exit("\n"); */
1123   /* } */
1124 
1125 
1126   /* /\* Coalescence stuff *\/ */
1127   /* { */
1128   /*   t_ldsk *lin1, *lin2; */
1129   /*   int n_evts,have_coal,coal_evt; */
1130   /*   phydbl T,t; */
1131 
1132   /*   /\* Selection of two lineages at random *\/ */
1133   /*   while(disk->next) disk = disk->next; */
1134   /*   permut = Permutate(disk->n_ldsk_a); */
1135   /*   lin1 = disk->ldsk_a[permut[0]]; */
1136   /*   lin2 = disk->ldsk_a[permut[1]]; */
1137   /*   Free(permut); */
1138 
1139   /*   printf("\n. disk->time-lin1->prev->time: %f",disk->time-lin1->prev->disk->time); */
1140   /*   printf("\n. disk->time-lin2->prev->time: %f",disk->time-lin2->prev->disk->time); */
1141 
1142   /*   /\* Go back in time *\/ */
1143   /*   curr_t = 0.0; */
1144   /*   n_evts = 0; */
1145   /*   coal_evt = 0; */
1146   /*   have_coal = 0; */
1147   /*   T = disk->time; */
1148   /*   do */
1149   /*     { */
1150   /*       if(disk->ldsk) n_evts++; */
1151 
1152   /*       if(disk->ldsk && disk->ldsk == lin1->prev) lin1 = lin1->prev; */
1153   /*       if(disk->ldsk && disk->ldsk == lin2->prev) lin2 = lin2->prev; */
1154 
1155   /*       if(lin1 == lin2) */
1156   /*         { */
1157   /*           have_coal = 1; */
1158   /*           coal_evt  = n_evts; */
1159   /*           break; */
1160   /*         } */
1161 
1162   /*       disk = disk->prev; */
1163   /*       t=disk?disk->time:0.0; */
1164   /*       curr_t = (T-t); */
1165 
1166   /*       /\* printf("\n>>> coal %d n_evts: %d curr_t: %f disk->time: %f T: %f one_gen: %f", *\/ */
1167   /*       /\*        have_coal, *\/ */
1168   /*       /\*        n_evts, *\/ */
1169   /*       /\*        curr_t, *\/ */
1170   /*       /\*        t, *\/ */
1171   /*       /\*        T, *\/ */
1172   /*       /\*        one_gen); *\/ */
1173   /*     } */
1174   /*   /\* while(disk && n_evts < 2); *\/ */
1175   /*   while(disk && curr_t < one_gen); */
1176 
1177   /*   PhyML_Printf("\n. @ coal : %d %d %f",have_coal,coal_evt,curr_t); */
1178   /*   /\* Exit("\n"); *\/ */
1179   /* } */
1180 
1181 
1182 
1183 
1184   /* Allocate coordinates for all the tips first (will grow afterwards) */
1185   ldsk_a_samp = (t_ldsk **)mCalloc(n_otu,sizeof(t_ldsk *));
1186   ldsk_a_tips = (t_ldsk **)mCalloc(n_otu,sizeof(t_ldsk *));
1187   ldsk_a_tmp  = (t_ldsk **)mCalloc(n_otu,sizeof(t_ldsk *));
1188 
1189   while(disk->next) disk = disk->next;
1190   ldsk_a_pop = disk->ldsk_a;
1191 
1192 
1193   /* /\* Sample n_otu individuals uniformly at random... *\/   */
1194   /* while(disk->next) disk = disk->next;  */
1195   /* permut = Permutate(disk->n_ldsk_a); */
1196   /* for(i=0;i<n_otu;i++) ldsk_a_samp[i] = ldsk_a_pop[permut[i]]; */
1197   /* Free(permut); */
1198   /* n_sampled_demes = 1; */
1199 
1200 
1201   /* ... or take samples in random polygons */
1202   n_poly = n_sites;
1203   is_sampled = (short int *)mCalloc(n_poly,sizeof(short int));
1204 
1205   do
1206     {
1207       poly = (t_poly **)mCalloc(n_poly,sizeof(t_poly *));
1208       for(i=0;i<n_poly;++i) poly[i] = Rpoly(3); /* triangles */
1209       for(i=0;i<n_poly;++i)
1210         {
1211           for(j=0;j<poly[i]->n_poly_vert;++j)
1212             {
1213               poly[i]->poly_vert[j]->lonlat[0] *= (mmod->lim_up->lonlat[0]-mmod->lim_do->lonlat[0])*0.5;
1214               poly[i]->poly_vert[j]->lonlat[1] *= (mmod->lim_up->lonlat[1]-mmod->lim_do->lonlat[1])*0.5;
1215             }
1216 
1217           max_x = 0.0;
1218           max_y = 0.0;
1219           for(j=0;j<poly[i]->n_poly_vert;++j)
1220             {
1221               if(poly[i]->poly_vert[j]->lonlat[0] > max_x) max_x = poly[i]->poly_vert[j]->lonlat[0];
1222               if(poly[i]->poly_vert[j]->lonlat[1] > max_y) max_y = poly[i]->poly_vert[j]->lonlat[1];
1223             }
1224 
1225           trans_x = Uni()*(mmod->lim_up->lonlat[0] - max_x);
1226           trans_y = Uni()*(mmod->lim_up->lonlat[1] - max_y);
1227 
1228           for(j=0;j<poly[i]->n_poly_vert;++j)
1229             {
1230               poly[i]->poly_vert[j]->lonlat[0] += trans_x;
1231               poly[i]->poly_vert[j]->lonlat[1] += trans_y;
1232             }
1233         }
1234 
1235       for(i=0;i<n_otu;++i) ldsk_a_samp[i] = NULL;
1236 
1237       for(i=0;i<n_poly;++i) is_sampled[i] = NO;
1238 
1239       permut = Permutate(n_poly);
1240 
1241       sample_size = 0;
1242       for(i=0;i<curr_pop_size;++i)
1243         {
1244           for(j=0;j<n_poly;++j)
1245             {
1246               if(Is_In_Polygon(ldsk_a_pop[i]->coord,poly[permut[j]]) == YES)
1247                 {
1248                   char *s;
1249                   int k;
1250 
1251                   s = (char *)mCalloc((int)strlen(ldsk_a_pop[i]->coord->id)+1+50,sizeof(char));
1252                   For(k,(int)strlen(ldsk_a_pop[i]->coord->id)+1+20) s[k]='\0';
1253                   sprintf(s,"%d_",i);
1254                   strcat(s,ldsk_a_pop[i]->coord->id);
1255                   Free(ldsk_a_pop[i]->coord->id);
1256                   strcat(s,"_deme");
1257                   sprintf(s+strlen(s),"%d",permut[j]);
1258                   ldsk_a_pop[i]->coord->id = s;
1259 
1260 
1261                   ldsk_a_samp[sample_size] = ldsk_a_pop[i];
1262                   sample_size++;
1263                   PhyML_Printf("\n@ Coord: %f %f %s %p",
1264                                ldsk_a_samp[sample_size-1]->coord->lonlat[0],
1265                                ldsk_a_samp[sample_size-1]->coord->lonlat[1],
1266                                ldsk_a_pop[i]->coord->id,ldsk_a_pop[i]);
1267 
1268                   is_sampled[permut[j]] = YES;
1269                   break;
1270                 }
1271             }
1272           if(sample_size == n_otu) break;
1273         }
1274 
1275       Free(permut);
1276 
1277       if(i == curr_pop_size)
1278         {
1279           for(j=0;j<n_poly;j++) Free_Poly(poly[j]);
1280           Free(poly);
1281           /* PhyML_Printf("\n. Not enough individuals in polygon(s) (only %d found).",sample_size); */
1282           /* Generic_Exit(__FILE__,__LINE__,__FUNCTION__);       */
1283         }
1284       else break;
1285     }
1286   while(1);
1287 
1288   n_sampled_demes = 0;
1289   for(i=0;i<n_poly;++i) if(is_sampled[i] == YES) n_sampled_demes++;
1290 
1291   for(i=0;i<n_otu;++i) ldsk_a_tips[i] = ldsk_a_samp[i];
1292 
1293   area = Make_Sarea(n_sampled_demes);
1294   area->n_poly = n_sampled_demes;
1295   n_sampled_demes = 0;
1296   for(i=0;i<n_poly;++i) if(is_sampled[i] == YES) area->a_poly[n_sampled_demes++] = poly[i];
1297 
1298 
1299   for(i=0;i<area->n_poly;++i)
1300     {
1301       /* PhyML_Printf("\n@ Poly %3d area = %f",i,Area_Of_Poly_Monte_Carlo(area->a_poly[i],mmod->lim)); */
1302 
1303       for(j=0;j<area->a_poly[i]->n_poly_vert;++j)
1304         {
1305           PhyML_Printf("\n@ Poly %3d point %d (x,y) = (%f,%f)",
1306                        i,
1307                        j,
1308                        area->a_poly[i]->poly_vert[j]->lonlat[0],
1309                        area->a_poly[i]->poly_vert[j]->lonlat[1]);
1310         }
1311     }
1312 
1313   tree->young_disk       = disk;
1314   disk->ldsk_a           = ldsk_a_tips;
1315   disk->mmod             = tree->mmod;
1316   disk->centr->lonlat[0] = .5*(tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
1317   disk->centr->lonlat[1] = .5*(tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]);
1318   disk->n_ldsk_a         = n_otu;
1319 
1320   int n_discs = 0;
1321 
1322   n_lineages = n_otu;
1323   do
1324     {
1325       n_remain = 0;
1326       for(i=0;i<n_lineages;++i)
1327         {
1328 
1329           if((disk->prev->ldsk != NULL) && (disk->prev->ldsk == ldsk_a_samp[i]->prev)) /* Coalescent event is sampled */
1330             {
1331               PHYREX_Make_Lindisk_Next(disk->prev->ldsk);
1332               disk->prev->ldsk->next[disk->prev->ldsk->n_next-1] = ldsk_a_samp[i];
1333             }
1334           else
1335             {
1336               ldsk_a_tmp[n_remain] = ldsk_a_samp[i];
1337               n_remain++;
1338             }
1339         }
1340 
1341       for(i=0;i<n_remain;i++) ldsk_a_samp[i] = ldsk_a_tmp[i];
1342       if((disk->prev->ldsk != NULL) && (disk->prev->ldsk->n_next > 0)) ldsk_a_samp[i] = disk->prev->ldsk;
1343 
1344       if((disk->prev->ldsk != NULL) && (disk->prev->ldsk->n_next > 0))
1345         {
1346           n_lineages -= (disk->prev->ldsk->n_next);
1347           n_lineages += 1;
1348         }
1349 
1350       if(n_lineages != n_remain+(disk->prev->ldsk && disk->prev->ldsk->n_next>0)?1:0)
1351         {
1352           PhyML_Fprintf(stderr,"\n. n_lineages: %d n_remain: %d n_next: %d",
1353                         n_lineages,
1354                         n_remain,
1355                         disk->prev->ldsk->n_next);
1356           Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1357         }
1358 
1359       /* None of the sampled lineages was hit */
1360       if((disk->prev->ldsk != NULL) && (disk->prev->ldsk->n_next == 0))
1361         {
1362           /* printf("\n. free %s @ %f",disk->ldsk->coord->id,disk->time); */
1363           Free_Ldisk(disk->prev->ldsk);
1364           disk->prev->ldsk = NULL;
1365         }
1366 
1367       disk = disk->prev;
1368 
1369       if(disk->prev == NULL)
1370         {
1371           PhyML_Fprintf(stderr,"\n. # lineages left: %d",n_remain);
1372           PhyML_Fprintf(stderr,"\n. Sample has not coalesced completely.");
1373           fflush(NULL);
1374           Exit("\n");
1375         }
1376     }
1377   while(n_lineages > 1);
1378 
1379 
1380   /* for(i=0;i<n_otu;i++) printf("\n> %s",tree->young_disk->ldsk_a[i]->coord->id); */
1381 
1382   disk->prev = NULL;
1383 
1384   disk = tree->young_disk;
1385   tree_height = disk->time;
1386   n_discs = 0;
1387   while(disk)
1388     {
1389       disk->time -= tree_height;
1390       disk = disk->prev;
1391       n_discs++;
1392     }
1393 
1394   Free(ldsk_a_tmp);
1395   Free(ldsk_a_samp);
1396   Free(ldsk_a_pop);
1397   Free(is_sampled);
1398 
1399   /* PHYREX_Print_Struct('#',tree); */
1400   /* Exit("\n"); */
1401 
1402   return(area);
1403 
1404 }
1405 
1406 
1407 /*////////////////////////////////////////////////////////////
1408 ////////////////////////////////////////////////////////////*/
1409 // Simulate Etheridge-Barton model backwards in time, following n_otu lineages
1410 // on a rectangle.
SLFV_Simulate_Backward_Core(t_dsk * init_disk,int avoid_multiple_mergers,t_tree * tree)1411 phydbl SLFV_Simulate_Backward_Core(t_dsk *init_disk, int avoid_multiple_mergers, t_tree *tree)
1412 {
1413   t_dsk *disk,*new_disk,*oldest_disk;
1414   int i,j,reached_oldest_disk,err;
1415   phydbl prob_hit,u,new_time,lnL;
1416   t_phyrex_mod *mmod;
1417   t_node *n;
1418 
1419   mmod = tree->mmod;
1420 
1421   Get_Node_Ranks_From_Tip_Times(tree);
1422 
1423   // Get to the youngest node
1424   n = tree->a_nodes[0];
1425   while(n->rk_next) n = n->rk_next;
1426 
1427   disk = init_disk;
1428   // Make sure at least one of the youngest nodes is on init_disk
1429   for(i=0;i<init_disk->n_ldsk_a;++i) if(init_disk->ldsk_a[i]->nd == n) break;
1430   assert(i != init_disk->n_ldsk_a);
1431 
1432   // Get to the oldest sampled disk
1433   disk = init_disk;
1434   while(disk->prev) disk = disk->prev;
1435   oldest_disk = disk;
1436   reached_oldest_disk = NO;
1437   if(init_disk->prev == NULL) reached_oldest_disk = YES; // Only one sampled disk
1438 
1439   lnL = 0.0;
1440   disk = init_disk;
1441   do
1442     {
1443       /* MRCA reached */
1444       if(PHYREX_Number_Of_Outgoing_Ldsks(disk) == 1 && reached_oldest_disk == YES) break;
1445 
1446       /* Proposed new time */
1447       new_time = disk->time - Rexp(mmod->lbda);
1448 
1449       lnL += log(mmod->lbda) - mmod->lbda * fabs(disk->time - new_time);
1450 
1451       /* New time is older than previous sampled disk (disk->prev) */
1452       if(disk->prev && new_time < disk->prev->time)
1453         {
1454           new_disk = disk->prev;
1455           new_time = disk->prev->time;
1456 
1457           // Reached oldest disk -> set it to NULL
1458           // so as to indicate that it is ok to stop
1459           // the simulation, continue otherwise
1460           if(new_disk == oldest_disk) reached_oldest_disk = YES;
1461 
1462           /* Connect disk and new_disk */
1463           disk->prev = new_disk;
1464           new_disk->next = disk;
1465 
1466           /* Time of next event */
1467           new_disk->time = new_time;
1468 
1469           /* Populate new_disk->ldsk_a array */
1470           PHYREX_Update_Lindisk_List_Core(new_disk,tree);
1471         }
1472       else
1473         {
1474           /* Create new disk */
1475           new_disk = PHYREX_Make_Disk_Event(mmod->n_dim,tree->n_otu);
1476           PHYREX_Init_Disk_Event(new_disk,mmod->n_dim,NULL);
1477 
1478           /* new_disk->prev now points to previous sampled disk */
1479           new_disk->prev = disk->prev;
1480           if(disk->prev) disk->prev->next = new_disk;
1481 
1482           /* Connect disk and new_disk */
1483           disk->prev = new_disk;
1484           new_disk->next = disk;
1485 
1486           /* Time of next event */
1487           new_disk->time = new_time;
1488 
1489           /* Coordinates of new event */
1490           for(j=0;j<mmod->n_dim;++j) new_disk->centr->lonlat[j] = Uni()*(mmod->lim_up->lonlat[j]-mmod->lim_do->lonlat[j])+mmod->lim_do->lonlat[j];
1491 
1492           for(j=0;j<mmod->n_dim;++j) lnL += log(1./(mmod->lim_up->lonlat[j]-mmod->lim_do->lonlat[j]));
1493 
1494 
1495           /* Populate new_disk->ldsk_a array */
1496           PHYREX_Update_Lindisk_List_Core(new_disk,tree);
1497 
1498           new_disk->ldsk = NULL;
1499           /* Which lineages in new_disk->ldsk_a are hit? */
1500           for(i=0;i<new_disk->n_ldsk_a;++i)
1501             {
1502               prob_hit = -1.;
1503               switch(mmod->id)
1504                 {
1505                 case SLFV_UNIFORM :
1506                   {
1507                     prob_hit = mmod->mu;
1508                     break;
1509                   }
1510                 case SLFV_GAUSSIAN : case RW : case RRW :
1511                   {
1512                     prob_hit = log(mmod->mu);
1513                     for(j=0;j<mmod->n_dim;++j)
1514                       {
1515                         prob_hit +=
1516                           -pow(new_disk->ldsk_a[i]->coord->lonlat[j] -
1517                                new_disk->centr->lonlat[j],2)/(2.*pow(mmod->rad,2));
1518                       }
1519                     prob_hit = exp(prob_hit);
1520                     break;
1521                   }
1522                 }
1523 
1524 
1525               u = Uni();
1526               if(!(u > prob_hit)) // disk->ldsk_a[i] is  hit
1527                 {
1528                   lnL += log(prob_hit);
1529                   // new_disk->ldsk_a[i] is hit -> coalesce (or just jump) to parent (i.e., new_disk->ldsk)
1530                   if(new_disk->ldsk == NULL)
1531                     {
1532                       new_disk->ldsk = PHYREX_Make_Lindisk_Node(mmod->n_dim);
1533                       PHYREX_Init_Lindisk_Node(new_disk->ldsk,new_disk,mmod->n_dim);
1534 
1535                       // Generate coordinates for newly created ldsk
1536                       switch(tree->mmod->id)
1537                         {
1538                         case SLFV_UNIFORM :
1539                           {
1540                             PHYREX_Runif_Rectangle_Overlap(new_disk->ldsk,new_disk,tree->mmod);
1541                             break;
1542                           }
1543                         case SLFV_GAUSSIAN : case RW : case RRW :
1544                           {
1545                             PHYREX_Rnorm_Trunc(new_disk->ldsk,new_disk,tree->mmod);
1546                             for(j=0;j<tree->mmod->n_dim;++j)
1547                               lnL += Log_Dnorm_Trunc(new_disk->ldsk->coord->lonlat[j],
1548                                                      new_disk->centr->lonlat[j],
1549                                                      tree->mmod->rad,
1550                                                      tree->mmod->lim_do->lonlat[j],
1551                                                      tree->mmod->lim_up->lonlat[j],&err);
1552                             break;
1553                           }
1554                         }
1555                     }
1556                   new_disk->ldsk_a[i]->prev = new_disk->ldsk;
1557                   PHYREX_Make_Lindisk_Next(new_disk->ldsk);
1558                   new_disk->ldsk->next[new_disk->ldsk->n_next-1] = new_disk->ldsk_a[i];
1559                 }
1560               else
1561                 {
1562                   lnL += log(1. - prob_hit);
1563                 }
1564               if(new_disk->ldsk && new_disk->ldsk->n_next == 2 && avoid_multiple_mergers == YES) break;
1565             }
1566         }
1567 
1568       disk = new_disk;
1569     }
1570   while(1);
1571   disk->prev = NULL;
1572 
1573   return(lnL);
1574 }
1575 
1576 //////////////////////////////////////////////////////////////
1577 //////////////////////////////////////////////////////////////
1578 // Simulate Etheridge-Barton model backwards in time, following n_otu lineages
1579 // on a rectangle of dimension width w x h
1580 // See Kelleher, Barton & Etheridge, Bioinformatics, 2013.
SLFV_Simulate(int n_otu,int n_sites,phydbl w,phydbl h,phydbl lbda,phydbl rad,phydbl mu,int r_seed)1581 t_tree *SLFV_Simulate(int n_otu, int n_sites, phydbl w, phydbl h, phydbl  lbda, phydbl rad, phydbl mu, int r_seed)
1582 {
1583   t_tree *tree;
1584   int n_dim,i;
1585   t_phyrex_mod *mmod;
1586   t_dsk *disk;
1587   option *io;
1588   t_mod *mod;
1589   t_opt *s_opt;
1590   calign *cdata;
1591   /* phydbl T; */
1592 
1593   n_dim = 2; // 2-dimensional landscape
1594 
1595   io    = (option *)Make_Input();
1596   mod   = (t_mod *)Make_Model_Basic();
1597   s_opt = (t_opt *)Make_Optimiz();
1598 
1599   Set_Defaults_Input(io);
1600   Set_Defaults_Model(mod);
1601   Set_Defaults_Optimiz(s_opt);
1602 
1603   io->mod      = mod;
1604   mod->io      = io;
1605   mod->s_opt   = s_opt;
1606   io->r_seed   = r_seed;
1607 
1608   io->n_otu    = n_otu;
1609   io->init_len = 500; /* sequence length */
1610 
1611   io->data = Make_Empty_Alignment(io);
1612 
1613   Make_Model_Complete(io->mod);
1614   Set_Model_Name(io->mod);
1615 
1616   io->colalias = NO;
1617   cdata = Compact_Data(io->data,io);
1618   Free_Seq(io->data,io->n_otu);
1619 
1620   tree = Make_Tree_From_Scratch(n_otu,cdata);
1621 
1622   Connect_CSeqs_To_Nodes(cdata,io,tree);
1623 
1624   tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
1625   RATES_Init_Rate_Struct(tree->rates,io->rates,tree->n_otu);
1626 
1627   tree->times = TIMES_Make_Time_Struct(tree->n_otu);
1628   TIMES_Init_Time_Struct(tree->times,io->times,tree->n_otu);
1629 
1630   tree->data      = cdata;
1631   tree->mod       = mod;
1632   tree->io        = io;
1633   tree->n_pattern = tree->data->crunch_len;
1634 
1635 
1636   /* Allocate migrep model */
1637   mmod = PHYREX_Make_Migrep_Model(tree->n_otu,n_dim);
1638   tree->mmod = mmod;
1639   PHYREX_Init_Migrep_Mod(mmod,n_dim,0.0,0.0,w,h);
1640 
1641   mmod->lbda = lbda;
1642   mmod->rad = rad;
1643   mmod->mu = mu;
1644 
1645   if(mmod->lbda > mmod->max_lbda) mmod->lbda = mmod->max_lbda;
1646   if(mmod->rad > mmod->max_rad) mmod->rad = mmod->max_rad;
1647   if(mmod->mu > mmod->max_mu) mmod->mu = mmod->max_mu;
1648 
1649   if(mmod->lbda < mmod->min_lbda) mmod->lbda = mmod->min_lbda;
1650   if(mmod->rad < mmod->min_rad) mmod->rad = mmod->min_rad;
1651   if(mmod->mu < mmod->min_mu) mmod->mu = mmod->min_mu;
1652 
1653 
1654   mmod->sigsq = SLFV_Update_Sigsq(tree);
1655 
1656 
1657 
1658   // Duration of a generation in number of events
1659   mmod->gen_cal_time = 1./(2.*mmod->mu*pow(mmod->rad,2)/pow(w*h,2)*
1660                            (sqrt(2.)*mmod->rad*exp(-.5*pow(h/mmod->rad,2)) + h*sqrt(PI)*erf(sqrt(2.)*h/(2.*mmod->rad)) - sqrt(2.)*mmod->rad)*
1661                            (sqrt(2.)*mmod->rad*exp(-.5*pow(w/mmod->rad,2)) + w*sqrt(PI)*erf(sqrt(2.)*w/(2.*mmod->rad)) - sqrt(2.)*mmod->rad));
1662   // Divide by rate of events per calendar time unit to get duration of gen. in calendar time unit
1663   mmod->gen_cal_time *= 1./mmod->lbda;
1664 
1665 
1666   /* Prepare start disk */
1667   tree->young_disk = PHYREX_Make_Disk_Event(tree->mmod->n_dim,tree->n_otu);
1668   tree->young_disk->time = 0.0;
1669   tree->young_disk->age_fixed = YES;
1670   tree->young_disk->centr->lonlat[0] = .5*(tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
1671   tree->young_disk->centr->lonlat[1] = .5*(tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]);
1672   tree->young_disk->n_ldsk_a         = tree->n_otu;
1673 
1674   for(i=0;i<tree->n_otu;++i)
1675     {
1676       char *s;
1677       tree->young_disk->ldsk_a[i] = PHYREX_Make_Lindisk_Node(tree->mmod->n_dim);
1678       PHYREX_Init_Lindisk_Node(tree->young_disk->ldsk_a[i],tree->young_disk,tree->mmod->n_dim);
1679       s = (char *)mCalloc(strlen(tree->young_disk->ldsk_a[i]->coord->id)+1+20,sizeof(char));
1680       strcpy(s,tree->young_disk->ldsk_a[i]->coord->id);
1681       strcat(s,"_deme0\0");
1682       Free(tree->young_disk->ldsk_a[i]->coord->id);
1683       tree->young_disk->ldsk_a[i]->coord->id = s;
1684     }
1685 
1686   /* Generate coordinates for the tip nodes (uniform distribution on the rectangle) */
1687   for(i=0;i<tree->n_otu;i++)
1688     {
1689       tree->young_disk->ldsk_a[i]->coord->lonlat[0] = Uni()*(tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0])+tree->mmod->lim_do->lonlat[0]; // longitude
1690       tree->young_disk->ldsk_a[i]->coord->lonlat[1] = Uni()*(tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1])+tree->mmod->lim_do->lonlat[1]; // latitude
1691     }
1692 
1693   /* /\* !!!!!!!!!!!!!!!!!!!!!!!!!! *\/ */
1694   /* /\* Fix coordinates of first two tips *\/ */
1695   /* tree->young_disk->ldsk_a[0]->coord->lonlat[0] = (tree->mmod->lim_up->lonlat[0]+tree->mmod->lim_do->lonlat[0])/2; // longitude */
1696   /* tree->young_disk->ldsk_a[0]->coord->lonlat[1] = (tree->mmod->lim_up->lonlat[1]+tree->mmod->lim_do->lonlat[1])/2; // latitude */
1697   /* tree->young_disk->ldsk_a[1]->coord->lonlat[0] = (tree->mmod->lim_up->lonlat[1]+tree->mmod->lim_do->lonlat[1])/2 + 1; // longitude */
1698   /* tree->young_disk->ldsk_a[1]->coord->lonlat[1] = (tree->mmod->lim_up->lonlat[1]+tree->mmod->lim_do->lonlat[1])/2 + 1; // latitude */
1699 
1700 
1701   for(i=0;i<tree->n_otu;i++)
1702     {
1703       tree->young_disk->ldsk_a[i]->nd = tree->a_nodes[i];
1704       tree->a_nodes[i]->ldsk = tree->young_disk->ldsk_a[i];
1705     }
1706 
1707   SLFV_Simulate_Backward_Core(tree->young_disk,NO,tree);
1708 
1709   PHYREX_Ldsk_To_Tree(tree);
1710   Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
1711   Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
1712   RATES_Fill_Lca_Table(tree);
1713 
1714 
1715   /* min_rate = 1.E-5; */
1716   /* max_rate = 1.E-4; */
1717 
1718   phydbl T = PHYREX_Tree_Height(tree);
1719 
1720   tree->rates->bl_from_rt = YES;
1721   tree->rates->clock_r    = 0.1/fabs(2.*T);
1722   tree->rates->model      = STRICTCLOCK;
1723 
1724   RATES_Update_Cur_Bl(tree);
1725 
1726   Init_Model(cdata,mod,io);
1727 
1728   tree->mod->ras->n_catg   = 1;
1729   tree->mod->whichmodel    = HKY85;
1730   tree->mod->kappa->v      = 4.0;
1731 
1732   Make_Tree_For_Pars(tree);
1733   Make_Tree_For_Lk(tree);
1734   Make_Spr(tree);
1735   Evolve(tree->data,tree->mod,0,tree);
1736 
1737   PhyML_Printf("@@@ %G %G %G : ",mmod->lbda,mmod->mu,mmod->rad);
1738   for(int i=0;i<tree->n_otu-1;++i) for(int j=i+1;j<tree->n_otu;++j) PhyML_Printf("%G ",PHYREX_Dist_Between_Two_Ldsk(tree->a_nodes[i]->ldsk,tree->a_nodes[j]->ldsk,tree));
1739   PhyML_Printf(" : ");
1740   for(int i=0;i<tree->n_otu-1;++i) for(int j=i+1;j<tree->n_otu;++j) PhyML_Printf("%G ",Euclidean_Dist(tree->a_nodes[i]->ldsk->coord,tree->a_nodes[j]->ldsk->coord));
1741   PhyML_Printf("\n");
1742   for(int i=0;i<tree->n_otu;++i) PhyML_Printf("\n%20s %12G %12G",
1743                                               tree->a_nodes[i]->name,
1744                                               tree->a_nodes[i]->ldsk->coord->lonlat[0],
1745                                               tree->a_nodes[i]->ldsk->coord->lonlat[1]);
1746   PhyML_Printf("\n\n");
1747   PhyML_Printf(">> SEQUENCES\n");
1748   Print_CSeq(stdout,NO,tree->data,tree);
1749   PhyML_Printf("<< SEQUENCES");
1750 
1751   /* Init_Partial_Lk_Tips_Double(tree); */
1752   /* Init_Partial_Lk_Loc(tree); */
1753 
1754   disk = tree->young_disk->prev;
1755   while(disk->prev) disk = disk->prev;
1756 
1757   PhyML_Printf("\n. Parameter boundaries: lambda:[%G,%G]; mu=[%G,%G]; rad=[%G,%G]",
1758                mmod->min_lbda,mmod->max_lbda,
1759                mmod->min_mu,mmod->max_mu,
1760                mmod->min_rad,mmod->max_rad);
1761   PhyML_Printf("\n. Useful parameters: lambda=%G; mu=%G; rad=%G; clockr=%G; sigsq=%G",
1762                mmod->lbda,
1763                mmod->mu,
1764                mmod->rad,
1765                tree->rates->clock_r,
1766                mmod->sigsq);
1767 
1768   PhyML_Printf("\n. Useful statistics: t.root=%f n.int=%d n.coal=%d n.hit=%d root.x=%f root.y=%f nt.div=%f\n",
1769                disk->time,
1770                PHYREX_Total_Number_Of_Intervals(tree),
1771                PHYREX_Total_Number_Of_Coal_Disks(tree),
1772                PHYREX_Total_Number_Of_Hit_Disks(tree),
1773                disk->ldsk->coord->lonlat[0],
1774                disk->ldsk->coord->lonlat[1],
1775                Nucleotide_Diversity(tree->data));
1776 
1777   Exit("\n");
1778 
1779   /* PhyML_Printf("\n. Tree: "); */
1780   /* PhyML_Printf("\n. %s \n",Write_Tree(tree)); */
1781 
1782   /* PhyML_Printf("\n. Spatial coordinates: "); */
1783   /* for(i=0;i<tree->n_otu;i++) */
1784   /*   { */
1785   /*     PhyML_Printf("\n%15s %12f %12f", */
1786   /*                  tree->young_disk->ldsk_a[i]->nd->name, */
1787   /*                  tree->young_disk->ldsk_a[i]->coord->lonlat[0],                    */
1788   /*                  tree->young_disk->ldsk_a[i]->coord->lonlat[1]); */
1789   /*   } */
1790   /* PhyML_Printf("\n"); */
1791 
1792   /* for(int i=0;i<io->n_otu;i++) */
1793   /*   { */
1794   /*     PhyML_Printf("\n<clade id=\"clad%d\">",i+1); */
1795   /*     PhyML_Printf("\n\t<taxon value=\"%s\"/>",tree->a_nodes[i]->name); */
1796   /*     PhyML_Printf("\n</clade>"); */
1797   /*     PhyML_Printf("\n<calibration id=\"cal%d\">",i+1); */
1798   /*     PhyML_Printf("\n\t<lower>0.0</lower>"); */
1799   /*     PhyML_Printf("\n\t<upper>0.0</upper>"); */
1800   /*     PhyML_Printf("\n\t<appliesto clade.id=\"clad%d\"/>",i+1); */
1801   /*     PhyML_Printf("\n</calibration>"); */
1802   /*   } */
1803 
1804   return(tree);
1805 }
1806 
1807 //////////////////////////////////////////////////////////////
1808 //////////////////////////////////////////////////////////////
1809 
SLFV_Simulate_Independent_Loci(int n_otu,int n_loci,phydbl w,phydbl h,int r_seed)1810 t_tree *SLFV_Simulate_Independent_Loci(int n_otu, int n_loci, phydbl w, phydbl h, int r_seed)
1811 {
1812   t_tree *tree;
1813   int n_dim,i;
1814   t_phyrex_mod *mmod;
1815   option *io;
1816   t_mod *mod;
1817   t_opt *s_opt;
1818   calign *cdata;
1819   phydbl area;
1820   phydbl subst_rate;
1821   int locus_idx;
1822 
1823   n_dim = 2; // 2-dimensional landscape
1824   area  = w * h;
1825 
1826   io    = (option *)Make_Input();
1827   mod   = (t_mod *)Make_Model_Basic();
1828   s_opt = (t_opt *)Make_Optimiz();
1829 
1830   Set_Defaults_Input(io);
1831   Set_Defaults_Model(mod);
1832   Set_Defaults_Optimiz(s_opt);
1833 
1834 
1835   // io stuff
1836   io->mod      = mod;
1837   mod->io      = io;
1838   mod->s_opt   = s_opt;
1839   io->r_seed   = r_seed;
1840   io->n_otu    = n_otu;
1841   io->init_len = n_loci; /* sequence length */
1842   io->data     = Make_Empty_Alignment(io);
1843   io->colalias = NO;
1844 
1845   Make_Model_Complete(io->mod);
1846   Set_Model_Name(io->mod);
1847 
1848   cdata = Compact_Data(io->data,io);
1849   Free_Seq(io->data,io->n_otu);
1850 
1851   Print_Settings(io);
1852 
1853 
1854   // tree stuff
1855   tree = Make_Tree_From_Scratch(n_otu,cdata);
1856   Random_Tree(tree);
1857   Connect_CSeqs_To_Nodes(cdata,io,tree);
1858   tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
1859   io->rates->model = STRICTCLOCK;
1860   RATES_Init_Rate_Struct(tree->rates,io->rates,tree->n_otu);
1861 
1862   tree->times = TIMES_Make_Time_Struct(tree->n_otu);
1863   TIMES_Init_Time_Struct(tree->times,io->times,tree->n_otu);
1864 
1865   tree->data      = cdata;
1866   tree->mod       = mod;
1867   tree->io        = io;
1868   tree->n_pattern = tree->data->crunch_len;
1869 
1870   Init_Model(cdata,mod,io);
1871 
1872   tree->mod->ras->n_catg = 1;
1873   tree->mod->whichmodel  = HKY85;
1874   tree->mod->kappa->v    = 4.0;
1875 
1876   Make_Tree_For_Pars(tree);
1877   Make_Tree_For_Lk(tree);
1878   Make_Spr(tree);
1879 
1880   // migrep model stuff */
1881   mmod = PHYREX_Make_Migrep_Model(tree->n_otu,n_dim);
1882   tree->mmod = mmod;
1883   PHYREX_Init_Migrep_Mod(mmod,n_dim,0.0,0.0,w,h);
1884 
1885 
1886   /* /\* Death proba param *\/ */
1887   /* mmod->mu = Uni(); */
1888   /* /\* Theta (radius) *\/ */
1889   /* mmod->rad = Uni()*(1.0 - 0.2) + 0.2; */
1890   /* /\* Rate of events *\/ */
1891   /* mmod->lbda = 1.0; */
1892   /* /\* Population density *\/ */
1893   /* mmod->rho = Uni()*(3.0 - 0.1) + 0.1; */
1894 
1895 
1896   /* Death proba param */
1897   mmod->mu = 0.8;
1898   /* Theta (radius) */
1899   mmod->rad = 0.3;
1900   /* Rate of events */
1901   mmod->lbda = 10.*w*h;
1902   /* (Actual, not effective) population density */
1903   mmod->rho = 1.;
1904 
1905 
1906 
1907   // Duration of a generation in number of events
1908   mmod->gen_cal_time = 1./(2.*mmod->mu*pow(mmod->rad,2)/pow(w*h,2)*
1909                            (sqrt(2.)*mmod->rad*exp(-.5*pow(h/mmod->rad,2)) + h*sqrt(PI)*erf(sqrt(2.)*h/(2.*mmod->rad)) - sqrt(2.)*mmod->rad)*
1910                            (sqrt(2.)*mmod->rad*exp(-.5*pow(w/mmod->rad,2)) + w*sqrt(PI)*erf(sqrt(2.)*w/(2.*mmod->rad)) - sqrt(2.)*mmod->rad));
1911   // Divide by rate of events per calendar time unit to get duration of gen. in calendar time unit
1912   mmod->gen_cal_time *= 1./mmod->lbda;
1913 
1914   /* Dispersal parameter (per generation) */
1915   mmod->sigsq = 4.*pow(mmod->rad,4)*mmod->lbda*PI*mmod->mu/area * mmod->gen_cal_time;
1916 
1917 
1918   phydbl p = SLFV_Prob_Two_Random_Lineages_Coal_One_Event(w,h,mmod->mu,mmod->rad);
1919   printf("\n. p.sim: %G p.appx: %G",p,4.*pow(mmod->mu,2)*pow(PI,2)*pow(mmod->rad,4)/(pow(w*h,2)));
1920   phydbl rhoe = 1./(1.-exp(-mmod->lbda*p*mmod->gen_cal_time));
1921   rhoe /= area;
1922 
1923   PhyML_Printf("\n. lambda=%G; mu=%G; rad=%G; clockr=%G; sigsq=%G; neigh=%G; rhoe(small rad)=%G rhoe(numerical)=%G rhoe(large rad)=%G one_gen=%G",
1924                mmod->lbda,
1925                mmod->mu,
1926                mmod->rad,
1927                tree->rates->clock_r,
1928                mmod->sigsq,
1929                2./mmod->mu,
1930                (1./(1.-exp(-4.*pow(mmod->mu,2)*pow(PI,2)*pow(mmod->rad,4)/(pow(w*h,2))*mmod->lbda*mmod->gen_cal_time)))/area,
1931                rhoe,
1932                /* (1./(1.-exp(-mmod->lbda*mmod->mu/(w*h))))/area, */
1933                (1./(1.-exp(-mmod->mu)))/area,
1934                mmod->gen_cal_time);
1935   fflush(NULL);
1936   /* Exit("\n"); */
1937 
1938 
1939   // Initialize position of sampled individuals
1940   tree->young_disk = PHYREX_Make_Disk_Event(mmod->n_dim,tree->n_otu);
1941   PHYREX_Init_Disk_Event(tree->young_disk,mmod->n_dim,NULL);
1942   tree->young_disk->time             = 0.0;
1943   tree->young_disk->mmod             = mmod;
1944   tree->young_disk->centr->lonlat[0] = .5*(mmod->lim_up->lonlat[0]-mmod->lim_do->lonlat[0]);
1945   tree->young_disk->centr->lonlat[1] = .5*(mmod->lim_up->lonlat[1]-mmod->lim_do->lonlat[1]);
1946   tree->young_disk->n_ldsk_a         = tree->n_otu;
1947 
1948   for(i=0;i<tree->n_otu;++i)
1949     {
1950       char *s;
1951       tree->young_disk->ldsk_a[i] = PHYREX_Make_Lindisk_Node(mmod->n_dim);
1952       PHYREX_Init_Lindisk_Node(tree->young_disk->ldsk_a[i],tree->young_disk,mmod->n_dim);
1953       s = (char *)mCalloc(strlen(tree->young_disk->ldsk_a[i]->coord->id)+1+20,sizeof(char));
1954       strcpy(s,tree->young_disk->ldsk_a[i]->coord->id);
1955       strcat(s,"_deme0\0");
1956       Free(tree->young_disk->ldsk_a[i]->coord->id);
1957       tree->young_disk->ldsk_a[i]->coord->id = s;
1958     }
1959 
1960   for(i=0;i<tree->n_otu;i++)
1961     {
1962       tree->young_disk->ldsk_a[i]->coord->lonlat[0] = Uni()*(tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]) + tree->mmod->lim_do->lonlat[0]; // longitude
1963       tree->young_disk->ldsk_a[i]->coord->lonlat[1] = Uni()*(tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]) + tree->mmod->lim_do->lonlat[1]; // latitude
1964     }
1965 
1966 
1967 
1968   // Generate sequences
1969   subst_rate = .0;
1970   locus_idx = 0;
1971   do
1972     {
1973       SLFV_Simulate_Backward_Core(tree->young_disk,NO,tree);
1974 
1975       // Random selection of a pair. Print out physical distances
1976       // between tips and time of coalescence for this pair, at this
1977       // particular locus
1978       {
1979         int *permut = Permutate(tree->n_otu);
1980         t_ldsk *lin1 = tree->young_disk->ldsk_a[permut[0]];
1981         t_ldsk *lin2 = tree->young_disk->ldsk_a[permut[1]];
1982         phydbl dist =
1983           sqrt(pow(lin1->coord->lonlat[0]-lin2->coord->lonlat[0],2) +
1984                pow(lin1->coord->lonlat[1]-lin2->coord->lonlat[1],2));
1985 
1986         t_dsk *disk = tree->young_disk;
1987         do
1988           {
1989             if(disk->ldsk == lin1->prev) lin1 = lin1->prev;
1990             if(disk->ldsk == lin2->prev) lin2 = lin2->prev;
1991             if(lin1 == lin2) break; // found MRCA
1992             disk = disk->prev;
1993           }
1994         while(disk);
1995 
1996         assert(lin1 && lin2);
1997 
1998         PhyML_Printf("\n. #$# locus %4d ; physical distance: %G time to coalescence: %G",locus_idx,dist,lin1->disk->time);
1999       }
2000 
2001       PHYREX_Ldsk_To_Tree(tree);
2002 
2003 
2004       Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
2005       Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
2006       RATES_Fill_Lca_Table(tree);
2007 
2008 
2009       tree->rates->bl_from_rt = YES;
2010       /* tree->rates->clock_r    = 1.E-3/FABS(T); // slow */
2011       /* tree->rates->clock_r    = 1.E-0/FABS(T); // fast */
2012       /* tree->rates->clock_r    = 1.E-1/FABS(T); // medium */
2013       /* tree->rates->clock_r = 1.E-7; // slow; */
2014       tree->rates->clock_r = 1.E-3; // medium;
2015       /* tree->rates->clock_r = 1.E-2; // fast; */
2016 
2017 
2018       PhyML_Printf("\n. #!# mutation rate at that locus: %G subst. per time unit",tree->rates->clock_r);
2019       subst_rate += tree->rates->clock_r;
2020 
2021       RATES_Update_Cur_Bl(tree);
2022 
2023       char *s = Write_Tree(tree);
2024       PhyML_Printf("\n. #@# tree: %s",s);
2025       Free(s);
2026 
2027       Evolve(tree->data,tree->mod,locus_idx,tree);
2028 
2029       t_dsk *disk = tree->young_disk->prev;
2030       while(disk->prev)
2031         {
2032           disk = disk->prev;
2033           if(disk->next->ldsk != NULL) Free_Ldisk(disk->next->ldsk);
2034           Free_Disk(disk->next);
2035         }
2036 
2037       locus_idx++;
2038     }
2039   while(locus_idx < n_loci);
2040 
2041   PhyML_Printf("\n. #s# average rate of mutation : %G",subst_rate/(phydbl)n_loci);
2042 
2043   tree->data->format = IBDSIM;
2044   PhyML_Printf("\n\n");
2045   Print_CSeq(stdout,NO,tree->data,tree);
2046 
2047 
2048   Exit("\n");
2049 
2050   return tree;
2051 }
2052 
2053 
2054 /*////////////////////////////////////////////////////////////
2055 ////////////////////////////////////////////////////////////*/
2056 
SLFV_Integrated_Coal_Rate(t_ldsk * l0,t_ldsk * l1,phydbl T,t_tree * tree)2057 void SLFV_Integrated_Coal_Rate(t_ldsk *l0, t_ldsk *l1, phydbl T, t_tree *tree)
2058 {
2059   t_ldsk *baseline;
2060   phydbl integrated_coal_rate,njumps,coaltime;
2061   int coal;
2062 
2063   PhyML_Printf("\n# ");
2064 
2065 
2066   coal = -1;
2067   baseline = l0;
2068   integrated_coal_rate = 0.0;
2069   njumps = 0.0;
2070   coaltime = 0.0;
2071   do
2072     {
2073       if(l0->prev->disk->time > l1->prev->disk->time)
2074         {
2075           assert(coal == -1);
2076           integrated_coal_rate +=
2077             tree->mmod->lbda * SLFV_Prob_Two_Lineages_Coal(l0,l1,tree) *
2078             fabs(baseline->disk->time - MAX(l0->prev->disk->time,l1->prev->disk->time));
2079 
2080           njumps+=1.0;
2081           baseline = l0->prev;
2082 
2083           l0 = l0->prev;
2084         }
2085       else if(l0->prev->disk->time < l1->prev->disk->time)
2086         {
2087           assert(coal == -1);
2088           integrated_coal_rate +=
2089             tree->mmod->lbda * SLFV_Prob_Two_Lineages_Coal(l0,l1,tree) *
2090             fabs(baseline->disk->time - MAX(l0->prev->disk->time,l1->prev->disk->time));
2091 
2092           njumps+=1.0;
2093           baseline = l1->prev;
2094 
2095           l1 = l1->prev;
2096         }
2097       else
2098         {
2099           assert(l0->prev);
2100           assert(l1->prev);
2101           assert(l0->prev == l1->prev);
2102           coaltime = l0->prev->disk->time;
2103           if(coaltime > T)
2104             {
2105               coal = 1;
2106             }
2107           else
2108             {
2109               coal = 0;
2110               integrated_coal_rate +=
2111                 tree->mmod->lbda * SLFV_Prob_Two_Lineages_Coal(l0,l1,tree) *
2112                 fabs(baseline->disk->time - T);
2113             }
2114 
2115           l0->prev = NULL;
2116           l1->prev = NULL;
2117         }
2118     }
2119   while(l0->prev && l1->prev);
2120 
2121   PhyML_Printf("%d %f %g %f",
2122                coal,
2123                integrated_coal_rate,
2124                njumps,
2125                coaltime);
2126   PhyML_Printf("\n");
2127   Exit("\n");
2128 }
2129 
2130 /*////////////////////////////////////////////////////////////
2131 ////////////////////////////////////////////////////////////*/
2132 // Calculation of Pr(T>n) where T is the proba of coalescence
SLFV_Sum_Coal_Rate(t_ldsk * l0,t_ldsk * l1,int n,t_tree * tree)2133 void SLFV_Sum_Coal_Rate(t_ldsk *l0, t_ldsk *l1, int n, t_tree *tree)
2134 {
2135   t_dsk *disk;
2136   phydbl prod_one_min_coal_prob;
2137   int coal,n_evts;
2138 
2139 
2140   PhyML_Printf("\n# ");
2141 
2142   prod_one_min_coal_prob = 0.0;
2143   n_evts = 1;
2144   coal = 0;
2145   disk = tree->young_disk->prev;
2146   do
2147     {
2148 
2149       if(n_evts <= n) prod_one_min_coal_prob += log(1. - SLFV_Prob_Two_Lineages_Coal(l0,l1,tree));
2150 
2151       if(l0->prev == disk->ldsk) l0 = disk->ldsk;
2152       if(l1->prev == disk->ldsk) l1 = disk->ldsk;
2153       if(l0 == l1 && n_evts <= n) coal = 1;
2154       disk = disk->prev;
2155       n_evts++;
2156     }
2157   while(disk);
2158 
2159   prod_one_min_coal_prob = exp(prod_one_min_coal_prob);
2160 
2161   PhyML_Printf("%d %d %f",coal,n_evts,prod_one_min_coal_prob);
2162   PhyML_Printf("\n");
2163   Exit("\n");
2164 }
2165