1 /*
2
3 PhyML: a program that computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10
11 */
12
13 #include "slfv.h"
14
15
16 /*////////////////////////////////////////////////////////////
17 ////////////////////////////////////////////////////////////*/
18
SLFV_Prob_Two_Lineages_Coal(t_ldsk * l0,t_ldsk * l1,t_tree * tree)19 phydbl SLFV_Prob_Two_Lineages_Coal(t_ldsk *l0, t_ldsk *l1, t_tree *tree)
20 {
21 phydbl eucl,prob,area,W,H;
22
23 assert(tree->mmod->n_dim == 2);
24
25 W = (tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
26 H = (tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]);
27
28 area = W*H;
29
30 eucl = Euclidean_Dist(l0->coord,l1->coord);
31
32 prob = 0.0;
33 prob += 2.*log(tree->mmod->mu);
34 prob += 2.*log(tree->mmod->rad);
35 prob += log(PI);
36 prob -= log(4.*area);
37 prob -= 0.25*eucl*eucl/(tree->mmod->rad*tree->mmod->rad);
38
39
40 prob +=
41 log(erf((l0->coord->lonlat[0] + l1->coord->lonlat[0])/(2.*tree->mmod->rad)) +
42 erf((2.*W - l0->coord->lonlat[0] - l1->coord->lonlat[0])/(2.*tree->mmod->rad)));
43
44 prob +=
45 log(erf((l0->coord->lonlat[1] + l1->coord->lonlat[1])/(2.*tree->mmod->rad)) +
46 erf((2.*H - l0->coord->lonlat[1] - l1->coord->lonlat[1])/(2.*tree->mmod->rad)));
47
48 prob = exp(prob);
49
50 return(prob);
51 }
52
53 /*////////////////////////////////////////////////////////////
54 ////////////////////////////////////////////////////////////*/
55
SLFV_Prob_Two_Random_Lineages_Coal_One_Event(phydbl w,phydbl h,phydbl mu,phydbl rad)56 phydbl SLFV_Prob_Two_Random_Lineages_Coal_One_Event(phydbl w, phydbl h, phydbl mu, phydbl rad)
57 {
58 phydbl cx,cy;
59 phydbl l1x,l1y;
60 phydbl l2x,l2y;
61 phydbl prob_hit;
62 int n_hit,n_trials,trial;
63
64 n_trials = 10000000;
65 trial = 0;
66 n_hit = 0;
67 do
68 {
69 cx = Uni()*w;
70 cy = Uni()*h;
71
72 l1x = Uni()*w;
73 l1y = Uni()*h;
74
75 l2x = Uni()*w;
76 l2y = Uni()*h;
77
78 prob_hit = log(mu);
79 prob_hit += -POW(l1x - cx,2)/(2.*pow(rad,2));
80 prob_hit += -POW(l1y - cy,2)/(2.*pow(rad,2));
81
82 prob_hit += log(mu);
83 prob_hit += -POW(l2x - cx,2)/(2.*pow(rad,2));
84 prob_hit += -POW(l2y - cy,2)/(2.*pow(rad,2));
85
86 prob_hit = exp(prob_hit);
87
88 if(!(Uni() > prob_hit)) n_hit++;
89
90 trial++;
91 }
92 while(trial != n_trials);
93
94 return((phydbl)n_hit/n_trials);
95 }
96
97 /*////////////////////////////////////////////////////////////
98 ////////////////////////////////////////////////////////////*/
99 // Coalescence rate with time expressed in calendar unit
SLFV_Coalescence_Rate(t_tree * tree)100 phydbl SLFV_Coalescence_Rate(t_tree *tree)
101 {
102 phydbl mu,theta,lbda,w,h;
103
104 mu = tree->mmod->mu;
105 theta = tree->mmod->rad;
106 lbda = tree->mmod->lbda;
107 w = (tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
108 h = (tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]);
109
110 return(4.*POW(PI,2)*POW(theta,4)*POW(mu,2)*lbda / (POW(w,2)*POW(h,2)));
111 }
112
113 /*////////////////////////////////////////////////////////////
114 ////////////////////////////////////////////////////////////*/
115
SLFV_Path_Logdensity(t_ldsk * young,t_ldsk * old,phydbl sd,t_tree * tree)116 phydbl SLFV_Path_Logdensity(t_ldsk *young, t_ldsk *old, phydbl sd, t_tree *tree)
117 {
118 int i,j,err;
119 t_ldsk *ldsk;
120 phydbl lnDens,mode;
121 phydbl X,Y,Xp,Yp;
122 phydbl slope,inter;
123 int dir_to_young;
124
125 lnDens = 0.0;
126 mode = 0.0;
127
128 for(i=0;i<tree->mmod->n_dim;i++)
129 {
130 j = 0;
131 ldsk = young->prev;
132
133 X = old->coord->lonlat[i];
134 Y = old->disk->time;
135
136 // Density up to the last jump (to old ldsk) which should
137 // not be accounted for (it is not part of the simulation
138 // when randomly generating a path using SLFV_Generate_Path
139 while(ldsk != old)
140 {
141
142 if(ldsk->n_next > 1)
143 dir_to_young = PHYREX_Get_Next_Direction(young,ldsk);
144 else
145 dir_to_young = 0;
146
147 Xp = ldsk->next[dir_to_young]->coord->lonlat[i];
148 Yp = ldsk->next[dir_to_young]->disk->time;
149
150 slope = (Yp-Y)/(Xp-X);
151 inter = Y - slope*X;
152
153 assert(ldsk != NULL);
154
155 /* mode = (old->coord->lonlat[i] - young->coord->lonlat[i])/(n_evt+1.-j) + young->coord->lonlat[i]; */
156
157 mode = (ldsk->disk->time - inter)/slope;
158
159 lnDens += Log_Dnorm_Trunc(ldsk->coord->lonlat[i],
160 mode,
161 sd,
162 tree->mmod->lim_do->lonlat[i],
163 tree->mmod->lim_up->lonlat[i],&err);
164
165 /* lnDens += Log_Dnorm_Trunc(ldsk->disk->centr->lonlat[i], */
166 /* mode, */
167 /* sd, */
168 /* 0.0, */
169 /* tree->mmod->lim->lonlat[i],&err); */
170
171 lnDens += Log_Dnorm_Trunc(ldsk->disk->centr->lonlat[i],
172 ldsk->coord->lonlat[i],
173 sd,
174 tree->mmod->lim_do->lonlat[i],
175 tree->mmod->lim_up->lonlat[i],&err);
176 /* lnDens += log(1./tree->mmod->lim->lonlat[i]); */
177
178 ldsk = ldsk->prev;
179 j++;
180 }
181 }
182
183 return(lnDens);
184 }
185
186 /*////////////////////////////////////////////////////////////
187 ////////////////////////////////////////////////////////////*/
188
SLFV_Sample_Path(t_ldsk * young,t_ldsk * old,phydbl sd,phydbl * global_hr,t_tree * tree)189 void SLFV_Sample_Path(t_ldsk *young, t_ldsk *old, phydbl sd, phydbl *global_hr, t_tree *tree)
190 {
191 phydbl new_ldsk_pos,cur_ldsk_pos;
192 phydbl new_centr_pos,cur_centr_pos;
193 phydbl new_glnL,cur_glnL;
194 phydbl u,alpha,ratio,hr;
195 int n_mcmc_iter;
196 int i,err;
197 t_ldsk *ldsk;
198 int accept,reject;
199 int path_len;
200
201
202 path_len = PHYREX_Path_Len(young,old)-2;
203 if(path_len == 0) return;
204
205 assert(young != old);
206 new_glnL = cur_glnL = tree->mmod->c_lnL;
207
208 n_mcmc_iter = 0;
209 accept = reject = 0;
210 do
211 {
212
213 ldsk = young->prev;
214 do
215 {
216 assert(ldsk->prev);
217
218 PHYREX_Store_Geo_Coord(ldsk->coord);
219 PHYREX_Store_Geo_Coord(ldsk->disk->centr);
220
221 hr = 0.0;
222 ratio = 0.0;
223
224 /* cur_glnL = SLFV_Lk_Range(young->disk,old->disk,tree); */
225 cur_glnL = SLFV_Lk_Range(ldsk->disk,ldsk->prev->disk,tree);
226 new_glnL = cur_glnL;
227
228 for(i=0;i<tree->mmod->n_dim;i++)
229 {
230 cur_ldsk_pos = ldsk->coord->lonlat[i];
231 cur_centr_pos = ldsk->disk->centr->lonlat[i];
232
233 /* new_centr_pos = Rnorm_Trunc(cur_centr_pos, */
234 /* 0.5*sd, */
235 /* 0.0, */
236 /* tree->mmod->lim->lonlat[i],&err); */
237
238 new_centr_pos = Uni() * (tree->mmod->lim_up->lonlat[i]-tree->mmod->lim_do->lonlat[i])+tree->mmod->lim_do->lonlat[i];
239
240 new_ldsk_pos = Rnorm_Trunc(new_centr_pos,
241 sd,
242 tree->mmod->lim_do->lonlat[i],
243 tree->mmod->lim_up->lonlat[i],&err);
244
245
246 /* PhyML_Printf("\n. cur_centr: %12f new_centr: %12f",cur_centr_pos,new_centr_pos); */
247 /* PhyML_Printf(" d(new|cur)=%12f d(cur|new): %12f", */
248 /* Log_Dnorm_Trunc(new_centr_pos, */
249 /* cur_centr_pos, */
250 /* 0.5*sd, */
251 /* 0.0, */
252 /* tree->mmod->lim->lonlat[i],&err), */
253 /* Log_Dnorm_Trunc(cur_centr_pos, */
254 /* new_centr_pos, */
255 /* 0.5*sd, */
256 /* 0.0, */
257 /* tree->mmod->lim->lonlat[i],&err)); */
258
259
260 /* PhyML_Printf(" cur_ldsk: %12f new_ldsk: %12f",cur_ldsk_pos,new_ldsk_pos); */
261 /* PhyML_Printf(" d(new|cur)=%12f d(cur|new): %12f", */
262 /* Log_Dnorm_Trunc(new_ldsk_pos, */
263 /* new_centr_pos, */
264 /* 0.5*sd, */
265 /* 0.0, */
266 /* tree->mmod->lim->lonlat[i],&err), */
267 /* Log_Dnorm_Trunc(cur_ldsk_pos, */
268 /* cur_centr_pos, */
269 /* 0.5*sd, */
270 /* 0.0, */
271 /* tree->mmod->lim->lonlat[i],&err)); */
272
273
274 hr -= Log_Dnorm_Trunc(new_ldsk_pos,
275 new_centr_pos,
276 sd,
277 tree->mmod->lim_do->lonlat[i],
278 tree->mmod->lim_up->lonlat[i],&err);
279
280 /* hr -= Log_Dnorm_Trunc(new_centr_pos, */
281 /* cur_centr_pos, */
282 /* 0.5*sd, */
283 /* 0.0, */
284 /* tree->mmod->lim->lonlat[i],&err); */
285
286
287 hr += Log_Dnorm_Trunc(cur_ldsk_pos,
288 cur_centr_pos,
289 sd,
290 tree->mmod->lim_do->lonlat[i],
291 tree->mmod->lim_up->lonlat[i],&err);
292
293 /* hr += Log_Dnorm_Trunc(cur_centr_pos, */
294 /* new_centr_pos, */
295 /* 0.5*sd, */
296 /* 0.0, */
297 /* tree->mmod->lim->lonlat[i],&err); */
298
299
300 ldsk->coord->lonlat[i] = new_ldsk_pos;
301 ldsk->disk->centr->lonlat[i] = new_centr_pos;
302 }
303
304 /* new_glnL = SLFV_Lk_Range(young->disk,old->disk,tree); */
305 /* new_glnL = SLFV_Lk_Core_Range(young->disk,old->disk,tree); */
306 new_glnL = SLFV_Lk_Range(ldsk->disk,ldsk->prev->disk,tree);
307 /* new_glnL = SLFV_Lk(tree); */
308
309
310 /* PhyML_Printf("\n. k: %4d lnL: %f",k,cur_glnL); */
311
312 ratio += (new_glnL - cur_glnL);
313 ratio += hr;
314
315 /* (*global_hr) -= ratio; */
316
317 ratio = exp(ratio);
318 alpha = MIN(1.,ratio);
319
320 u = Uni();
321
322 if(u > alpha) /* Reject */
323 {
324 PHYREX_Restore_Geo_Coord(ldsk->coord);
325 PHYREX_Restore_Geo_Coord(ldsk->disk->centr);
326 reject++;
327 }
328 else
329 {
330 cur_glnL = new_glnL;
331 accept++;
332 }
333
334 ldsk = ldsk->prev;
335 }
336 while(ldsk != old);
337
338 n_mcmc_iter++;
339 }
340 while(n_mcmc_iter < 100 * path_len);
341 /* while(n_mcmc_iter < 1); */
342 /* PhyML_Printf("\n. ratio: %f %d %d",(phydbl)(accept)/(phydbl)(accept+reject),accept,reject); */
343 }
344
345 /*////////////////////////////////////////////////////////////
346 ////////////////////////////////////////////////////////////*/
347
SLFV_Generate_Path(t_ldsk * young,t_ldsk * old,phydbl n_evt,phydbl sd,t_tree * tree)348 t_ldsk *SLFV_Generate_Path(t_ldsk *young, t_ldsk *old, phydbl n_evt, phydbl sd, t_tree *tree)
349 {
350 int i,j,swap,err;
351 phydbl *time,dum,mode;
352 t_ldsk *path,**ldsk_a;
353 t_dsk *disk;
354 phydbl X,Y,Xp,Yp;
355 phydbl slope,inter;
356
357 path = NULL;
358
359 if(n_evt == 0) return(NULL); // path is set to NULL
360
361 time = (phydbl *)mCalloc(n_evt,sizeof(phydbl));
362 ldsk_a = (t_ldsk **)mCalloc(n_evt,sizeof(t_ldsk *));
363
364
365 for(i=0;i<n_evt;i++) time[i] = Uni()*(young->disk->time - old->disk->time) + old->disk->time;
366
367 /* Bubble sort time in decreasing order */
368 do
369 {
370 swap = NO;
371 for(i=0;i<n_evt-1;i++)
372 {
373 if(time[i+1] > time[i])
374 {
375 swap = YES;
376 dum = time[i+1];
377 time[i+1] = time[i];
378 time[i] = dum;
379 }
380 }
381 }
382 while(swap == YES);
383
384 for(i=0;i<n_evt;i++)
385 {
386 ldsk_a[i] = PHYREX_Make_Lindisk_Node(tree->mmod->n_dim);
387 disk = PHYREX_Make_Disk_Event(tree->mmod->n_dim,tree->n_otu);
388 PHYREX_Init_Lindisk_Node(ldsk_a[i],disk,tree->mmod->n_dim);
389 PHYREX_Make_Lindisk_Next(ldsk_a[i]);
390 PHYREX_Init_Disk_Event(disk,tree->mmod->n_dim,tree->mmod);
391 disk->ldsk = ldsk_a[i];
392 disk->time = time[i];
393 }
394
395 for(i=0;i<n_evt-1;i++) ldsk_a[i]->prev = ldsk_a[i+1];
396 ldsk_a[i]->prev = NULL;
397
398 for(i=1;i<n_evt;i++) ldsk_a[i]->next[0] = ldsk_a[i-1];
399 ldsk_a[0]->next[0] = NULL;
400
401 path = ldsk_a[0];
402
403 /* Instantiate path */
404 for(i=0;i<tree->mmod->n_dim;i++)
405 {
406 X = old->coord->lonlat[i];
407 Y = old->disk->time;
408
409 for(j=0;j<n_evt;j++)
410 {
411
412 if(j == 0)
413 {
414 Xp = young->coord->lonlat[i];
415 Yp = young->disk->time;
416 }
417 else
418 {
419 assert(ldsk_a[j]->n_next == 1);
420 Xp = ldsk_a[j]->next[0]->coord->lonlat[i];
421 Yp = ldsk_a[j]->next[0]->disk->time;
422 }
423
424 slope = (Yp-Y)/(Xp-X);
425 inter = Y - slope*X;
426
427 if(j == 0)
428 {
429 mode = (young->disk->time - inter)/slope;
430 }
431 else
432 {
433 mode = (ldsk_a[j]->next[0]->disk->time - inter)/slope;
434 }
435
436 ldsk_a[j]->coord->lonlat[i] = Rnorm_Trunc(mode,
437 sd,
438 tree->mmod->lim_do->lonlat[i],
439 tree->mmod->lim_up->lonlat[i],&err);
440
441
442 /* ldsk_a[j]->disk->centr->lonlat[i] = Rnorm_Trunc(mode, */
443 /* sd, */
444 /* 0.0, */
445 /* tree->mmod->lim->lonlat[i],&err); */
446
447
448 ldsk_a[j]->disk->centr->lonlat[i] = Rnorm_Trunc(ldsk_a[j]->coord->lonlat[i],
449 sd,
450 tree->mmod->lim_do->lonlat[i],
451 tree->mmod->lim_up->lonlat[i],&err);
452 }
453 }
454
455 Free(ldsk_a);
456 Free(time);
457
458 return(path);
459 }
460
461 /*////////////////////////////////////////////////////////////
462 ////////////////////////////////////////////////////////////*/
463
SLFV_Effective_Density(t_tree * tree)464 phydbl SLFV_Effective_Density(t_tree *tree)
465 {
466 return(SLFV_Neighborhood_Size(tree)/(4.*PI*SLFV_Update_Sigsq(tree)));
467 }
468
469 /*////////////////////////////////////////////////////////////
470 ////////////////////////////////////////////////////////////*/
471
SLFV_Rate_Per_Unit_Area(t_tree * tree)472 phydbl SLFV_Rate_Per_Unit_Area(t_tree *tree)
473 {
474 int i;
475 phydbl denom;
476
477 denom = (tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
478 for(i=1;i<tree->mmod->n_dim;i++) denom *= (tree->mmod->lim_up->lonlat[i]-tree->mmod->lim_do->lonlat[i]);
479
480 return(tree->mmod->lbda / denom);
481
482 }
483
484 /*////////////////////////////////////////////////////////////
485 ////////////////////////////////////////////////////////////*/
486
SLFV_Sample_Rad_From_Prior(t_tree * tree)487 phydbl SLFV_Sample_Rad_From_Prior(t_tree *tree)
488 {
489 phydbl u,h,w,lbda,mu,b,a;
490
491 h = (tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
492 w = (tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]);
493 lbda = tree->mmod->lbda;
494 mu = tree->mmod->mu;
495 a = tree->mmod->min_sigsq;
496 b = tree->mmod->max_sigsq;
497
498 u = Uni();
499
500 return(POW(((b-a)*u+a)*h*w/(4.*PI*lbda*mu),0.25));
501
502 }
503
504 /*////////////////////////////////////////////////////////////
505 ////////////////////////////////////////////////////////////*/
506
SLFV_Update_Radius(t_tree * tree)507 phydbl SLFV_Update_Radius(t_tree *tree)
508 {
509 switch(tree->mmod->id)
510 {
511 case SLFV_UNIFORM: { return(-1.0); break;}
512 case SLFV_GAUSSIAN:
513 {
514 return(POW(tree->mmod->sigsq/(SLFV_Rate_Per_Unit_Area(tree)*4.*PI*tree->mmod->mu),0.25));
515 break;
516 }
517 }
518 return(-1.);
519 }
520
521 /*////////////////////////////////////////////////////////////
522 ////////////////////////////////////////////////////////////*/
523
SLFV_Generation_Length(t_tree * tree)524 phydbl SLFV_Generation_Length(t_tree *tree)
525 {
526 return(1./(2.*tree->mmod->mu*POW(tree->mmod->rad,2)*PI*SLFV_Rate_Per_Unit_Area(tree)));
527 }
528
529 /*////////////////////////////////////////////////////////////
530 ////////////////////////////////////////////////////////////*/
531
SLFV_Neighborhood_Size(t_tree * tree)532 phydbl SLFV_Neighborhood_Size(t_tree *tree)
533 {
534 switch(tree->mmod->id)
535 {
536 case SLFV_UNIFORM: { return(1./tree->mmod->mu); break; }
537 case SLFV_GAUSSIAN: { return(2./tree->mmod->mu); break; }
538 }
539 return(-1.);
540 }
541
542 /*////////////////////////////////////////////////////////////
543 ////////////////////////////////////////////////////////////*/
544
SLFV_Update_Sigsq(t_tree * tree)545 phydbl SLFV_Update_Sigsq(t_tree *tree)
546 {
547 switch(tree->mmod->id)
548 {
549 case SLFV_UNIFORM : { return(-1.0); break;}
550 case SLFV_GAUSSIAN :
551 {
552 return(4.*PI*
553 SLFV_Rate_Per_Unit_Area(tree) *
554 pow(tree->mmod->rad,4)*
555 tree->mmod->mu);
556 break;
557 }
558 }
559 return(-1.);
560 }
561
562 /*////////////////////////////////////////////////////////////
563 ////////////////////////////////////////////////////////////*/
564
565 /* Uses the regression technique described in Barton et al. TPB (2013)
566 to estimate the size of the neighborhood when the population evolve
567 according to a spatial Lambda-Fleming-Viot process.
568 */
SLFV_Neighborhood_Size_Regression(t_tree * tree)569 phydbl SLFV_Neighborhood_Size_Regression(t_tree *tree)
570 {
571 int i,j,pair;
572 t_node *anc;
573 phydbl *dist,min_dist;
574 phydbl QA,Qr,*fst,fst0,fst_min_dist;
575 phydbl cov_fst_dist,var_dist,slope;
576 phydbl eps;
577
578 eps = 1.E-10;
579
580 QA = Mean_Identity(tree->data);
581
582 fst = (phydbl *)mCalloc(tree->n_otu*(tree->n_otu-1)/2,sizeof(phydbl));
583 dist = (phydbl *)mCalloc(tree->n_otu*(tree->n_otu-1)/2,sizeof(phydbl));
584
585 pair = 0;
586 fst0 = 0.0;
587 for(i=0;i<tree->n_otu-1;i++)
588 {
589 fst_min_dist = 0.0;
590 min_dist = MDBL_MAX;
591 for(j=i+1;j<tree->n_otu;j++)
592 {
593 anc = Find_Lca_Pair_Of_Nodes(tree->a_nodes[i],tree->a_nodes[j],tree);
594 if(anc == NULL)
595 {
596 PhyML_Fprintf(stderr,"\n. %s",Write_Tree(tree));
597 PhyML_Fprintf(stderr,"\n. %s %s",tree->a_nodes[i]->name,tree->a_nodes[j]->name);
598 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
599 }
600
601 dist[pair] = Euclidean_Dist(tree->a_nodes[i]->ldsk->coord,tree->a_nodes[j]->ldsk->coord);
602 dist[pair] = log(dist[pair]);
603
604 Qr = Pairwise_Identity(i,j,tree->data);
605 fst[pair] = (Qr-QA)/(1.-QA);
606
607 if(dist[pair] < min_dist)
608 {
609 min_dist = dist[pair];
610 fst_min_dist = fst[pair];
611 }
612
613 pair++;
614 }
615 fst0 += fst_min_dist;
616 }
617
618 fst0 /= (phydbl)(tree->n_otu);
619
620 cov_fst_dist = Covariance(dist,fst,pair);
621 var_dist = Variance(dist,pair);
622
623 slope = cov_fst_dist / var_dist;
624
625 Free(dist);
626 Free(fst);
627
628 return((fst0-1.+eps)/(slope+eps));
629 }
630
631 /*////////////////////////////////////////////////////////////
632 ////////////////////////////////////////////////////////////*/
633
SLFV_Lk_Range(t_dsk * young,t_dsk * old,t_tree * tree)634 phydbl SLFV_Lk_Range(t_dsk *young, t_dsk *old, t_tree *tree)
635 {
636 if(tree->mmod->id == SLFV_GAUSSIAN) return(SLFV_Lk_Gaussian_Range(young,old,tree));
637 else if(tree->mmod->id == SLFV_UNIFORM) assert(FALSE);
638 else assert(FALSE);
639 return(-1.);
640 }
641
642 /*////////////////////////////////////////////////////////////
643 ////////////////////////////////////////////////////////////*/
644
SLFV_Lk_Gaussian_Range(t_dsk * young,t_dsk * old,t_tree * tree)645 phydbl SLFV_Lk_Gaussian_Range(t_dsk *young, t_dsk *old, t_tree *tree)
646 {
647 t_dsk *disk;
648 phydbl lnL,dt;
649 int n_evt;
650
651 /* return(PHYREX_Lk(tree)); */
652
653 if(PHYREX_Total_Number_Of_Intervals(tree) > tree->mmod->max_num_of_intervals) return UNLIKELY;
654
655 assert(young);
656
657 lnL = 0.0;
658
659 PHYREX_Update_Lindisk_List_Core(young,tree);
660 lnL += SLFV_Lk_Gaussian_Core(young,tree);
661
662 dt = 0.0;
663 n_evt = 0;
664 disk = young->prev;
665 do
666 {
667 if(disk->age_fixed == NO)
668 {
669 dt += fabs(disk->next->time - disk->time);
670 n_evt++;
671 }
672
673 assert(disk);
674 if(disk->time > disk->next->time) return UNLIKELY;
675 PHYREX_Update_Lindisk_List_Core(disk,tree);
676 lnL += SLFV_Lk_Gaussian_Core(disk,tree);
677 if(disk == old) break;
678 disk = disk->prev;
679 }
680 while(disk);
681
682 lnL += n_evt * log(tree->mmod->lbda) - tree->mmod->lbda * dt;
683
684 return(lnL);
685 }
686
687 //////////////////////////////////////////////////////////////
688 //////////////////////////////////////////////////////////////
689
SLFV_Lk_Gaussian_Core(t_dsk * disk,t_tree * tree)690 phydbl SLFV_Lk_Gaussian_Core(t_dsk *disk, t_tree *tree)
691 {
692 phydbl lnL,log_prob_hit,log_mu;
693 int was_hit,i,j,err;
694 phydbl two_theta_two;
695
696 two_theta_two = 2.*pow(tree->mmod->rad,2);
697 lnL = 0.0;
698 log_mu = log(tree->mmod->mu);
699 was_hit = (disk->ldsk != NULL);
700
701 if(disk == tree->young_disk) return 0.0;
702 if(disk->age_fixed == YES) return 0.0;
703
704 for(i=0;i<disk->n_ldsk_a;++i)
705 if(disk->ldsk_a[i]->prev->disk->time > disk->time)
706 return UNLIKELY;
707
708 if(disk->ldsk != NULL)
709 {
710 for(i=0;i<disk->n_ldsk_a;++i) if(disk->ldsk_a[i]->prev == disk->ldsk) break;
711 if(i == disk->n_ldsk_a) return UNLIKELY;
712 /* assert(i != disk->n_ldsk_a); */
713 }
714
715 for(i=0;i<disk->n_ldsk_a;++i)
716 {
717 if(PHYREX_Is_In_Ldscape(disk->ldsk_a[i],tree->mmod) == NO)
718 {
719 for(j=0;j<tree->mmod->n_dim;j++) PhyML_Fprintf(stdout,"\n. Found a lineage outside habitat (%20f)",disk->ldsk_a[i]->coord->lonlat[j]);
720 return(UNLIKELY);
721 }
722
723 log_prob_hit = log_mu;
724 for(j=0;j<tree->mmod->n_dim;j++)
725 log_prob_hit += -pow(disk->ldsk_a[i]->coord->lonlat[j] - disk->centr->lonlat[j],2)/two_theta_two;
726
727
728 if(disk->ldsk_a[i]->prev == disk->ldsk) // disk->ldsk_a[i] was hit
729 {
730 lnL += log_prob_hit;
731 }
732 else // disk->ldsk_a[i] was not hit
733 {
734 lnL += log(1. - exp(log_prob_hit));
735 }
736 }
737
738 /* a hit occurred */
739 if(was_hit == TRUE)
740 {
741 err = NO;
742 for(j=0;j<tree->mmod->n_dim;j++) lnL += Log_Dnorm_Trunc(disk->ldsk->coord->lonlat[j],
743 disk->centr->lonlat[j],
744 tree->mmod->rad,
745 tree->mmod->lim_do->lonlat[j],
746 tree->mmod->lim_up->lonlat[j],&err);
747 }
748
749 /* Likelihood for the disk center */
750 for(j=0;j<tree->mmod->n_dim;j++) lnL += log(1./(tree->mmod->lim_up->lonlat[j]-tree->mmod->lim_do->lonlat[j]));
751
752 return(lnL);
753 }
754
755 //////////////////////////////////////////////////////////////
756 //////////////////////////////////////////////////////////////
757
SLFV_Lk_Gaussian(t_tree * tree)758 phydbl SLFV_Lk_Gaussian(t_tree *tree)
759 {
760 t_dsk *disk;
761 int n_evt;
762
763 assert(!tree->young_disk->next);
764 assert(tree->young_disk->prev);
765
766 tree->mmod->c_lnL = 0.0;
767
768 if(PHYREX_Total_Number_Of_Intervals(tree) > tree->mmod->max_num_of_intervals)
769 {
770 tree->mmod->c_lnL = UNLIKELY;
771 return UNLIKELY;
772 }
773
774 if(isinf(tree->mmod->c_lnL) || isnan(tree->mmod->c_lnL))
775 {
776 tree->mmod->c_lnL = UNLIKELY;
777 return tree->mmod->c_lnL;
778 }
779
780 tree->mmod->c_lnL += PHYREX_LnPrior_Radius(tree);
781
782 PHYREX_Update_Lindisk_List(tree);
783
784 tree->mmod->c_lnL += SLFV_Lk_Gaussian_Core(tree->young_disk,tree);
785
786 n_evt = 0;
787 disk = tree->young_disk->prev;
788 do
789 {
790 if(disk->age_fixed == NO) n_evt++;
791
792 assert(disk);
793 if(disk->time > disk->next->time)
794 {
795 PhyML_Printf("\n. Anomaly in ordering of disk times (disk->time: %f disk->next->time: %f)",disk->time,disk->next->time);
796 PhyML_Printf("\n. Currently doing following move: %s",tree->mcmc->move_name[tree->mcmc->move_idx]);
797 tree->mmod->c_lnL = UNLIKELY;
798 return tree->mmod->c_lnL;
799 }
800
801 tree->mmod->c_lnL += SLFV_Lk_Gaussian_Core(disk,tree);
802 if(disk->prev == NULL) break;
803 disk = disk->prev;
804 }
805 while(1);
806
807 tree->mmod->c_lnL += PHYREX_Lk_Time_Component(tree);
808
809 /* tree->mmod->c_lnL += n_evt * log(tree->mmod->lbda) - tree->mmod->lbda * fabs(tree->young_disk->time - disk->time); */
810
811 if(isinf(tree->mmod->c_lnL) || isnan(tree->mmod->c_lnL)) tree->mmod->c_lnL = UNLIKELY;
812
813 return(tree->mmod->c_lnL);
814 }
815
816 /*////////////////////////////////////////////////////////////
817 ////////////////////////////////////////////////////////////*/
818 // Simulate Etheridge-Barton model forwards in time, following n_otu lineages
819 // on a rectangle of dimension width x h
SLFV_Simulate_Forward_Core(int n_sites,t_tree * tree)820 t_sarea *SLFV_Simulate_Forward_Core(int n_sites, t_tree *tree)
821 {
822 t_dsk *disk,*new_disk;
823 t_ldsk *ldsk,**ldsk_a_pop,**ldsk_a_samp,**ldsk_a_tmp,**ldsk_a_tips,*new_ldsk;
824 t_ll *ldsk_list,*dum_ll;
825 int i,j,n_disk,n_dim,n_otu,init_pop_size,curr_pop_size,parent_id,n_lineages,sample_size,n_poly,*permut,n_sampled_demes;
826 phydbl sum,*parent_prob,prob_death,tree_height,max_x,max_y,trans_x,trans_y,t_sim, one_gen;
827 short int dies,n_remain;
828 t_phyrex_mod *mmod;
829 t_poly **poly;
830 t_sarea *area;
831 short int *is_sampled;
832 phydbl w,h;
833 phydbl cx,cy;
834 phydbl lx,ly;
835 int n_survive,n_offspring, n_gen;
836
837 mmod = tree->mmod;
838 n_dim = 2;
839 n_otu = tree->n_otu;
840 w = (mmod->lim_up->lonlat[0]-mmod->lim_do->lonlat[0]);
841 h = (mmod->lim_up->lonlat[1]-mmod->lim_do->lonlat[1]);
842 init_pop_size = mmod->rho*w*h;
843 n_gen = 10; // number of generations to simulate
844 one_gen = mmod->gen_cal_time; // time duration of one generation (calendar unit)
845 t_sim = n_gen * one_gen; // total simulation duration (calendar unit)
846
847
848 /* Allocate and initialise first disk event */
849 disk = PHYREX_Make_Disk_Event(n_dim,init_pop_size);
850 PHYREX_Init_Disk_Event(disk,n_dim,NULL);
851 disk->time = 0.0;
852 disk->prev = NULL;
853 disk->n_ldsk_a = init_pop_size;
854
855 /* Allocate coordinates for all individuals in the starting population */
856 ldsk_list = NULL;
857 i = 0;
858 do
859 {
860 ldsk = PHYREX_Make_Lindisk_Node(n_dim);
861 PHYREX_Init_Lindisk_Node(ldsk,disk,n_dim);
862 Push_Bottom_Linked_List(ldsk,&ldsk_list,NO);
863 i++;
864 }
865 while(i != init_pop_size);
866
867
868 /* Fill in array of lineages on the first disk */
869 dum_ll = ldsk_list->head;
870 i = 0;
871 do
872 {
873 disk->ldsk_a[i] = (t_ldsk *)dum_ll->v;
874 dum_ll = dum_ll->next;
875 i++;
876 }
877 while(dum_ll != NULL);
878
879
880
881 /* Generate coordinates for all individuals in starting population */
882 for(i=0;i<init_pop_size;i++)
883 {
884 disk->ldsk_a[i]->coord->lonlat[0] = Uni()*w; // longitude
885 disk->ldsk_a[i]->coord->lonlat[1] = Uni()*h; // latitude
886 }
887
888
889 n_disk = 0;
890 do
891 {
892 /* Create new disk */
893 new_disk = PHYREX_Make_Disk_Event(n_dim,1);
894 Free(new_disk->ldsk_a);
895 new_disk->ldsk_a = NULL;
896 PHYREX_Init_Disk_Event(new_disk,n_dim,NULL);
897 new_disk->prev = disk;
898 disk->next = new_disk;
899 n_disk++;
900
901
902 /* Coordinates of event */
903 new_disk->centr->lonlat[0] = Uni()*(mmod->lim_up->lonlat[0]-mmod->lim_do->lonlat[0])+mmod->lim_do->lonlat[0];
904 new_disk->centr->lonlat[1] = Uni()*(mmod->lim_up->lonlat[1]-mmod->lim_do->lonlat[1])+mmod->lim_do->lonlat[1];
905
906 cx = new_disk->centr->lonlat[0];
907 cy = new_disk->centr->lonlat[1];
908
909
910 /* Time of new disk */
911 new_disk->time = disk->time + Rexp(mmod->lbda);
912
913 /* PhyML_Printf("\n. new_disk->time: %f t_sim: %f",new_disk->time,t_sim); */
914
915 /* Size of current population */
916 curr_pop_size = disk->n_ldsk_a;
917
918 if(curr_pop_size == 0)
919 {
920 PhyML_Fprintf(stderr,"\n== Population went extinct after %d events",n_disk);
921 Exit("\n");
922 }
923
924
925 if(new_disk->time < t_sim)
926 {
927 /* Select one parent */
928 parent_prob = (phydbl *)mCalloc(curr_pop_size,sizeof(phydbl));
929 for(i=0;i<curr_pop_size;++i)
930 {
931 lx = disk->ldsk_a[i]->coord->lonlat[0];
932 ly = disk->ldsk_a[i]->coord->lonlat[1];
933
934 switch(mmod->id)
935 {
936 case SLFV_UNIFORM:
937 {
938 if(PHYREX_Is_In_Disk(disk->ldsk_a[i]->coord,new_disk,mmod) == YES)
939 parent_prob[i] = 1.0;
940 else
941 parent_prob[i] = 0.0;
942 break;
943 }
944 case SLFV_GAUSSIAN:
945 {
946 parent_prob[i] = 0.0;
947 parent_prob[i] += -pow(lx - cx,2)/(2.*pow(mmod->rad,2));
948 parent_prob[i] += -pow(ly - cy,2)/(2.*pow(mmod->rad,2));
949 parent_prob[i] = exp(parent_prob[i]);
950 break;
951 }
952 }
953 }
954
955 sum = 0.0;
956 for(i=0;i<curr_pop_size;++i) sum += parent_prob[i];
957
958 if(sum < 1.E-100)
959 {
960 sum = curr_pop_size;
961 for(i=0;i<curr_pop_size;++i) parent_prob[i] = 1.;
962 }
963
964 for(i=0;i<curr_pop_size;++i) parent_prob[i] /= sum;
965
966
967
968 parent_id = Sample_i_With_Proba_pi(parent_prob,curr_pop_size);
969 new_disk->ldsk = disk->ldsk_a[parent_id];
970
971 Free(parent_prob);
972
973
974 /* Which lineages survive that event? */
975 n_survive = 0;
976 for(i=0;i<curr_pop_size;++i)
977 {
978 lx = disk->ldsk_a[i]->coord->lonlat[0];
979 ly = disk->ldsk_a[i]->coord->lonlat[1];
980
981 prob_death = 0.0;
982 switch(mmod->id)
983 {
984 case SLFV_UNIFORM:
985 {
986 if(PHYREX_Is_In_Disk(disk->ldsk_a[i]->coord,new_disk,mmod) == YES)
987 prob_death = mmod->mu;
988 break;
989 }
990 case SLFV_GAUSSIAN:
991 {
992 prob_death = log(mmod->mu);
993 prob_death += -pow(lx - cx,2)/(2.*pow(mmod->rad,2));
994 prob_death += -pow(ly - cy,2)/(2.*pow(mmod->rad,2));
995 prob_death = exp(prob_death);
996 break;
997 }
998 }
999
1000 dies = NO;
1001 if(Uni() < prob_death) dies = YES;
1002
1003 if(dies == NO)
1004 {
1005 if(n_survive == 0) new_disk->ldsk_a = (t_ldsk **)mCalloc(1,sizeof(t_ldsk *));
1006 else new_disk->ldsk_a = (t_ldsk **)mRealloc(new_disk->ldsk_a,n_survive+1,sizeof(t_ldsk *));
1007 new_disk->ldsk_a[n_survive] = disk->ldsk_a[i];
1008 n_survive++;
1009 }
1010 }
1011
1012 /* PhyML_Printf("\n. n_survive: %d",n_survive); */
1013
1014 /* Offspring */
1015 /* How many? */
1016 phydbl r = mmod->rad;
1017 phydbl u = mmod->mu;
1018 phydbl rho = mmod->rho;
1019 phydbl mean =
1020 0.5*PI*rho*u*r*r*
1021 (erf(0.5*sqrt(2.)/r*(cy-0.0))*erf(0.5*sqrt(2.)/r*(cx-0.0)) +
1022 erf(0.5*sqrt(2.)/r*(cy-h))*erf(0.5*sqrt(2.)/r*(cx-w)) -
1023 erf(0.5*sqrt(2.)/r*(cy-0.0))*erf(0.5*sqrt(2.)/r*(cx-w)) -
1024 erf(0.5*sqrt(2.)/r*(cy-h))*erf(0.5*sqrt(2.)/r*(cx-0.0)));
1025
1026 n_offspring = Rpois(mean);
1027
1028 new_disk->n_ldsk_a = n_survive + n_offspring;
1029
1030 /* PhyML_Printf("\n. n_offspring: %d",n_offspring); */
1031
1032 /* Where */
1033 for(i=0;i<n_offspring;++i)
1034 {
1035 /* New lindisk */
1036 new_ldsk = PHYREX_Make_Lindisk_Node(n_dim);
1037 PHYREX_Init_Lindisk_Node(new_ldsk,new_disk,n_dim);
1038 Push_Bottom_Linked_List(new_ldsk,&ldsk_list,NO);
1039
1040 if(new_disk->ldsk_a == NULL) new_disk->ldsk_a = (t_ldsk **)mCalloc(n_survive+1,sizeof(t_ldsk *));
1041 else new_disk->ldsk_a = (t_ldsk **)mRealloc(new_disk->ldsk_a,n_survive+i+1,sizeof(t_ldsk *));
1042
1043 new_disk->ldsk_a[n_survive+i] = new_ldsk;
1044
1045
1046 /* Generate new location */
1047 switch(mmod->id)
1048 {
1049 case SLFV_UNIFORM: { PHYREX_Runif_Rectangle_Overlap(new_ldsk,new_disk,mmod); break; }
1050 case SLFV_GAUSSIAN: { PHYREX_Rnorm_Trunc(new_ldsk,new_disk,mmod); break; }
1051 }
1052
1053 /* Connect to parent */
1054 new_ldsk->prev = disk->ldsk_a[parent_id];
1055 }
1056 }
1057 else
1058 {
1059 new_disk->time = t_sim;
1060 new_disk->n_ldsk_a = disk->n_ldsk_a;
1061 new_disk->ldsk_a = (t_ldsk **)mCalloc(curr_pop_size,sizeof(t_ldsk *));
1062 for(i=0;i<curr_pop_size;++i) new_disk->ldsk_a[i] = disk->ldsk_a[i];
1063 }
1064
1065
1066 disk = new_disk;
1067
1068 /* printf("\n. pop size: %6d # of events: %6d",disk->n_ldsk_a,n_disk); */
1069 }
1070 while(disk->time < t_sim);
1071
1072
1073 /* Dispersal stuff */
1074 /* { */
1075 /* phydbl T = disk->time; // total simulation time (in calendar unit) */
1076 /* t_ldsk *dum_ldsk = disk->ldsk_a[Rand_Int(0,disk->n_ldsk_a-1)]; // random selection of a lineage among all the lineages available at present time */
1077 /* t_dsk *dum_dsk = disk; */
1078 /* phydbl s = w*h; // area */
1079 /* printf("\n. s: %f",s); fflush(NULL); */
1080 /* phydbl gentime = 1./(2.*PI*mmod->rad*mmod->rad*mmod->mu*mmod->lbda/s); */
1081 /* phydbl ssq = 0.0; // sum of squared difference between current and previous position of lineage, when previous pos != current pos. */
1082 /* phydbl curr_pos,prev_pos; */
1083 /* int curr_gen,prev_gen,nhits=0; */
1084 /* prev_pos = dum_ldsk->coord->lonlat[0]; */
1085 /* curr_pos = prev_pos; */
1086 /* curr_gen = prev_gen = 1; */
1087 /* // Compute sum of squared difference of positions */
1088 /* do */
1089 /* { */
1090 /* curr_gen = 1 + (int)(dum_dsk->time-T)/gentime; // increment the number of generations (generation time measured in calendar time units) */
1091 /* curr_pos = dum_ldsk->coord->lonlat[0]; // current position of lineage */
1092
1093 /* if(dum_ldsk->disk == dum_dsk) // lineage was born at that time */
1094 /* { */
1095 /* dum_ldsk = dum_ldsk->prev; // jump to parent */
1096 /* nhits++; */
1097 /* } */
1098
1099 /* if(curr_gen != prev_gen) */
1100 /* { */
1101 /* ssq += pow(curr_pos-prev_pos,2); */
1102 /* prev_pos = curr_pos; */
1103 /* prev_gen = curr_gen; */
1104 /* } */
1105
1106 /* dum_dsk = dum_dsk->prev; */
1107 /* } */
1108 /* while(dum_dsk); */
1109
1110 /* PhyML_Printf("\n # var T nhits T/nhits gentime sigsq theta u lambda"); */
1111 /* PhyML_Printf("\n a@z %G %f %d %f %f %f %f %f %f", */
1112 /* (1./(T/gentime))*ssq, */
1113 /* T, */
1114 /* nhits, */
1115 /* T/nhits, */
1116 /* gentime, */
1117 /* 4*pow(mmod->rad,4)*mmod->lbda/s*PI*mmod->mu*gentime, */
1118 /* mmod->rad, */
1119 /* mmod->mu, */
1120 /* mmod->lbda); */
1121
1122 /* Exit("\n"); */
1123 /* } */
1124
1125
1126 /* /\* Coalescence stuff *\/ */
1127 /* { */
1128 /* t_ldsk *lin1, *lin2; */
1129 /* int n_evts,have_coal,coal_evt; */
1130 /* phydbl T,t; */
1131
1132 /* /\* Selection of two lineages at random *\/ */
1133 /* while(disk->next) disk = disk->next; */
1134 /* permut = Permutate(disk->n_ldsk_a); */
1135 /* lin1 = disk->ldsk_a[permut[0]]; */
1136 /* lin2 = disk->ldsk_a[permut[1]]; */
1137 /* Free(permut); */
1138
1139 /* printf("\n. disk->time-lin1->prev->time: %f",disk->time-lin1->prev->disk->time); */
1140 /* printf("\n. disk->time-lin2->prev->time: %f",disk->time-lin2->prev->disk->time); */
1141
1142 /* /\* Go back in time *\/ */
1143 /* curr_t = 0.0; */
1144 /* n_evts = 0; */
1145 /* coal_evt = 0; */
1146 /* have_coal = 0; */
1147 /* T = disk->time; */
1148 /* do */
1149 /* { */
1150 /* if(disk->ldsk) n_evts++; */
1151
1152 /* if(disk->ldsk && disk->ldsk == lin1->prev) lin1 = lin1->prev; */
1153 /* if(disk->ldsk && disk->ldsk == lin2->prev) lin2 = lin2->prev; */
1154
1155 /* if(lin1 == lin2) */
1156 /* { */
1157 /* have_coal = 1; */
1158 /* coal_evt = n_evts; */
1159 /* break; */
1160 /* } */
1161
1162 /* disk = disk->prev; */
1163 /* t=disk?disk->time:0.0; */
1164 /* curr_t = (T-t); */
1165
1166 /* /\* printf("\n>>> coal %d n_evts: %d curr_t: %f disk->time: %f T: %f one_gen: %f", *\/ */
1167 /* /\* have_coal, *\/ */
1168 /* /\* n_evts, *\/ */
1169 /* /\* curr_t, *\/ */
1170 /* /\* t, *\/ */
1171 /* /\* T, *\/ */
1172 /* /\* one_gen); *\/ */
1173 /* } */
1174 /* /\* while(disk && n_evts < 2); *\/ */
1175 /* while(disk && curr_t < one_gen); */
1176
1177 /* PhyML_Printf("\n. @ coal : %d %d %f",have_coal,coal_evt,curr_t); */
1178 /* /\* Exit("\n"); *\/ */
1179 /* } */
1180
1181
1182
1183
1184 /* Allocate coordinates for all the tips first (will grow afterwards) */
1185 ldsk_a_samp = (t_ldsk **)mCalloc(n_otu,sizeof(t_ldsk *));
1186 ldsk_a_tips = (t_ldsk **)mCalloc(n_otu,sizeof(t_ldsk *));
1187 ldsk_a_tmp = (t_ldsk **)mCalloc(n_otu,sizeof(t_ldsk *));
1188
1189 while(disk->next) disk = disk->next;
1190 ldsk_a_pop = disk->ldsk_a;
1191
1192
1193 /* /\* Sample n_otu individuals uniformly at random... *\/ */
1194 /* while(disk->next) disk = disk->next; */
1195 /* permut = Permutate(disk->n_ldsk_a); */
1196 /* for(i=0;i<n_otu;i++) ldsk_a_samp[i] = ldsk_a_pop[permut[i]]; */
1197 /* Free(permut); */
1198 /* n_sampled_demes = 1; */
1199
1200
1201 /* ... or take samples in random polygons */
1202 n_poly = n_sites;
1203 is_sampled = (short int *)mCalloc(n_poly,sizeof(short int));
1204
1205 do
1206 {
1207 poly = (t_poly **)mCalloc(n_poly,sizeof(t_poly *));
1208 for(i=0;i<n_poly;++i) poly[i] = Rpoly(3); /* triangles */
1209 for(i=0;i<n_poly;++i)
1210 {
1211 for(j=0;j<poly[i]->n_poly_vert;++j)
1212 {
1213 poly[i]->poly_vert[j]->lonlat[0] *= (mmod->lim_up->lonlat[0]-mmod->lim_do->lonlat[0])*0.5;
1214 poly[i]->poly_vert[j]->lonlat[1] *= (mmod->lim_up->lonlat[1]-mmod->lim_do->lonlat[1])*0.5;
1215 }
1216
1217 max_x = 0.0;
1218 max_y = 0.0;
1219 for(j=0;j<poly[i]->n_poly_vert;++j)
1220 {
1221 if(poly[i]->poly_vert[j]->lonlat[0] > max_x) max_x = poly[i]->poly_vert[j]->lonlat[0];
1222 if(poly[i]->poly_vert[j]->lonlat[1] > max_y) max_y = poly[i]->poly_vert[j]->lonlat[1];
1223 }
1224
1225 trans_x = Uni()*(mmod->lim_up->lonlat[0] - max_x);
1226 trans_y = Uni()*(mmod->lim_up->lonlat[1] - max_y);
1227
1228 for(j=0;j<poly[i]->n_poly_vert;++j)
1229 {
1230 poly[i]->poly_vert[j]->lonlat[0] += trans_x;
1231 poly[i]->poly_vert[j]->lonlat[1] += trans_y;
1232 }
1233 }
1234
1235 for(i=0;i<n_otu;++i) ldsk_a_samp[i] = NULL;
1236
1237 for(i=0;i<n_poly;++i) is_sampled[i] = NO;
1238
1239 permut = Permutate(n_poly);
1240
1241 sample_size = 0;
1242 for(i=0;i<curr_pop_size;++i)
1243 {
1244 for(j=0;j<n_poly;++j)
1245 {
1246 if(Is_In_Polygon(ldsk_a_pop[i]->coord,poly[permut[j]]) == YES)
1247 {
1248 char *s;
1249 int k;
1250
1251 s = (char *)mCalloc((int)strlen(ldsk_a_pop[i]->coord->id)+1+50,sizeof(char));
1252 For(k,(int)strlen(ldsk_a_pop[i]->coord->id)+1+20) s[k]='\0';
1253 sprintf(s,"%d_",i);
1254 strcat(s,ldsk_a_pop[i]->coord->id);
1255 Free(ldsk_a_pop[i]->coord->id);
1256 strcat(s,"_deme");
1257 sprintf(s+strlen(s),"%d",permut[j]);
1258 ldsk_a_pop[i]->coord->id = s;
1259
1260
1261 ldsk_a_samp[sample_size] = ldsk_a_pop[i];
1262 sample_size++;
1263 PhyML_Printf("\n@ Coord: %f %f %s %p",
1264 ldsk_a_samp[sample_size-1]->coord->lonlat[0],
1265 ldsk_a_samp[sample_size-1]->coord->lonlat[1],
1266 ldsk_a_pop[i]->coord->id,ldsk_a_pop[i]);
1267
1268 is_sampled[permut[j]] = YES;
1269 break;
1270 }
1271 }
1272 if(sample_size == n_otu) break;
1273 }
1274
1275 Free(permut);
1276
1277 if(i == curr_pop_size)
1278 {
1279 for(j=0;j<n_poly;j++) Free_Poly(poly[j]);
1280 Free(poly);
1281 /* PhyML_Printf("\n. Not enough individuals in polygon(s) (only %d found).",sample_size); */
1282 /* Generic_Exit(__FILE__,__LINE__,__FUNCTION__); */
1283 }
1284 else break;
1285 }
1286 while(1);
1287
1288 n_sampled_demes = 0;
1289 for(i=0;i<n_poly;++i) if(is_sampled[i] == YES) n_sampled_demes++;
1290
1291 for(i=0;i<n_otu;++i) ldsk_a_tips[i] = ldsk_a_samp[i];
1292
1293 area = Make_Sarea(n_sampled_demes);
1294 area->n_poly = n_sampled_demes;
1295 n_sampled_demes = 0;
1296 for(i=0;i<n_poly;++i) if(is_sampled[i] == YES) area->a_poly[n_sampled_demes++] = poly[i];
1297
1298
1299 for(i=0;i<area->n_poly;++i)
1300 {
1301 /* PhyML_Printf("\n@ Poly %3d area = %f",i,Area_Of_Poly_Monte_Carlo(area->a_poly[i],mmod->lim)); */
1302
1303 for(j=0;j<area->a_poly[i]->n_poly_vert;++j)
1304 {
1305 PhyML_Printf("\n@ Poly %3d point %d (x,y) = (%f,%f)",
1306 i,
1307 j,
1308 area->a_poly[i]->poly_vert[j]->lonlat[0],
1309 area->a_poly[i]->poly_vert[j]->lonlat[1]);
1310 }
1311 }
1312
1313 tree->young_disk = disk;
1314 disk->ldsk_a = ldsk_a_tips;
1315 disk->mmod = tree->mmod;
1316 disk->centr->lonlat[0] = .5*(tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
1317 disk->centr->lonlat[1] = .5*(tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]);
1318 disk->n_ldsk_a = n_otu;
1319
1320 int n_discs = 0;
1321
1322 n_lineages = n_otu;
1323 do
1324 {
1325 n_remain = 0;
1326 for(i=0;i<n_lineages;++i)
1327 {
1328
1329 if((disk->prev->ldsk != NULL) && (disk->prev->ldsk == ldsk_a_samp[i]->prev)) /* Coalescent event is sampled */
1330 {
1331 PHYREX_Make_Lindisk_Next(disk->prev->ldsk);
1332 disk->prev->ldsk->next[disk->prev->ldsk->n_next-1] = ldsk_a_samp[i];
1333 }
1334 else
1335 {
1336 ldsk_a_tmp[n_remain] = ldsk_a_samp[i];
1337 n_remain++;
1338 }
1339 }
1340
1341 for(i=0;i<n_remain;i++) ldsk_a_samp[i] = ldsk_a_tmp[i];
1342 if((disk->prev->ldsk != NULL) && (disk->prev->ldsk->n_next > 0)) ldsk_a_samp[i] = disk->prev->ldsk;
1343
1344 if((disk->prev->ldsk != NULL) && (disk->prev->ldsk->n_next > 0))
1345 {
1346 n_lineages -= (disk->prev->ldsk->n_next);
1347 n_lineages += 1;
1348 }
1349
1350 if(n_lineages != n_remain+(disk->prev->ldsk && disk->prev->ldsk->n_next>0)?1:0)
1351 {
1352 PhyML_Fprintf(stderr,"\n. n_lineages: %d n_remain: %d n_next: %d",
1353 n_lineages,
1354 n_remain,
1355 disk->prev->ldsk->n_next);
1356 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
1357 }
1358
1359 /* None of the sampled lineages was hit */
1360 if((disk->prev->ldsk != NULL) && (disk->prev->ldsk->n_next == 0))
1361 {
1362 /* printf("\n. free %s @ %f",disk->ldsk->coord->id,disk->time); */
1363 Free_Ldisk(disk->prev->ldsk);
1364 disk->prev->ldsk = NULL;
1365 }
1366
1367 disk = disk->prev;
1368
1369 if(disk->prev == NULL)
1370 {
1371 PhyML_Fprintf(stderr,"\n. # lineages left: %d",n_remain);
1372 PhyML_Fprintf(stderr,"\n. Sample has not coalesced completely.");
1373 fflush(NULL);
1374 Exit("\n");
1375 }
1376 }
1377 while(n_lineages > 1);
1378
1379
1380 /* for(i=0;i<n_otu;i++) printf("\n> %s",tree->young_disk->ldsk_a[i]->coord->id); */
1381
1382 disk->prev = NULL;
1383
1384 disk = tree->young_disk;
1385 tree_height = disk->time;
1386 n_discs = 0;
1387 while(disk)
1388 {
1389 disk->time -= tree_height;
1390 disk = disk->prev;
1391 n_discs++;
1392 }
1393
1394 Free(ldsk_a_tmp);
1395 Free(ldsk_a_samp);
1396 Free(ldsk_a_pop);
1397 Free(is_sampled);
1398
1399 /* PHYREX_Print_Struct('#',tree); */
1400 /* Exit("\n"); */
1401
1402 return(area);
1403
1404 }
1405
1406
1407 /*////////////////////////////////////////////////////////////
1408 ////////////////////////////////////////////////////////////*/
1409 // Simulate Etheridge-Barton model backwards in time, following n_otu lineages
1410 // on a rectangle.
SLFV_Simulate_Backward_Core(t_dsk * init_disk,int avoid_multiple_mergers,t_tree * tree)1411 phydbl SLFV_Simulate_Backward_Core(t_dsk *init_disk, int avoid_multiple_mergers, t_tree *tree)
1412 {
1413 t_dsk *disk,*new_disk,*oldest_disk;
1414 int i,j,reached_oldest_disk,err;
1415 phydbl prob_hit,u,new_time,lnL;
1416 t_phyrex_mod *mmod;
1417 t_node *n;
1418
1419 mmod = tree->mmod;
1420
1421 Get_Node_Ranks_From_Tip_Times(tree);
1422
1423 // Get to the youngest node
1424 n = tree->a_nodes[0];
1425 while(n->rk_next) n = n->rk_next;
1426
1427 disk = init_disk;
1428 // Make sure at least one of the youngest nodes is on init_disk
1429 for(i=0;i<init_disk->n_ldsk_a;++i) if(init_disk->ldsk_a[i]->nd == n) break;
1430 assert(i != init_disk->n_ldsk_a);
1431
1432 // Get to the oldest sampled disk
1433 disk = init_disk;
1434 while(disk->prev) disk = disk->prev;
1435 oldest_disk = disk;
1436 reached_oldest_disk = NO;
1437 if(init_disk->prev == NULL) reached_oldest_disk = YES; // Only one sampled disk
1438
1439 lnL = 0.0;
1440 disk = init_disk;
1441 do
1442 {
1443 /* MRCA reached */
1444 if(PHYREX_Number_Of_Outgoing_Ldsks(disk) == 1 && reached_oldest_disk == YES) break;
1445
1446 /* Proposed new time */
1447 new_time = disk->time - Rexp(mmod->lbda);
1448
1449 lnL += log(mmod->lbda) - mmod->lbda * fabs(disk->time - new_time);
1450
1451 /* New time is older than previous sampled disk (disk->prev) */
1452 if(disk->prev && new_time < disk->prev->time)
1453 {
1454 new_disk = disk->prev;
1455 new_time = disk->prev->time;
1456
1457 // Reached oldest disk -> set it to NULL
1458 // so as to indicate that it is ok to stop
1459 // the simulation, continue otherwise
1460 if(new_disk == oldest_disk) reached_oldest_disk = YES;
1461
1462 /* Connect disk and new_disk */
1463 disk->prev = new_disk;
1464 new_disk->next = disk;
1465
1466 /* Time of next event */
1467 new_disk->time = new_time;
1468
1469 /* Populate new_disk->ldsk_a array */
1470 PHYREX_Update_Lindisk_List_Core(new_disk,tree);
1471 }
1472 else
1473 {
1474 /* Create new disk */
1475 new_disk = PHYREX_Make_Disk_Event(mmod->n_dim,tree->n_otu);
1476 PHYREX_Init_Disk_Event(new_disk,mmod->n_dim,NULL);
1477
1478 /* new_disk->prev now points to previous sampled disk */
1479 new_disk->prev = disk->prev;
1480 if(disk->prev) disk->prev->next = new_disk;
1481
1482 /* Connect disk and new_disk */
1483 disk->prev = new_disk;
1484 new_disk->next = disk;
1485
1486 /* Time of next event */
1487 new_disk->time = new_time;
1488
1489 /* Coordinates of new event */
1490 for(j=0;j<mmod->n_dim;++j) new_disk->centr->lonlat[j] = Uni()*(mmod->lim_up->lonlat[j]-mmod->lim_do->lonlat[j])+mmod->lim_do->lonlat[j];
1491
1492 for(j=0;j<mmod->n_dim;++j) lnL += log(1./(mmod->lim_up->lonlat[j]-mmod->lim_do->lonlat[j]));
1493
1494
1495 /* Populate new_disk->ldsk_a array */
1496 PHYREX_Update_Lindisk_List_Core(new_disk,tree);
1497
1498 new_disk->ldsk = NULL;
1499 /* Which lineages in new_disk->ldsk_a are hit? */
1500 for(i=0;i<new_disk->n_ldsk_a;++i)
1501 {
1502 prob_hit = -1.;
1503 switch(mmod->id)
1504 {
1505 case SLFV_UNIFORM :
1506 {
1507 prob_hit = mmod->mu;
1508 break;
1509 }
1510 case SLFV_GAUSSIAN : case RW : case RRW :
1511 {
1512 prob_hit = log(mmod->mu);
1513 for(j=0;j<mmod->n_dim;++j)
1514 {
1515 prob_hit +=
1516 -pow(new_disk->ldsk_a[i]->coord->lonlat[j] -
1517 new_disk->centr->lonlat[j],2)/(2.*pow(mmod->rad,2));
1518 }
1519 prob_hit = exp(prob_hit);
1520 break;
1521 }
1522 }
1523
1524
1525 u = Uni();
1526 if(!(u > prob_hit)) // disk->ldsk_a[i] is hit
1527 {
1528 lnL += log(prob_hit);
1529 // new_disk->ldsk_a[i] is hit -> coalesce (or just jump) to parent (i.e., new_disk->ldsk)
1530 if(new_disk->ldsk == NULL)
1531 {
1532 new_disk->ldsk = PHYREX_Make_Lindisk_Node(mmod->n_dim);
1533 PHYREX_Init_Lindisk_Node(new_disk->ldsk,new_disk,mmod->n_dim);
1534
1535 // Generate coordinates for newly created ldsk
1536 switch(tree->mmod->id)
1537 {
1538 case SLFV_UNIFORM :
1539 {
1540 PHYREX_Runif_Rectangle_Overlap(new_disk->ldsk,new_disk,tree->mmod);
1541 break;
1542 }
1543 case SLFV_GAUSSIAN : case RW : case RRW :
1544 {
1545 PHYREX_Rnorm_Trunc(new_disk->ldsk,new_disk,tree->mmod);
1546 for(j=0;j<tree->mmod->n_dim;++j)
1547 lnL += Log_Dnorm_Trunc(new_disk->ldsk->coord->lonlat[j],
1548 new_disk->centr->lonlat[j],
1549 tree->mmod->rad,
1550 tree->mmod->lim_do->lonlat[j],
1551 tree->mmod->lim_up->lonlat[j],&err);
1552 break;
1553 }
1554 }
1555 }
1556 new_disk->ldsk_a[i]->prev = new_disk->ldsk;
1557 PHYREX_Make_Lindisk_Next(new_disk->ldsk);
1558 new_disk->ldsk->next[new_disk->ldsk->n_next-1] = new_disk->ldsk_a[i];
1559 }
1560 else
1561 {
1562 lnL += log(1. - prob_hit);
1563 }
1564 if(new_disk->ldsk && new_disk->ldsk->n_next == 2 && avoid_multiple_mergers == YES) break;
1565 }
1566 }
1567
1568 disk = new_disk;
1569 }
1570 while(1);
1571 disk->prev = NULL;
1572
1573 return(lnL);
1574 }
1575
1576 //////////////////////////////////////////////////////////////
1577 //////////////////////////////////////////////////////////////
1578 // Simulate Etheridge-Barton model backwards in time, following n_otu lineages
1579 // on a rectangle of dimension width w x h
1580 // See Kelleher, Barton & Etheridge, Bioinformatics, 2013.
SLFV_Simulate(int n_otu,int n_sites,phydbl w,phydbl h,phydbl lbda,phydbl rad,phydbl mu,int r_seed)1581 t_tree *SLFV_Simulate(int n_otu, int n_sites, phydbl w, phydbl h, phydbl lbda, phydbl rad, phydbl mu, int r_seed)
1582 {
1583 t_tree *tree;
1584 int n_dim,i;
1585 t_phyrex_mod *mmod;
1586 t_dsk *disk;
1587 option *io;
1588 t_mod *mod;
1589 t_opt *s_opt;
1590 calign *cdata;
1591 /* phydbl T; */
1592
1593 n_dim = 2; // 2-dimensional landscape
1594
1595 io = (option *)Make_Input();
1596 mod = (t_mod *)Make_Model_Basic();
1597 s_opt = (t_opt *)Make_Optimiz();
1598
1599 Set_Defaults_Input(io);
1600 Set_Defaults_Model(mod);
1601 Set_Defaults_Optimiz(s_opt);
1602
1603 io->mod = mod;
1604 mod->io = io;
1605 mod->s_opt = s_opt;
1606 io->r_seed = r_seed;
1607
1608 io->n_otu = n_otu;
1609 io->init_len = 500; /* sequence length */
1610
1611 io->data = Make_Empty_Alignment(io);
1612
1613 Make_Model_Complete(io->mod);
1614 Set_Model_Name(io->mod);
1615
1616 io->colalias = NO;
1617 cdata = Compact_Data(io->data,io);
1618 Free_Seq(io->data,io->n_otu);
1619
1620 tree = Make_Tree_From_Scratch(n_otu,cdata);
1621
1622 Connect_CSeqs_To_Nodes(cdata,io,tree);
1623
1624 tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
1625 RATES_Init_Rate_Struct(tree->rates,io->rates,tree->n_otu);
1626
1627 tree->times = TIMES_Make_Time_Struct(tree->n_otu);
1628 TIMES_Init_Time_Struct(tree->times,io->times,tree->n_otu);
1629
1630 tree->data = cdata;
1631 tree->mod = mod;
1632 tree->io = io;
1633 tree->n_pattern = tree->data->crunch_len;
1634
1635
1636 /* Allocate migrep model */
1637 mmod = PHYREX_Make_Migrep_Model(tree->n_otu,n_dim);
1638 tree->mmod = mmod;
1639 PHYREX_Init_Migrep_Mod(mmod,n_dim,0.0,0.0,w,h);
1640
1641 mmod->lbda = lbda;
1642 mmod->rad = rad;
1643 mmod->mu = mu;
1644
1645 if(mmod->lbda > mmod->max_lbda) mmod->lbda = mmod->max_lbda;
1646 if(mmod->rad > mmod->max_rad) mmod->rad = mmod->max_rad;
1647 if(mmod->mu > mmod->max_mu) mmod->mu = mmod->max_mu;
1648
1649 if(mmod->lbda < mmod->min_lbda) mmod->lbda = mmod->min_lbda;
1650 if(mmod->rad < mmod->min_rad) mmod->rad = mmod->min_rad;
1651 if(mmod->mu < mmod->min_mu) mmod->mu = mmod->min_mu;
1652
1653
1654 mmod->sigsq = SLFV_Update_Sigsq(tree);
1655
1656
1657
1658 // Duration of a generation in number of events
1659 mmod->gen_cal_time = 1./(2.*mmod->mu*pow(mmod->rad,2)/pow(w*h,2)*
1660 (sqrt(2.)*mmod->rad*exp(-.5*pow(h/mmod->rad,2)) + h*sqrt(PI)*erf(sqrt(2.)*h/(2.*mmod->rad)) - sqrt(2.)*mmod->rad)*
1661 (sqrt(2.)*mmod->rad*exp(-.5*pow(w/mmod->rad,2)) + w*sqrt(PI)*erf(sqrt(2.)*w/(2.*mmod->rad)) - sqrt(2.)*mmod->rad));
1662 // Divide by rate of events per calendar time unit to get duration of gen. in calendar time unit
1663 mmod->gen_cal_time *= 1./mmod->lbda;
1664
1665
1666 /* Prepare start disk */
1667 tree->young_disk = PHYREX_Make_Disk_Event(tree->mmod->n_dim,tree->n_otu);
1668 tree->young_disk->time = 0.0;
1669 tree->young_disk->age_fixed = YES;
1670 tree->young_disk->centr->lonlat[0] = .5*(tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]);
1671 tree->young_disk->centr->lonlat[1] = .5*(tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]);
1672 tree->young_disk->n_ldsk_a = tree->n_otu;
1673
1674 for(i=0;i<tree->n_otu;++i)
1675 {
1676 char *s;
1677 tree->young_disk->ldsk_a[i] = PHYREX_Make_Lindisk_Node(tree->mmod->n_dim);
1678 PHYREX_Init_Lindisk_Node(tree->young_disk->ldsk_a[i],tree->young_disk,tree->mmod->n_dim);
1679 s = (char *)mCalloc(strlen(tree->young_disk->ldsk_a[i]->coord->id)+1+20,sizeof(char));
1680 strcpy(s,tree->young_disk->ldsk_a[i]->coord->id);
1681 strcat(s,"_deme0\0");
1682 Free(tree->young_disk->ldsk_a[i]->coord->id);
1683 tree->young_disk->ldsk_a[i]->coord->id = s;
1684 }
1685
1686 /* Generate coordinates for the tip nodes (uniform distribution on the rectangle) */
1687 for(i=0;i<tree->n_otu;i++)
1688 {
1689 tree->young_disk->ldsk_a[i]->coord->lonlat[0] = Uni()*(tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0])+tree->mmod->lim_do->lonlat[0]; // longitude
1690 tree->young_disk->ldsk_a[i]->coord->lonlat[1] = Uni()*(tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1])+tree->mmod->lim_do->lonlat[1]; // latitude
1691 }
1692
1693 /* /\* !!!!!!!!!!!!!!!!!!!!!!!!!! *\/ */
1694 /* /\* Fix coordinates of first two tips *\/ */
1695 /* tree->young_disk->ldsk_a[0]->coord->lonlat[0] = (tree->mmod->lim_up->lonlat[0]+tree->mmod->lim_do->lonlat[0])/2; // longitude */
1696 /* tree->young_disk->ldsk_a[0]->coord->lonlat[1] = (tree->mmod->lim_up->lonlat[1]+tree->mmod->lim_do->lonlat[1])/2; // latitude */
1697 /* tree->young_disk->ldsk_a[1]->coord->lonlat[0] = (tree->mmod->lim_up->lonlat[1]+tree->mmod->lim_do->lonlat[1])/2 + 1; // longitude */
1698 /* tree->young_disk->ldsk_a[1]->coord->lonlat[1] = (tree->mmod->lim_up->lonlat[1]+tree->mmod->lim_do->lonlat[1])/2 + 1; // latitude */
1699
1700
1701 for(i=0;i<tree->n_otu;i++)
1702 {
1703 tree->young_disk->ldsk_a[i]->nd = tree->a_nodes[i];
1704 tree->a_nodes[i]->ldsk = tree->young_disk->ldsk_a[i];
1705 }
1706
1707 SLFV_Simulate_Backward_Core(tree->young_disk,NO,tree);
1708
1709 PHYREX_Ldsk_To_Tree(tree);
1710 Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
1711 Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
1712 RATES_Fill_Lca_Table(tree);
1713
1714
1715 /* min_rate = 1.E-5; */
1716 /* max_rate = 1.E-4; */
1717
1718 phydbl T = PHYREX_Tree_Height(tree);
1719
1720 tree->rates->bl_from_rt = YES;
1721 tree->rates->clock_r = 0.1/fabs(2.*T);
1722 tree->rates->model = STRICTCLOCK;
1723
1724 RATES_Update_Cur_Bl(tree);
1725
1726 Init_Model(cdata,mod,io);
1727
1728 tree->mod->ras->n_catg = 1;
1729 tree->mod->whichmodel = HKY85;
1730 tree->mod->kappa->v = 4.0;
1731
1732 Make_Tree_For_Pars(tree);
1733 Make_Tree_For_Lk(tree);
1734 Make_Spr(tree);
1735 Evolve(tree->data,tree->mod,0,tree);
1736
1737 PhyML_Printf("@@@ %G %G %G : ",mmod->lbda,mmod->mu,mmod->rad);
1738 for(int i=0;i<tree->n_otu-1;++i) for(int j=i+1;j<tree->n_otu;++j) PhyML_Printf("%G ",PHYREX_Dist_Between_Two_Ldsk(tree->a_nodes[i]->ldsk,tree->a_nodes[j]->ldsk,tree));
1739 PhyML_Printf(" : ");
1740 for(int i=0;i<tree->n_otu-1;++i) for(int j=i+1;j<tree->n_otu;++j) PhyML_Printf("%G ",Euclidean_Dist(tree->a_nodes[i]->ldsk->coord,tree->a_nodes[j]->ldsk->coord));
1741 PhyML_Printf("\n");
1742 for(int i=0;i<tree->n_otu;++i) PhyML_Printf("\n%20s %12G %12G",
1743 tree->a_nodes[i]->name,
1744 tree->a_nodes[i]->ldsk->coord->lonlat[0],
1745 tree->a_nodes[i]->ldsk->coord->lonlat[1]);
1746 PhyML_Printf("\n\n");
1747 PhyML_Printf(">> SEQUENCES\n");
1748 Print_CSeq(stdout,NO,tree->data,tree);
1749 PhyML_Printf("<< SEQUENCES");
1750
1751 /* Init_Partial_Lk_Tips_Double(tree); */
1752 /* Init_Partial_Lk_Loc(tree); */
1753
1754 disk = tree->young_disk->prev;
1755 while(disk->prev) disk = disk->prev;
1756
1757 PhyML_Printf("\n. Parameter boundaries: lambda:[%G,%G]; mu=[%G,%G]; rad=[%G,%G]",
1758 mmod->min_lbda,mmod->max_lbda,
1759 mmod->min_mu,mmod->max_mu,
1760 mmod->min_rad,mmod->max_rad);
1761 PhyML_Printf("\n. Useful parameters: lambda=%G; mu=%G; rad=%G; clockr=%G; sigsq=%G",
1762 mmod->lbda,
1763 mmod->mu,
1764 mmod->rad,
1765 tree->rates->clock_r,
1766 mmod->sigsq);
1767
1768 PhyML_Printf("\n. Useful statistics: t.root=%f n.int=%d n.coal=%d n.hit=%d root.x=%f root.y=%f nt.div=%f\n",
1769 disk->time,
1770 PHYREX_Total_Number_Of_Intervals(tree),
1771 PHYREX_Total_Number_Of_Coal_Disks(tree),
1772 PHYREX_Total_Number_Of_Hit_Disks(tree),
1773 disk->ldsk->coord->lonlat[0],
1774 disk->ldsk->coord->lonlat[1],
1775 Nucleotide_Diversity(tree->data));
1776
1777 Exit("\n");
1778
1779 /* PhyML_Printf("\n. Tree: "); */
1780 /* PhyML_Printf("\n. %s \n",Write_Tree(tree)); */
1781
1782 /* PhyML_Printf("\n. Spatial coordinates: "); */
1783 /* for(i=0;i<tree->n_otu;i++) */
1784 /* { */
1785 /* PhyML_Printf("\n%15s %12f %12f", */
1786 /* tree->young_disk->ldsk_a[i]->nd->name, */
1787 /* tree->young_disk->ldsk_a[i]->coord->lonlat[0], */
1788 /* tree->young_disk->ldsk_a[i]->coord->lonlat[1]); */
1789 /* } */
1790 /* PhyML_Printf("\n"); */
1791
1792 /* for(int i=0;i<io->n_otu;i++) */
1793 /* { */
1794 /* PhyML_Printf("\n<clade id=\"clad%d\">",i+1); */
1795 /* PhyML_Printf("\n\t<taxon value=\"%s\"/>",tree->a_nodes[i]->name); */
1796 /* PhyML_Printf("\n</clade>"); */
1797 /* PhyML_Printf("\n<calibration id=\"cal%d\">",i+1); */
1798 /* PhyML_Printf("\n\t<lower>0.0</lower>"); */
1799 /* PhyML_Printf("\n\t<upper>0.0</upper>"); */
1800 /* PhyML_Printf("\n\t<appliesto clade.id=\"clad%d\"/>",i+1); */
1801 /* PhyML_Printf("\n</calibration>"); */
1802 /* } */
1803
1804 return(tree);
1805 }
1806
1807 //////////////////////////////////////////////////////////////
1808 //////////////////////////////////////////////////////////////
1809
SLFV_Simulate_Independent_Loci(int n_otu,int n_loci,phydbl w,phydbl h,int r_seed)1810 t_tree *SLFV_Simulate_Independent_Loci(int n_otu, int n_loci, phydbl w, phydbl h, int r_seed)
1811 {
1812 t_tree *tree;
1813 int n_dim,i;
1814 t_phyrex_mod *mmod;
1815 option *io;
1816 t_mod *mod;
1817 t_opt *s_opt;
1818 calign *cdata;
1819 phydbl area;
1820 phydbl subst_rate;
1821 int locus_idx;
1822
1823 n_dim = 2; // 2-dimensional landscape
1824 area = w * h;
1825
1826 io = (option *)Make_Input();
1827 mod = (t_mod *)Make_Model_Basic();
1828 s_opt = (t_opt *)Make_Optimiz();
1829
1830 Set_Defaults_Input(io);
1831 Set_Defaults_Model(mod);
1832 Set_Defaults_Optimiz(s_opt);
1833
1834
1835 // io stuff
1836 io->mod = mod;
1837 mod->io = io;
1838 mod->s_opt = s_opt;
1839 io->r_seed = r_seed;
1840 io->n_otu = n_otu;
1841 io->init_len = n_loci; /* sequence length */
1842 io->data = Make_Empty_Alignment(io);
1843 io->colalias = NO;
1844
1845 Make_Model_Complete(io->mod);
1846 Set_Model_Name(io->mod);
1847
1848 cdata = Compact_Data(io->data,io);
1849 Free_Seq(io->data,io->n_otu);
1850
1851 Print_Settings(io);
1852
1853
1854 // tree stuff
1855 tree = Make_Tree_From_Scratch(n_otu,cdata);
1856 Random_Tree(tree);
1857 Connect_CSeqs_To_Nodes(cdata,io,tree);
1858 tree->rates = RATES_Make_Rate_Struct(tree->n_otu);
1859 io->rates->model = STRICTCLOCK;
1860 RATES_Init_Rate_Struct(tree->rates,io->rates,tree->n_otu);
1861
1862 tree->times = TIMES_Make_Time_Struct(tree->n_otu);
1863 TIMES_Init_Time_Struct(tree->times,io->times,tree->n_otu);
1864
1865 tree->data = cdata;
1866 tree->mod = mod;
1867 tree->io = io;
1868 tree->n_pattern = tree->data->crunch_len;
1869
1870 Init_Model(cdata,mod,io);
1871
1872 tree->mod->ras->n_catg = 1;
1873 tree->mod->whichmodel = HKY85;
1874 tree->mod->kappa->v = 4.0;
1875
1876 Make_Tree_For_Pars(tree);
1877 Make_Tree_For_Lk(tree);
1878 Make_Spr(tree);
1879
1880 // migrep model stuff */
1881 mmod = PHYREX_Make_Migrep_Model(tree->n_otu,n_dim);
1882 tree->mmod = mmod;
1883 PHYREX_Init_Migrep_Mod(mmod,n_dim,0.0,0.0,w,h);
1884
1885
1886 /* /\* Death proba param *\/ */
1887 /* mmod->mu = Uni(); */
1888 /* /\* Theta (radius) *\/ */
1889 /* mmod->rad = Uni()*(1.0 - 0.2) + 0.2; */
1890 /* /\* Rate of events *\/ */
1891 /* mmod->lbda = 1.0; */
1892 /* /\* Population density *\/ */
1893 /* mmod->rho = Uni()*(3.0 - 0.1) + 0.1; */
1894
1895
1896 /* Death proba param */
1897 mmod->mu = 0.8;
1898 /* Theta (radius) */
1899 mmod->rad = 0.3;
1900 /* Rate of events */
1901 mmod->lbda = 10.*w*h;
1902 /* (Actual, not effective) population density */
1903 mmod->rho = 1.;
1904
1905
1906
1907 // Duration of a generation in number of events
1908 mmod->gen_cal_time = 1./(2.*mmod->mu*pow(mmod->rad,2)/pow(w*h,2)*
1909 (sqrt(2.)*mmod->rad*exp(-.5*pow(h/mmod->rad,2)) + h*sqrt(PI)*erf(sqrt(2.)*h/(2.*mmod->rad)) - sqrt(2.)*mmod->rad)*
1910 (sqrt(2.)*mmod->rad*exp(-.5*pow(w/mmod->rad,2)) + w*sqrt(PI)*erf(sqrt(2.)*w/(2.*mmod->rad)) - sqrt(2.)*mmod->rad));
1911 // Divide by rate of events per calendar time unit to get duration of gen. in calendar time unit
1912 mmod->gen_cal_time *= 1./mmod->lbda;
1913
1914 /* Dispersal parameter (per generation) */
1915 mmod->sigsq = 4.*pow(mmod->rad,4)*mmod->lbda*PI*mmod->mu/area * mmod->gen_cal_time;
1916
1917
1918 phydbl p = SLFV_Prob_Two_Random_Lineages_Coal_One_Event(w,h,mmod->mu,mmod->rad);
1919 printf("\n. p.sim: %G p.appx: %G",p,4.*pow(mmod->mu,2)*pow(PI,2)*pow(mmod->rad,4)/(pow(w*h,2)));
1920 phydbl rhoe = 1./(1.-exp(-mmod->lbda*p*mmod->gen_cal_time));
1921 rhoe /= area;
1922
1923 PhyML_Printf("\n. lambda=%G; mu=%G; rad=%G; clockr=%G; sigsq=%G; neigh=%G; rhoe(small rad)=%G rhoe(numerical)=%G rhoe(large rad)=%G one_gen=%G",
1924 mmod->lbda,
1925 mmod->mu,
1926 mmod->rad,
1927 tree->rates->clock_r,
1928 mmod->sigsq,
1929 2./mmod->mu,
1930 (1./(1.-exp(-4.*pow(mmod->mu,2)*pow(PI,2)*pow(mmod->rad,4)/(pow(w*h,2))*mmod->lbda*mmod->gen_cal_time)))/area,
1931 rhoe,
1932 /* (1./(1.-exp(-mmod->lbda*mmod->mu/(w*h))))/area, */
1933 (1./(1.-exp(-mmod->mu)))/area,
1934 mmod->gen_cal_time);
1935 fflush(NULL);
1936 /* Exit("\n"); */
1937
1938
1939 // Initialize position of sampled individuals
1940 tree->young_disk = PHYREX_Make_Disk_Event(mmod->n_dim,tree->n_otu);
1941 PHYREX_Init_Disk_Event(tree->young_disk,mmod->n_dim,NULL);
1942 tree->young_disk->time = 0.0;
1943 tree->young_disk->mmod = mmod;
1944 tree->young_disk->centr->lonlat[0] = .5*(mmod->lim_up->lonlat[0]-mmod->lim_do->lonlat[0]);
1945 tree->young_disk->centr->lonlat[1] = .5*(mmod->lim_up->lonlat[1]-mmod->lim_do->lonlat[1]);
1946 tree->young_disk->n_ldsk_a = tree->n_otu;
1947
1948 for(i=0;i<tree->n_otu;++i)
1949 {
1950 char *s;
1951 tree->young_disk->ldsk_a[i] = PHYREX_Make_Lindisk_Node(mmod->n_dim);
1952 PHYREX_Init_Lindisk_Node(tree->young_disk->ldsk_a[i],tree->young_disk,mmod->n_dim);
1953 s = (char *)mCalloc(strlen(tree->young_disk->ldsk_a[i]->coord->id)+1+20,sizeof(char));
1954 strcpy(s,tree->young_disk->ldsk_a[i]->coord->id);
1955 strcat(s,"_deme0\0");
1956 Free(tree->young_disk->ldsk_a[i]->coord->id);
1957 tree->young_disk->ldsk_a[i]->coord->id = s;
1958 }
1959
1960 for(i=0;i<tree->n_otu;i++)
1961 {
1962 tree->young_disk->ldsk_a[i]->coord->lonlat[0] = Uni()*(tree->mmod->lim_up->lonlat[0]-tree->mmod->lim_do->lonlat[0]) + tree->mmod->lim_do->lonlat[0]; // longitude
1963 tree->young_disk->ldsk_a[i]->coord->lonlat[1] = Uni()*(tree->mmod->lim_up->lonlat[1]-tree->mmod->lim_do->lonlat[1]) + tree->mmod->lim_do->lonlat[1]; // latitude
1964 }
1965
1966
1967
1968 // Generate sequences
1969 subst_rate = .0;
1970 locus_idx = 0;
1971 do
1972 {
1973 SLFV_Simulate_Backward_Core(tree->young_disk,NO,tree);
1974
1975 // Random selection of a pair. Print out physical distances
1976 // between tips and time of coalescence for this pair, at this
1977 // particular locus
1978 {
1979 int *permut = Permutate(tree->n_otu);
1980 t_ldsk *lin1 = tree->young_disk->ldsk_a[permut[0]];
1981 t_ldsk *lin2 = tree->young_disk->ldsk_a[permut[1]];
1982 phydbl dist =
1983 sqrt(pow(lin1->coord->lonlat[0]-lin2->coord->lonlat[0],2) +
1984 pow(lin1->coord->lonlat[1]-lin2->coord->lonlat[1],2));
1985
1986 t_dsk *disk = tree->young_disk;
1987 do
1988 {
1989 if(disk->ldsk == lin1->prev) lin1 = lin1->prev;
1990 if(disk->ldsk == lin2->prev) lin2 = lin2->prev;
1991 if(lin1 == lin2) break; // found MRCA
1992 disk = disk->prev;
1993 }
1994 while(disk);
1995
1996 assert(lin1 && lin2);
1997
1998 PhyML_Printf("\n. #$# locus %4d ; physical distance: %G time to coalescence: %G",locus_idx,dist,lin1->disk->time);
1999 }
2000
2001 PHYREX_Ldsk_To_Tree(tree);
2002
2003
2004 Update_Ancestors(tree->n_root,tree->n_root->v[2],tree);
2005 Update_Ancestors(tree->n_root,tree->n_root->v[1],tree);
2006 RATES_Fill_Lca_Table(tree);
2007
2008
2009 tree->rates->bl_from_rt = YES;
2010 /* tree->rates->clock_r = 1.E-3/FABS(T); // slow */
2011 /* tree->rates->clock_r = 1.E-0/FABS(T); // fast */
2012 /* tree->rates->clock_r = 1.E-1/FABS(T); // medium */
2013 /* tree->rates->clock_r = 1.E-7; // slow; */
2014 tree->rates->clock_r = 1.E-3; // medium;
2015 /* tree->rates->clock_r = 1.E-2; // fast; */
2016
2017
2018 PhyML_Printf("\n. #!# mutation rate at that locus: %G subst. per time unit",tree->rates->clock_r);
2019 subst_rate += tree->rates->clock_r;
2020
2021 RATES_Update_Cur_Bl(tree);
2022
2023 char *s = Write_Tree(tree);
2024 PhyML_Printf("\n. #@# tree: %s",s);
2025 Free(s);
2026
2027 Evolve(tree->data,tree->mod,locus_idx,tree);
2028
2029 t_dsk *disk = tree->young_disk->prev;
2030 while(disk->prev)
2031 {
2032 disk = disk->prev;
2033 if(disk->next->ldsk != NULL) Free_Ldisk(disk->next->ldsk);
2034 Free_Disk(disk->next);
2035 }
2036
2037 locus_idx++;
2038 }
2039 while(locus_idx < n_loci);
2040
2041 PhyML_Printf("\n. #s# average rate of mutation : %G",subst_rate/(phydbl)n_loci);
2042
2043 tree->data->format = IBDSIM;
2044 PhyML_Printf("\n\n");
2045 Print_CSeq(stdout,NO,tree->data,tree);
2046
2047
2048 Exit("\n");
2049
2050 return tree;
2051 }
2052
2053
2054 /*////////////////////////////////////////////////////////////
2055 ////////////////////////////////////////////////////////////*/
2056
SLFV_Integrated_Coal_Rate(t_ldsk * l0,t_ldsk * l1,phydbl T,t_tree * tree)2057 void SLFV_Integrated_Coal_Rate(t_ldsk *l0, t_ldsk *l1, phydbl T, t_tree *tree)
2058 {
2059 t_ldsk *baseline;
2060 phydbl integrated_coal_rate,njumps,coaltime;
2061 int coal;
2062
2063 PhyML_Printf("\n# ");
2064
2065
2066 coal = -1;
2067 baseline = l0;
2068 integrated_coal_rate = 0.0;
2069 njumps = 0.0;
2070 coaltime = 0.0;
2071 do
2072 {
2073 if(l0->prev->disk->time > l1->prev->disk->time)
2074 {
2075 assert(coal == -1);
2076 integrated_coal_rate +=
2077 tree->mmod->lbda * SLFV_Prob_Two_Lineages_Coal(l0,l1,tree) *
2078 fabs(baseline->disk->time - MAX(l0->prev->disk->time,l1->prev->disk->time));
2079
2080 njumps+=1.0;
2081 baseline = l0->prev;
2082
2083 l0 = l0->prev;
2084 }
2085 else if(l0->prev->disk->time < l1->prev->disk->time)
2086 {
2087 assert(coal == -1);
2088 integrated_coal_rate +=
2089 tree->mmod->lbda * SLFV_Prob_Two_Lineages_Coal(l0,l1,tree) *
2090 fabs(baseline->disk->time - MAX(l0->prev->disk->time,l1->prev->disk->time));
2091
2092 njumps+=1.0;
2093 baseline = l1->prev;
2094
2095 l1 = l1->prev;
2096 }
2097 else
2098 {
2099 assert(l0->prev);
2100 assert(l1->prev);
2101 assert(l0->prev == l1->prev);
2102 coaltime = l0->prev->disk->time;
2103 if(coaltime > T)
2104 {
2105 coal = 1;
2106 }
2107 else
2108 {
2109 coal = 0;
2110 integrated_coal_rate +=
2111 tree->mmod->lbda * SLFV_Prob_Two_Lineages_Coal(l0,l1,tree) *
2112 fabs(baseline->disk->time - T);
2113 }
2114
2115 l0->prev = NULL;
2116 l1->prev = NULL;
2117 }
2118 }
2119 while(l0->prev && l1->prev);
2120
2121 PhyML_Printf("%d %f %g %f",
2122 coal,
2123 integrated_coal_rate,
2124 njumps,
2125 coaltime);
2126 PhyML_Printf("\n");
2127 Exit("\n");
2128 }
2129
2130 /*////////////////////////////////////////////////////////////
2131 ////////////////////////////////////////////////////////////*/
2132 // Calculation of Pr(T>n) where T is the proba of coalescence
SLFV_Sum_Coal_Rate(t_ldsk * l0,t_ldsk * l1,int n,t_tree * tree)2133 void SLFV_Sum_Coal_Rate(t_ldsk *l0, t_ldsk *l1, int n, t_tree *tree)
2134 {
2135 t_dsk *disk;
2136 phydbl prod_one_min_coal_prob;
2137 int coal,n_evts;
2138
2139
2140 PhyML_Printf("\n# ");
2141
2142 prod_one_min_coal_prob = 0.0;
2143 n_evts = 1;
2144 coal = 0;
2145 disk = tree->young_disk->prev;
2146 do
2147 {
2148
2149 if(n_evts <= n) prod_one_min_coal_prob += log(1. - SLFV_Prob_Two_Lineages_Coal(l0,l1,tree));
2150
2151 if(l0->prev == disk->ldsk) l0 = disk->ldsk;
2152 if(l1->prev == disk->ldsk) l1 = disk->ldsk;
2153 if(l0 == l1 && n_evts <= n) coal = 1;
2154 disk = disk->prev;
2155 n_evts++;
2156 }
2157 while(disk);
2158
2159 prod_one_min_coal_prob = exp(prod_one_min_coal_prob);
2160
2161 PhyML_Printf("%d %d %f",coal,n_evts,prod_one_min_coal_prob);
2162 PhyML_Printf("\n");
2163 Exit("\n");
2164 }
2165