1 /*
2
3 PhyML: a program that computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10
11 */
12
13 #include "stats.h"
14
15
16 //////////////////////////////////////////////////////////////
17 //////////////////////////////////////////////////////////////
18
19 /* RANDOM VARIATES GENERATORS */
20 //////////////////////////////////////////////////////////////
21 //////////////////////////////////////////////////////////////
22
23
24 /*********************************************************************/
25 /* A C-function for TT800 : July 8th 1996 Version */
26 /* by M. Matsumoto, email: matumoto@math.keio.ac.jp */
27 /* tt800() generate one pseudorandom number with double precision */
28 /* which is uniformly distributed on [0,1]-interval */
29 /* for each call. One may choose any initial 25 seeds */
30 /* except all zeros. */
31
32 /* See: ACM Transactions on Modelling and Computer Simulation, */
33 /* Vol. 4, No. 3, 1994, pages 254-266. */
34
tt800()35 phydbl tt800()
36 {
37 int M=7;
38 unsigned long y;
39 static int k = 0;
40 static unsigned long x[25]={ /* initial 25 seeds, change as you wish */
41 0x95f24dab, 0x0b685215, 0xe76ccae7, 0xaf3ec239, 0x715fad23,
42 0x24a590ad, 0x69e4b5ef, 0xbf456141, 0x96bc1b7b, 0xa7bdf825,
43 0xc1de75b7, 0x8858a9c9, 0x2da87693, 0xb657f9dd, 0xffdc8a9f,
44 0x8121da71, 0x8b823ecb, 0x885d05f5, 0x4e20cd47, 0x5a9ad5d9,
45 0x512c0c03, 0xea857ccd, 0x4cc1d30f, 0x8891a8a1, 0xa6b7aadb
46 };
47 static unsigned long mag01[2]={
48 0x0, 0x8ebfd028 /* this is magic vector `a', don't change */
49 };
50 if (k==25) { /* generate 25 words at one time */
51 int kk;
52 for (kk=0;kk<25-M;kk++) {
53 x[kk] = x[kk+M] ^ (x[kk] >> 1) ^ mag01[x[kk] % 2];
54 }
55 for (; kk<25;kk++) {
56 x[kk] = x[kk+(M-25)] ^ (x[kk] >> 1) ^ mag01[x[kk] % 2];
57 }
58 k=0;
59 }
60 y = x[k];
61 y ^= (y << 7) & 0x2b5b2500; /* s and b, magic vectors */
62 y ^= (y << 15) & 0xdb8b0000; /* t and c, magic vectors */
63 y &= 0xffffffff; /* you may delete this line if word size = 32 */
64 /*
65 the following line was added by Makoto Matsumoto in the 1996 version
66 to improve lower bit's corellation.
67 Delete this line to o use the code published in 1994.
68 */
69 y ^= (y >> 16); /* added to the 1994 version */
70 k++;
71 return((phydbl)y / (unsigned long) 0xffffffff);
72 }
73
74 /*********************************************************************/
75
Uni()76 phydbl Uni()
77 {
78 phydbl r,mx;
79 mx = (phydbl)RAND_MAX;
80 r = (phydbl)rand();
81 r /= mx;
82 /* r = tt800(); */
83 return r;
84 }
85
86 /*********************************************************************/
87 // Return a uniform draw u s.t., min <= u <= max
Rand_Int(int min,int max)88 int Rand_Int(int min, int max)
89 {
90 /* phydbl u; */
91 /* u = (phydbl)rand(); */
92 /* u /= (RAND_MAX); */
93 /* u *= (max - min + 1); */
94 /* u += min; */
95 /* return (int)FLOOR(u); */
96
97 int u;
98 /* if(max < min) Generic_Exit(__FILE__,__LINE__,__FUNCTION__); */
99 u = rand();
100 return (u%(max+1-min)+min);
101
102 }
103
104 //////////////////////////////////////////////////////////////
105 //////////////////////////////////////////////////////////////
106
107
108
109 /********************* random Gamma generator ************************
110 * Properties:
111 * (1) X = Gamma(alpha,lambda) = Gamma(alpha,1)/lambda
112 * (2) X1 = Gamma(alpha1,1), X2 = Gamma(alpha2,1) independent
113 * then X = X1+X2 = Gamma(alpha1+alpha2,1)
114 * (3) alpha = k = integer then
115 * X = Gamma(k,1) = Erlang(k,1) = -sum(log(Ui)) = -log(prod(Ui))
116 * where U1,...Uk iid uniform(0,1)
117 *
118 * Decompose alpha = k+delta with k = [alpha], and 0<delta<1
119 * Apply (3) for Gamma(k,1)
120 * Apply Ahrens-Dieter algorithm for Gamma(delta,1)
121 */
122
Ahrensdietergamma(phydbl alpha)123 phydbl Ahrensdietergamma(phydbl alpha)
124 {
125 phydbl x = 0.;
126
127 if (alpha>0.)
128 {
129 phydbl y = 0.;
130 phydbl b = (alpha+exp(1.))/exp(1.);
131 phydbl p = 1./alpha;
132 int go = 0;
133 while (go==0)
134 {
135 phydbl u = Uni();
136 phydbl w = Uni();
137 phydbl v = b*u;
138 if (v<=1.)
139 {
140 x = POW(v,p);
141 y = exp(-x);
142 }
143 else
144 {
145 x = -log(p*(b-v));
146 y = POW(x,alpha-1.);
147 }
148 go = (w<y); // x is accepted when go=1
149 }
150 }
151 return x;
152 }
153
154 //////////////////////////////////////////////////////////////
155 //////////////////////////////////////////////////////////////
156
Rgamma(phydbl shape,phydbl scale)157 phydbl Rgamma(phydbl shape, phydbl scale)
158 {
159 /* Code below is stolen from R sources. Thanks to the R team! */
160 /* References:
161 [1] Shape parameter a >= 1. Algorithm GD in:
162
163 Ahrens, J.H. and Dieter, U. (1982).
164 Generating gamma variates by a modified
165 rejection technique.
166 Comm. ACM, 25, 47-54.
167
168
169 [2] Shape parameter 0 < a < 1. Algorithm GS in:
170
171 Ahrens, J.H. and Dieter, U. (1974).
172 Computer methods for sampling from gamma, beta,
173 poisson and binomial distributions.
174 Computing, 12, 223-246.
175 */
176
177
178 double a = (double)shape;
179 /* Constants : */
180 const static double sqrt32 = 5.656854;
181 const static double exp_m1 = 0.36787944117144232159;/* exp(-1) = 1/e */
182
183 /* Coefficients q[k] - for q0 = sum(q[k]*a^(-k))
184 * Coefficients a[k] - for q = q0+(t*t/2)*sum(a[k]*v^k)
185 * Coefficients e[k] - for exp(q)-1 = sum(e[k]*q^k)
186 */
187 const static double q1 = 0.04166669;
188 const static double q2 = 0.02083148;
189 const static double q3 = 0.00801191;
190 const static double q4 = 0.00144121;
191 const static double q5 = -7.388e-5;
192 const static double q6 = 2.4511e-4;
193 const static double q7 = 2.424e-4;
194
195 const static double a1 = 0.3333333;
196 const static double a2 = -0.250003;
197 const static double a3 = 0.2000062;
198 const static double a4 = -0.1662921;
199 const static double a5 = 0.1423657;
200 const static double a6 = -0.1367177;
201 const static double a7 = 0.1233795;
202
203 /* State variables [FIXME for threading!] :*/
204 static double aa = 0.;
205 static double aaa = 0.;
206 static double s, s2, d; /* no. 1 (step 1) */
207 static double q0, b, si, c;/* no. 2 (step 4) */
208
209 double e, p, q, r, t, u, v, w, x, ret_val;
210
211 if(a < 0.0 || scale <= 0.0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
212
213 if (a < 1.)
214 { /* GS algorithm for parameters a < 1 */
215 if(a == 0) return 0.;
216 e = 1.0 + exp_m1 * a;
217 for(;;)
218 {
219 p = e * Uni();
220 if (p >= 1.0)
221 {
222 x = -log((e - p) / a);
223 if (Rexp(1.) >= (1.0 - a) * log(x))
224 break;
225 }
226 else
227 {
228 x = exp(log(p) / a);
229 if (Rexp(1.) >= x)
230 break;
231 }
232 }
233 return scale * x;
234 }
235
236 /* --- a >= 1 : GD algorithm --- */
237
238 /* Step 1: Recalculations of s2, s, d if a has changed */
239 if (a != aa)
240 {
241 aa = a;
242 s2 = a - 0.5;
243 s = SQRT(s2);
244 d = sqrt32 - s * 12.0;
245 }
246 /* Step 2: t = standard normal deviate,
247 x = (s,1/2) -normal deviate. */
248
249 /* immediate acceptance (i) */
250 t = Rnorm(0.0,1.0);
251 x = s + 0.5 * t;
252 ret_val = x * x;
253 if (t >= 0.0) return scale * ret_val;
254
255 /* Step 3: u = 0,1 - uniform sample. squeeze acceptance (s) */
256 u = Uni();
257 if (d * u <= t * t * t) return scale * ret_val;
258
259 /* Step 4: recalculations of q0, b, si, c if necessary */
260
261 if (a != aaa)
262 {
263 aaa = a;
264 r = 1.0 / a;
265 q0 = ((((((q7 * r + q6) * r + q5) * r + q4) * r + q3) * r
266 + q2) * r + q1) * r;
267
268 /* Approximation depending on size of parameter a */
269 /* The constants in the expressions for b, si and c */
270 /* were established by numerical experiments */
271
272 if (a <= 3.686)
273 {
274 b = 0.463 + s + 0.178 * s2;
275 si = 1.235;
276 c = 0.195 / s - 0.079 + 0.16 * s;
277 }
278 else if (a <= 13.022)
279 {
280 b = 1.654 + 0.0076 * s2;
281 si = 1.68 / s + 0.275;
282 c = 0.062 / s + 0.024;
283 }
284 else
285 {
286 b = 1.77;
287 si = 0.75;
288 c = 0.1515 / s;
289 }
290 }
291
292 /* Step 5: no quotient test if x not positive */
293 if (x > 0.0)
294 {
295 /* Step 6: calculation of v and quotient q */
296 v = t / (s + s);
297 if (fabs(v) <= 0.25)
298 q = q0 + 0.5 * t * t * ((((((a7 * v + a6) * v + a5) * v + a4) * v
299 + a3) * v + a2) * v + a1) * v;
300 else
301 q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
302
303
304 /* Step 7: quotient acceptance (q) */
305 if (log(1.0 - u) <= q)
306 return scale * ret_val;
307 }
308
309 for(;;)
310 {
311 /* Step 8: e = standard exponential deviate
312 * u = 0,1 -uniform deviate
313 * t = (b,si)-double exponential (laplace) sample */
314 e = Rexp(1.0);
315 u = Uni();
316 u = u + u - 1.0;
317 if (u < 0.0)
318 t = b - si * e;
319 else
320 t = b + si * e;
321 /* Step 9: rejection if t < tau(1) = -0.71874483771719 */
322 if (t >= -0.71874483771719) {
323 /* Step 10: calculation of v and quotient q */
324 v = t / (s + s);
325 if (fabs(v) <= 0.25)
326 q = q0 + 0.5 * t * t *
327 ((((((a7 * v + a6) * v + a5) * v + a4) * v + a3) * v
328 + a2) * v + a1) * v;
329 else
330 q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
331 /* Step 11: hat acceptance (h) */
332 /* (if q not positive go to step 8) */
333 if (q > 0.0) {
334 w = exp(q)-1.0;
335 /* if t is rejected sample again at step 8 */
336 if (c * fabs(u) <= w * exp(e - 0.5 * t * t))
337 break;
338 }
339 }
340 } /* repeat .. until `t' is accepted */
341 x = s + 0.5 * t;
342 return scale * x * x;
343 }
344
345 //////////////////////////////////////////////////////////////
346 //////////////////////////////////////////////////////////////
347
Rexp(phydbl lambda)348 phydbl Rexp(phydbl lambda)
349 {
350 return -log(Uni()+SMALL)/lambda;
351 }
352
353 /*////////////////////////////////////////////////////////////
354 ////////////////////////////////////////////////////////////*/
355
356 /* Returns a random deviates from an exponential distribution */
357 /* left-truncated at 'left' and right-truncated at 'rght' */
Rexp_Trunc(phydbl lambda,phydbl left,phydbl rght)358 phydbl Rexp_Trunc(phydbl lambda, phydbl left, phydbl rght)
359 {
360 phydbl u;
361 u = Uni();
362 return (left-log(1. - u*(1.-exp(-lambda*(rght-left))))/lambda);
363 }
364
365 /*////////////////////////////////////////////////////////////
366 ////////////////////////////////////////////////////////////*/
367
Rnorm(phydbl mean,phydbl sd)368 phydbl Rnorm(phydbl mean, phydbl sd)
369 {
370 /* Box-Muller transformation */
371 phydbl u1, u2, res;
372
373 /* u1=Uni(); */
374 /* u2=Uni(); */
375 /* u1 = SQRT(-2.*log(u1))*COS(6.28318530717959f*u2); */
376
377 /* Polar */
378 phydbl d,x,y;
379
380 do
381 {
382 u1=Uni();
383 u2=Uni();
384 x = 2.*u1-1.;
385 y = 2.*u2-1.;
386 d = x*x + y*y;
387 if(d>.0 && d<1.) break;
388 }
389 while(1);
390 u1 = x*SQRT((-2.*log(d))/d);
391
392 res = u1*sd+mean;
393
394 if(isnan(res) || isinf(res))
395 {
396 printf("\n. res=%f sd=%f mean=%f u1=%f u2=%f",res,sd,mean,u1,u2);
397 }
398 return res;
399 }
400
401 //////////////////////////////////////////////////////////////
402 //////////////////////////////////////////////////////////////
403
404
Rnorm_Multid(phydbl * mu,phydbl * cov,int dim)405 phydbl *Rnorm_Multid(phydbl *mu, phydbl *cov, int dim)
406 {
407 phydbl *L,*x,*y;
408 int i,j;
409
410 x = (phydbl *)mCalloc(dim,sizeof(phydbl));
411 y = (phydbl *)mCalloc(dim,sizeof(phydbl));
412
413 L = (phydbl *)Cholesky_Decomp(cov,dim);
414
415 for(i=0;i<dim;i++) x[i]=Rnorm(0.0,1.0);
416 for(i=0;i<dim;i++) for(j=0;j<dim;j++) y[i] += L[i*dim+j]*x[j];
417 for(i=0;i<dim;i++) y[i] += mu[i];
418
419 Free(L);
420 Free(x);
421
422 return(y);
423 }
424
425 //////////////////////////////////////////////////////////////
426 //////////////////////////////////////////////////////////////
427
Rnorm_Trunc_Inverse(phydbl mean,phydbl sd,phydbl min,phydbl max,int * error)428 phydbl Rnorm_Trunc_Inverse(phydbl mean, phydbl sd, phydbl min, phydbl max, int *error)
429 {
430
431 phydbl u, ret_val,eps;
432 phydbl z;
433 phydbl z_min,z_max;
434 phydbl cdf_min, cdf_max;
435
436 z = 0.0;
437 u = -1.0;
438 *error = 0;
439
440 if(sd < 1.E-100)
441 {
442 PhyML_Printf("\n. Small variance detected in Rnorm_Trunc.");
443 PhyML_Printf("\n. mean=%f sd=%f min=%f max=%f",mean,sd,min,max);
444 *error = 1;
445 return -1.0;
446 }
447
448 z_min = (min - mean)/sd;
449 z_max = (max - mean)/sd;
450
451 eps = (z_max-z_min)/1E+6;
452
453
454 /* Simple inversion method. Seems to work well. Needs more thorough testing though... */
455 cdf_min = Pnorm(z_min,0.0,1.0);
456 cdf_max = Pnorm(z_max,0.0,1.0);
457 u = cdf_min + (cdf_max-cdf_min) * Uni();
458 z = PointNormal(u);
459
460 if((z < z_min-eps) || (z > z_max+eps))
461 {
462 *error = 1;
463 PhyML_Printf("\n. Numerical precision issue detected in Rnorm_Trunc.");
464 PhyML_Printf("\n. z = %f",z);
465 PhyML_Printf("\n. mean=%f sd=%f z_min=%f z_max=%f min=%f max=%f",mean,sd,z_min,z_max,min,max);
466 ret_val = (max - min)/2.;
467 Exit("\n");
468 }
469
470 ret_val = z*sd+mean;
471
472 return ret_val;
473 }
474
475 //////////////////////////////////////////////////////////////
476 //////////////////////////////////////////////////////////////
477 /* Borrowed from https://github.com/olafmersmann/truncnorm/blob/afc91b696db8a3feda25d39435fd979bacd962c6/src/rtruncnorm.c */
478
Rnorm_Trunc_Algo1(phydbl alpha,phydbl beta)479 phydbl Rnorm_Trunc_Algo1(phydbl alpha, phydbl beta)
480 {
481 phydbl z = -DBL_MAX;
482 while(z < alpha || z > beta)
483 {
484 z = Rnorm(0.0,1.0);
485 }
486 return(z);
487 }
488
Rnorm_Trunc_Algo2(phydbl alpha,phydbl beta)489 phydbl Rnorm_Trunc_Algo2(phydbl alpha, phydbl beta)
490 {
491 phydbl z = 0.0;
492 phydbl d_alpha = Dnorm(alpha,0.0,1.0);
493 const double ub = alpha < 0.0 && beta > 0.0 ? M_1_SQRT_2PI : d_alpha;
494 do
495 {
496 z = Uni()*(beta-alpha) + alpha;
497 }
498 while(Uni() * ub > Dnorm(z,0.0,1.0));
499 return(z);
500 }
501
Rnorm_Trunc_Algo3(phydbl alpha,phydbl beta)502 phydbl Rnorm_Trunc_Algo3(phydbl alpha, phydbl beta)
503 {
504
505 phydbl z = alpha - 1.0;
506 while(z < alpha || z > beta)
507 {
508 z = Rnorm(0,1);
509 z = fabs(z);
510 }
511 return(z);
512 }
513
Rnorm_Trunc_Algo4(phydbl alpha,phydbl beta)514 phydbl Rnorm_Trunc_Algo4(phydbl alpha, phydbl beta)
515 {
516 phydbl z = 0.0;
517 const phydbl ainv = 1.0/alpha;
518 phydbl rho;
519 do
520 {
521 z = Rexp(ainv) + alpha;
522 rho = exp(-0.5 * pow((z-alpha),2));
523 }
524 while(Uni() > rho || z > beta);
525 return(z);
526 }
527
528
Rnorm_Trunc(phydbl mean,phydbl sd,phydbl min,phydbl max,int * error)529 phydbl Rnorm_Trunc(phydbl mean, phydbl sd, phydbl min, phydbl max, int *error)
530 {
531 phydbl alpha,beta;
532 phydbl d_alpha,d_beta;
533 phydbl z,ret_val;
534
535 alpha = (min - mean)/sd;
536 beta = (max - mean)/sd;
537
538 d_alpha = Dnorm(alpha,0.0,1.0);
539 d_beta = Dnorm(beta,0.0,1.0);
540
541 /* PhyML_Printf("\n. alpha: %f beta: %f d_alpha: %f d_beta: %f",alpha,beta,d_alpha,d_beta); */
542
543
544 if(beta < alpha) return NAN;
545 else
546 {
547 if(!(alpha > 0.0) && !(beta < 0.0))
548 {
549 if(!(d_alpha > 0.15) || !(d_beta > 0.15))
550 {
551 z = Rnorm_Trunc_Algo1(alpha,beta);
552 return(mean + sd * z);
553 }
554 else
555 {
556 z = Rnorm_Trunc_Algo2(alpha,beta);
557 return(mean + sd * z);
558 }
559 }
560 else if(alpha > 0.0)
561 {
562 if(!(d_alpha / d_beta > 2.18))
563 {
564 z = Rnorm_Trunc_Algo2(alpha,beta);
565 return(mean + sd * z);
566 }
567 else
568 {
569 if(!(alpha > 0.725))
570 {
571 z = Rnorm_Trunc_Algo3(alpha,beta);
572 return(mean + sd * z);
573 }
574 else
575 {
576 z = Rnorm_Trunc_Algo4(alpha,beta);
577 return(mean + sd * z);
578 }
579 }
580 }
581 else
582 {
583 if(!(d_beta / d_alpha > 2.18))
584 {
585 z = Rnorm_Trunc_Algo2(alpha,beta);
586 return(mean - sd * z);
587 }
588 else if(beta > -0.725)
589 {
590 z = Rnorm_Trunc_Algo3(alpha,beta);
591 return(mean - sd * z);
592 }
593 else
594 {
595 z = Rnorm_Trunc_Algo4(alpha,beta);
596 return(mean - sd * z);
597 }
598 }
599 }
600
601 ret_val = mean + sd*z;
602 return ret_val;
603 }
604
605 //////////////////////////////////////////////////////////////
606 //////////////////////////////////////////////////////////////
607
608
Rnorm_Multid_Trunc(phydbl * mean,phydbl * cov,phydbl * min,phydbl * max,int dim)609 phydbl *Rnorm_Multid_Trunc(phydbl *mean, phydbl *cov, phydbl *min, phydbl *max, int dim)
610 {
611 int i,j;
612 phydbl *L,*x, *u;
613 phydbl up, low, rec;
614 int err;
615
616 u = (phydbl *)mCalloc(dim,sizeof(phydbl));
617 x = (phydbl *)mCalloc(dim,sizeof(phydbl));
618
619 L = Cholesky_Decomp(cov,dim);
620
621 low = (min[0]-mean[0])/L[0*dim+0];
622 up = (max[0]-mean[0])/L[0*dim+0];
623 u[0] = Rnorm_Trunc(0.0,1.0,low,up,&err);
624
625 for(i=1;i<dim;i++)
626 {
627 rec = .0;
628 for(j=0;j<i;j++) rec += L[i*dim+j] * u[j];
629 low = (min[i]-mean[i]-rec)/L[i*dim+i];
630 up = (max[i]-mean[i]-rec)/L[i*dim+i];
631 u[i] = Rnorm_Trunc(0.0,1.0,low,up,&err);
632 }
633
634 x = Matrix_Mult(L,u,dim,dim,dim,1);
635
636 /* PhyML_Printf("\n>>>\n"); */
637 /* for(i=0;i<dim;i++) */
638 /* { */
639 /* for(j=0;j<dim;j++) */
640 /* { */
641 /* PhyML_Printf("%10lf ",L[i*dim+j]); */
642 /* } */
643 /* PhyML_Printf("\n"); */
644 /* } */
645 /* PhyML_Printf("\n"); */
646
647 /* for(i=0;i<dim;i++) PhyML_Printf("%f ",u[i]); */
648 /* PhyML_Printf("\n"); */
649
650
651 /* PhyML_Printf("\n"); */
652 /* for(i=0;i<dim;i++) PhyML_Printf("%10lf ",x[i]); */
653 /* PhyML_Printf("\n<<<\n"); */
654
655 for(i=0;i<dim;i++) x[i] += mean[i];
656
657 Free(L);
658 Free(u);
659
660 return x;
661 }
662
663 //////////////////////////////////////////////////////////////
664 //////////////////////////////////////////////////////////////
665 /* Inversion method for sampling from Geometric distibution */
Rgeom(phydbl p)666 phydbl Rgeom(phydbl p)
667 {
668 phydbl x,u;
669
670 u = Uni();
671 if(u < SMALL) return(0.0);
672 x = log(u) / log(1. - p);
673
674 return(CEIL(x));
675 }
676
677 //////////////////////////////////////////////////////////////
678 //////////////////////////////////////////////////////////////
679
Dgeom(phydbl k,phydbl p,int logit)680 phydbl Dgeom(phydbl k, phydbl p, int logit)
681 {
682 phydbl prob;
683
684 if(k < 1.) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
685 if(p > 1.-SMALL)
686 {
687 if(logit == YES) return(-INFINITY);
688 else return(0.0);
689 }
690
691 if(logit == YES)
692 prob = (k - 1.)*log(1. - p) + log(p);
693 else
694 prob = POW(1.-p,k-1.)*p;
695
696 return(prob);
697 }
698
699
700 //////////////////////////////////////////////////////////////
701 //////////////////////////////////////////////////////////////
702
Pgeom(phydbl k,phydbl p)703 phydbl Pgeom(phydbl k, phydbl p)
704 {
705
706 if(k < 1.) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
707 if(p > 1.-SMALL) return(0.0);
708
709 return(1. - POW((1. - p),k));
710
711 }
712
713
714 /*
715 * Random variates from the Poisson distribution. Completely stolen from R code.
716 *
717 * REFERENCE
718 *
719 * Ahrens, J.H. and Dieter, U. (1982).
720 * Computer generation of Poisson deviates
721 * from modified normal distributions.
722 * ACM Trans. Math. Software 8, 163-179.
723 */
Rpois(phydbl mmu)724 phydbl Rpois(phydbl mmu)
725 {
726 double mu = (double)mmu;
727
728 double a0 = -0.5;
729 double a1 = 0.3333333;
730 double a2 = -0.2500068;
731 double a3 = 0.2000118;
732 double a4 = -0.1661269;
733 double a5 = 0.1421878;
734 double a6 = -0.1384794;
735 double a7 = 0.1250060;
736 double one_7 = 0.1428571428571428571;
737 double one_12 = 0.0833333333333333333;
738 double one_24 = 0.0416666666666666667;
739
740 /* Factorial Table (0:9)! */
741 const static double fact[10] =
742 {
743 1., 1., 2., 6., 24., 120., 720., 5040., 40320., 362880.
744 };
745 /* These are static --- persistent between calls for same mu : */
746 static int l, m;
747 static double b1, b2, c, c0, c1, c2, c3;
748 static double pp[36], p0, p, q, s, d, omega;
749 static double big_l;/* integer "w/o overflow" */
750 static double muprev = 0., muprev2 = 0.;/*, muold = 0.*/
751 /* Local Vars [initialize some for -Wall]: */
752 double del, difmuk= 0., E= 0., fk= 0., fx, fy, g, px, py, t, u= 0., v, x;
753 double pois = -1.;
754 int k, kflag, big_mu, new_big_mu = FALSE;
755
756 if (isnan(mu) || mu < 0.0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
757 if (mu <= 0.) return 0.;
758
759 big_mu = mu >= 10.;
760 if(big_mu) new_big_mu = FALSE;
761 if (!(big_mu && mu == muprev)) {/* maybe compute new persistent par.s */
762 if (big_mu)
763 {
764 new_big_mu = TRUE;
765 /* Case A. (recalculation of s,d,l because mu has changed):
766 * The poisson probabilities pk exceed the discrete normal
767 * probabilities fk whenever k >= m(mu).
768 */
769 muprev = mu;
770 s = sqrt(mu);
771 d = 6. * mu * mu;
772 big_l = floor(mu - 1.1484);
773 /* = an upper bound to m(mu) for all mu >= 10.*/
774 }
775 else
776 {
777 /* Small mu ( < 10) -- not using normal approx. */
778 /* Case B. (start new table and calculate p0 if necessary) */
779 /*muprev = 0.;-* such that next time, mu != muprev ..*/
780 if (mu != muprev)
781 {
782 muprev = mu;
783 m = MAX(1, (int) mu);
784 l = 0; /* pp[] is already ok up to pp[l] */
785 q = p0 = p = exp(-mu);
786 }
787 for(;;)
788 {
789 /* Step U. uniform sample for inversion method */
790 u = Uni();
791 if (u <= p0) return 0.;
792 /* Step T. table comparison until the end pp[l] of the
793 pp-table of cumulative poisson probabilities
794 (0.458 > ~= pp[9](= 0.45792971447) for mu=10 ) */
795 if (l != 0)
796 {
797 for (k = (u <= 0.458) ? 1 : MIN(l, m); k <= l; k++) if (u <= pp[k]) return((phydbl)k);
798 if (l == 35) /* u > pp[35] */
799 continue;
800 }
801 /* Step C. creation of new poisson
802 probabilities p[l..] and their cumulatives q =: pp[k] */
803 l++;
804 for (k = l; k <= 35; k++)
805 {
806 p *= mu / k;
807 q += p;
808 pp[k] = q;
809 if (u <= q)
810 {
811 l = k;
812 return((phydbl)k);
813 }
814 }
815 l = 35;
816 } /* end(repeat) */
817 }/* mu < 10 */
818 } /* end {initialize persistent vars} */
819 /* Only if mu >= 10 : ----------------------- */
820 /* Step N. normal sample */
821 g = mu + s * Rnorm(0.0,1.0);/* norm_rand() ~ N(0,1), standard normal */
822 if (g >= 0.)
823 {
824 pois = floor(g);
825 /* Step I. immediate acceptance if pois is large enough */
826 if (pois >= big_l)
827 return((phydbl)pois);
828 /* Step S. squeeze acceptance */
829 fk = pois;
830 difmuk = mu - fk;
831 u = Uni(); /* ~ U(0,1) - sample */
832 if (d * u >= difmuk * difmuk * difmuk)
833 return((phydbl)pois);
834 }
835 /* Step P. preparations for steps Q and H.
836 (recalculations of parameters if necessary) */
837 if (new_big_mu || mu != muprev2) {
838 /* Careful! muprev2 is not always == muprev
839 because one might have exited in step I or S
840 */
841 muprev2 = mu;
842 omega = M_1_SQRT_2PI / s;
843 /* The quantities b1, b2, c3, c2, c1, c0 are for the Hermite
844 * approximations to the discrete normal probabilities fk. */
845 b1 = one_24 / mu;
846 b2 = 0.3 * b1 * b1;
847 c3 = one_7 * b1 * b2;
848 c2 = b2 - 15. * c3;
849 c1 = b1 - 6. * b2 + 45. * c3;
850 c0 = 1. - b1 + 3. * b2 - 15. * c3;
851 c = 0.1069 / mu; /* guarantees majorization by the 'hat'-function. */
852 }
853 if (g >= 0.) {
854 /* 'Subroutine' F is called (kflag=0 for correct return) */
855 kflag = 0;
856 goto Step_F;
857 }
858 for(;;)
859 {
860 /* Step E. Exponential Sample */
861 E = Rexp(1.0); /* ~ Exp(1) (standard exponential) */
862 /* sample t from the laplace 'hat'
863 (if t <= -0.6744 then pk < fk for all mu >= 10.) */
864 u = 2. * Uni() - 1.;
865 /* t = 1.8 + fsign(E, u); */
866 t = 1.8 + ((u >= 0.0) ? fabs(E) : -fabs(E));
867 if (t > -0.6744)
868 {
869 pois = floor(mu + s * t);
870 fk = pois;
871 difmuk = mu - fk;
872 /* 'subroutine' F is called (kflag=1 for correct return) */
873 kflag = 1;
874 Step_F: /* 'subroutine' F : calculation of px,py,fx,fy. */
875 if (pois < 10) { /* use factorials from table fact[] */
876 px = -mu;
877 py = pow(mu, pois) / fact[(int)pois];
878 }
879 else {
880 /* Case pois >= 10 uses polynomial approximation
881 a0-a7 for accuracy when advisable */
882 del = one_12 / fk;
883 del = del * (1. - 4.8 * del * del);
884 v = difmuk / fk;
885 if (fabs(v) <= 0.25)
886 px = fk * v * v * (((((((a7 * v + a6) * v + a5) * v + a4) *
887 v + a3) * v + a2) * v + a1) * v + a0)
888 - del;
889 else /* |v| > 1/4 */
890 px = fk * log(1. + v) - difmuk - del;
891 py = M_1_SQRT_2PI / sqrt(fk);
892 }
893 x = (0.5 - difmuk) / s;
894 x *= x;/* x^2 */
895 fx = -0.5 * x;
896 fy = omega * (((c3 * x + c2) * x + c1) * x + c0);
897 if (kflag > 0) {
898 /* Step H. Hat acceptance (E is repeated on rejection) */
899 if (c * fabs(u) <= py * exp(px + E) - fy * exp(fx + E))
900 break;
901 } else
902 /* Step Q. Quotient acceptance (rare case) */
903 if (fy - u * fy <= py * exp(px - fx))
904 break;
905 }/* t > -.67.. */
906 }
907 return((phydbl)pois);
908 }
909
910
911 //////////////////////////////////////////////////////////////
912 //////////////////////////////////////////////////////////////
913
914 /* DENSITIES / PROBA */
915 //////////////////////////////////////////////////////////////
916 //////////////////////////////////////////////////////////////
917
918
Dnorm_Moments(phydbl x,phydbl mean,phydbl var)919 phydbl Dnorm_Moments(phydbl x, phydbl mean, phydbl var)
920 {
921 phydbl dens,sd,pi;
922
923 pi = 3.141593;
924 sd = SQRT(var);
925
926 dens = 1./(SQRT(2*pi)*sd)*exp(-((x-mean)*(x-mean)/(2.*sd*sd)));
927
928 return dens;
929 }
930
931 //////////////////////////////////////////////////////////////
932 //////////////////////////////////////////////////////////////
933
934
Dnorm(phydbl x,phydbl mean,phydbl sd)935 phydbl Dnorm(phydbl x, phydbl mean, phydbl sd)
936 {
937 phydbl dens;
938
939 /* dens = -(.5*LOG2PI+log(sd)) - .5*POW(x-mean,2)/POW(sd,2); */
940 /* return exp(dens); */
941
942 if(sd < SMALL && fabs(x-mean) < SMALL) return(1.0);
943
944 x = (x-mean)/sd;
945
946 dens = M_1_SQRT_2_PI * exp(-0.5*x*x);
947
948 return dens / sd;
949 }
950
951 //////////////////////////////////////////////////////////////
952 //////////////////////////////////////////////////////////////
953
Log_Dnorm(phydbl x,phydbl mean,phydbl sd,int * err)954 phydbl Log_Dnorm(phydbl x, phydbl mean, phydbl sd, int *err)
955 {
956 phydbl dens;
957
958 *err = NO;
959
960 if(sd < SMALL && fabs(x-mean) < SMALL) return(0.0);
961
962 x = (x-mean)/sd;
963
964 dens = -(phydbl)log(SQRT(2.*PI)) - x*x*0.5 - log(sd);
965
966 if(dens < -BIG)
967 {
968 PhyML_Printf("\n. dens=%f -- x=%f mean=%f sd=%f\n",dens,x,mean,sd);
969 *err = 1;
970 }
971
972 return dens;
973 }
974
975 /*////////////////////////////////////////////////////////////
976 ////////////////////////////////////////////////////////////*/
977
Log_Dnorm_Trunc(phydbl x,phydbl mean,phydbl sd,phydbl lo,phydbl up,int * err)978 phydbl Log_Dnorm_Trunc(phydbl x, phydbl mean, phydbl sd, phydbl lo, phydbl up, int *err)
979 {
980 phydbl log_dens;
981 phydbl cdf_up, cdf_lo;
982
983 if(x < lo || x > up) return -230.;
984
985 if(sd < SMALL && fabs(x-mean) < SMALL) return(0.0);
986
987 *err = NO;
988 cdf_lo = cdf_up = 0.0;
989
990 log_dens = Log_Dnorm(x,mean,sd,err);
991
992 if(*err == YES)
993 {
994 PhyML_Printf("\n== mean=%f sd=%f lo=%f up=%f cdf_lo=%G CDF_up=%G log_dens=%G",mean,sd,lo,up,cdf_lo,cdf_up,log_dens);
995 PhyML_Printf("\n== Warning in file %s at line %d\n",__FILE__,__LINE__);
996 *err = YES;
997 }
998
999 cdf_up = Pnorm(up,mean,sd);
1000 cdf_lo = Pnorm(lo,mean,sd);
1001
1002 if(cdf_up - cdf_lo < 1.E-20)
1003 {
1004 log_dens = -230.; /* ~log(1.E-100) */
1005 }
1006 else
1007 {
1008 log_dens -= log(cdf_up - cdf_lo);
1009 }
1010
1011 if(isnan(log_dens) || isinf(fabs(log_dens)))
1012 {
1013 PhyML_Printf("\n. x=%f mean=%f sd=%f lo=%f up=%f cdf_lo=%G CDF_up=%G log_dens=%G",x,mean,sd,lo,up,cdf_lo,cdf_up,log_dens);
1014 PhyML_Printf("\n. Warning in file %s at line %d\n",__FILE__,__LINE__);
1015 *err = YES;
1016 }
1017
1018 return log_dens;
1019 }
1020
1021 //////////////////////////////////////////////////////////////
1022 //////////////////////////////////////////////////////////////
1023
1024
Dnorm_Trunc(phydbl x,phydbl mean,phydbl sd,phydbl lo,phydbl up)1025 phydbl Dnorm_Trunc(phydbl x, phydbl mean, phydbl sd, phydbl lo, phydbl up)
1026 {
1027 phydbl dens;
1028 phydbl cdf_up, cdf_lo;
1029
1030 if(sd < SMALL && fabs(x-mean) < SMALL) return(1.0);
1031
1032 dens = Dnorm(x,mean,sd);
1033 cdf_up = Pnorm(up,mean,sd);
1034 cdf_lo = Pnorm(lo,mean,sd);
1035
1036 dens /= (cdf_up - cdf_lo);
1037
1038 if(isnan(dens) || isinf(fabs(dens)))
1039 {
1040 PhyML_Printf("\n== mean=%f sd=%f lo=%f up=%f cdf_lo=%G CDF_up=%G",mean,sd,lo,up,cdf_lo,cdf_up);
1041 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
1042 Exit("\n");
1043 }
1044
1045 return dens;
1046 }
1047
1048 //////////////////////////////////////////////////////////////
1049 //////////////////////////////////////////////////////////////
1050
1051
Dnorm_Multi(phydbl * x,phydbl * mu,phydbl * cov,int size,int _log)1052 phydbl Dnorm_Multi(phydbl *x, phydbl *mu, phydbl *cov, int size, int _log)
1053 {
1054 phydbl *xmmu,*invcov;
1055 phydbl *buff1,*buff2;
1056 int i;
1057 phydbl det,density;
1058
1059 xmmu = (phydbl *)mCalloc(size,sizeof(phydbl));
1060 invcov = (phydbl *)mCalloc(size*size,sizeof(phydbl));
1061
1062 for(i=0;i<size;i++) xmmu[i] = x[i] - mu[i];
1063 For(i,size*size) invcov[i] = cov[i];
1064
1065 if(!Matinv(invcov,size,size,NO))
1066 {
1067 PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1068 Exit("\n");
1069 }
1070
1071 buff1 = Matrix_Mult(xmmu,invcov,1,size,size,size);
1072 buff2 = Matrix_Mult(buff1,xmmu,1,size,size,1);
1073
1074 det = Matrix_Det(cov,size,NO);
1075 /* det_1D(cov,size,&det); */
1076
1077 density = size * LOG2PI + log(det) + buff2[0];
1078 density /= -2.;
1079
1080 /* density = (1./(POW(2.*PI,size/2.)*SQRT(fabs(det)))) * exp(-0.5*buff2[0]); */
1081
1082 Free(xmmu);
1083 Free(invcov);
1084 Free(buff1);
1085 Free(buff2);
1086
1087 return (_log)?(density):(exp(density));
1088 }
1089
1090 //////////////////////////////////////////////////////////////
1091 //////////////////////////////////////////////////////////////
1092
1093
Dnorm_Multi_Given_InvCov_Det(phydbl * x,phydbl * mu,phydbl * invcov,phydbl log_det,int size,int _log)1094 phydbl Dnorm_Multi_Given_InvCov_Det(phydbl *x, phydbl *mu, phydbl *invcov, phydbl log_det, int size, int _log)
1095 {
1096 phydbl *xmmu;
1097 phydbl *buff1,*buff2;
1098 int i;
1099 phydbl density;
1100
1101 xmmu = (phydbl *)mCalloc(size,sizeof(phydbl));
1102
1103 for(i=0;i<size;i++) xmmu[i] = x[i] - mu[i];
1104
1105 buff1 = Matrix_Mult(xmmu,invcov,1,size,size,size);
1106 buff2 = Matrix_Mult(buff1,xmmu,1,size,size,1);
1107
1108 density = size * LOG2PI + log_det + buff2[0];
1109 density /= -2.;
1110
1111 Free(xmmu);
1112 Free(buff1);
1113 Free(buff2);
1114
1115 return (_log)?(density):(exp(density));
1116 }
1117
1118 //////////////////////////////////////////////////////////////
1119 //////////////////////////////////////////////////////////////
1120
1121
Pbinom(int N,int ni,phydbl p)1122 phydbl Pbinom(int N, int ni, phydbl p)
1123 {
1124 return Bico(N,ni)*POW(p,ni)*POW(1-p,N-ni);
1125 }
1126
1127 //////////////////////////////////////////////////////////////
1128 //////////////////////////////////////////////////////////////
1129
1130
Bivariate_Normal_Density(phydbl x,phydbl y,phydbl mux,phydbl muy,phydbl sdx,phydbl sdy,phydbl rho)1131 phydbl Bivariate_Normal_Density(phydbl x, phydbl y, phydbl mux, phydbl muy, phydbl sdx, phydbl sdy, phydbl rho)
1132 {
1133 phydbl cx, cy;
1134 phydbl pi;
1135 phydbl dens;
1136 phydbl rho2;
1137
1138 pi = 3.141593;
1139
1140 cx = x - mux;
1141 cy = y - muy;
1142
1143 rho2 = rho*rho;
1144
1145 dens = 1./(2*pi*sdx*sdy*SQRT(1.-rho2));
1146 dens *= exp((-1./(2.*(1.-rho2)))*(cx*cx/(sdx*sdx)+cy*cy/(sdy*sdy)+2*rho*cx*cy/(sdx*sdy)));
1147
1148 return dens;
1149 }
1150
1151 //////////////////////////////////////////////////////////////
1152 //////////////////////////////////////////////////////////////
1153 // E(X) = n(1-p)/p ; V(X) = n(1-p)/p^2
Dnbinom(phydbl x,phydbl n,phydbl p,int logit)1154 phydbl Dnbinom(phydbl x, phydbl n, phydbl p, int logit)
1155 {
1156
1157 phydbl lnDens = LnGamma(x+n) - LnGamma(n) - LnFact(x) + n*log(p) + x*log(1.-p);
1158
1159 if(logit == TRUE)
1160 {
1161 return(lnDens);
1162 }
1163 else
1164 {
1165 return(exp(lnDens));
1166 }
1167 }
1168
1169 //////////////////////////////////////////////////////////////
1170 //////////////////////////////////////////////////////////////
1171
Rnbinom(phydbl n,phydbl p)1172 phydbl Rnbinom(phydbl n, phydbl p)
1173 {
1174 phydbl y,x;
1175 y = Rgamma(n,(1.-p)/p);
1176 x = Rpois(y);
1177 return(x);
1178 }
1179
1180 //////////////////////////////////////////////////////////////
1181 //////////////////////////////////////////////////////////////
1182
Dgamma_Moments(phydbl x,phydbl mean,phydbl var)1183 phydbl Dgamma_Moments(phydbl x, phydbl mean, phydbl var)
1184 {
1185 phydbl shape, scale;
1186
1187 if(var < 1.E-20)
1188 {
1189 /* var = 1.E-20; */
1190 PhyML_Printf("\n. var=%f mean=%f",var,mean);
1191 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
1192 Exit("\n");
1193 }
1194
1195 if(mean < 1.E-20)
1196 {
1197 /* mean = 1.E-20; */
1198 PhyML_Printf("\n. var=%f mean=%f",var,mean);
1199 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
1200 Exit("\n");
1201 }
1202
1203
1204 shape = mean * mean / var;
1205 scale = var / mean;
1206
1207 return(Dgamma(x,shape,scale));
1208 }
1209
1210 //////////////////////////////////////////////////////////////
1211 //////////////////////////////////////////////////////////////
1212
Dgamma(phydbl x,phydbl shape,phydbl scale)1213 phydbl Dgamma(phydbl x, phydbl shape, phydbl scale)
1214 {
1215 phydbl v;
1216
1217 if(x > INFINITY)
1218 {
1219 PhyML_Printf("\n. WARNING: huge value of x -> x = %G",x);
1220 x = 1.E+10;
1221 }
1222
1223 if(x < 1.E-200)
1224 {
1225 if(x < 0.0) return 0.0;
1226 else
1227 {
1228 PhyML_Printf("\n. WARNING: Dgamma -> small value of x = %G (shape: %G scale: %G <X>: %G)",x,shape,scale,shape*scale);
1229 x = 1.E-200;
1230 }
1231 }
1232
1233
1234 if(scale < 0.0 || shape < 0.0)
1235 {
1236 PhyML_Printf("\n. scale=%f shape=%f",scale,shape);
1237 Exit("\n");
1238 }
1239
1240
1241 v = (shape-1.) * log(x) - shape * log(scale) - x / scale - LnGamma(shape);
1242
1243
1244 if(v < 500.)
1245 {
1246 v = exp(v);
1247 }
1248 else
1249 {
1250 PhyML_Printf("\n. WARNING v=%f x=%f shape=%f scale=%f",v,x,shape,scale);
1251 PhyML_Printf("\n. log(x) = %G LnGamma(shape)=%G",log(x),LnGamma(shape));
1252 v = exp(v);
1253 /* PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__); */
1254 /* Exit("\n"); */
1255 }
1256
1257
1258 return v;
1259 }
1260
1261 /*////////////////////////////////////////////////////////////
1262 ////////////////////////////////////////////////////////////*/
1263
Dexp(phydbl x,phydbl param)1264 phydbl Dexp(phydbl x, phydbl param)
1265 {
1266 return param * exp(-param * x);
1267 }
1268
1269 /*////////////////////////////////////////////////////////////
1270 ////////////////////////////////////////////////////////////*/
1271
1272 /* Returns the density of an exponential distribution left-truncated */
1273 /* at 'left' and right-truncated at 'rght' */
1274
Dexp_Trunc(phydbl x,phydbl lambda,phydbl left,phydbl rght)1275 phydbl Dexp_Trunc(phydbl x, phydbl lambda, phydbl left, phydbl rght)
1276 {
1277 return (lambda * exp(-lambda * x))/(exp(-lambda * left) - exp(-lambda * rght));
1278 }
1279
1280 /*////////////////////////////////////////////////////////////
1281 ////////////////////////////////////////////////////////////*/
1282 /* Poisson probability */
Dpois(phydbl x,phydbl param,int logit)1283 phydbl Dpois(phydbl x, phydbl param, int logit)
1284 {
1285 phydbl v;
1286
1287 if(param < SMALL)
1288 {
1289 if(x < SMALL)
1290 {
1291 if(logit) return 0.0;
1292 else return 1.0;
1293 }
1294 else
1295 {
1296 if(logit) return -INFINITY;
1297 else return 0.0;
1298 }
1299 }
1300
1301 if(x < .0)
1302 {
1303 if(logit == YES) return(-INFINITY);
1304 else return 0.0;
1305 }
1306
1307 v = x * log(param) - param - Factln(x);
1308 if(logit == YES) return v;
1309 else
1310 {
1311 if(v < 500.)
1312 {
1313 v = exp(v);
1314 }
1315 else
1316 {
1317 PhyML_Printf("\n. WARNING v=%f x=%f param=%f",v,x,param);
1318 v = exp(500);
1319 }
1320 return v;
1321 }
1322 return(-1.0);
1323 }
1324
1325 //////////////////////////////////////////////////////////////
1326 //////////////////////////////////////////////////////////////
1327
1328
1329
1330 //////////////////////////////////////////////////////////////
1331 //////////////////////////////////////////////////////////////
1332
1333 /* CDFs */
1334 //////////////////////////////////////////////////////////////
1335 //////////////////////////////////////////////////////////////
1336 // Error function
Erf(phydbl x)1337 phydbl Erf(phydbl x)
1338 {
1339 return(2.*Pnorm(x*sqrt(2.),0.0,1.0)-1.);
1340 }
1341
Pnorm(phydbl x,phydbl mean,phydbl sd)1342 phydbl Pnorm(phydbl x, phydbl mean, phydbl sd)
1343 {
1344 /* const phydbl b1 = 0.319381530; */
1345 /* const phydbl b2 = -0.356563782; */
1346 /* const phydbl b3 = 1.781477937; */
1347 /* const phydbl b4 = -1.821255978; */
1348 /* const phydbl b5 = 1.330274429; */
1349 /* const phydbl p = 0.2316419; */
1350 /* const phydbl c = 0.39894228; */
1351
1352 x = (x-mean)/sd;
1353
1354 /* if(x >= 0.0) */
1355 /* { */
1356 /* phydbl t = 1.0 / ( 1.0 + p * x ); */
1357 /* return (1.0 - c * exp( -x * x / 2.0 ) * t * */
1358 /* ( t *( t * ( t * ( t * b5 + b4 ) + b3 ) + b2 ) + b1 )); */
1359 /* } */
1360 /* else */
1361 /* { */
1362 /* phydbl t = 1.0 / ( 1.0 - p * x ); */
1363 /* return ( c * exp( -x * x / 2.0 ) * t * */
1364 /* ( t *( t * ( t * ( t * b5 + b4 ) + b3 ) + b2 ) + b1 )); */
1365 /* } */
1366
1367 /* i_tail in {0,1,2} means: "lower", "upper", or "both" :
1368 if(lower) return *cum := P[X <= x]
1369 if(upper) return *ccum := P[X > x] = 1 - P[X <= x]
1370 */
1371
1372 /* return Pnorm_Marsaglia(x); */
1373 return Pnorm_Ihaka_Derived_From_Cody(x);
1374 }
1375
1376
1377 /* G. Marsaglia. "Evaluating the Normal distribution". Journal of Statistical Software. 2004. Vol. 11. Issue 4. */
Pnorm_Marsaglia(phydbl x)1378 phydbl Pnorm_Marsaglia(phydbl x)
1379 {
1380 long double s=x,t=0,b=x,q=x*x,i=1;
1381 while(s!=t) s=(t=s)+(b*=q/(i+=2));
1382 return .5+s*exp(-.5*q-.91893853320467274178L);
1383
1384 }
1385
1386
1387
1388 /* Stolen from R source code */
1389 #define SIXTEN 16
1390
Pnorm_Ihaka_Derived_From_Cody(phydbl x)1391 phydbl Pnorm_Ihaka_Derived_From_Cody(phydbl x)
1392 {
1393
1394 const static double a[5] = {
1395 2.2352520354606839287,
1396 161.02823106855587881,
1397 1067.6894854603709582,
1398 18154.981253343561249,
1399 0.065682337918207449113
1400 };
1401 const static double b[4] = {
1402 47.20258190468824187,
1403 976.09855173777669322,
1404 10260.932208618978205,
1405 45507.789335026729956
1406 };
1407 const static double c[9] = {
1408 0.39894151208813466764,
1409 8.8831497943883759412,
1410 93.506656132177855979,
1411 597.27027639480026226,
1412 2494.5375852903726711,
1413 6848.1904505362823326,
1414 11602.651437647350124,
1415 9842.7148383839780218,
1416 1.0765576773720192317e-8
1417 };
1418 const static double d[8] = {
1419 22.266688044328115691,
1420 235.38790178262499861,
1421 1519.377599407554805,
1422 6485.558298266760755,
1423 18615.571640885098091,
1424 34900.952721145977266,
1425 38912.003286093271411,
1426 19685.429676859990727
1427 };
1428 const static double p[6] = {
1429 0.21589853405795699,
1430 0.1274011611602473639,
1431 0.022235277870649807,
1432 0.001421619193227893466,
1433 2.9112874951168792e-5,
1434 0.02307344176494017303
1435 };
1436 const static double q[5] = {
1437 1.28426009614491121,
1438 0.468238212480865118,
1439 0.0659881378689285515,
1440 0.00378239633202758244,
1441 7.29751555083966205e-5
1442 };
1443
1444 double xden, xnum, temp, del, eps, xsq, y;
1445 int i, lower, upper;
1446 double cum,ccum;
1447 int i_tail;
1448
1449 i_tail = 0;
1450 cum = ccum = 0.0;
1451
1452 if(isnan(x)) { cum = ccum = x; return (phydbl)cum; }
1453
1454 /* Consider changing these : */
1455 eps = DBL_EPSILON * 0.5;
1456
1457 /* i_tail in {0,1,2} =^= {lower, upper, both} */
1458 lower = i_tail != 1;
1459 upper = i_tail != 0;
1460
1461 y = fabs(x);
1462 if (y <= 0.67448975) { /* qnorm(3/4) = .6744.... -- earlier had 0.66291 */
1463 if (y > eps) {
1464 xsq = x * x;
1465 xnum = a[4] * xsq;
1466 xden = xsq;
1467 for (i = 0; i < 3; ++i) {
1468 xnum = (xnum + a[i]) * xsq;
1469 xden = (xden + b[i]) * xsq;
1470 }
1471 } else xnum = xden = 0.0;
1472
1473 temp = x * (xnum + a[3]) / (xden + b[3]);
1474 if(lower) cum = 0.5 + temp;
1475 if(upper) ccum = 0.5 - temp;
1476 }
1477 else if (y <= M_SQRT_32) {
1478
1479 /* Evaluate pnorm for 0.674.. = qnorm(3/4) < |x| <= SQRT(32) ~= 5.657 */
1480
1481 xnum = c[8] * y;
1482 xden = y;
1483 for (i = 0; i < 7; ++i) {
1484 xnum = (xnum + c[i]) * y;
1485 xden = (xden + d[i]) * y;
1486 }
1487 temp = (xnum + c[7]) / (xden + d[7]);
1488
1489 #define do_del(X) \
1490 xsq = floor(X * SIXTEN) / SIXTEN; \
1491 del = (X - xsq) * (X + xsq); \
1492 cum = exp(-xsq * xsq * 0.5) * exp(-del * 0.5) * temp; \
1493 ccum = 1.0 - cum; \
1494
1495 #define swap_tail \
1496 if (x > 0.) {/* swap ccum <--> cum */ \
1497 temp = cum; if(lower) cum = ccum; ccum = temp; \
1498 }
1499
1500 do_del(y);
1501 swap_tail;
1502 }
1503
1504 /* else |x| > SQRT(32) = 5.657 :
1505 * the next two case differentiations were really for lower=T, log=F
1506 * Particularly *not* for log_p !
1507
1508 * Cody had (-37.5193 < x && x < 8.2924) ; R originally had y < 50
1509 *
1510 * Note that we do want symmetry(0), lower/upper -> hence use y
1511 */
1512 else if((lower && -37.5193 < x && x < 8.2924) || (upper && -8.2924 < x && x < 37.5193))
1513 {
1514 /* Evaluate pnorm for x in (-37.5, -5.657) union (5.657, 37.5) */
1515 xsq = 1.0 / (x * x);
1516 xnum = p[5] * xsq;
1517 xden = xsq;
1518 for (i = 0; i < 4; ++i) {
1519 xnum = (xnum + p[i]) * xsq;
1520 xden = (xden + q[i]) * xsq;
1521 }
1522 temp = xsq * (xnum + p[4]) / (xden + q[4]);
1523 temp = (M_1_SQRT_2_PI - temp) / y;
1524
1525 do_del(x);
1526 swap_tail;
1527 }
1528 else
1529 { /* no log_p , large x such that probs are 0 or 1 */
1530 if(x > 0) { cum = 1.; ccum = 0.; }
1531 else { cum = 0.; ccum = 1.; }
1532 }
1533
1534 return (phydbl)cum;
1535
1536
1537 }
1538
1539 //////////////////////////////////////////////////////////////
1540 //////////////////////////////////////////////////////////////
1541
Pgamma(phydbl x,phydbl shape,phydbl scale)1542 phydbl Pgamma(phydbl x, phydbl shape, phydbl scale)
1543 {
1544 return IncompleteGamma(x/scale,shape,LnGamma(shape));
1545 }
1546
1547 //////////////////////////////////////////////////////////////
1548 //////////////////////////////////////////////////////////////
1549
Ppois(phydbl x,phydbl param)1550 phydbl Ppois(phydbl x, phydbl param)
1551 {
1552 /* Press et al. (1990) approximation of the CDF for the Poisson distribution */
1553 if(param < SMALL || x < 0.0)
1554 {
1555 PhyML_Printf("\n== param = %G x=%G",param,x);
1556 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
1557 Warn_And_Exit("");
1558 }
1559 return IncompleteGamma(x,param,LnGamma(param));
1560 }
1561
1562 //////////////////////////////////////////////////////////////
1563 //////////////////////////////////////////////////////////////
1564
1565 /* Inverse CDFs */
1566 //////////////////////////////////////////////////////////////
1567 //////////////////////////////////////////////////////////////
1568
1569
PointChi2(phydbl prob,phydbl v)1570 phydbl PointChi2 (phydbl prob, phydbl v)
1571 {
1572 /* returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v
1573 returns -1 if in error. 0.000002<prob<0.999998
1574 RATNEST FORTRAN by
1575 Best DJ & Roberts DE (1975) The percentage points of the
1576 Chi2 distribution. Applied Statistics 24: 385-388. (AS91)
1577 Converted into C by Ziheng Yang, Oct. 1993.
1578 */
1579 double aa=.6931471805, p=prob, g;
1580 double xx, c, ch, a=0,q=0,p1=0,p2=0,t=0,x=0,b=0,s1,s2,s3,s4,s5,s6;
1581 double e=.5e-6;
1582
1583 if (p<.000002 || p>.999998 || v<=0) return ((phydbl)-1);
1584
1585 g = (double)LnGamma(v/2);
1586 xx=v/2; c=xx-1;
1587 if (v >= -1.24*log(p)) goto l1;
1588
1589 ch=pow((p*xx*exp(g+xx*aa)), 1/xx);
1590 if (ch-e<0) return (ch);
1591 goto l4;
1592 l1:
1593 if (v>.32) goto l3;
1594 ch=0.4; a=log(1-p);
1595 l2:
1596 q=ch; p1=1+ch*(4.67+ch); p2=ch*(6.73+ch*(6.66+ch));
1597 t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2;
1598 ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t;
1599 if (fabs(q/ch-1)-.01 <= 0) goto l4;
1600 else goto l2;
1601
1602 l3:
1603 x=(double)PointNormal (p);
1604 p1=0.222222/v; ch=v*pow((x*sqrt(p1)+1-p1), 3.0);
1605 if (ch>2.2*v+6) ch=-2*(log(1-p)-c*log(.5*ch)+g);
1606 l4:
1607 q=ch; p1=.5*ch;
1608 if ((t=(double)IncompleteGamma (p1, xx, g))<0) {
1609 PhyML_Printf ("\nerr IncompleteGamma");
1610 return ((phydbl)-1.);
1611 }
1612 p2=p-t;
1613 t=p2*exp(xx*aa+g+p1-c*log(ch));
1614 b=t/ch; a=0.5*t-b*c;
1615
1616 s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420;
1617 s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520;
1618 s3=(210+a*(462+a*(707+932*a)))/2520;
1619 s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040;
1620 s5=(84+264*a+c*(175+606*a))/2520;
1621 s6=(120+c*(346+127*c))/5040;
1622 ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6))))));
1623 if (fabs(q/ch-1) > e) goto l4;
1624
1625 return (phydbl)(ch);
1626 }
1627
1628 //////////////////////////////////////////////////////////////
1629 //////////////////////////////////////////////////////////////
1630
1631
1632
1633 /*
1634 The following function was extracted from the source code of R.
1635 It implements the methods referenced below.
1636 * REFERENCE
1637 *
1638 * Beasley, J. D. and S. G. Springer (1977).
1639 * Algorithm AS 111: The percentage points of the normal distribution,
1640 * Applied Statistics, 26, 118-121.
1641 *
1642 * Wichura, M.J. (1988).
1643 * Algorithm AS 241: The Percentage Points of the Normal Distribution.
1644 * Applied Statistics, 37, 477-484.
1645 */
1646
1647
PointNormal(phydbl prob)1648 phydbl PointNormal (phydbl prob)
1649 {
1650 /* returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12)<prob<1-(1e-12)
1651 returns (-9999) if in error
1652 Odeh RE & Evans JO (1974) The percentage points of the normal distribution.
1653 Applied Statistics 22: 96-97 (AS70)
1654
1655 Newer methods:
1656 Wichura MJ (1988) Algorithm AS 241: the percentage points of the
1657 normal distribution. 37: 477-484.
1658 Beasley JD & Springer SG (1977). Algorithm AS 111: the percentage
1659 points of the normal distribution. 26: 118-121.
1660 */
1661 phydbl a0=-.322232431088, a1=-1, a2=-.342242088547, a3=-.0204231210245;
1662 phydbl a4=-.453642210148e-4, b0=.0993484626060, b1=.588581570495;
1663 phydbl b2=.531103462366, b3=.103537752850, b4=.0038560700634;
1664 phydbl y, z=0, p=prob, p1;
1665
1666 p1 = (p<0.5 ? p : 1-p);
1667 if (p1<1e-20) z=999;
1668 else {
1669 y = SQRT (log(1/(p1*p1)));
1670 z = y + ((((y*a4+a3)*y+a2)*y+a1)*y+a0) / ((((y*b4+b3)*y+b2)*y+b1)*y+b0);
1671 }
1672 return (p<0.5 ? -z : z);
1673 }
1674
1675
1676 /* phydbl PointNormal(phydbl p) */
1677 /* { */
1678 /* double p_, q, r, val; */
1679
1680 /* p_ = p; */
1681 /* q = p_ - 0.5; */
1682
1683 /* /\*-- use AS 241 --- *\/ */
1684 /* /\* double ppnd16_(double *p, long *ifault)*\/ */
1685 /* /\* ALGORITHM AS241 APPL. STATIST. (1988) VOL. 37, NO. 3 */
1686
1687 /* Produces the normal deviate Z corresponding to a given lower */
1688 /* tail area of P; Z is accurate to about 1 part in 10**16. */
1689
1690 /* (original fortran code used PARAMETER(..) for the coefficients */
1691 /* and provided hash codes for checking them...) */
1692 /* *\/ */
1693 /* if (fabs(q) <= .425) */
1694 /* {/\* 0.075 <= p <= 0.925 *\/ */
1695 /* r = .180625 - q * q; */
1696 /* val = */
1697 /* q * (((((((r * 2509.0809287301226727 + */
1698 /* 33430.575583588128105) * r + 67265.770927008700853) * r + */
1699 /* 45921.953931549871457) * r + 13731.693765509461125) * r + */
1700 /* 1971.5909503065514427) * r + 133.14166789178437745) * r + */
1701 /* 3.387132872796366608) */
1702 /* / (((((((r * 5226.495278852854561 + */
1703 /* 28729.085735721942674) * r + 39307.89580009271061) * r + */
1704 /* 21213.794301586595867) * r + 5394.1960214247511077) * r + */
1705 /* 687.1870074920579083) * r + 42.313330701600911252) * r + 1.); */
1706 /* } */
1707 /* else */
1708 /* { /\* closer than 0.075 from {0,1} boundary *\/ */
1709
1710 /* /\* r = min(p, 1-p) < 0.075 *\/ */
1711 /* if (q > 0) */
1712 /* r = 1-p;/\* 1-p *\/ */
1713 /* else */
1714 /* r = p_;/\* = R_DT_Iv(p) ^= p *\/ */
1715
1716 /* r = sqrt(-log(r)); */
1717 /* /\* r = sqrt(-log(r)) <==> min(p, 1-p) = exp( - r^2 ) *\/ */
1718
1719 /* if (r <= 5.) { /\* <==> min(p,1-p) >= exp(-25) ~= 1.3888e-11 *\/ */
1720 /* r += -1.6; */
1721 /* val = (((((((r * 7.7454501427834140764e-4 + */
1722 /* .0227238449892691845833) * r + .24178072517745061177) * */
1723 /* r + 1.27045825245236838258) * r + */
1724 /* 3.64784832476320460504) * r + 5.7694972214606914055) * */
1725 /* r + 4.6303378461565452959) * r + */
1726 /* 1.42343711074968357734) */
1727 /* / (((((((r * */
1728 /* 1.05075007164441684324e-9 + 5.475938084995344946e-4) * */
1729 /* r + .0151986665636164571966) * r + */
1730 /* .14810397642748007459) * r + .68976733498510000455) * */
1731 /* r + 1.6763848301838038494) * r + */
1732 /* 2.05319162663775882187) * r + 1.); */
1733 /* } */
1734 /* else */
1735 /* { /\* very close to 0 or 1 *\/ */
1736 /* r += -5.; */
1737 /* val = (((((((r * 2.01033439929228813265e-7 + */
1738 /* 2.71155556874348757815e-5) * r + */
1739 /* .0012426609473880784386) * r + .026532189526576123093) * */
1740 /* r + .29656057182850489123) * r + */
1741 /* 1.7848265399172913358) * r + 5.4637849111641143699) * */
1742 /* r + 6.6579046435011037772) */
1743 /* / (((((((r * */
1744 /* 2.04426310338993978564e-15 + 1.4215117583164458887e-7)* */
1745 /* r + 1.8463183175100546818e-5) * r + */
1746 /* 7.868691311456132591e-4) * r + .0148753612908506148525) */
1747 /* * r + .13692988092273580531) * r + */
1748 /* .59983220655588793769) * r + 1.); */
1749 /* } */
1750
1751 /* if(q < 0.0) */
1752 /* val = -val; */
1753 /* /\* return (q >= 0.)? r : -r ;*\/ */
1754 /* } */
1755 /* return (phydbl)val; */
1756 /* } */
1757
1758 //////////////////////////////////////////////////////////////
1759 //////////////////////////////////////////////////////////////
1760
1761 /* MISCs */
1762 //////////////////////////////////////////////////////////////
1763 //////////////////////////////////////////////////////////////
1764
1765
1766 //////////////////////////////////////////////////////////////
1767 //////////////////////////////////////////////////////////////
1768
1769
Bico(int n,int k)1770 phydbl Bico(int n, int k)
1771 {
1772 return FLOOR(0.5+exp(Factln(n)-Factln(k)-Factln(n-k)));
1773 }
1774
1775
1776 //////////////////////////////////////////////////////////////
1777 //////////////////////////////////////////////////////////////
1778
Factln(int n)1779 phydbl Factln(int n)
1780 {
1781 /* static phydbl a[101]; */
1782
1783 /* if (n < 0) { Warn_And_Exit("\n== Err: negative factorial in routine FACTLN"); } */
1784 /* if (n <= 1) return 0.0; */
1785 /* if (n <= 100) return (a[n]>SMALL) ? a[n] : (a[n]=Gammln(n+1.0)); */
1786 /* else return Gammln(n+1.0); */
1787 return Gammln(n+1.0);
1788 }
1789
1790 //////////////////////////////////////////////////////////////
1791 //////////////////////////////////////////////////////////////
1792
1793
Gammln(phydbl xx)1794 phydbl Gammln(phydbl xx)
1795 {
1796 double x,tmp,ser;
1797 static double cof[6]={76.18009173,-86.50532033,24.01409822,
1798 -1.231739516,0.120858003e-2,-0.536382e-5};
1799 int j;
1800
1801 x=xx-1.0;
1802 tmp=x+5.5;
1803 tmp -= (x+0.5)*log(tmp);
1804 ser=1.0;
1805 for (j=0;j<=5;j++)
1806 {
1807 x += 1.0;
1808 ser += cof[j]/x;
1809 }
1810 return (phydbl)(-tmp+log(2.50662827465*ser));
1811 }
1812
1813 //////////////////////////////////////////////////////////////
1814 //////////////////////////////////////////////////////////////
1815
1816
1817 /* void Plim_Binom(phydbl pH0, int N, phydbl *pinf, phydbl *psup) */
1818 /* { */
1819 /* *pinf = pH0 - 1.64*SQRT(pH0*(1-pH0)/(phydbl)N); */
1820 /* if(*pinf < 0) *pinf = .0; */
1821 /* *psup = pH0 + 1.64*SQRT(pH0*(1-pH0)/(phydbl)N); */
1822 /* } */
1823
1824 //////////////////////////////////////////////////////////////
1825 //////////////////////////////////////////////////////////////
1826
1827
LnGamma(phydbl alpha)1828 phydbl LnGamma (phydbl alpha)
1829 {
1830 /* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
1831 Stirling's formula is used for the central polynomial part of the procedure.
1832 Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
1833 Communications of the Association for Computing Machinery, 9:684
1834 */
1835 double x=alpha, f=0, z;
1836 if (x<7) {
1837 f=1; z=x-1;
1838 while (++z<7) f*=z;
1839 x=z; f=-log(f);
1840 }
1841 z = 1/(x*x);
1842 return (phydbl)(f + (x-0.5)*log(x) - x + .918938533204673
1843 + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z
1844 +.083333333333333)/x);
1845 }
1846
1847 //////////////////////////////////////////////////////////////
1848 //////////////////////////////////////////////////////////////
1849
1850
IncompleteGamma(phydbl x,phydbl alpha,phydbl ln_gamma_alpha)1851 phydbl IncompleteGamma(phydbl x, phydbl alpha, phydbl ln_gamma_alpha)
1852 {
1853 /* returns the incomplete gamma ratio I(x,alpha) where x is the upper
1854 limit of the integration and alpha is the shape parameter.
1855 returns (-1) if in error
1856 ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant.
1857 (1) series expansion if (alpha>x || x<=1)
1858 (2) continued fraction otherwise
1859 RATNEST FORTRAN by
1860 Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics,
1861 19: 285-287 (AS32)
1862 */
1863 int i;
1864 double p=alpha, g=ln_gamma_alpha;
1865 double accurate=1e-8, overflow=1e30;
1866 double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6];
1867
1868 if (fabs(x) < SMALL) return ((phydbl).0);
1869 if (x<0 || p<=0) return ((phydbl)-1);
1870
1871 factor=exp(p*log(x)-x-g);
1872 if (x>1 && x>=p) goto l30;
1873 /* (1) series expansion */
1874 gin=1; term=1; rn=p;
1875 l20:
1876 rn++;
1877 term*=x/rn; gin+=term;
1878
1879 if (term > accurate) goto l20;
1880 gin*=factor/p;
1881 goto l50;
1882 l30:
1883 /* (2) continued fraction */
1884 a=1-p; b=a+x+1; term=0;
1885 pn[0]=1; pn[1]=x; pn[2]=x+1; pn[3]=x*b;
1886 gin=pn[2]/pn[3];
1887 l32:
1888 a++; b+=2; term++; an=a*term;
1889 for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i];
1890 if (fabs(pn[5]) < .0) goto l35;
1891 rn=pn[4]/pn[5]; dif=fabs(gin-rn);
1892 if (dif>accurate) goto l34;
1893 if (dif<=accurate*rn) goto l42;
1894 l34:
1895 gin=rn;
1896 l35:
1897 for (i=0; i<4; i++) pn[i]=pn[i+2];
1898 if (fabs(pn[4]) < overflow) goto l32;
1899 for (i=0; i<4; i++) pn[i]/=overflow;
1900 goto l32;
1901 l42:
1902 gin=1-factor*gin;
1903
1904 l50:
1905 return (phydbl)(gin);
1906 }
1907
1908
1909 //////////////////////////////////////////////////////////////
1910 //////////////////////////////////////////////////////////////
1911
1912
DiscreteGamma(phydbl freqK[],phydbl rK[],phydbl alfa,phydbl beta,int K,int median)1913 int DiscreteGamma (phydbl freqK[], phydbl rK[],
1914 phydbl alfa, phydbl beta, int K, int median)
1915 {
1916 /* discretization of gamma distribution with equal proportions in each
1917 category
1918 */
1919
1920 int i;
1921 phydbl gap05=1.0/(2.0*K), t, factor=alfa/beta*K, lnga1;
1922
1923 if(K==1)
1924 {
1925 freqK[0] = 1.0;
1926 rK[0] = 1.0;
1927 return 0;
1928 }
1929
1930 if (median)
1931 {
1932 for (i=0; i<K; i++) rK[i]=PointGamma((i*2.0+1)*gap05, alfa, beta);
1933 for (i=0,t=0; i<K; i++) t+=rK[i];
1934 for (i=0; i<K; i++) rK[i]*=factor/t;
1935 }
1936 else
1937 {
1938 lnga1=LnGamma(alfa+1);
1939 for (i=0; i<K-1; i++)
1940 {
1941 freqK[i]=PointGamma((i+1.0)/K, alfa, beta);
1942 }
1943
1944 for (i=0; i<K-1; i++)
1945 {
1946 freqK[i]=IncompleteGamma(freqK[i]*beta, alfa+1, lnga1);
1947 }
1948
1949 rK[0] = freqK[0]*factor;
1950 rK[K-1] = (1-freqK[K-2])*factor;
1951 for (i=1; i<K-1; i++) rK[i] = (freqK[i]-freqK[i-1])*factor;
1952 }
1953 for (i=0; i<K; i++) freqK[i]=1.0/K;
1954 return (0);
1955 }
1956
1957 //////////////////////////////////////////////////////////////
1958 //////////////////////////////////////////////////////////////
1959
1960
1961 /* Return log(n!) */
1962
LnFact(int n)1963 phydbl LnFact(int n)
1964 {
1965 int i;
1966 phydbl res;
1967
1968 res = .0;
1969 for(i=2;i<n+1;i++) res += log((phydbl)i);
1970
1971 return(res);
1972 }
1973
1974 //////////////////////////////////////////////////////////////
1975 //////////////////////////////////////////////////////////////
1976
Choose(int n,int k)1977 int Choose(int n, int k)
1978 {
1979 phydbl accum;
1980 int i;
1981
1982 if (k > n) return(0);
1983 if (k > n/2) k = n-k;
1984 if(!k) return(1);
1985
1986 accum = 1.;
1987 for(i=1;i<k+1;i++) accum = accum * (n-k+i) / i;
1988
1989 return((int)accum);
1990 }
1991
1992 //////////////////////////////////////////////////////////////
1993 //////////////////////////////////////////////////////////////
1994
LnChoose(int n,int k)1995 phydbl LnChoose(int n, int k)
1996 {
1997 return(LnFact(n) - LnFact(k) - LnFact(n-k));
1998 }
1999
2000 //////////////////////////////////////////////////////////////
2001 //////////////////////////////////////////////////////////////
2002
Covariance_Matrix(t_tree * tree)2003 phydbl *Covariance_Matrix(t_tree *tree)
2004 {
2005 phydbl *cov, *mean;
2006 int *ori_wght,*site_num;
2007 int dim,i,j,replicate,n_site,position,sample_size;
2008
2009 sample_size = 100;
2010 dim = 2*tree->n_otu-3;
2011
2012 cov = (phydbl *)mCalloc(dim*dim,sizeof(phydbl));
2013 mean = (phydbl *)mCalloc( dim,sizeof(phydbl));
2014 ori_wght = (int *)mCalloc(tree->data->crunch_len,sizeof(int));
2015 site_num = (int *)mCalloc(tree->data->init_len,sizeof(int));
2016
2017 for(i=0;i<tree->data->crunch_len;i++) ori_wght[i] = tree->data->wght[i];
2018
2019 n_site = 0;
2020 for(i=0;i<tree->data->crunch_len;i++) For(j,tree->data->wght[i])
2021 {
2022 site_num[n_site] = i;
2023 n_site++;
2024 }
2025
2026
2027 tree->verbose = VL0;
2028 for(replicate=0;replicate<sample_size;replicate++)
2029 {
2030 For(i,2*tree->n_otu-3) tree->a_edges[i]->l->v = .1;
2031
2032 for(i=0;i<tree->data->crunch_len;i++) tree->data->wght[i] = 0;
2033
2034 for(i=0;i<tree->data->init_len;i++)
2035 {
2036 position = Rand_Int(0,(int)(tree->data->init_len-1.0));
2037 tree->data->wght[site_num[position]] += 1;
2038 }
2039
2040 Round_Optimize(tree,ROUND_MAX);
2041
2042 For(i,2*tree->n_otu-3) For(j,2*tree->n_otu-3) cov[i*dim+j] += log(tree->a_edges[i]->l->v) * log(tree->a_edges[j]->l->v);
2043 For(i,2*tree->n_otu-3) mean[i] += log(tree->a_edges[i]->l->v);
2044
2045 PhyML_Printf("[%3d/%3d]",replicate,sample_size); fflush(NULL);
2046 /* PhyML_Printf("\n. %3d %12f %12f %12f ", */
2047 /* replicate, */
2048 /* cov[1*dim+1]/(replicate+1)-mean[1]*mean[1]/POW(replicate+1,2), */
2049 /* tree->a_edges[1]->l->v, */
2050 /* mean[1]/(replicate+1)); */
2051 }
2052
2053 For(i,2*tree->n_otu-3) mean[i] /= (phydbl)sample_size;
2054 For(i,2*tree->n_otu-3) For(j,2*tree->n_otu-3) cov[i*dim+j] /= (phydbl)sample_size;
2055 For(i,2*tree->n_otu-3) For(j,2*tree->n_otu-3) cov[i*dim+j] -= mean[i]*mean[j];
2056 /* For(i,2*tree->n_otu-3) if(cov[i*dim+i] < var_min) cov[i*dim+i] = var_min; */
2057
2058
2059 /* PhyML_Printf("\n"); */
2060 /* For(i,2*tree->n_otu-3) PhyML_Printf("%f %f\n",mean[i],tree->a_edges[i]->l->v); */
2061 /* PhyML_Printf("\n"); */
2062 /* PhyML_Printf("\n"); */
2063 /* For(i,2*tree->n_otu-3) */
2064 /* { */
2065 /* For(j,2*tree->n_otu-3) */
2066 /* { */
2067 /* PhyML_Printf("%G\n",cov[i*dim+j]); */
2068 /* } */
2069 /* PhyML_Printf("\n"); */
2070 /* } */
2071
2072 for(i=0;i<tree->data->crunch_len;i++) tree->data->wght[i] = ori_wght[i];
2073
2074 Free(mean);
2075 Free(ori_wght);
2076 Free(site_num);
2077
2078 return cov;
2079 }
2080
2081 //////////////////////////////////////////////////////////////
2082 //////////////////////////////////////////////////////////////
2083
2084 /* Work out the Hessian for the likelihood function. Only branch lengths are considered as variable.
2085 This function is very much inspired from Jeff Thorne's 'hessian' function in his program 'estbranches'. */
Hessian(t_tree * tree)2086 phydbl *Hessian(t_tree *tree)
2087 {
2088 phydbl *hessian;
2089 phydbl *plus_plus, *minus_minus, *plus_zero, *minus_zero, *plus_minus, zero_zero;
2090 phydbl *ori_bl,*inc,*buff;
2091 int *ok_edges,*is_ok;
2092 int dim;
2093 int n_ok_edges;
2094 int i,j;
2095 phydbl eps;
2096 phydbl lk;
2097 phydbl lnL,lnL1,lnL2,ori_lnL;
2098 phydbl l_inf;
2099
2100 dim = 2*tree->n_otu-3;
2101 eps = (tree->mod->log_l == YES)?(0.2):(1E-4);
2102
2103 hessian = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2104 ori_bl = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2105 plus_plus = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2106 minus_minus = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2107 plus_minus = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2108 plus_zero = (phydbl *)mCalloc((int)dim ,sizeof(phydbl));
2109 minus_zero = (phydbl *)mCalloc((int)dim ,sizeof(phydbl));
2110 inc = (phydbl *)mCalloc((int)dim ,sizeof(phydbl));
2111 buff = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2112 ok_edges = (int *)mCalloc((int)dim,sizeof(int));
2113 is_ok = (int *)mCalloc((int)dim,sizeof(int));
2114
2115 lnL = lnL1 = lnL2 = UNLIKELY;
2116
2117 Set_Both_Sides(YES,tree);
2118 Lk(NULL,tree);
2119 ori_lnL = tree->c_lnL;
2120
2121
2122 for(i=0;i<dim;i++) ori_bl[i] = tree->a_edges[i]->l->v;
2123
2124 if(tree->mod->log_l == NO)
2125 l_inf = MAX(tree->mod->l_min,1./(phydbl)tree->data->init_len);
2126 else
2127 l_inf = MAX(tree->mod->l_min,-log((phydbl)tree->data->init_len));
2128
2129
2130 n_ok_edges = 0;
2131 for(i=0;i<dim;i++)
2132 {
2133 if(tree->a_edges[i]->l->v*(1.-eps) > l_inf)
2134 {
2135 inc[i] = eps * tree->a_edges[i]->l->v;
2136 ok_edges[n_ok_edges] = i;
2137 n_ok_edges++;
2138 is_ok[i] = 1;
2139 }
2140 else
2141 {
2142 inc[i] = -1.0;
2143 is_ok[i] = 0;
2144 }
2145 }
2146
2147
2148 /* Fine tune the increments */
2149 for(i=0;i<dim;i++)
2150 {
2151 do
2152 {
2153 tree->a_edges[i]->l->v += inc[i];
2154 lnL1 = Lk(tree->a_edges[i],tree);
2155 tree->a_edges[i]->l->v = ori_bl[i];
2156 inc[i] *= 1.1;
2157 }while((fabs(lnL1 - ori_lnL) < 1.E-1) &&
2158 (tree->a_edges[i]->l->v+inc[i] < tree->mod->l_max));
2159 inc[i] /= 1.1;
2160 }
2161
2162
2163
2164 /* zero zero */
2165 zero_zero = tree->c_lnL;
2166
2167 /* plus zero */
2168 for(i=0;i<dim;i++)
2169 {
2170 if(is_ok[i])
2171 {
2172 tree->a_edges[i]->l->v += inc[i];
2173 lk = Lk(tree->a_edges[i],tree);
2174 plus_zero[i] = lk;
2175 tree->a_edges[i]->l->v = ori_bl[i];
2176 }
2177 }
2178
2179
2180 /* minus zero */
2181 for(i=0;i<dim;i++)
2182 {
2183 if(is_ok[i])
2184 {
2185 tree->a_edges[i]->l->v -= inc[i];
2186 lk = Lk(tree->a_edges[i],tree);
2187 minus_zero[i] = lk;
2188 tree->a_edges[i]->l->v = ori_bl[i];
2189 }
2190 }
2191
2192
2193 for(i=0;i<dim;i++) Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2194
2195 /* plus plus */
2196 for(i=0;i<dim;i++)
2197 {
2198 if(is_ok[i])
2199 {
2200 tree->a_edges[i]->l->v += inc[i];
2201 Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2202
2203 for(j=0;j<3;j++)
2204 if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
2205 Recurr_Hessian(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],1,inc,plus_plus+i*dim,is_ok,tree);
2206
2207 for(j=0;j<3;j++)
2208 if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
2209 Recurr_Hessian(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],1,inc,plus_plus+i*dim,is_ok,tree);
2210
2211 tree->a_edges[i]->l->v = ori_bl[i];
2212 Lk(NULL,tree);
2213 }
2214 }
2215
2216
2217 /* plus minus */
2218 for(i=0;i<dim;i++)
2219 {
2220 if(is_ok[i])
2221 {
2222 tree->a_edges[i]->l->v += inc[i];
2223 Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2224
2225 for(j=0;j<3;j++)
2226 if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
2227 Recurr_Hessian(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],-1,inc,plus_minus+i*dim,is_ok,tree);
2228
2229 for(j=0;j<3;j++)
2230 if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
2231 Recurr_Hessian(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],-1,inc,plus_minus+i*dim,is_ok,tree);
2232
2233 tree->a_edges[i]->l->v = ori_bl[i];
2234 Lk(NULL,tree);
2235 }
2236 }
2237
2238
2239 /* minus minus */
2240 for(i=0;i<dim;i++)
2241 {
2242 if(is_ok[i])
2243 {
2244 tree->a_edges[i]->l->v -= inc[i];
2245
2246 Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2247
2248 for(j=0;j<3;j++)
2249 if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
2250 Recurr_Hessian(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],-1,inc,minus_minus+i*dim,is_ok,tree);
2251
2252 for(j=0;j<3;j++)
2253 if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
2254 Recurr_Hessian(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],-1,inc,minus_minus+i*dim,is_ok,tree);
2255
2256 tree->a_edges[i]->l->v = ori_bl[i];
2257 Lk(NULL,tree);
2258 }
2259 }
2260
2261
2262
2263 for(i=0;i<dim;i++)
2264 {
2265 if(is_ok[i])
2266 {
2267 hessian[i*dim+i] = (plus_zero[i]-2*zero_zero+minus_zero[i])/(POW(inc[i],2));
2268
2269 for(j=i+1;j<dim;j++)
2270 {
2271 if(is_ok[j])
2272 {
2273 hessian[i*dim+j] =
2274 (plus_plus[i*dim+j]-plus_minus[i*dim+j]-plus_minus[j*dim+i]+minus_minus[i*dim+j])/
2275 (4*inc[i]*inc[j]);
2276 hessian[j*dim+i] = hessian[i*dim+j];
2277 }
2278 }
2279 }
2280 }
2281
2282 for(i=0;i<n_ok_edges;i++)
2283 {
2284 for(j=0;j<n_ok_edges;j++)
2285 {
2286 buff[i*n_ok_edges+j] = -1.0*hessian[ok_edges[i]*dim+ok_edges[j]];
2287 }
2288 }
2289
2290
2291 if(!Matinv(buff,n_ok_edges,n_ok_edges,NO))
2292 {
2293 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
2294 Exit("\n");
2295 }
2296
2297 for(i=0;i<n_ok_edges;i++)
2298 {
2299 for(j=0;j<n_ok_edges;j++)
2300 {
2301 hessian[ok_edges[i]*dim+ok_edges[j]] = buff[i*n_ok_edges+j];
2302 }
2303 }
2304
2305 /* eps = 1./(phydbl)tree->data->init_len; */
2306 /* Approximate variance for very short branches */
2307 for(i=0;i<dim;i++)
2308 if(inc[i] < 0.0 || hessian[i*dim+i] < MIN_VAR_BL)
2309 {
2310 eps = 0.2 * tree->a_edges[i]->l->v;
2311 do
2312 {
2313 lnL = Lk(tree->a_edges[i],tree);
2314 tree->a_edges[i]->l->v += eps;
2315 lnL1 = Lk(tree->a_edges[i],tree);
2316 tree->a_edges[i]->l->v += eps;
2317 lnL2 = Lk(tree->a_edges[i],tree);
2318 tree->a_edges[i]->l->v -= 2.*eps;
2319
2320 hessian[i*dim+i] = (lnL2 - 2.*lnL1 + lnL) / POW(eps,2);
2321
2322 /* printf("\n* l=%G eps=%f lnL=%f lnL1=%f lnL2=%f var=%f",tree->a_edges[i]->l->v,eps,lnL,lnL1,lnL2,hessian[i*dim+i]); */
2323 eps *= 5.;
2324 }while(fabs(lnL2 - lnL) < 1.E-3);
2325
2326 hessian[i*dim+i] = -1.0 / hessian[i*dim+i];
2327
2328 }
2329
2330
2331 /* Fit a straight line to the log-likelihood (i.e., an exponential to the likelihood) */
2332 /* It is only a straight line when considering branch length (rather than log(branch lengths)) */
2333 for(i=0;i<dim;i++)
2334 if((tree->a_edges[i]->l->v / tree->mod->l_min < 1.1) &&
2335 (tree->a_edges[i]->l->v / tree->mod->l_min > 0.9))
2336 {
2337 phydbl *x,*y,l;
2338 phydbl cov,var;
2339
2340 x=plus_plus;
2341 y=minus_minus;
2342 l=(tree->mod->log_l == YES)?(exp(tree->a_edges[i]->l->v)):(tree->a_edges[i]->l->v); /* Get actual branch length */
2343
2344 for(j=0;j<dim;j++)
2345 {
2346 x[j] = l + (100.*l-l)*((phydbl)j/dim);
2347 tree->a_edges[i]->l->v = (tree->mod->log_l)?(log(x[j])):(x[j]); /* Transform to log if necessary */
2348 y[j] = Lk(tree->a_edges[i],tree);
2349 tree->a_edges[i]->l->v = (tree->mod->log_l)?(log(l)):(l); /* Go back to initial edge length */
2350 }
2351
2352 cov = Covariance(x,y,dim);
2353 var = Covariance(x,x,dim);
2354
2355 /* cov/var is minus the parameter of the exponential distribution.
2356 The variance is therefore : */
2357 hessian[i*dim+i] = 1.0 / pow(cov/var,2);
2358
2359 /* printf("\n. Hessian = %G cov=%G var=%G",hessian[i*dim+i],cov,var); */
2360 }
2361 /* } */
2362
2363
2364 for(i=0;i<dim;i++)
2365 if(hessian[i*dim+i] < 0.0)
2366 {
2367 PhyML_Printf("\n. l=%G var=%G",tree->a_edges[i]->l->v,hessian[i*dim+i]);
2368 /* PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__); */
2369 /* Exit("\n"); */
2370 hessian[i*dim+i] = MIN_VAR_BL;
2371 }
2372
2373 for(i=0;i<dim;i++)
2374 {
2375 if(hessian[i*dim+i] < MIN_VAR_BL)
2376 {
2377 PhyML_Printf("\n. l=%10G var(l)=%12G. WARNING: numerical precision issues may affect this analysis.",
2378 tree->a_edges[i]->l->v,hessian[i*dim+i]);
2379 hessian[i*dim+i] = MIN_VAR_BL;
2380 }
2381 if(hessian[i*dim+i] > MAX_VAR_BL)
2382 {
2383 PhyML_Printf("\n. l=%10G var(l)=%12G. WARNING: numerical precision issues may affect this analysis.",
2384 tree->a_edges[i]->l->v,hessian[i*dim+i]);
2385 hessian[i*dim+i] = MAX_VAR_BL;
2386 }
2387 }
2388
2389 Iter_Matinv(hessian,dim,dim,NO);
2390
2391 For(i,dim*dim) hessian[i] = -1.0*hessian[i];
2392
2393 for(i=0;i<dim;i++)
2394 {
2395 for(j=0;j<dim;j++)
2396 {
2397 if(fabs(hessian[i*dim+j]-hessian[j*dim+i]) > 1.E-3)
2398 {
2399 PhyML_Printf("\n. Hessian not symmetrical.");
2400 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
2401 Exit("\n");
2402 }
2403 hessian[i*dim+j] = (hessian[i*dim+j] + hessian[j*dim+i]) / 2.;
2404 hessian[j*dim+i] = hessian[i*dim+j];
2405 }
2406 }
2407
2408 /* printf("\n"); */
2409 /* printf("HESSIAN\n"); */
2410 /* for(i=0;i<dim;i++) */
2411 /* { */
2412 /* PhyML_Printf("[%f] ",tree->a_edges[i]->l->v); */
2413 /* for(j=0;j<dim;j++) */
2414 /* { */
2415 /* PhyML_Printf("%12lf ",hessian[i*dim+j]); */
2416 /* } */
2417 /* PhyML_Printf("\n"); */
2418 /* } */
2419
2420 /* Matinv(hessian,dim,dim,NO); */
2421
2422 /* PhyML_Printf("\n"); */
2423
2424 /* for(i=0;i<dim;i++) */
2425 /* { */
2426 /* PhyML_Printf("[%f] ",tree->a_edges[i]->l->v); */
2427 /* for(j=0;j<dim;j++) */
2428 /* { */
2429 /* PhyML_Printf("%12G ",-hessian[i*dim+j]); */
2430 /* } */
2431 /* PhyML_Printf("\n"); */
2432 /* } */
2433 /* Exit("\n"); */
2434
2435
2436 /* Make sure to update likelihood before bailing out */
2437 Set_Both_Sides(YES,tree);
2438 Lk(NULL,tree);
2439
2440 Free(ori_bl);
2441 Free(plus_plus);
2442 Free(minus_minus);
2443 Free(plus_zero);
2444 Free(minus_zero);
2445 Free(plus_minus);
2446 Free(inc);
2447 Free(buff);
2448 Free(ok_edges);
2449 Free(is_ok);
2450
2451 return hessian;
2452
2453 }
2454
2455 //////////////////////////////////////////////////////////////
2456 //////////////////////////////////////////////////////////////
2457
2458
2459 /* Work out the gradient for the likelihood function. Only branch lengths are considered as variable.
2460 */
Gradient(t_tree * tree)2461 phydbl *Gradient(t_tree *tree)
2462 {
2463 phydbl *gradient;
2464 phydbl *plus, *minus;
2465 phydbl *ori_bl,*inc;
2466 int *is_ok;
2467 int dim;
2468 int i;
2469 phydbl eps;
2470 phydbl lk;
2471 phydbl lnL,lnL1,lnL2;
2472 phydbl l_inf;
2473
2474 dim = 2*tree->n_otu-3;
2475 eps = (tree->mod->log_l == YES)?(0.2):(1.E-6);
2476
2477 gradient = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2478 ori_bl = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2479 plus = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2480 minus = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2481 inc = (phydbl *)mCalloc((int)dim ,sizeof(phydbl));
2482 is_ok = (int *)mCalloc((int)dim,sizeof(int));
2483
2484 lnL = lnL1 = lnL2 = UNLIKELY;
2485
2486 Set_Both_Sides(YES,tree);
2487 Lk(NULL,tree);
2488
2489 for(i=0;i<dim;i++) ori_bl[i] = tree->a_edges[i]->l->v;
2490
2491 if(tree->mod->log_l == NO)
2492 l_inf = MAX(tree->mod->l_min,1./(phydbl)tree->data->init_len);
2493 else
2494 l_inf = MAX(tree->mod->l_min,-log((phydbl)tree->data->init_len));
2495
2496 for(i=0;i<dim;i++)
2497 {
2498 if(tree->a_edges[i]->l->v*(1.-eps) > l_inf)
2499 {
2500 inc[i] = eps * tree->a_edges[i]->l->v;
2501 is_ok[i] = YES;
2502 }
2503 else
2504 {
2505 inc[i] = -1.0;
2506 is_ok[i] = NO;
2507 }
2508 }
2509
2510 /* plus */
2511 for(i=0;i<dim;i++)
2512 {
2513 if(is_ok[i] == YES)
2514 {
2515 tree->a_edges[i]->l->v += inc[i];
2516 lk = Lk(tree->a_edges[i],tree);
2517 plus[i] = lk;
2518 tree->a_edges[i]->l->v = ori_bl[i];
2519 }
2520 }
2521
2522
2523 /* minus */
2524 for(i=0;i<dim;i++)
2525 {
2526 if(is_ok[i] == YES)
2527 {
2528 tree->a_edges[i]->l->v -= inc[i];
2529 lk = Lk(tree->a_edges[i],tree);
2530 minus[i] = lk;
2531 tree->a_edges[i]->l->v = ori_bl[i];
2532 }
2533 }
2534
2535
2536 for(i=0;i<dim;i++)
2537 {
2538 if(is_ok[i] == YES)
2539 {
2540 gradient[i] = (plus[i] - minus[i])/(2.*inc[i]);
2541 }
2542 }
2543
2544
2545 for(i=0;i<dim;i++)
2546 {
2547 if(is_ok[i] == NO)
2548 {
2549 eps = fabs(0.2 * tree->a_edges[i]->l->v);
2550 lnL = Lk(tree->a_edges[i],tree);
2551 tree->a_edges[i]->l->v += eps;
2552 lnL1 = Lk(tree->a_edges[i],tree);
2553 tree->a_edges[i]->l->v += eps;
2554 lnL2 = Lk(tree->a_edges[i],tree);
2555 tree->a_edges[i]->l->v -= eps;
2556 tree->a_edges[i]->l->v -= eps;
2557 gradient[i] = (4.*lnL1 - lnL2 - 3.*lnL) / (2.*eps);
2558 }
2559 }
2560
2561 /* Make sure to update likelihood before bailing out */
2562 Set_Both_Sides(YES,tree);
2563 Lk(NULL,tree);
2564
2565 Free(ori_bl);
2566 Free(plus);
2567 Free(minus);
2568 Free(inc);
2569 Free(is_ok);
2570
2571
2572 /* printf("\n"); */
2573 /* printf("GRADIENT\n"); */
2574 /* for(i=0;i<dim;i++) */
2575 /* { */
2576 /* PhyML_Printf("[%f] ",tree->a_edges[i]->l->v); */
2577 /* for(j=0;j<dim;j++) */
2578 /* { */
2579 /* printf("%12lf ",gradient[i]*gradient[j]); */
2580 /* } */
2581 /* printf("\n"); */
2582 /* } */
2583 /* printf("\n"); */
2584 /* for(i=0;i<dim;i++) */
2585 /* { */
2586 /* PhyML_Printf("[%f] [%f]\n",tree->a_edges[i]->l->v,gradient[i]); */
2587 /* } */
2588
2589 /* Exit("\n"); */
2590
2591 return gradient;
2592
2593 }
2594
2595 //////////////////////////////////////////////////////////////
2596 //////////////////////////////////////////////////////////////
2597
2598
2599 /* Work out the Hessian for the likelihood function using the method described by Seo et al., 2004, MBE.
2600 Corresponds to the outer product of the scores approach described in Porter, 2002. (matrix J1)
2601 */
Hessian_Seo(t_tree * tree)2602 phydbl *Hessian_Seo(t_tree *tree)
2603 {
2604 phydbl *hessian,*site_hessian;
2605 phydbl *gradient;
2606 phydbl *plus, *minus, *plusplus, *zero;
2607 phydbl *ori_bl,*inc_plus,*inc_minus,*inc;
2608 int *is_ok;
2609 int dim;
2610 int i,j,k;
2611 phydbl eps;
2612 phydbl ori_lnL,lnL1,lnL2;
2613 phydbl l_inf;
2614 int l,n;
2615 phydbl small_var;
2616
2617 dim = 2*tree->n_otu-3;
2618 eps = (tree->mod->log_l == YES)?(0.2):(1.E-4);
2619
2620 hessian = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2621 site_hessian = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2622 gradient = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2623 ori_bl = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2624 plus = (phydbl *)mCalloc((int)dim*tree->n_pattern,sizeof(phydbl));
2625 plusplus = (phydbl *)mCalloc((int)dim*tree->n_pattern,sizeof(phydbl));
2626 minus = (phydbl *)mCalloc((int)dim*tree->n_pattern,sizeof(phydbl));
2627 zero = (phydbl *)mCalloc((int)dim*tree->n_pattern,sizeof(phydbl));
2628 inc_plus = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2629 inc_minus = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2630 inc = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2631 is_ok = (int *)mCalloc((int)dim,sizeof(int));
2632
2633 lnL1 = lnL2 = UNLIKELY;
2634
2635 for(i=0;i<dim;i++) ori_bl[i] = tree->a_edges[i]->l->v;
2636
2637 Set_Both_Sides(YES,tree);
2638 Lk(NULL,tree);
2639 ori_lnL = tree->c_lnL;
2640
2641
2642 if(tree->mod->log_l == NO)
2643 l_inf = MAX(tree->mod->l_min,1./(phydbl)tree->data->init_len);
2644 else
2645 l_inf = MAX(tree->mod->l_min,-log((phydbl)tree->data->init_len));
2646
2647 for(i=0;i<dim;i++)
2648 {
2649 if(tree->a_edges[i]->l->v*(1.-eps) > l_inf)
2650 {
2651 inc_plus[i] = fabs(eps * MAX(tree->mod->l_min,tree->a_edges[i]->l->v));
2652 inc_minus[i] = fabs(eps * MAX(tree->mod->l_min,tree->a_edges[i]->l->v));
2653 is_ok[i] = YES;
2654 }
2655 else
2656 {
2657 inc_plus[i] = fabs(0.2 * MAX(tree->mod->l_min,tree->a_edges[i]->l->v));
2658 inc_minus[i] = fabs(0.2 * MAX(tree->mod->l_min,tree->a_edges[i]->l->v));
2659 is_ok[i] = NO;
2660 }
2661
2662
2663 }
2664
2665 /* Fine tune the increments */
2666 for(i=0;i<dim;i++)
2667 {
2668 do
2669 {
2670 tree->a_edges[i]->l->v += inc_plus[i];
2671 lnL1 = Lk(tree->a_edges[i],tree);
2672 tree->a_edges[i]->l->v = ori_bl[i];
2673 inc_plus[i] *= 1.1;
2674 }while((fabs(lnL1 - ori_lnL) < 1.E-1) && (tree->a_edges[i]->l->v+inc_plus[i] < tree->mod->l_max));
2675 inc_plus[i] /= 1.1;
2676 }
2677
2678 for(i=0;i<dim;i++)
2679 {
2680 do
2681 {
2682 tree->a_edges[i]->l->v -= inc_minus[i];
2683 lnL1 = Lk(tree->a_edges[i],tree);
2684 tree->a_edges[i]->l->v = ori_bl[i];
2685 inc_minus[i] *= 1.1;
2686 }while((fabs(lnL1 - ori_lnL) < 1.E-1) &&
2687 (tree->a_edges[i]->l->v -inc_minus[i] > tree->mod->l_min));
2688 inc_minus[i] /= 1.1;
2689 }
2690
2691 for(i=0;i<dim;i++)
2692 {
2693 inc[i] = MIN(inc_plus[i],inc_minus[i]);
2694 }
2695
2696 /* plus */
2697 for(i=0;i<dim;i++)
2698 {
2699 if(is_ok[i] == YES)
2700 {
2701 tree->a_edges[i]->l->v += inc[i];
2702 Lk(tree->a_edges[i],tree);
2703 for(j=0;j<tree->n_pattern;j++) plus[i*tree->n_pattern+j] = log(tree->cur_site_lk[j]);
2704 tree->a_edges[i]->l->v = ori_bl[i];
2705 }
2706 }
2707
2708
2709 /* minus */
2710 for(i=0;i<dim;i++)
2711 {
2712 if(is_ok[i] == YES)
2713 {
2714 tree->a_edges[i]->l->v -= inc[i];
2715 Lk(tree->a_edges[i],tree);
2716 for(j=0;j<tree->n_pattern;j++) minus[i*tree->n_pattern+j] = log(tree->cur_site_lk[j]);
2717 tree->a_edges[i]->l->v = ori_bl[i];
2718 }
2719 }
2720
2721
2722 for(i=0;i<dim;i++)
2723 {
2724 if(is_ok[i] == NO)
2725 {
2726 Lk(tree->a_edges[i],tree);
2727 for(j=0;j<tree->n_pattern;j++) zero[i*tree->n_pattern+j] = log(tree->cur_site_lk[j]);
2728
2729 tree->a_edges[i]->l->v += inc[i];
2730 lnL1 = Lk(tree->a_edges[i],tree);
2731 for(j=0;j<tree->n_pattern;j++) plus[i*tree->n_pattern+j] = log(tree->cur_site_lk[j]);
2732
2733 tree->a_edges[i]->l->v += inc[i];
2734 lnL2 = Lk(tree->a_edges[i],tree);
2735 for(j=0;j<tree->n_pattern;j++) plusplus[i*tree->n_pattern+j] = log(tree->cur_site_lk[j]);
2736
2737 tree->a_edges[i]->l->v = ori_bl[i];
2738
2739 }
2740 }
2741
2742 For(i,dim*dim) hessian[i] = 0.0;
2743
2744 for(k=0;k<tree->n_pattern;k++)
2745 {
2746 for(i=0;i<dim;i++)
2747 {
2748 if(is_ok[i] == YES)
2749 gradient[i] = (plus[i*tree->n_pattern+k] - minus[i*tree->n_pattern+k])/(inc[i] + inc[i]);
2750 else
2751 gradient[i] = (4.*plus[i*tree->n_pattern+k] - plusplus[i*tree->n_pattern+k] - 3.*zero[i*tree->n_pattern+k])/(inc[i] + inc[i]);
2752
2753 /* if(is_ok[i] == NO) */
2754 /* printf("\n. i=%d site=%d l=%G plus=%G plusplus=%G zero=%G num=%f grad=%G", */
2755 /* i,k,tree->a_edges[i]->l->v, */
2756 /* plus[i*tree->n_pattern+k],plusplus[i*tree->n_pattern+k],zero[i*tree->n_pattern+k], */
2757 /* (4.*plus[i*tree->n_pattern+k] - plusplus[i*tree->n_pattern+k] - 3.*zero[i*tree->n_pattern+k]), */
2758 /* gradient[i]); */
2759
2760 }
2761 for(i=0;i<dim;i++) for(j=0;j<dim;j++) site_hessian[i*dim+j] = gradient[i] * gradient[j];
2762 For(i,dim*dim) hessian[i] -= site_hessian[i] * tree->data->wght[k];
2763 }
2764
2765
2766 /* Make sure to update likelihood before bailing out */
2767 Set_Both_Sides(YES,tree);
2768 Lk(NULL,tree);
2769
2770 l = tree->data->init_len;
2771 n = tree->mod->ns;
2772 /* Delta method for variance. Assumes Jukes and Cantor with p=1/n */
2773 small_var = (1./(l*l))*(1.-1./l)*(n-1.)*(n-1.)/(n-1.-n/l);
2774 for(i=0;i<dim;i++)
2775 if(is_ok[i] == NO)
2776 {
2777 for(j=0;j<dim;j++)
2778 {
2779 hessian[i*dim+j] = 0.;
2780 hessian[j*dim+i] = 0.;
2781 }
2782 hessian[i*dim+i] = -1./small_var;
2783
2784 if(tree->mod->log_l == YES)
2785 {
2786 hessian[i*dim+i] = small_var * POW(exp(tree->a_edges[i]->l->v),-2);
2787 hessian[i*dim+i] = -1./hessian[i*dim+i];
2788 }
2789 }
2790
2791 for(i=0;i<dim;i++)
2792 if(is_ok[i] == YES && hessian[i*dim+i] < -1./small_var)
2793 {
2794 for(j=0;j<dim;j++)
2795 {
2796 hessian[i*dim+j] = 0.;
2797 hessian[j*dim+i] = 0.;
2798 }
2799 hessian[i*dim+i] = -1./small_var;
2800
2801 if(tree->mod->log_l == YES)
2802 {
2803 hessian[i*dim+i] = small_var * POW(exp(tree->a_edges[i]->l->v),-2);
2804 hessian[i*dim+i] = -1./hessian[i*dim+i];
2805 }
2806 }
2807
2808
2809 for(i=0;i<dim;i++)
2810 {
2811 for(j=0;j<dim;j++)
2812 {
2813 if(fabs(hessian[i*dim+j]-hessian[j*dim+i]) > 1.E-3)
2814 {
2815 PhyML_Printf("\n== Hessian not symmetrical.");
2816 PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
2817 Exit("\n");
2818 }
2819 hessian[i*dim+j] = (hessian[i*dim+j] + hessian[j*dim+i]) / 2.;
2820 hessian[j*dim+i] = hessian[i*dim+j];
2821 }
2822 }
2823
2824 /* printf("\n"); */
2825 /* printf("HESSIAN SEO\n"); */
2826 /* for(i=0;i<dim;i++) */
2827 /* { */
2828 /* PhyML_Printf("[%f] ",tree->a_edges[i]->l->v); */
2829 /* for(j=0;j<dim;j++) */
2830 /* { */
2831 /* PhyML_Printf("%12lf ",hessian[i*dim+j]); */
2832 /* } */
2833 /* PhyML_Printf("\n"); */
2834 /* } */
2835
2836 Free(site_hessian);
2837 Free(ori_bl);
2838 Free(plus);
2839 Free(minus);
2840 Free(plusplus);
2841 Free(zero);
2842 Free(inc);
2843 Free(inc_plus);
2844 Free(inc_minus);
2845 Free(is_ok);
2846 Free(gradient);
2847
2848 return hessian;
2849
2850 }
2851
2852 //////////////////////////////////////////////////////////////
2853 //////////////////////////////////////////////////////////////
2854
2855
Recurr_Hessian(t_node * a,t_node * d,int plus_minus,phydbl * inc,phydbl * res,int * is_ok,t_tree * tree)2856 void Recurr_Hessian(t_node *a, t_node *d, int plus_minus, phydbl *inc, phydbl *res, int *is_ok, t_tree *tree)
2857 {
2858 int i;
2859 phydbl ori_l;
2860
2861 for(i=0;i<3;i++)
2862 if(a->v[i] == d)
2863 {
2864 Update_Partial_Lk(tree,a->b[i],a);
2865
2866 ori_l = a->b[i]->l->v;
2867 if(is_ok[a->b[i]->num])
2868 {
2869 if(plus_minus > 0) a->b[i]->l->v += inc[a->b[i]->num];
2870 else a->b[i]->l->v -= inc[a->b[i]->num];
2871 res[a->b[i]->num] = Lk(a->b[i],tree);
2872 a->b[i]->l->v = ori_l;
2873 Update_PMat_At_Given_Edge(a->b[i],tree);
2874 }
2875 break;
2876 }
2877
2878 if(d->tax) return;
2879 else
2880 for(i=0;i<3;i++)
2881 if(d->v[i] != a)
2882 Recurr_Hessian(d,d->v[i],plus_minus,inc,res,is_ok,tree);
2883 }
2884
2885 //////////////////////////////////////////////////////////////
2886 //////////////////////////////////////////////////////////////
2887
2888
2889 /* Work out the Hessian for the likelihood function. Only logARITHM of branch lengths are considered as variable.
2890 This function is very much inspired from Jeff Thorne's 'hessian' function in his program 'estbranches'. */
Hessian_Log(t_tree * tree)2891 phydbl *Hessian_Log(t_tree *tree)
2892 {
2893 phydbl *hessian;
2894 phydbl *plus_plus, *minus_minus, *plus_zero, *minus_zero, *plus_minus, *zero_zero;
2895 phydbl *ori_bl,*inc,*buff;
2896 int *ok_edges,*is_ok;
2897 int dim;
2898 int n_ok_edges;
2899 int i,j;
2900 phydbl eps;
2901 phydbl lk;
2902
2903 dim = 2*tree->n_otu-3;
2904 eps = 1.E-4;
2905
2906 hessian = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2907 ori_bl = (phydbl *)mCalloc((int)dim, sizeof(phydbl));
2908 plus_plus = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2909 minus_minus = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2910 plus_minus = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2911 plus_zero = (phydbl *)mCalloc((int)dim ,sizeof(phydbl));
2912 minus_zero = (phydbl *)mCalloc((int)dim ,sizeof(phydbl));
2913 zero_zero = (phydbl *)mCalloc((int)dim ,sizeof(phydbl));
2914 inc = (phydbl *)mCalloc((int)dim ,sizeof(phydbl));
2915 buff = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2916 ok_edges = (int *)mCalloc((int)dim, sizeof(int));
2917 is_ok = (int *)mCalloc((int)dim, sizeof(int));
2918
2919 Set_Both_Sides(YES,tree);
2920 Lk(NULL,tree);
2921
2922 for(i=0;i<dim;i++) ori_bl[i] = tree->a_edges[i]->l->v;
2923
2924 n_ok_edges = 0;
2925 for(i=0;i<dim;i++)
2926 {
2927 if(tree->a_edges[i]->l->v > 3.0/(phydbl)tree->data->init_len)
2928 {
2929 inc[i] = fabs(eps * tree->a_edges[i]->l->v);
2930 ok_edges[n_ok_edges] = i;
2931 n_ok_edges++;
2932 is_ok[i] = 1;
2933 }
2934 else is_ok[i] = 0;
2935 }
2936
2937 /* zero zero */
2938 lk = Log_Det(is_ok,tree);
2939 for(i=0;i<dim;i++) if(is_ok[i]) zero_zero[i] = tree->c_lnL+lk;
2940
2941 /* plus zero */
2942 for(i=0;i<dim;i++)
2943 {
2944 if(is_ok[i])
2945 {
2946 tree->a_edges[i]->l->v += inc[i];
2947 lk = Lk(tree->a_edges[i],tree);
2948 plus_zero[i] = lk+Log_Det(is_ok,tree);
2949 tree->a_edges[i]->l->v = ori_bl[i];
2950 }
2951 }
2952
2953
2954 /* minus zero */
2955 for(i=0;i<dim;i++)
2956 {
2957 if(is_ok[i])
2958 {
2959 tree->a_edges[i]->l->v -= inc[i];
2960 lk = Lk(tree->a_edges[i],tree);
2961 minus_zero[i] = lk+Log_Det(is_ok,tree);
2962 tree->a_edges[i]->l->v = ori_bl[i];
2963 }
2964 }
2965
2966 for(i=0;i<dim;i++) Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2967
2968 /* plus plus */
2969 for(i=0;i<dim;i++)
2970 {
2971 if(is_ok[i])
2972 {
2973 tree->a_edges[i]->l->v += inc[i];
2974 Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2975
2976 for(j=0;j<3;j++)
2977 if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
2978 Recurr_Hessian_Log(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],1,inc,plus_plus+i*dim,is_ok,tree);
2979
2980 for(j=0;j<3;j++)
2981 if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
2982 Recurr_Hessian_Log(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],1,inc,plus_plus+i*dim,is_ok,tree);
2983
2984 /* for(j=0;j<dim;j++) */
2985 /* if(j != i) */
2986 /* { */
2987 /* if(inc[j] > 0.0) */
2988 /* { */
2989 /* tree->a_edges[j]->l->v += inc[j]; */
2990 /* Lk(tree); */
2991 /* plus_plus[i*dim+j]=tree->c_lnL; */
2992 /* tree->a_edges[j]->l->v = ori_bl[j]; */
2993 /* } */
2994 /* } */
2995
2996 tree->a_edges[i]->l->v = ori_bl[i];
2997 Lk(NULL,tree);
2998 }
2999 }
3000
3001 /* plus minus */
3002 for(i=0;i<dim;i++)
3003 {
3004 if(is_ok[i])
3005 {
3006 tree->a_edges[i]->l->v += inc[i];
3007 Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
3008
3009 for(j=0;j<3;j++)
3010 if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
3011 Recurr_Hessian_Log(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],-1,inc,plus_minus+i*dim,is_ok,tree);
3012
3013 for(j=0;j<3;j++)
3014 if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
3015 Recurr_Hessian_Log(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],-1,inc,plus_minus+i*dim,is_ok,tree);
3016
3017 /* for(j=0;j<dim;j++) */
3018 /* if(j != i) */
3019 /* { */
3020 /* if(inc[j] > 0.0) */
3021 /* { */
3022 /* tree->a_edges[j]->l->v -= inc[j]; */
3023 /* Lk(tree); */
3024 /* plus_minus[i*dim+j] = tree->c_lnL; */
3025 /* tree->a_edges[j]->l->v = ori_bl[j]; */
3026 /* } */
3027 /* } */
3028
3029 tree->a_edges[i]->l->v = ori_bl[i];
3030 Lk(NULL,tree);
3031 }
3032 }
3033
3034
3035 /* minus minus */
3036 for(i=0;i<dim;i++)
3037 {
3038 if(is_ok[i])
3039 {
3040 tree->a_edges[i]->l->v -= inc[i];
3041
3042 Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
3043
3044 for(j=0;j<3;j++)
3045 if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
3046 Recurr_Hessian_Log(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],-1,inc,minus_minus+i*dim,is_ok,tree);
3047
3048 for(j=0;j<3;j++)
3049 if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
3050 Recurr_Hessian_Log(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],-1,inc,minus_minus+i*dim,is_ok,tree);
3051
3052 /* for(j=0;j<dim;j++) */
3053 /* if(j != i) */
3054 /* { */
3055 /* if(inc[j] > 0.0) */
3056 /* { */
3057 /* tree->a_edges[j]->l->v -= inc[j]; */
3058 /* Lk(tree); */
3059 /* minus_minus[i*dim+j] = tree->c_lnL; */
3060 /* tree->a_edges[j]->l->v = ori_bl[j]; */
3061 /* } */
3062 /* } */
3063
3064 tree->a_edges[i]->l->v = ori_bl[i];
3065 Lk(NULL,tree);
3066 }
3067 }
3068
3069 /* for(i=0;i<dim;i++) if(is_ok[i]) inc[i] = POW(tree->a_edges[i]->l->v+inc[i],2)-POW(tree->a_edges[i]->l->v,2); */
3070 for(i=0;i<dim;i++) if(is_ok[i]) inc[i] = log(tree->a_edges[i]->l->v+inc[i])-log(tree->a_edges[i]->l->v);
3071 /* for(i=0;i<dim;i++) inc[i] = 2.*inc[i]; */
3072 /* for(i=0;i<dim;i++) if(is_ok[i]) inc[i] = SQRT(tree->a_edges[i]->l->v+inc[i])-SQRT(tree->a_edges[i]->l->v); */
3073
3074 for(i=0;i<dim;i++)
3075 {
3076 if(is_ok[i])
3077 {
3078 hessian[i*dim+i] = (plus_zero[i]-2*zero_zero[i]+minus_zero[i])/(POW(inc[i],2));
3079
3080 for(j=i+1;j<dim;j++)
3081 {
3082 if(is_ok[j])
3083 {
3084 hessian[i*dim+j] =
3085 (plus_plus[i*dim+j]-plus_minus[i*dim+j]-plus_minus[j*dim+i]+minus_minus[i*dim+j])/
3086 (4*inc[i]*inc[i]);
3087 hessian[j*dim+i] = hessian[i*dim+j];
3088 }
3089 }
3090 }
3091 }
3092
3093
3094 for(i=0;i<n_ok_edges;i++)
3095 {
3096 for(j=0;j<n_ok_edges;j++)
3097 {
3098 buff[i*n_ok_edges+j] = -hessian[ok_edges[i]*dim+ok_edges[j]];
3099 }
3100 }
3101
3102 if(!Matinv(buff,n_ok_edges,n_ok_edges,NO))
3103 {
3104 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3105 Exit("\n");
3106 }
3107
3108 for(i=0;i<n_ok_edges;i++)
3109 {
3110 for(j=0;j<n_ok_edges;j++)
3111 {
3112 hessian[ok_edges[i]*dim+ok_edges[j]] = buff[i*n_ok_edges+j];
3113 }
3114 }
3115
3116 /* Approximate variance for very short branches */
3117 for(i=0;i<dim;i++)
3118 if(!is_ok[i])
3119 {
3120 hessian[i*dim+i] = 1./POW(tree->data->init_len,2);
3121 }
3122
3123 if(!Matinv(hessian,dim,dim,NO))
3124 {
3125 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3126 Exit("\n");
3127 }
3128
3129 For(i,dim*dim) hessian[i] = -1.0*hessian[i];
3130
3131 /* for(i=0;i<dim;i++) */
3132 /* { */
3133 /* PhyML_Printf("[%f] ",tree->a_edges[i]->l->v); */
3134 /* For(j,i+1) */
3135 /* { */
3136 /* PhyML_Printf("%12lf ",hessian[i*dim+j]); */
3137 /* } */
3138 /* PhyML_Printf("\n"); */
3139 /* } */
3140
3141 /* Matinv(hessian,dim,dim); */
3142
3143 /* PhyML_Printf("\n"); */
3144
3145 for(i=0;i<dim;i++)
3146 {
3147 PhyML_Printf("[%f] ",tree->a_edges[i]->l->v);
3148 For(j,i+1)
3149 {
3150 PhyML_Printf("%12lf ",hessian[i*dim+j]);
3151 }
3152 PhyML_Printf("\n");
3153 }
3154 /* Exit("\n"); */
3155
3156
3157 Free(ori_bl);
3158 Free(plus_plus);
3159 Free(minus_minus);
3160 Free(plus_zero);
3161 Free(minus_zero);
3162 Free(plus_minus);
3163 Free(zero_zero);
3164 Free(inc);
3165 Free(buff);
3166 Free(ok_edges);
3167 Free(is_ok);
3168
3169 return hessian;
3170
3171 }
3172
3173 //////////////////////////////////////////////////////////////
3174 //////////////////////////////////////////////////////////////
3175
3176
Recurr_Hessian_Log(t_node * a,t_node * d,int plus_minus,phydbl * inc,phydbl * res,int * is_ok,t_tree * tree)3177 void Recurr_Hessian_Log(t_node *a, t_node *d, int plus_minus, phydbl *inc, phydbl *res, int *is_ok, t_tree *tree)
3178 {
3179 int i;
3180 phydbl ori_l;
3181
3182 for(i=0;i<3;i++)
3183 if(a->v[i] == d)
3184 {
3185 Update_Partial_Lk(tree,a->b[i],a);
3186
3187 ori_l = a->b[i]->l->v;
3188 if(is_ok[a->b[i]->num])
3189 {
3190 if(plus_minus > 0) a->b[i]->l->v += inc[a->b[i]->num];
3191 else a->b[i]->l->v -= inc[a->b[i]->num];
3192 res[a->b[i]->num] = Lk(a->b[i],tree);
3193 res[a->b[i]->num] += Log_Det(is_ok,tree);
3194 a->b[i]->l->v = ori_l;
3195 Update_PMat_At_Given_Edge(a->b[i],tree);
3196 }
3197 break;
3198 }
3199
3200 if(d->tax) return;
3201 else
3202 for(i=0;i<3;i++)
3203 if(d->v[i] != a)
3204 Recurr_Hessian_Log(d,d->v[i],plus_minus,inc,res,is_ok,tree);
3205 }
3206
3207 //////////////////////////////////////////////////////////////
3208 //////////////////////////////////////////////////////////////
3209
3210
Log_Det(int * is_ok,t_tree * tree)3211 phydbl Log_Det(int *is_ok, t_tree *tree)
3212 {
3213 int i;
3214 phydbl ldet;
3215
3216 ldet = 0.0;
3217 /* For(i,2*tree->n_otu-3) if(is_ok[i]) ldet += log(2.*SQRT(tree->a_edges[i]->l->v)); */
3218 For(i,2*tree->n_otu-3) if(is_ok[i]) ldet += log(tree->a_edges[i]->l->v);
3219 /* For(i,2*tree->n_otu-3) if(is_ok[i]) ldet -= log(2*tree->a_edges[i]->l->v); */
3220
3221 return ldet;
3222
3223 }
3224
3225 //////////////////////////////////////////////////////////////
3226 //////////////////////////////////////////////////////////////
3227
Normal_Trunc_Mean(phydbl mu,phydbl sd,phydbl min,phydbl max)3228 phydbl Normal_Trunc_Mean(phydbl mu, phydbl sd, phydbl min, phydbl max)
3229 {
3230 phydbl mean;
3231
3232 mean = mu + sd *
3233 (Dnorm((min-mu)/sd,0.,1.)-Dnorm((max-mu)/sd,0.,1.))/
3234 (Pnorm((max-mu)/sd,0.,1.)-Pnorm((min-mu)/sd,0.,1.));
3235 return mean;
3236 }
3237
3238 //////////////////////////////////////////////////////////////
3239 //////////////////////////////////////////////////////////////
3240
3241
Constraint_Normal_Trunc_Mean(phydbl wanted_mu,phydbl sd,phydbl min,phydbl max)3242 phydbl Constraint_Normal_Trunc_Mean(phydbl wanted_mu, phydbl sd, phydbl min, phydbl max)
3243 {
3244 int j;
3245 phydbl dx,f,fmid,xmid,rtb;
3246 phydbl x1, x2;
3247
3248 x1 = min;
3249 x2 = max;
3250
3251 f = Normal_Trunc_Mean(x1,sd,min,max) - wanted_mu;
3252 fmid = Normal_Trunc_Mean(x2,sd,min,max) - wanted_mu;
3253
3254 if(f*fmid >= 0.0)
3255 {
3256 PhyML_Printf("\n. Root must be bracketed for bisection!");
3257 PhyML_Printf("\n. f=%f fmid=%f",f,fmid);
3258 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3259 Exit("\n");
3260 }
3261
3262 rtb = f < 0.0 ? (dx=x2-x1,x1) : (dx=x1-x2,x2);
3263
3264 for(j=0;j<100;j++)
3265 {
3266 xmid=rtb+(dx *= 0.5);
3267 fmid=Normal_Trunc_Mean(xmid,sd,min,max)-wanted_mu;
3268 if(fmid <= 0.0) rtb=xmid;
3269 if(fmid > -1.E-10 && fmid < 1.E-10) return rtb;
3270 }
3271
3272 Exit("Too many bisections in RTBIS");
3273 return(-1.);
3274 }
3275
3276 //////////////////////////////////////////////////////////////
3277 //////////////////////////////////////////////////////////////
3278
3279
Matinv(phydbl * x,const int n,const int m,const int verbose)3280 int Matinv(phydbl *x, const int n, const int m, const int verbose)
3281 {
3282
3283 /* x[n*m] ... m>=n
3284 */
3285
3286 int i,j,k;
3287 int *irow;
3288 phydbl ee, t,t1,xmax;
3289 phydbl det;
3290
3291 ee = 1.0E-10;
3292 det = 1.0;
3293
3294 irow = (int *)mCalloc(n,sizeof(int));
3295
3296 for(i=0;i<n;++i)
3297 {
3298 xmax = 0.;
3299 for (j=i; j<n; j++)
3300 if (xmax < fabs(x[j*m+i]))
3301 {
3302 xmax = fabs(x[j*m+i]);
3303 irow[i]=j;
3304 }
3305
3306 det *= xmax;
3307 if (xmax < ee)
3308 {
3309 Free(irow);
3310 if(verbose)
3311 {
3312 PhyML_Printf("\n. xmax=%g",xmax);
3313 PhyML_Printf("\n. Determinant becomes zero at %3d!\t", i+1);
3314 PhyML_Printf("\n. Failed to invert the matrix.");
3315 }
3316 return(0);
3317 }
3318 if (irow[i] != i)
3319 {
3320 for (j=0;j<m;++j)
3321 {
3322 t = x[i*m+j];
3323 x[i*m+j] = x[irow[i]*m+j];
3324 x[irow[i]*m+j] = t;
3325 }
3326 }
3327 t = 1./x[i*m+i];
3328 for (j=0;j<n;++j)
3329 {
3330 if (j == i) continue;
3331 t1 = t*x[j*m+i];
3332 for(k=0;k<m;k++) x[j*m+k] -= t1*x[i*m+k];
3333 x[j*m+i] = -t1;
3334 }
3335 for(j=0;j<m;j++) x[i*m+j] *= t;
3336 x[i*m+i] = t;
3337 } /* i */
3338 for (i=n-1; i>=0; i--)
3339 {
3340 if (irow[i] == i) continue;
3341 for(j=0;j<n;j++)
3342 {
3343 t = x[j*m+i];
3344 x[j*m+i] = x[j*m + irow[i]];
3345 x[j*m + irow[i]] = t;
3346 }
3347 }
3348
3349 Free(irow);
3350 return (1);
3351
3352 /* int i, j, k, lower, upper; */
3353 /* phydbl temp; */
3354 /* phydbl *a; */
3355 /* int nsize; */
3356
3357 /* nsize = n; */
3358 /* a = x; */
3359
3360 /* /\*Gauss-Jordan reduction -- invert matrix a in place, */
3361 /* overwriting previous contents of a. On exit, matrix a */
3362 /* contains the inverse.*\/ */
3363 /* lower = 0; */
3364 /* upper = nsize-1; */
3365 /* for(i = lower; i <= upper; i++) */
3366 /* { */
3367 /* temp = 1.0 / a[i*n+i]; */
3368 /* a[i*n+i] = 1.0; */
3369 /* for (j = lower; j <= upper; j++) */
3370 /* { */
3371 /* a[i*n+j] *= temp; */
3372 /* } */
3373 /* for (j = lower; j <= upper; j++) */
3374 /* { */
3375 /* if (j != i) */
3376 /* { */
3377 /* temp = a[j*n+i]; */
3378 /* a[j*n+i] = 0.0; */
3379 /* for (k = lower; k <= upper; k++) */
3380 /* { */
3381 /* a[j*n+k] -= temp * a[i*n+k]; */
3382 /* } */
3383 /* } */
3384 /* } */
3385 /* } */
3386
3387 return(1);
3388
3389 }
3390
3391 //////////////////////////////////////////////////////////////
3392 //////////////////////////////////////////////////////////////
3393
3394
Iter_Matinv(phydbl * x,int n,int m,int verbose)3395 int Iter_Matinv(phydbl *x, int n, int m, int verbose)
3396 {
3397 phydbl *buff;
3398 int i,iter;
3399 phydbl scaler;
3400 int pb;
3401
3402 buff = (phydbl *)mCalloc(n*m,sizeof(phydbl));
3403
3404 pb = NO;
3405 iter = 0;
3406 scaler = 1.;
3407 For(i,n*m) buff[i] = x[i];
3408 while(!Matinv(buff,n,m,verbose))
3409 {
3410 pb = YES;
3411 For(i,n*m) buff[i] = x[i];
3412 scaler *= 10.;
3413 For(i,n*m) buff[i] *= scaler;
3414 iter++;
3415
3416 if(iter > 100)
3417 {
3418 PhyML_Printf("\n== Err in file %s at line %d.",__FILE__,__LINE__);
3419 return 0;
3420 }
3421 }
3422 if(pb) PhyML_Printf("\n== Managed to fix the problem by rescaling the matrix.");
3423 For(i,n*m) x[i] = buff[i]*scaler;
3424 Free(buff);
3425 return 1;
3426 }
3427
3428
3429 //////////////////////////////////////////////////////////////
3430 //////////////////////////////////////////////////////////////
3431
3432
Matrix_Mult(phydbl * A,phydbl * B,int nra,int nca,int nrb,int ncb)3433 phydbl *Matrix_Mult(phydbl *A, phydbl *B, int nra, int nca, int nrb, int ncb)
3434 {
3435 int i,j,k;
3436 phydbl *C;
3437
3438 C = (phydbl *)mCalloc(nra*ncb,sizeof(phydbl));
3439
3440 if(nca != nrb)
3441 {
3442 PhyML_Printf("\n. Matrices dimensions don't match.");
3443 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3444 Exit("\n");
3445 }
3446
3447 for(i=0;i<nra;i++)
3448 for(j=0;j<ncb;j++)
3449 for(k=0;k<nca;k++)
3450 C[i*ncb+j] += A[i*nca+k] * B[k*ncb+j];
3451
3452 return C;
3453 }
3454
3455 //////////////////////////////////////////////////////////////
3456 //////////////////////////////////////////////////////////////
3457
3458
Matrix_Transpose(phydbl * A,int dim)3459 phydbl *Matrix_Transpose(phydbl *A, int dim)
3460 {
3461 phydbl *tA,buff;
3462 int i,j;
3463
3464 tA = (phydbl *)mCalloc(dim*dim,sizeof(phydbl));
3465
3466 For(i,dim*dim) tA[i]=A[i];
3467
3468 for(i=0;i<dim;i++) for(j=i+1;j<dim;j++)
3469 {
3470 buff = tA[i*dim+j];
3471 tA[i*dim+j] = tA[j*dim+i];
3472 tA[j*dim+i] = buff;
3473 }
3474
3475 return tA;
3476 }
3477
3478 //////////////////////////////////////////////////////////////
3479 //////////////////////////////////////////////////////////////
3480
3481
Matrix_Det(phydbl * A,int size,int _log)3482 phydbl Matrix_Det(phydbl *A, int size, int _log)
3483 {
3484 phydbl *triA;
3485 int i;
3486 phydbl det;
3487
3488 triA = Cholesky_Decomp(A,size);
3489 det = 0.0;
3490 for(i=0;i<size;i++) det += log(triA[i*size+i]);
3491 Free(triA);
3492
3493 if(_log == NO)
3494 {
3495 det = exp(det);
3496 return det*det;
3497 }
3498 else
3499 {
3500 return 2.*det;
3501 }
3502 }
3503
3504 //////////////////////////////////////////////////////////////
3505 //////////////////////////////////////////////////////////////
3506
3507
3508 /* http://en.wikipedia.org/wiki/Multivariate_normal_distribution (Conditional distributions) */
Normal_Conditional(phydbl * mu,phydbl * cov,phydbl * a,int n,short int * is_1,int n1,phydbl * cond_mu,phydbl * cond_cov)3509 void Normal_Conditional(phydbl *mu, phydbl *cov, phydbl *a, int n, short int *is_1, int n1, phydbl *cond_mu, phydbl *cond_cov)
3510 {
3511 phydbl *mu1,*mu2;
3512 phydbl *sig11,*sig12,*sig21,*sig22,*sig12_invsig22,*buff;
3513 phydbl *ctrd_a;
3514 phydbl *cond_cov_norder,*cond_mu_norder;
3515 int n2;
3516 int i,j,nr,nc;
3517 phydbl *buff_mat;
3518
3519 n2 = n-n1;
3520
3521 mu1 = (phydbl *)mCalloc(n1, sizeof(phydbl));
3522 mu2 = (phydbl *)mCalloc(n2, sizeof(phydbl));
3523 sig11 = (phydbl *)mCalloc(n1*n1,sizeof(phydbl));
3524 sig12 = (phydbl *)mCalloc(n1*n2,sizeof(phydbl));
3525 sig21 = (phydbl *)mCalloc(n2*n1,sizeof(phydbl));
3526 sig22 = (phydbl *)mCalloc(n2*n2,sizeof(phydbl));
3527 ctrd_a = (phydbl *)mCalloc(n2, sizeof(phydbl));
3528 cond_cov_norder = (phydbl *)mCalloc(n1*n1,sizeof(phydbl));
3529 cond_mu_norder = (phydbl *)mCalloc(n1*n1,sizeof(phydbl));
3530 buff_mat = (phydbl *)mCalloc(n2*n2,sizeof(phydbl));
3531
3532 nr=0;
3533 for(i=0;i<n;i++) { if(!is_1[i]) { ctrd_a[nr] = a[i]-mu[i]; nr++; } }
3534
3535 nr=0;
3536 for(i=0;i<n;i++) { if( is_1[i]) { mu1[nr] = mu[i]; nr++; } }
3537
3538 nr=0;
3539 for(i=0;i<n;i++) { if(!is_1[i]) { mu2[nr] = mu[i]; nr++; } }
3540
3541 nr=0; nc=0;
3542 for(i=0;i<n;i++)
3543 {
3544 if(is_1[i])
3545 {
3546 nc = nr;
3547 for(j=i;j<n;j++)
3548 /* nc = 0; */
3549 /* for(j=0;j<n;j++) */
3550 {
3551 if(is_1[j])
3552 {
3553 sig11[nr*n1+nc] = cov[i*n+j];
3554 sig11[nc*n1+nr] = cov[i*n+j];
3555 nc++;
3556 }
3557 }
3558 nr++;
3559 }
3560 }
3561
3562
3563 nr=0; nc=0;
3564 for(i=0;i<n;i++)
3565 {
3566 if(is_1[i])
3567 {
3568 /* nc = nr; */
3569 /* for(j=i;j<n;j++) */
3570 nc = 0;
3571 for(j=0;j<n;j++)
3572 {
3573 if(!is_1[j])
3574 {
3575 sig12[nr*n2+nc] = cov[i*n+j];
3576 /* sig12[nc*n2+nr] = cov[i*n+j]; */
3577 nc++;
3578 }
3579 }
3580 nr++;
3581 }
3582 }
3583
3584 nr=0; nc=0;
3585 for(i=0;i<n;i++)
3586 {
3587 if(!is_1[i])
3588 {
3589 /* nc = nr; */
3590 /* for(j=i;j<n;j++) */
3591 nc = 0;
3592 for(j=0;j<n;j++)
3593 {
3594 if(is_1[j])
3595 {
3596 sig21[nr*n1+nc] = cov[i*n+j];
3597 /* sig21[nc*n1+nr] = cov[i*n+j]; */
3598 nc++;
3599 }
3600 }
3601 nr++;
3602 }
3603 }
3604
3605
3606 nr=0; nc=0;
3607 for(i=0;i<n;i++)
3608 {
3609 if(!is_1[i])
3610 {
3611 nc = nr;
3612 for(j=i;j<n;j++)
3613 /* nc = 0; */
3614 /* for(j=0;j<n;j++) */
3615 {
3616 if(!is_1[j])
3617 {
3618 sig22[nr*n2+nc] = cov[i*n+j];
3619 sig22[nc*n2+nr] = cov[i*n+j];
3620 nc++;
3621 }
3622 }
3623 nr++;
3624 }
3625 }
3626
3627 Iter_Matinv(sig22,n2,n2,NO);
3628
3629 sig12_invsig22 = Matrix_Mult(sig12,sig22,n1,n2,n2,n2);
3630
3631 buff = Matrix_Mult(sig12_invsig22,ctrd_a,n1,n2,n2,1);
3632 for(i=0;i<n1;i++) cond_mu_norder[i] = mu1[i]+buff[i];
3633 Free(buff);
3634
3635 buff = Matrix_Mult(sig12_invsig22,sig21,n1,n2,n2,n1);
3636 for(i=0;i<n1;i++) for(j=0;j<n1;j++) cond_cov_norder[i*n1+j] = sig11[i*n1+j] - buff[i*n1+j];
3637 Free(buff);
3638
3639 nr = 0;
3640 for(i=0;i<n;i++) if(is_1[i]) { cond_mu[i] = cond_mu_norder[nr]; nr++; }
3641
3642 nr = nc = 0;
3643 for(i=0;i<n;i++)
3644 {
3645 if(is_1[i])
3646 {
3647 nc = 0;
3648 for(j=0;j<n;j++)
3649 {
3650 if(is_1[j])
3651 {
3652 cond_cov[i*n+j] = cond_cov_norder[nr*n1+nc];
3653 nc++;
3654 }
3655 }
3656 nr++;
3657 }
3658 }
3659
3660 /* for(i=0;i<n1;i++) */
3661 /* { */
3662 /* for(j=i;j<n1;j++) */
3663 /* if(fabs(cond_cov_norder[i*n1+j] - cond_cov_norder[j*n1+i]) > 1.E-3) */
3664 /* { */
3665 /* PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__); */
3666 /* Warn_And_Exit(""); */
3667 /* } */
3668 /* } */
3669
3670
3671 for(i=0;i<n;i++)
3672 {
3673 for(j=i+1;j<n;j++)
3674 if(fabs(cond_cov[i*n+j] - cond_cov[j*n+i]) > 1.E-3)
3675 {
3676 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3677 Warn_And_Exit("");
3678 }
3679 }
3680
3681 Free(mu1);
3682 Free(mu2);
3683 Free(sig11);
3684 Free(sig12);
3685 Free(sig21);
3686 Free(sig22);
3687 Free(ctrd_a);
3688 Free(cond_cov_norder);
3689 Free(cond_mu_norder);
3690 Free(sig12_invsig22);
3691 Free(buff_mat);
3692 }
3693
3694
3695 //////////////////////////////////////////////////////////////
3696 //////////////////////////////////////////////////////////////
3697
3698
3699 /* http://en.wikipedia.org/wiki/Multivariate_normal_distribution (Conditional distributions) */
Normal_Conditional_Unsorted(phydbl * mu,phydbl * cov,phydbl * a,int n,short int * is_1,int n1,phydbl * cond_mu,phydbl * cond_cov)3700 void Normal_Conditional_Unsorted(phydbl *mu, phydbl *cov, phydbl *a, int n, short int *is_1, int n1, phydbl *cond_mu, phydbl *cond_cov)
3701 {
3702 phydbl *mu1,*mu2;
3703 phydbl *sig11,*sig12,*sig21,*sig22,*sig12_invsig22,*buff;
3704 phydbl *ctrd_a;
3705 int n2;
3706 int i,j,nr,nc;
3707
3708 n2 = n-n1;
3709
3710 mu1 = (phydbl *)mCalloc(n1, sizeof(phydbl));
3711 mu2 = (phydbl *)mCalloc(n2, sizeof(phydbl));
3712 sig11 = (phydbl *)mCalloc(n1*n1,sizeof(phydbl));
3713 sig12 = (phydbl *)mCalloc(n1*n2,sizeof(phydbl));
3714 sig21 = (phydbl *)mCalloc(n2*n1,sizeof(phydbl));
3715 sig22 = (phydbl *)mCalloc(n2*n2,sizeof(phydbl));
3716 ctrd_a = (phydbl *)mCalloc(n2, sizeof(phydbl));
3717
3718 nr=0;
3719 for(i=0;i<n;i++) { if(!is_1[i]) { ctrd_a[nr] = a[i]-mu[i]; nr++; } }
3720
3721 nr=0;
3722 for(i=0;i<n;i++) { if( is_1[i]) { mu1[nr] = mu[i]; nr++; } }
3723
3724 nr=0;
3725 for(i=0;i<n;i++) { if(!is_1[i]) { mu2[nr] = mu[i]; nr++; } }
3726
3727 nr=0; nc=0;
3728 for(i=0;i<n;i++)
3729 {
3730 if(is_1[i])
3731 {
3732 nc = nr;
3733 for(j=i;j<n;j++)
3734 /* nc = 0; */
3735 /* for(j=0;j<n;j++) */
3736 {
3737 if(is_1[j])
3738 {
3739 sig11[nr*n1+nc] = cov[i*n+j];
3740 sig11[nc*n1+nr] = cov[i*n+j];
3741 nc++;
3742 }
3743 }
3744 nr++;
3745 }
3746 }
3747
3748
3749 nr=0; nc=0;
3750 for(i=0;i<n;i++)
3751 {
3752 if(is_1[i])
3753 {
3754 /* nc = nr; */
3755 /* for(j=i;j<n;j++) */
3756 nc = 0;
3757 for(j=0;j<n;j++)
3758 {
3759 if(!is_1[j])
3760 {
3761 sig12[nr*n2+nc] = cov[i*n+j];
3762 /* sig12[nc*n2+nr] = cov[i*n+j]; */
3763 nc++;
3764 }
3765 }
3766 nr++;
3767 }
3768 }
3769
3770 nr=0; nc=0;
3771 for(i=0;i<n;i++)
3772 {
3773 if(!is_1[i])
3774 {
3775 /* nc = nr; */
3776 /* for(j=i;j<n;j++) */
3777 nc = 0;
3778 for(j=0;j<n;j++)
3779 {
3780 if(is_1[j])
3781 {
3782 sig21[nr*n1+nc] = cov[i*n+j];
3783 /* sig21[nc*n1+nr] = cov[i*n+j]; */
3784 nc++;
3785 }
3786 }
3787 nr++;
3788 }
3789 }
3790
3791
3792 nr=0; nc=0;
3793 for(i=0;i<n;i++)
3794 {
3795 if(!is_1[i])
3796 {
3797 nc = nr;
3798 for(j=i;j<n;j++)
3799 /* nc = 0; */
3800 /* for(j=0;j<n;j++) */
3801 {
3802 if(!is_1[j])
3803 {
3804 sig22[nr*n2+nc] = cov[i*n+j];
3805 sig22[nc*n2+nr] = cov[i*n+j];
3806 nc++;
3807 }
3808 }
3809 nr++;
3810 }
3811 }
3812
3813 if(!Matinv(sig22,n2,n2,NO))
3814 {
3815 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3816 Exit("\n");
3817 }
3818 sig12_invsig22 = Matrix_Mult(sig12,sig22,n1,n2,n2,n2);
3819
3820 buff = Matrix_Mult(sig12_invsig22,ctrd_a,n1,n2,n2,1);
3821 for(i=0;i<n1;i++) cond_mu[i] = mu1[i]+buff[i];
3822 Free(buff);
3823
3824 buff = Matrix_Mult(sig12_invsig22,sig21,n1,n2,n2,n1);
3825 for(i=0;i<n1;i++) for(j=0;j<n1;j++) cond_cov[i*n1+j] = sig11[i*n1+j] - buff[i*n1+j];
3826
3827
3828 Free(mu1);
3829 Free(mu2);
3830 Free(sig11);
3831 Free(sig12);
3832 Free(sig21);
3833 Free(sig22);
3834 Free(ctrd_a);
3835 Free(sig12_invsig22);
3836 }
3837
3838
3839 //////////////////////////////////////////////////////////////
3840 //////////////////////////////////////////////////////////////
3841
3842
3843 /* http://en.wikipedia.org/wiki/Multivariate_normal_distribution (Conditional distributions) */
Get_Reg_Coeff(phydbl * mu,phydbl * cov,phydbl * a,int n,short int * is_1,int n1,phydbl * reg_coeff)3844 void Get_Reg_Coeff(phydbl *mu, phydbl *cov, phydbl *a, int n, short int *is_1, int n1, phydbl *reg_coeff)
3845 {
3846 phydbl *sig12,*sig22,*sig12_invsig22;
3847 int n2;
3848 int i,j,nr,nc;
3849
3850 n2 = n-n1;
3851
3852 sig12 = (phydbl *)mCalloc(n1*n2,sizeof(phydbl));
3853 sig22 = (phydbl *)mCalloc(n2*n2,sizeof(phydbl));
3854
3855 nr=0; nc=0;
3856 for(i=0;i<n;i++)
3857 {
3858 if(is_1[i])
3859 {
3860 nc = 0;
3861 for(j=0;j<n;j++)
3862 {
3863 if(!is_1[j])
3864 {
3865 sig12[nr*n2+nc] = cov[i*n+j];
3866 nc++;
3867 }
3868 }
3869 nr++;
3870 }
3871 }
3872
3873
3874 nr=0; nc=0;
3875 for(i=0;i<n;i++)
3876 {
3877 if(!is_1[i])
3878 {
3879 nc = nr;
3880 for(j=i;j<n;j++)
3881 {
3882 if(!is_1[j])
3883 {
3884 sig22[nr*n2+nc] = cov[i*n+j];
3885 sig22[nc*n2+nr] = cov[i*n+j];
3886 nc++;
3887 }
3888 }
3889 nr++;
3890 }
3891 }
3892
3893
3894 if(!Matinv(sig22,n2,n2,NO))
3895 {
3896 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3897 Exit("\n");
3898 }
3899 sig12_invsig22 = Matrix_Mult(sig12,sig22,n1,n2,n2,n2);
3900
3901
3902 for(i=0;i<n;i++) reg_coeff[i] = 0.0;
3903
3904 /* nr = 0; */
3905 /* for(i=0;i<n;i++) if(!is_1[i]) { reg_coeff[i] = sig12_invsig22[nr]; nr++; } */
3906
3907 nc = 0;
3908 nr = 0;
3909 for(i=0;i<n1;i++)
3910 {
3911 nc = 0;
3912 for(j=0;j<n;j++)
3913 if(!is_1[j])
3914 {
3915 reg_coeff[i*n+j] = sig12_invsig22[nr*n2+nc];
3916 nc++;
3917 }
3918 nr++;
3919 }
3920
3921
3922 if(nc != n2 || nr != n1)
3923 {
3924 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3925 Exit("\n");
3926 }
3927
3928
3929 Free(sig12);
3930 Free(sig22);
3931 Free(sig12_invsig22);
3932 }
3933
3934
3935 //////////////////////////////////////////////////////////////
3936 //////////////////////////////////////////////////////////////
3937
3938
Norm_Trunc_Sd(phydbl mu,phydbl sd,phydbl a,phydbl b)3939 phydbl Norm_Trunc_Sd(phydbl mu, phydbl sd, phydbl a, phydbl b)
3940 {
3941 phydbl pdfa, pdfb;
3942 phydbl cdfa, cdfb;
3943 phydbl ctra, ctrb;
3944 phydbl cond_var;
3945 phydbl cdfbmcdfa;
3946
3947 ctra = (a - mu)/sd;
3948 ctrb = (b - mu)/sd;
3949
3950 pdfa = Dnorm(ctra,0.0,1.0);
3951 pdfb = Dnorm(ctrb,0.0,1.0);
3952
3953 cdfa = Pnorm(ctra,0.0,1.0);
3954 cdfb = Pnorm(ctrb,0.0,1.0);
3955
3956 cdfbmcdfa = cdfb - cdfa;
3957
3958 if(cdfbmcdfa < SMALL)
3959 {
3960 cdfbmcdfa = SMALL;
3961 PhyML_Printf("\n. mu=%G sd=%G a=%G b=%G",mu,sd,a,b);
3962 PhyML_Printf("\n. Numerical precision issue detected.");
3963 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3964 }
3965
3966 cond_var = sd*sd*(1. + (ctra*pdfa - ctrb*pdfb)/cdfbmcdfa - POW((pdfa - pdfb)/cdfbmcdfa,2));
3967
3968 return SQRT(cond_var);
3969 }
3970
3971 //////////////////////////////////////////////////////////////
3972 //////////////////////////////////////////////////////////////
3973
3974
Norm_Trunc_Mean(phydbl mu,phydbl sd,phydbl a,phydbl b)3975 phydbl Norm_Trunc_Mean(phydbl mu, phydbl sd, phydbl a, phydbl b)
3976 {
3977 phydbl pdfa, pdfb;
3978 phydbl cdfa, cdfb;
3979 phydbl ctra, ctrb;
3980 phydbl cond_mu;
3981 phydbl cdfbmcdfa;
3982
3983 ctra = (a - mu)/sd;
3984 ctrb = (b - mu)/sd;
3985
3986 pdfa = Dnorm(ctra,0.0,1.0);
3987 pdfb = Dnorm(ctrb,0.0,1.0);
3988
3989 cdfa = Pnorm(ctra,0.0,1.0);
3990 cdfb = Pnorm(ctrb,0.0,1.0);
3991
3992 cdfbmcdfa = cdfb - cdfa;
3993
3994 if(cdfbmcdfa < SMALL)
3995 {
3996 cdfbmcdfa = SMALL;
3997 PhyML_Printf("\n. mu=%G sd=%G a=%G b=%G",mu,sd,a,b);
3998 PhyML_Printf("\n. Numerical precision issue detected.");
3999 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
4000 }
4001
4002 cond_mu = mu + sd*(pdfa - pdfb)/cdfbmcdfa;
4003
4004 return cond_mu;
4005 }
4006
4007 //////////////////////////////////////////////////////////////
4008 //////////////////////////////////////////////////////////////
4009
4010
Norm_Trunc_Mean_Sd(phydbl mu,phydbl sd,phydbl a,phydbl b,phydbl * trunc_mu,phydbl * trunc_sd)4011 int Norm_Trunc_Mean_Sd(phydbl mu, phydbl sd, phydbl a, phydbl b, phydbl *trunc_mu, phydbl *trunc_sd)
4012 {
4013
4014 phydbl pdfa, pdfb;
4015 phydbl cdfa, cdfb;
4016 phydbl ctra, ctrb;
4017 phydbl cdfbmcdfa;
4018
4019 ctra = (a - mu)/sd;
4020 ctrb = (b - mu)/sd;
4021
4022 pdfa = Dnorm(ctra,0.0,1.0);
4023 pdfb = Dnorm(ctrb,0.0,1.0);
4024
4025 cdfa = Pnorm(ctra,0.0,1.0);
4026 cdfb = Pnorm(ctrb,0.0,1.0);
4027
4028 cdfbmcdfa = cdfb - cdfa;
4029
4030 if(cdfbmcdfa < SMALL)
4031 {
4032 cdfbmcdfa = SMALL;
4033 PhyML_Printf("\n. mu=%G sd=%G a=%G b=%G",mu,sd,a,b);
4034 PhyML_Printf("\n. cdfa=%G cdfb=%G",cdfa,cdfb);
4035 PhyML_Printf("\n. Numerical precision issue detected.");
4036 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
4037 return 0;
4038 }
4039
4040 *trunc_mu = mu + sd*(pdfa - pdfb)/cdfbmcdfa;
4041 *trunc_sd = sd*sd*(1. + (ctra*pdfa - ctrb*pdfb)/cdfbmcdfa - POW((pdfa - pdfb)/cdfbmcdfa,2));
4042 *trunc_sd = SQRT(*trunc_sd);
4043 return 1;
4044 }
4045
4046 //////////////////////////////////////////////////////////////
4047 //////////////////////////////////////////////////////////////
4048
VarCov_Approx_Likelihood(t_tree * tree)4049 void VarCov_Approx_Likelihood(t_tree *tree)
4050 {
4051 int i,j;
4052 phydbl *cov;
4053 phydbl *mean;
4054 int dim;
4055 int iter;
4056 phydbl cur_mean,new_mean,diff_mean,max_diff_mean;
4057 phydbl cur_cov,new_cov,diff_cov,max_diff_cov;
4058 FILE *fp;
4059
4060
4061 cov = tree->rates->cov_l;
4062 mean = tree->rates->mean_l;
4063 dim = 2*tree->n_otu-3;
4064
4065 fp = fopen("covariance","w");
4066 fprintf(fp,"\n");
4067 fprintf(fp,"Run\t");
4068 fprintf(fp,"lnL\t");
4069 for(i=0;i<dim;i++) fprintf(fp,"Edge%d[%f]\t",i,tree->rates->u_ml_l[i]);
4070
4071
4072 for(i=0;i<dim;i++) mean[i] = .0;
4073 For(i,dim*dim) cov[i] = .0;
4074
4075 MCMC_Randomize_Branch_Lengths(tree);
4076
4077 /* For(i,2*tree->n_otu-3) tree->a_edges[i]->l->v *= Rgamma(5.,1./5.); */
4078
4079 Set_Both_Sides(YES,tree);
4080 Lk(NULL,tree);
4081
4082 iter = 0;
4083 do
4084 {
4085 /* tree->both_sides = YES; */
4086 /* Lk(tree); */
4087 MCMC_Br_Lens(tree);
4088 /* MCMC_Scale_Br_Lens(tree); */
4089
4090
4091 max_diff_mean = 0.0;
4092 for(i=0;i<dim;i++)
4093 {
4094 cur_mean = mean[i];
4095
4096 mean[i] *= (phydbl)iter;
4097 mean[i] += tree->a_edges[i]->l->v;
4098 mean[i] /= (phydbl)(iter+1);
4099
4100 new_mean = mean[i];
4101 diff_mean = MAX(cur_mean,new_mean)/MIN(cur_mean,new_mean);
4102 if(diff_mean > max_diff_mean) max_diff_mean = diff_mean;
4103 /* printf("\n. %d diff_mean = %f %f %f %f",i,diff_mean,cur_mean,new_mean,tree->a_edges[i]->l->v); */
4104 }
4105
4106 max_diff_cov = 0.0;
4107 for(i=0;i<dim;i++)
4108 {
4109 for(j=0;j<dim;j++)
4110 {
4111 cur_cov = cov[i*dim+j];
4112
4113 cov[i*dim+j] *= (phydbl)iter;
4114 cov[i*dim+j] += tree->a_edges[i]->l->v * tree->a_edges[j]->l->v;
4115 cov[i*dim+j] /= (phydbl)(iter+1);
4116
4117 new_cov = cov[i*dim+j];
4118 diff_cov = MAX(cur_cov,new_cov)/MIN(cur_cov,new_cov);
4119 if(diff_cov > max_diff_cov) max_diff_cov = diff_cov;
4120 }
4121 }
4122 iter++;
4123
4124 /* if(!(iter%10)) */
4125 /* printf("\n. iter=%d max_diff_mean=%f max_diff_cov=%f",iter,max_diff_mean,max_diff_cov); */
4126
4127 /* if(iter && max_diff_mean < 1.01 && max_diff_cov < 1.01) break; */
4128
4129 if(!(iter%20))
4130 {
4131 fprintf(fp,"\n");
4132 fprintf(fp,"%d\t",iter);
4133 fprintf(fp,"%f\t",tree->c_lnL);
4134 for(i=0;i<dim;i++) fprintf(fp,"%f\t",tree->a_edges[i]->l->v);
4135 fflush(NULL);
4136 }
4137
4138 }while(iter < 5000);
4139
4140
4141 for(i=0;i<dim;i++)
4142 {
4143 for(j=0;j<dim;j++)
4144 {
4145 cov[i*dim+j] = cov[i*dim+j] - mean[i]*mean[j];
4146 if(i == j && cov[i*dim+j] < MIN_VAR_BL) cov[i*dim+j] = MIN_VAR_BL;
4147 }
4148 }
4149
4150 fclose(fp);
4151 }
4152
4153 //////////////////////////////////////////////////////////////
4154 //////////////////////////////////////////////////////////////
4155
4156
4157 /* Order statistic. x_is are uniformily distributed in [min,max] */
Dorder_Unif(phydbl x,int r,int n,phydbl min,phydbl max)4158 phydbl Dorder_Unif(phydbl x, int r, int n, phydbl min, phydbl max)
4159 {
4160 phydbl cons;
4161 phydbl Fx;
4162 phydbl dens;
4163
4164 if(x < min || x > max || min > max)
4165 {
4166 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
4167 Exit("\n");
4168 }
4169
4170 cons = LnGamma(n+1) - LnGamma(r) - LnGamma(n-r+1);
4171 cons = exp(cons);
4172 cons = ROUND(cons);
4173
4174 Fx = (x-min)/(max-min);
4175
4176 dens = cons * pow(Fx,r-1) * pow(1.-Fx,n-r) * (1./(max-min));
4177
4178 /* printf("\n. x=%f r=%d n=%d min=%f max=%f dens=%f",x,r,n,min,max,dens); */
4179 /* Exit("\n"); */
4180
4181 return(dens);
4182 }
4183
4184 //////////////////////////////////////////////////////////////
4185 //////////////////////////////////////////////////////////////
4186
4187
Covariance(phydbl * x,phydbl * y,int n)4188 phydbl Covariance(phydbl *x, phydbl *y, int n)
4189 {
4190 int i;
4191 phydbl mean_x,mean_y,mean_xy;
4192
4193 mean_x = .0;
4194 for(i=0;i<n;i++) mean_x += x[i];
4195 mean_x /= (phydbl)n;
4196
4197 mean_y = .0;
4198 for(i=0;i<n;i++) mean_y += y[i];
4199 mean_y /= (phydbl)n;
4200
4201 mean_xy = .0;
4202 for(i=0;i<n;i++) mean_xy += x[i]*y[i];
4203 mean_xy /= (phydbl)n;
4204
4205 return (mean_xy - mean_x*mean_y);
4206 }
4207
4208 //////////////////////////////////////////////////////////////
4209 //////////////////////////////////////////////////////////////
4210
4211 /* Sample X from a multivariate normal with mean mu and covariance cov, within
4212 the interval [min,max], under the linear constraint X.lambda=k
4213 */
4214
Rnorm_Multid_Trunc_Constraint(phydbl * mu,phydbl * cov,phydbl * min,phydbl * max,phydbl * lambda,phydbl k,phydbl * res,int len)4215 phydbl *Rnorm_Multid_Trunc_Constraint(phydbl *mu, phydbl *cov, phydbl *min, phydbl *max, phydbl *lambda, phydbl k, phydbl *res, int len)
4216 {
4217
4218 phydbl *loc_res;
4219 int i,j,cond,iter;
4220 phydbl *x;
4221 phydbl cond_mean,cond_var;
4222 phydbl cov_zic,cov_zii,cov_zcc;
4223 phydbl mean_zi, mean_zc;
4224 phydbl alpha;
4225 phydbl sum;
4226 int err;
4227 phydbl zi;
4228
4229
4230 cond = 0;
4231
4232 loc_res = NULL;
4233 if(!res)
4234 {
4235 loc_res = (phydbl *)mCalloc(len,sizeof(phydbl));
4236 x = loc_res;
4237 }
4238 else x = res;
4239
4240
4241
4242 /* zi = x[i] . lambda[i] */
4243
4244 iter = 0;
4245 do
4246 {
4247 sum = 0.0;
4248 for(i=0;i<len;i++)
4249 {
4250 if(i != cond)
4251 {
4252 cov_zic = lambda[i] * lambda[cond] * cov[i*len+cond];
4253 cov_zii = lambda[i] * lambda[i] * cov[i*len+i];
4254 cov_zcc = lambda[cond] * lambda[cond] * cov[cond*len+cond];
4255
4256 mean_zi = lambda[i];
4257 mean_zc = lambda[cond];
4258
4259 /* alpha = k - \sum_{j != cond, j !=i} z_j */
4260 alpha = k;
4261 for(j=0;j<len;j++) if(j != cond && j != i) alpha -= lambda[j] * x[j];
4262
4263 cond_mean = mean_zi + (cov_zii + cov_zic) / (cov_zii + 2.*cov_zic + cov_zcc) * (alpha - mean_zi - mean_zc);
4264 cond_var = cov_zii - POW(cov_zii + cov_zic,2)/(cov_zii + 2.*cov_zic + cov_zcc);
4265
4266 if(lambda[i]*min[i] > alpha - lambda[cond]*min[i])
4267 {
4268 PhyML_Printf("\n. Cannot satisfy the constraint.\n");
4269 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
4270 Exit("\n");
4271 }
4272
4273 err = NO;
4274 zi = Rnorm_Trunc(cond_mean,SQRT(cond_var),
4275 MAX(lambda[i]*min[i],alpha-lambda[cond]*max[cond]),
4276 MIN(lambda[i]*max[i],alpha-lambda[cond]*min[cond]),&err);
4277 if(err == YES)
4278 {
4279 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
4280 Exit("\n");
4281 }
4282 sum += zi;
4283 x[i] = zi / lambda[i];
4284 }
4285 }
4286
4287 x[cond] = (k - sum)/lambda[cond];
4288
4289 }while(iter++ < 10);
4290
4291 return(loc_res);
4292
4293 }
4294
4295 //////////////////////////////////////////////////////////////
4296 //////////////////////////////////////////////////////////////
4297
Integrated_Brownian_Bridge_Moments(phydbl x_beg,phydbl x_end,phydbl y_beg,phydbl y_end,phydbl brownian_var,phydbl * mean,phydbl * var)4298 void Integrated_Brownian_Bridge_Moments(phydbl x_beg, phydbl x_end,
4299 phydbl y_beg, phydbl y_end,
4300 phydbl brownian_var, phydbl *mean, phydbl *var)
4301 {
4302 /* phydbl *y; */
4303 /* phydbl *y_mean; */
4304 /* int n_rep; */
4305 int n_breaks;
4306 int i;
4307 /* int j; */
4308 /* phydbl traj_mean, traj_sd; */
4309 /* phydbl x_prev, x_curr; */
4310 phydbl x;
4311 phydbl x_step;
4312 phydbl sum;
4313 /* phydbl sumsum; */
4314 phydbl scaled_var;
4315
4316 scaled_var = brownian_var/fabs(x_end - x_beg);
4317
4318 n_breaks = 100;
4319
4320
4321 /* n_rep = 500; */
4322
4323 /* x_step = (x_end - x_beg)/(n_breaks+1); */
4324
4325 /* y = (phydbl *)mCalloc(n_breaks+2,sizeof(phydbl)); */
4326 /* y_mean = (phydbl *)mCalloc(n_rep,sizeof(phydbl)); */
4327
4328 /* y[0] = y_beg; */
4329 /* y[n_breaks+1] = y_end; */
4330
4331 /* for(i=0;i<n_rep;i++) */
4332 /* { */
4333 /* for(j=1;j<n_breaks+1;j++) */
4334 /* { */
4335 /* x_prev = x_beg + (j-1)*x_step; */
4336 /* x_curr = x_prev + x_step; */
4337
4338 /* traj_mean = y[j-1] + (y_end - y[j-1])*(x_curr - x_prev)/(x_end - x_prev); */
4339 /* traj_sd = SQRT(scaled_var*(x_curr - x_prev)*(x_end - x_curr)/(x_end - x_prev)); */
4340
4341 /* if(isnan(traj_mean) || isnan(traj_sd)) */
4342 /* { */
4343 /* PhyML_Printf("\n. traj_mean=%f traj_sd=%f x_end=%f x_prev=%f x_step=%f [%f %f %f %f %f %f %f] j=%d n_breaks=%d", */
4344 /* traj_mean,traj_sd,x_end,x_prev,x_step, */
4345 /* y[j-1],y_end,y[j-1],x_curr,x_prev,x_end,x_prev,j,n_breaks); */
4346 /* Exit("\n"); */
4347 /* } */
4348
4349 /* y[j] = Rnorm(traj_mean,traj_sd); */
4350
4351 /* if(isnan(y[j]) || isinf(y[j])) */
4352 /* { */
4353 /* printf("\n. mean=%f sd=%f %f j=%d y[j]=%f",traj_sd,traj_mean,Rnorm(traj_mean,traj_sd),j,y[j]); */
4354 /* Exit("\n"); */
4355 /* } */
4356
4357 /* } */
4358
4359 /* sum = 0.0; */
4360 /* For(j,n_breaks+2) sum += fabs(y[j]); */
4361 /* y_mean[i] = sum/(n_breaks+2); */
4362 /* } */
4363
4364 /* sum = sumsum = 0.0; */
4365 /* for(i=0;i<n_rep;i++) */
4366 /* { */
4367 /* sum += y_mean[i]; */
4368 /* sumsum += y_mean[i] * y_mean[i]; */
4369 /* } */
4370
4371 /* *mean = sum/n_rep; */
4372 /* *var = sumsum/n_rep - (*mean) * (*mean); */
4373
4374 /* if(isnan(*mean) || isnan(*var)) */
4375 /* { */
4376 /* PhyML_Printf("\n. sum=%f sumsum=%f n_rep=%d",sum,sumsum,n_rep); */
4377 /* Exit("\n"); */
4378 /* } */
4379
4380 /* Free(y); */
4381 /* Free(y_mean); */
4382
4383 /* /\* printf("\n. [%f %f]",*mean,*var); *\/ */
4384
4385 phydbl mux,six;
4386
4387 x_step = (x_end - x_beg)/(n_breaks+1);
4388 sum = y_beg;
4389 for(i=1;i<n_breaks+1;i++)
4390 {
4391 x = x_beg + i*x_step;
4392
4393 mux = y_beg + (y_end - y_beg)*(x - x_beg)/(x_end - x_beg);
4394 six = SQRT(scaled_var*(x - x_beg)*(x_end - x)/(x_end - x_beg));
4395
4396 sum +=
4397 (2.*six)/SQRT(2.*PI)*exp(-POW(mux,2)/(2.*POW(six,2))) +
4398 2.*mux*Pnorm(mux/six,.0,1.) - mux;
4399 }
4400 sum += y_end;
4401
4402 (*mean) = sum / (n_breaks+2.);
4403 (*var) = (1./12.)*scaled_var*(x_end - x_beg);
4404
4405 /* printf(" [%f %f] -- x_beg=%f x_end=%f y_beg=%f y_end=%f sd=%f", */
4406 /* (*mean),(*var),x_beg,x_end,y_beg,y_end,brownian_var); */
4407 }
4408
4409
4410 //////////////////////////////////////////////////////////////
4411 //////////////////////////////////////////////////////////////
4412
4413 //////////////////////////////////////////////////////////////
4414 //////////////////////////////////////////////////////////////
4415
4416 //////////////////////////////////////////////////////////////
4417 //////////////////////////////////////////////////////////////
4418
4419
4420 /* Let X'(t) = A + (B-A)t/T + X(t) and X(t) = W(t) + t/T * W(T),
4421 i.e., X(t) is a Brownian bridge starting at 0 at t=0 and stopping
4422 at value 1 at time t=T. X'(t) starts at X'(t)=A at t=0 and stops at X'(t)=B at
4423 t=T. This function calculates the mean and variance of
4424 Z(T) = 1/T \int_0^T exp(X'(t)) dt.
4425 */
4426
Integrated_Geometric_Brownian_Bridge_Moments(phydbl T,phydbl A,phydbl B,phydbl u,phydbl * mean,phydbl * var)4427 void Integrated_Geometric_Brownian_Bridge_Moments(phydbl T, phydbl A, phydbl B, phydbl u, phydbl *mean, phydbl *var)
4428 {
4429 Integrated_Geometric_Brownian_Bridge_Mean(T,A,B,u,mean);
4430 Integrated_Geometric_Brownian_Bridge_Var(T,A,B,u,var);
4431 }
4432
4433 //////////////////////////////////////////////////////////////
4434 //////////////////////////////////////////////////////////////
4435
Integrated_Geometric_Brownian_Bridge_Mean(phydbl T,phydbl A,phydbl B,phydbl u,phydbl * mean)4436 void Integrated_Geometric_Brownian_Bridge_Mean(phydbl T, phydbl A, phydbl B, phydbl u, phydbl *mean)
4437 {
4438 phydbl pnormarg1 = log(B/A)/sqrt(u*u*T) + .5*sqrt(u*u*T);
4439 phydbl pnormarg2 = pnormarg1 - sqrt(u*u*T);
4440
4441 /* printf("\n. %G %G",pnormarg1,pnormarg2); */
4442
4443 if((pnormarg1 > 2.0 && pnormarg2 > 2.0) ||
4444 (pnormarg1 < -2.0 && pnormarg2 < -2.0))
4445 {
4446 // Proposition 5.1 in Privaut & Guindon, 2015, Journal of mathematical biology 71 (6-7), 1387-1409
4447 *mean = T*(B-A)/log(B/A)+u*u*T*T*((A+B)/(2.*pow(log(B/A),2)) - (B-A)/pow(log(B/A),3));
4448 }
4449 else
4450 {
4451 // Proposition 3.3 in Privaut & Guindon, 2015, Journal of mathematical biology 71 (6-7), 1387-1409
4452 *mean =
4453 (A/(u*u))*sqrt(2.*PI*u*u*T)*exp(pow(u*u*T/2.+log(B/A),2)/(2.*u*u*T))*
4454 (Pnorm(pnormarg1,0.,1.) -
4455 Pnorm(pnormarg2,0.,1.));
4456 }
4457
4458 *mean = *mean/T;
4459 }
4460
4461 //////////////////////////////////////////////////////////////
4462 //////////////////////////////////////////////////////////////
4463
Integrated_Geometric_Brownian_Bridge_Var(phydbl T,phydbl A,phydbl B,phydbl u,phydbl * var)4464 void Integrated_Geometric_Brownian_Bridge_Var(phydbl T, phydbl A, phydbl B, phydbl u, phydbl *var)
4465 {
4466
4467 double U = T*u*u;
4468 double logz = log(B/A);
4469 double z = B/A;
4470 double m;
4471
4472 phydbl pnormarg1 = log(B/A)/sqrt(u*u*T) + .5*sqrt(u*u*T);
4473 phydbl pnormarg2 = pnormarg1 - sqrt(u*u*T);
4474
4475 if((pnormarg1 > 2.0 && pnormarg2 > 2.0) ||
4476 (pnormarg1 < -2.0 && pnormarg2 < -2.0))
4477 {
4478 // Proposition 5.1 in Privaut & Guindon, 2015, Journal of mathematical biology 71 (6-7), 1387-1409
4479 *var = 0.0;
4480 }
4481 else
4482 {
4483 // Proposition 3.3 in Privaut & Guindon, 2015, Journal of mathematical biology 71 (6-7), 1387-1409
4484 *var =
4485 2.*sqrt(2.*PI*U)*exp(pow(U +logz,2)/(2.*U))*(Pnorm(logz/sqrt(U) + sqrt(U),0.0,1.0)-Pnorm(logz/sqrt(U) - sqrt(U),0.0,1.0)) -
4486 (1.+z)*2.*sqrt(2.*PI*U)*exp(pow(U/2.+logz,2)/(2.*U))*(Pnorm(logz/sqrt(U) + .5*sqrt(U),0.0,1.0)-Pnorm(logz/sqrt(U) - .5*sqrt(U),0.0,1.0));
4487 *var = *var * pow(A,2) / pow(u,4);
4488 Integrated_Geometric_Brownian_Bridge_Mean(T,A,B,u,&m);
4489 *var = *var / pow(T,2);
4490 *var = *var - m*m;
4491 }
4492 if(*var < 0.0) *var = 0.0;
4493 }
4494
4495 //////////////////////////////////////////////////////////////
4496 //////////////////////////////////////////////////////////////
4497 // Inverse method to sample from X where P(X=xi)=pi[i]
4498
Sample_i_With_Proba_pi(phydbl * pi,int len)4499 int Sample_i_With_Proba_pi(phydbl *pi, int len)
4500 {
4501 phydbl *cum_pi;
4502 int i;
4503 phydbl u;
4504
4505 cum_pi = (phydbl *)mCalloc(len,sizeof(phydbl));
4506
4507 u = .0;
4508 for(i=0;i<len;i++) u += pi[i];
4509 for(i=0;i<len;i++) cum_pi[i] = pi[i] / u;
4510 for(i=1;i<len;i++) cum_pi[i] += cum_pi[i-1];
4511
4512 if((cum_pi[i-1] > 1. + 1.E-10) || (cum_pi[i-1] < 1. - 1.E-10))
4513 {
4514 PhyML_Printf("\n== Sum of probabilities is different from 1.0 (%f).",cum_pi[i-1]);
4515 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
4516 Exit("\n");
4517 }
4518
4519 i = 0;
4520 u = Uni();
4521 for(i=0;i<len;i++) if(cum_pi[i] > u) break;
4522
4523 if(i == len)
4524 {
4525 for(i=0;i<len;i++) printf("\n== idx:%d prob:%g",i,pi[i]);
4526 PhyML_Printf("\n== Len = %d",len);
4527 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
4528 Exit("\n");
4529 }
4530 Free(cum_pi);
4531
4532 return(i);
4533 }
4534
4535 //////////////////////////////////////////////////////////////
4536 //////////////////////////////////////////////////////////////
4537 // Same method as previous one, but returning n_elts sampled
4538 // elements at once, avoiding to recreate cum_pi for each sample.
4539 // Alias Method (https://en.wikipedia.org/wiki/Alias_method)
4540 // Inspired from:
4541 // https://jugit.fz-juelich.de/mlz/ransampl/blob/master/lib/ransampl.c
4542 // and
4543 // https://possiblywrong.wordpress.com/2012/02/05/the-alias-method-and-double-precision/
Sample_n_i_With_Proba_pi(phydbl * pi,int len,int n_elts)4544 int* Sample_n_i_With_Proba_pi(phydbl *pi, int len, int n_elts)
4545 {
4546 int i,n;
4547 phydbl sum;
4548 int *sampled;
4549 int *alias;
4550 phydbl *prob, *p;
4551 int *small, *large;
4552 int num_small = 0, num_large = 0;
4553 int a, g;
4554
4555 alias = mCalloc(len, sizeof(int));
4556 prob = mCalloc(len, sizeof(phydbl));
4557 p = mCalloc(len, sizeof(phydbl));
4558 small = mCalloc(len, sizeof(int));
4559 large = mCalloc(len, sizeof(int));
4560
4561 sampled = (int *)mCalloc(n_elts,sizeof(int));
4562
4563 sum = .0;
4564 for(i=0;i<len;i++)
4565 {
4566 if( pi[i]<0 )
4567 {
4568 PhyML_Printf("\n== Probability < 0");
4569 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
4570 Exit("\n");
4571 }
4572 sum += pi[i];
4573 }
4574
4575 if(sum == 0. )
4576 {
4577 PhyML_Printf("\n== Sum of probabilities is 0");
4578 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
4579 Exit("\n");
4580 }
4581
4582 for(i=0;i<len;i++) p[i] = pi[i] * len / sum;
4583
4584 num_small = 0;
4585 num_large = 0;
4586 for ( i=len-1; i>=0; --i )
4587 {
4588 if ( p[i]<1 )
4589 small[num_small++] = i;
4590 else
4591 large[num_large++] = i;
4592 }
4593
4594 while ( num_small && num_large )
4595 {
4596 a = small[--num_small];
4597 g = large[--num_large];
4598 prob[a] = p[a];
4599 alias[a] = g;
4600 p[g] = p[g] + p[a] - 1;
4601 if ( p[g] < 1 )
4602 small[num_small++] = g;
4603 else
4604 large[num_large++] = g;
4605 }
4606
4607 while ( num_large )
4608 prob[ large[--num_large] ] = 1;
4609
4610 while ( num_small )
4611 prob[ small[--num_small] ] = 1;
4612
4613 Free( p );
4614 Free( small );
4615 Free( large );
4616
4617 for(n=0; n<n_elts;n++)
4618 {
4619 phydbl r1 = Uni();
4620 phydbl r2 = Uni();
4621 i = (int) (len * r1);
4622 sampled[n] = (r2 < prob[i] ? i : alias[i]);
4623 }
4624
4625 Free(alias );
4626 Free(prob );
4627
4628 return(sampled);
4629 }
4630
4631
4632 //////////////////////////////////////////////////////////////
4633 //////////////////////////////////////////////////////////////
4634
4635 // Return the value y such that Prob(x<y) = p
Quantile(phydbl * x,int len,phydbl p)4636 phydbl Quantile(phydbl *x, int len, phydbl p)
4637 {
4638 phydbl *y,q;
4639 int i;
4640 int swap;
4641 phydbl buff;
4642
4643 y = (phydbl *)mCalloc(len,sizeof(phydbl));
4644 for(i=0;i<len;i++) y[i] = x[i];
4645
4646 do
4647 {
4648 swap = NO;
4649 for(i=0;i<len-1;i++)
4650 {
4651 if(y[i+1] < y[i])
4652 {
4653 swap = YES;
4654
4655 buff = y[i+1];
4656 y[i+1] = y[i];
4657 y[i] = buff;
4658 }
4659 }
4660 }
4661 while(swap == YES);
4662
4663 q = y[(int)((len-1)*p)];
4664
4665 Free(y);
4666
4667 return(q);
4668
4669 }
4670
4671
4672 //////////////////////////////////////////////////////////////
4673 //////////////////////////////////////////////////////////////
4674
4675 // Return p such that Prob(x<z) = p
Prob(phydbl * x,int len,phydbl z)4676 phydbl Prob(phydbl *x, int len, phydbl z)
4677 {
4678 int i;
4679 phydbl hit;
4680
4681 hit = 0.;
4682 for(i=0;i<len;i++) if(x[i] < z) hit+=1.;
4683
4684 return(hit/(phydbl)len);
4685
4686 }
4687
4688 //////////////////////////////////////////////////////////////
4689 //////////////////////////////////////////////////////////////
4690
4691 // Return x where mu is the first moment of the normal density
4692 // and x is the value such that f(x;mu,sigma)=y
4693
Inverse_Truncated_Normal(phydbl y,phydbl mu,phydbl sigma,phydbl lim_inf,phydbl lim_sup)4694 phydbl Inverse_Truncated_Normal(phydbl y, phydbl mu, phydbl sigma, phydbl lim_inf, phydbl lim_sup)
4695 {
4696 phydbl p_inf, p_sup;
4697
4698 p_inf = Pnorm(lim_inf,mu,sigma);
4699 p_sup = Pnorm(lim_sup,mu,sigma);
4700
4701 /* return(mu + sigma * SQRT(-log( y * y * (p_sup - p_inf) * (p_sup - p_inf) * 2 * PI * sigma * sigma))); */
4702 return(mu + sigma * SQRT(-log( y * y * (p_sup - p_inf) * (p_sup - p_inf) * 2. * PI * sigma * sigma)));
4703 }
4704
4705 //////////////////////////////////////////////////////////////
4706 //////////////////////////////////////////////////////////////
4707 // Returns a vector with a permutation of all the integers from 0
4708 // to len-1. Fisher-Yates algorithm.
4709
Permutate(int len)4710 int *Permutate(int len)
4711 {
4712 int i,pos,tmp;
4713 int *x;
4714
4715 x = (int *)mCalloc(len,sizeof(int));
4716
4717 for(i=0;i<len;i++) x[i] = i;
4718
4719 for(i=0;i<len;i++)
4720 {
4721 pos = Rand_Int(i,len-1);
4722
4723 tmp = x[i];
4724 x[i] = x[pos];
4725 x[pos] = tmp;
4726 }
4727
4728 return(x);
4729 }
4730
4731 //////////////////////////////////////////////////////////////
4732 //////////////////////////////////////////////////////////////
4733 // Returns the p-value for the Mantel test of correlation between
4734 // matrices x and y.
4735
Mantel(phydbl * x,phydbl * y,int nrow,int ncol)4736 phydbl Mantel(phydbl *x, phydbl *y, int nrow, int ncol)
4737 {
4738
4739 phydbl obs_stat; // Value of the statistic on the observed data
4740 phydbl mc_stat; // Value of the statistics on the Monte Carlo generated data
4741 int N;
4742 phydbl sumx, sumy, sumxy, sumxx, sumyy;
4743 int i,j,k;
4744 int npermut;
4745 int *permut;
4746 phydbl p_val;
4747
4748 N = nrow*ncol;
4749
4750 sumx = .0;
4751 for(i=0;i<N;i++) sumx += x[i];
4752
4753 sumy = .0;
4754 for(i=0;i<N;i++) sumy += y[i];
4755
4756 sumxx = .0;
4757 for(i=0;i<N;i++) sumxx += x[i]*x[i];
4758
4759 sumyy = .0;
4760 for(i=0;i<N;i++) sumyy += y[i]*y[i];
4761
4762 sumxy = .0;
4763 for(i=0;i<N;i++) sumxy += x[i]*y[i];
4764
4765 obs_stat = (N * sumxy - sumx * sumy) / (SQRT((N-1)*sumxx - (sumx/N)*(sumx/N)) * SQRT((N-1)*sumyy - (sumy/N)*(sumy/N)));
4766
4767 npermut = 1000;
4768 p_val = 0.0;
4769 for(k=0;k<npermut;k++)
4770 {
4771 permut = Permutate(nrow);
4772
4773 sumxy = .0;
4774 for(i=0;i<nrow;i++)
4775 {
4776 for(j=0;j<ncol;j++)
4777 {
4778 sumxy += x[i*ncol+j] * y[permut[i]*ncol+permut[j]];
4779 }
4780 }
4781
4782 mc_stat = (N * sumxy - sumx * sumy) / (SQRT((N-1)*sumxx - (sumx/N)*(sumx/N)) * SQRT((N-1)*sumyy - (sumy/N)*(sumy/N)));
4783
4784 Free(permut);
4785
4786 if(mc_stat > obs_stat) p_val += 1.;
4787 }
4788
4789 return(p_val / (phydbl)npermut);
4790
4791 }
4792
4793 //////////////////////////////////////////////////////////////
4794 //////////////////////////////////////////////////////////////
4795
Weighted_Mean(phydbl * x,phydbl * w,int l)4796 phydbl Weighted_Mean(phydbl *x, phydbl *w, int l)
4797 {
4798 int i;
4799 phydbl wm;
4800 wm = .0;
4801 if(w) for(i=0;i<l;i++) wm += x[i]*w[i];
4802 else
4803 {
4804 for(i=0;i<l;i++) wm += x[i];
4805 wm /= (phydbl)l;
4806 }
4807 return(wm);
4808 }
4809
4810 //////////////////////////////////////////////////////////////
4811 //////////////////////////////////////////////////////////////
4812
Variance(phydbl * x,int l)4813 phydbl Variance(phydbl *x, int l)
4814 {
4815 phydbl mean,sum;
4816 int i;
4817
4818 mean = Weighted_Mean(x,NULL,l);
4819 sum = 0.0;
4820 for(i=0;i<l;i++) sum += x[i]*x[i];
4821
4822 return(sum/l - mean*mean);
4823 }
4824
4825 //////////////////////////////////////////////////////////////
4826 //////////////////////////////////////////////////////////////
4827
Sum_Bits(int value,int range)4828 int Sum_Bits(int value, int range)
4829 {
4830 int i;
4831 int sum;
4832
4833 if(range > 8*(int)sizeof(int))
4834 {
4835 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
4836 Exit("\n");
4837 }
4838
4839 sum = 0;
4840 for(i=0;i<range;i++)
4841 {
4842 sum += (value >> i) & 1;
4843 }
4844
4845 return(sum);
4846 }
4847
4848 //////////////////////////////////////////////////////////////
4849 //////////////////////////////////////////////////////////////
4850
Modulo(int a,int b)4851 int Modulo (int a, int b)
4852 {
4853 if(b < 0)
4854 return Modulo(-a, -b);
4855 int ret = a % b;
4856 if(ret < 0)
4857 ret+=b;
4858 return ret;
4859 }
4860
4861 //////////////////////////////////////////////////////////////
4862 //////////////////////////////////////////////////////////////
4863
Runif_Disk(phydbl * sampled_x,phydbl * sampled_y,phydbl centrx,phydbl centry,phydbl radius)4864 void Runif_Disk(phydbl *sampled_x, phydbl *sampled_y, phydbl centrx, phydbl centry, phydbl radius)
4865 {
4866 phydbl r,theta;
4867
4868 r = Uni();
4869 theta = Uni()*2.*PI;
4870
4871 (*sampled_x) = SQRT(r)*COS(theta);
4872 (*sampled_y) = SQRT(r)*SIN(theta);
4873
4874 (*sampled_x) *= radius;
4875 (*sampled_y) *= radius;
4876
4877 (*sampled_x) += centrx;
4878 (*sampled_y) += centry;
4879
4880 }
4881
4882 //////////////////////////////////////////////////////////////
4883 //////////////////////////////////////////////////////////////
4884
Random_String(char * s,int len)4885 void Random_String(char *s, int len)
4886 {
4887 int i;
4888 for(i=0;i<len;i++) s[i] = Rand_Int(97,121);
4889 s[i] = '\0';
4890 }
4891
4892 //////////////////////////////////////////////////////////////
4893 //////////////////////////////////////////////////////////////
4894
Random_Permut(int n)4895 int *Random_Permut(int n)
4896 {
4897 int *permut;
4898 int i,j;
4899 int tmp;
4900
4901 if(n < 3)
4902 {
4903 PhyML_Printf("\n== Number of vertices in a polygon has to be at least 3.");
4904 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
4905 }
4906
4907 permut = (int *)mCalloc(n,sizeof(int));
4908
4909 for(i=0;i<n;i++) permut[i] = i;
4910
4911 for(i=0;i<n-1;i++)
4912 {
4913 j = Rand_Int(i,n-1);
4914 tmp = permut[i];
4915 permut[i] = permut[j];
4916 permut[j] = tmp;
4917 }
4918
4919 return(permut);
4920 }
4921
4922 //////////////////////////////////////////////////////////////
4923 //////////////////////////////////////////////////////////////
4924
4925 /* Generate a random polygon with n vertices. Each point lies in [0,1] */
Rpoly(int n)4926 t_poly *Rpoly(int n)
4927 {
4928 t_poly *p;
4929 int i;
4930
4931 p = (t_poly *)Make_Poly(n);
4932 p->n_poly_vert = n;
4933
4934 for(i=0;i<n;i++)
4935 {
4936 p->poly_vert[i]->lonlat[0] = Uni();
4937 p->poly_vert[i]->lonlat[1] = Uni();
4938 }
4939
4940 return(p);
4941 }
4942
4943 //////////////////////////////////////////////////////////////
4944 //////////////////////////////////////////////////////////////
4945
Area_Of_Poly_Monte_Carlo(t_poly * poly,t_geo_coord * lim)4946 phydbl Area_Of_Poly_Monte_Carlo(t_poly *poly, t_geo_coord *lim)
4947 {
4948 int n_hit,n_trials,trial;
4949 t_geo_coord *point;
4950
4951 point = (t_geo_coord *)GEO_Make_Geo_Coord(2);
4952
4953 n_trials = 1E+7;
4954 trial = 0;
4955 n_hit = 0;
4956 do
4957 {
4958 point->lonlat[0] = Uni()*lim->lonlat[0];
4959 point->lonlat[1] = Uni()*lim->lonlat[1];
4960 if(Is_In_Polygon(point,poly) == YES) n_hit++;
4961 trial++;
4962 }
4963 while(trial < n_trials);
4964
4965
4966 Free_Geo_Coord(point);
4967
4968 return((phydbl)(n_hit)/n_trials*lim->lonlat[0]*lim->lonlat[1]);
4969 }
4970
4971 /*////////////////////////////////////////////////////////////
4972 ////////////////////////////////////////////////////////////*/
4973
Is_In_Polygon(t_geo_coord * point,t_poly * poly)4974 int Is_In_Polygon(t_geo_coord *point, t_poly *poly)
4975 {
4976 int i,j;
4977 phydbl x,y,x1,y1,x2,y2;
4978 phydbl x_intersect;
4979 short int is_in;
4980
4981 assert(point);
4982 assert(poly);
4983
4984 /* Coordinates of the point to test */
4985 x = point->lonlat[0];
4986 y = point->lonlat[1];
4987
4988 j = poly->n_poly_vert-1;
4989 is_in = NO;
4990 for(i=0;i<poly->n_poly_vert;i++)
4991 {
4992 /* Edge of polygon goes from (x1,y1) to (x2,y2) */
4993 x1 = poly->poly_vert[i]->lonlat[0];
4994 y1 = poly->poly_vert[i]->lonlat[1];
4995 x2 = poly->poly_vert[j]->lonlat[0];
4996 y2 = poly->poly_vert[j]->lonlat[1];
4997
4998 j = i;
4999
5000 /* Shoot an horizontal ray to the right. Find out if
5001 this ray hits the polygon edge */
5002 if((y1 < y && y2 > y) || (y2 < y && y1 > y))
5003 {
5004 /* Coordinates along X-axis of the intersection between ray and edge */
5005 x_intersect = (y-y1)/(y1-y2)*(x1-x2)+x1;
5006 if(x_intersect > x) /* Intersection is on the righthand side */
5007 is_in = (is_in == YES)?NO:YES;
5008 }
5009 }
5010
5011 return is_in;
5012 }
5013
5014 /*////////////////////////////////////////////////////////////
5015 ////////////////////////////////////////////////////////////*/
5016
5017 /* Modified Bessel function of the first kind. Stolen from Numerical Recipes in C. */
Bessi0(phydbl x)5018 phydbl Bessi0(phydbl x)
5019 {
5020 phydbl ax,ans;
5021 phydbl y;
5022
5023 if ((ax=fabs(x)) < 3.75)
5024 {
5025 y=x/3.75;
5026 y*=y;
5027 ans=1.0+y*(3.5156229+y*(3.0899424+y*(1.2067492+y*(0.2659732+y*(0.360768e-1+y*0.45813e-2)))));
5028 }
5029 else
5030 {
5031 y=3.75/ax;
5032 ans=(exp(ax)/sqrt(ax))*(0.39894228+y*(0.1328592e-1+y*(0.225319e-2+y*(-0.157565e-2+y*(0.916281e-2+y*(-0.2057706e-1+y*(0.2635537e-1+y*(-0.1647633e-1+y*0.392377e-2))))))));
5033 }
5034
5035 return ans;
5036 }
5037
5038
5039 /*////////////////////////////////////////////////////////////
5040 ////////////////////////////////////////////////////////////*/
5041
5042 /* Modified Bessel function of the second kind (degree 0). Stolen from Numerical Recipes in C. */
Bessk0(phydbl x)5043 phydbl Bessk0(phydbl x)
5044 {
5045 phydbl y,ans;
5046
5047 if (x <= 2.0)
5048 {
5049 y=x*x/4.0;
5050 ans=(-log(x/2.0)*Bessi0(x))+(-0.57721566+y*(0.42278420+y*(0.23069756+y*(0.3488590e-1+y*(0.262698e-2+y*(0.10750e-3+y*0.74e-5))))));
5051 }
5052 else
5053 {
5054 y=2.0/x;
5055 ans=(exp(-x)/sqrt(x))*(1.25331414+y*(-0.7832358e-1+y*(0.2189568e-1+y*(-0.1062446e-1+y*(0.587872e-2+y*(-0.251540e-2+y*0.53208e-3))))));
5056 }
5057
5058 return ans;
5059 }
5060
5061
5062 /*////////////////////////////////////////////////////////////
5063 ////////////////////////////////////////////////////////////*/
5064
Euclidean_Dist(t_geo_coord * x,t_geo_coord * y)5065 phydbl Euclidean_Dist(t_geo_coord *x, t_geo_coord *y)
5066 {
5067 int i;
5068 phydbl dist;
5069
5070 if(x->dim != y->dim)
5071 {
5072 PhyML_Printf("\n. x->dim: %d y->dim: %d",x->dim,y->dim);
5073 Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5074 }
5075
5076 dist = 0.0;
5077 for(i=0;i<x->dim;i++) dist += pow(x->lonlat[i]-y->lonlat[i],2);
5078
5079 return(sqrt(dist));
5080 }
5081
5082 /*////////////////////////////////////////////////////////////
5083 ////////////////////////////////////////////////////////////*/
5084 /* Return a vector rk such that rk[i] gives the index of the i-th largest element in x */
Ranks(phydbl * x,int len)5085 int *Ranks(phydbl *x, int len)
5086 {
5087 int *rk,tmp;
5088 int i,swap;
5089
5090 rk = (int *)mCalloc(len,sizeof(int));
5091
5092 for(i=0;i<len;i++) rk[i] = i;
5093
5094 do
5095 {
5096 swap = NO;
5097 for(i=0;i<len-1;i++)
5098 {
5099 if(x[rk[i]] < x[rk[i+1]])
5100 {
5101 swap = YES;
5102 tmp = rk[i];
5103 rk[i] = rk[i+1];
5104 rk[i+1] = tmp;
5105 }
5106 }
5107 }
5108 while(swap == YES);
5109
5110 return(rk);
5111 }
5112
5113 /*////////////////////////////////////////////////////////////
5114 ////////////////////////////////////////////////////////////*/
5115
Brownian_Bridge_Generate(phydbl start,phydbl end,phydbl var,phydbl beg_time,phydbl end_time,int n_steps,phydbl * time)5116 phydbl *Brownian_Bridge_Generate(phydbl start, phydbl end, phydbl var, phydbl beg_time, phydbl end_time, int n_steps, phydbl *time)
5117 {
5118 phydbl *state,end_brown;
5119 int i;
5120
5121 if(n_steps == 0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5122 if(beg_time > end_time) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5123 for(i=0;i<n_steps-1;i++) if(!(time[i+1] > time[i])) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5124
5125 state = Brownian_Generate(var,n_steps,beg_time,time);
5126 end_brown = Rnorm(state[n_steps-1],SQRT((time[n_steps-1] - end_time)*var));
5127
5128 for(i=0;i<n_steps;i++)
5129 {
5130 state[i] = state[i] - (time[i]/end_time) * end_brown;
5131 state[i] = start + (end - start)/end_time * time[i] + state[i];
5132 }
5133
5134
5135 return(state);
5136 }
5137
5138 /*////////////////////////////////////////////////////////////
5139 ////////////////////////////////////////////////////////////*/
5140
Brownian_Generate(phydbl var,int n_steps,phydbl beg_time,phydbl * time)5141 phydbl *Brownian_Generate(phydbl var, int n_steps, phydbl beg_time, phydbl *time)
5142 {
5143 phydbl *state;
5144 int i;
5145
5146 if(n_steps == 0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5147
5148 state = (phydbl *)mCalloc(n_steps,sizeof(phydbl));
5149
5150 state[0] = Rnorm(0.0,SQRT((time[0] - beg_time)*var));
5151
5152 for(i=1;i<n_steps;i++)
5153 {
5154 state[i] = Rnorm(state[i-1],SQRT((time[i]-time[i-1])*var));
5155 if(time[i] < time[i-1]) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5156 }
5157
5158 return(state);
5159 }
5160
5161 /*////////////////////////////////////////////////////////////
5162 ////////////////////////////////////////////////////////////*/
5163
Random_Walk_Bridged_Generate(phydbl start,phydbl end,phydbl var,int n_steps)5164 phydbl *Random_Walk_Bridged_Generate(phydbl start, phydbl end, phydbl var, int n_steps)
5165 {
5166 phydbl *state,end_walk;
5167 int i;
5168
5169 if(n_steps == 0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5170
5171 state = Random_Walk_Generate(var,n_steps);
5172 end_walk = Rnorm(state[n_steps-1],SQRT(var));
5173
5174 for(i=0;i<n_steps;i++)
5175 {
5176 state[i] = state[i] - ((phydbl)(i+1.)/n_steps) * end_walk;
5177 state[i] = start + (end - start)/n_steps * (i+1.) + state[i];
5178 }
5179
5180 return(state);
5181 }
5182
5183 /*////////////////////////////////////////////////////////////
5184 ////////////////////////////////////////////////////////////*/
5185
Random_Walk_Generate(phydbl var,int n_steps)5186 phydbl *Random_Walk_Generate(phydbl var, int n_steps)
5187 {
5188 phydbl *state;
5189 int i;
5190
5191 if(n_steps == 0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5192
5193 state = (phydbl *)mCalloc(n_steps,sizeof(phydbl));
5194
5195 state[0] = Rnorm(0.0,SQRT(var));
5196
5197 for(i=1;i<n_steps;i++) state[i] = Rnorm(state[i-1],SQRT(var));
5198
5199 return(state);
5200 }
5201
5202 /*////////////////////////////////////////////////////////////
5203 ////////////////////////////////////////////////////////////*/
5204
Reflected(phydbl x,phydbl down,phydbl up)5205 phydbl Reflected(phydbl x, phydbl down, phydbl up)
5206 {
5207 phydbl ref;
5208
5209 ref = x;
5210 do
5211 {
5212 if(ref > up) ref = up - (ref - up);
5213 if(ref < down) ref = down + (down - ref);
5214 }while(!(ref < up && ref > down));
5215
5216 return(ref);
5217 }
5218