1 /*
2 
3 PhyML:  a program that  computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5 
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7 
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10 
11 */
12 
13 #include "stats.h"
14 
15 
16 //////////////////////////////////////////////////////////////
17 //////////////////////////////////////////////////////////////
18 
19 /* RANDOM VARIATES GENERATORS */
20 //////////////////////////////////////////////////////////////
21 //////////////////////////////////////////////////////////////
22 
23 
24 /*********************************************************************/
25 /* A C-function for TT800 : July 8th 1996 Version */
26 /* by M. Matsumoto, email: matumoto@math.keio.ac.jp */
27 /* tt800() generate one pseudorandom number with double precision */
28 /* which is uniformly distributed on [0,1]-interval */
29 /* for each call.  One may choose any initial 25 seeds */
30 /* except all zeros. */
31 
32 /* See: ACM Transactions on Modelling and Computer Simulation, */
33 /* Vol. 4, No. 3, 1994, pages 254-266. */
34 
tt800()35 phydbl tt800()
36 {
37   int M=7;
38   unsigned long y;
39   static int k = 0;
40   static unsigned long x[25]={ /* initial 25 seeds, change as you wish */
41     0x95f24dab, 0x0b685215, 0xe76ccae7, 0xaf3ec239, 0x715fad23,
42     0x24a590ad, 0x69e4b5ef, 0xbf456141, 0x96bc1b7b, 0xa7bdf825,
43     0xc1de75b7, 0x8858a9c9, 0x2da87693, 0xb657f9dd, 0xffdc8a9f,
44     0x8121da71, 0x8b823ecb, 0x885d05f5, 0x4e20cd47, 0x5a9ad5d9,
45     0x512c0c03, 0xea857ccd, 0x4cc1d30f, 0x8891a8a1, 0xa6b7aadb
46   };
47   static unsigned long mag01[2]={
48     0x0, 0x8ebfd028 /* this is magic vector `a', don't change */
49   };
50   if (k==25) { /* generate 25 words at one time */
51     int kk;
52     for (kk=0;kk<25-M;kk++) {
53       x[kk] = x[kk+M] ^ (x[kk] >> 1) ^ mag01[x[kk] % 2];
54     }
55     for (; kk<25;kk++) {
56       x[kk] = x[kk+(M-25)] ^ (x[kk] >> 1) ^ mag01[x[kk] % 2];
57     }
58     k=0;
59   }
60   y = x[k];
61   y ^= (y << 7) & 0x2b5b2500; /* s and b, magic vectors */
62   y ^= (y << 15) & 0xdb8b0000; /* t and c, magic vectors */
63   y &= 0xffffffff; /* you may delete this line if word size = 32 */
64   /*
65      the following line was added by Makoto Matsumoto in the 1996 version
66      to improve lower bit's corellation.
67      Delete this line to o use the code published in 1994.
68   */
69   y ^= (y >> 16); /* added to the 1994 version */
70   k++;
71   return((phydbl)y / (unsigned long) 0xffffffff);
72 }
73 
74 /*********************************************************************/
75 
Uni()76 phydbl Uni()
77 {
78   phydbl r,mx;
79   mx = (phydbl)RAND_MAX;
80   r  = (phydbl)rand();
81   r /= mx;
82   /* r = tt800(); */
83   return r;
84 }
85 
86 /*********************************************************************/
87 // Return a uniform draw u s.t., min <= u <= max
Rand_Int(int min,int max)88 int Rand_Int(int min, int max)
89 {
90 /*   phydbl u;   */
91 /*   u = (phydbl)rand(); */
92 /*   u /=  (RAND_MAX); */
93 /*   u *= (max - min + 1); */
94 /*   u += min; */
95 /*   return (int)FLOOR(u); */
96 
97   int u;
98   /* if(max < min) Generic_Exit(__FILE__,__LINE__,__FUNCTION__); */
99   u = rand();
100   return (u%(max+1-min)+min);
101 
102 }
103 
104 //////////////////////////////////////////////////////////////
105 //////////////////////////////////////////////////////////////
106 
107 
108 
109 /********************* random Gamma generator ************************
110 * Properties:
111 * (1) X = Gamma(alpha,lambda) = Gamma(alpha,1)/lambda
112 * (2) X1 = Gamma(alpha1,1), X2 = Gamma(alpha2,1) independent
113 *     then X = X1+X2 = Gamma(alpha1+alpha2,1)
114 * (3) alpha = k = integer then
115 *     X = Gamma(k,1) = Erlang(k,1) = -sum(log(Ui)) = -log(prod(Ui))
116 *     where U1,...Uk iid uniform(0,1)
117 *
118 * Decompose alpha = k+delta with k = [alpha], and 0<delta<1
119 * Apply (3) for Gamma(k,1)
120 * Apply Ahrens-Dieter algorithm for Gamma(delta,1)
121 */
122 
Ahrensdietergamma(phydbl alpha)123 phydbl Ahrensdietergamma(phydbl alpha)
124 {
125   phydbl x = 0.;
126 
127   if (alpha>0.)
128     {
129       phydbl y = 0.;
130       phydbl b = (alpha+exp(1.))/exp(1.);
131       phydbl p = 1./alpha;
132       int go = 0;
133       while (go==0)
134 	{
135 	  phydbl u = Uni();
136 	  phydbl w = Uni();
137 	  phydbl v = b*u;
138 	  if (v<=1.)
139 	    {
140 	      x = POW(v,p);
141 	      y = exp(-x);
142 	    }
143 	  else
144 	    {
145 	      x = -log(p*(b-v));
146 	      y = POW(x,alpha-1.);
147 	    }
148 	  go = (w<y); // x is accepted when go=1
149 	}
150     }
151   return x;
152 }
153 
154 //////////////////////////////////////////////////////////////
155 //////////////////////////////////////////////////////////////
156 
Rgamma(phydbl shape,phydbl scale)157 phydbl Rgamma(phydbl shape, phydbl scale)
158 {
159   /* Code below is stolen from R sources. Thanks to the R team! */
160   /* References:
161      [1] Shape parameter a >= 1.  Algorithm GD in:
162 
163 	  Ahrens, J.H. and Dieter, U. (1982).
164 	  Generating gamma variates by a modified
165 	  rejection technique.
166 	  Comm. ACM, 25, 47-54.
167 
168 
169     [2] Shape parameter 0 < a < 1. Algorithm GS in:
170 
171 	  Ahrens, J.H. and Dieter, U. (1974).
172 	  Computer methods for sampling from gamma, beta,
173 	  poisson and binomial distributions.
174 	  Computing, 12, 223-246.
175   */
176 
177 
178   double a = (double)shape;
179   /* Constants : */
180   const static double sqrt32 = 5.656854;
181   const static double exp_m1 = 0.36787944117144232159;/* exp(-1) = 1/e */
182 
183   /* Coefficients q[k] - for q0 = sum(q[k]*a^(-k))
184    * Coefficients a[k] - for q = q0+(t*t/2)*sum(a[k]*v^k)
185    * Coefficients e[k] - for exp(q)-1 = sum(e[k]*q^k)
186    */
187   const static double q1 = 0.04166669;
188   const static double q2 = 0.02083148;
189   const static double q3 = 0.00801191;
190   const static double q4 = 0.00144121;
191   const static double q5 = -7.388e-5;
192   const static double q6 = 2.4511e-4;
193   const static double q7 = 2.424e-4;
194 
195   const static double a1 = 0.3333333;
196   const static double a2 = -0.250003;
197   const static double a3 = 0.2000062;
198   const static double a4 = -0.1662921;
199   const static double a5 = 0.1423657;
200   const static double a6 = -0.1367177;
201   const static double a7 = 0.1233795;
202 
203   /* State variables [FIXME for threading!] :*/
204   static double aa = 0.;
205   static double aaa = 0.;
206   static double s, s2, d;    /* no. 1 (step 1) */
207   static double q0, b, si, c;/* no. 2 (step 4) */
208 
209   double e, p, q, r, t, u, v, w, x, ret_val;
210 
211   if(a < 0.0 || scale <= 0.0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
212 
213   if (a < 1.)
214     { /* GS algorithm for parameters a < 1 */
215       if(a == 0) return 0.;
216       e = 1.0 + exp_m1 * a;
217       for(;;)
218         {
219           p = e * Uni();
220           if (p >= 1.0)
221             {
222               x = -log((e - p) / a);
223               if (Rexp(1.) >= (1.0 - a) * log(x))
224                 break;
225 	    }
226           else
227             {
228               x = exp(log(p) / a);
229               if (Rexp(1.) >= x)
230                 break;
231 	    }
232 	}
233       return scale * x;
234     }
235 
236     /* --- a >= 1 : GD algorithm --- */
237 
238     /* Step 1: Recalculations of s2, s, d if a has changed */
239     if (a != aa)
240       {
241 	aa = a;
242 	s2 = a - 0.5;
243 	s = SQRT(s2);
244 	d = sqrt32 - s * 12.0;
245       }
246     /* Step 2: t = standard normal deviate,
247                x = (s,1/2) -normal deviate. */
248 
249     /* immediate acceptance (i) */
250     t = Rnorm(0.0,1.0);
251     x = s + 0.5 * t;
252     ret_val = x * x;
253     if (t >= 0.0) return scale * ret_val;
254 
255     /* Step 3: u = 0,1 - uniform sample. squeeze acceptance (s) */
256     u = Uni();
257     if (d * u <= t * t * t) return scale * ret_val;
258 
259     /* Step 4: recalculations of q0, b, si, c if necessary */
260 
261     if (a != aaa)
262       {
263 	aaa = a;
264 	r = 1.0 / a;
265 	q0 = ((((((q7 * r + q6) * r + q5) * r + q4) * r + q3) * r
266 	       + q2) * r + q1) * r;
267 
268 	/* Approximation depending on size of parameter a */
269 	/* The constants in the expressions for b, si and c */
270 	/* were established by numerical experiments */
271 
272 	if (a <= 3.686)
273           {
274             b = 0.463 + s + 0.178 * s2;
275             si = 1.235;
276             c = 0.195 / s - 0.079 + 0.16 * s;
277           }
278         else if (a <= 13.022)
279           {
280             b = 1.654 + 0.0076 * s2;
281             si = 1.68 / s + 0.275;
282 	    c = 0.062 / s + 0.024;
283           }
284         else
285           {
286             b = 1.77;
287             si = 0.75;
288 	    c = 0.1515 / s;
289           }
290       }
291 
292     /* Step 5: no quotient test if x not positive */
293     if (x > 0.0)
294       {
295 	/* Step 6: calculation of v and quotient q */
296 	v = t / (s + s);
297 	if (fabs(v) <= 0.25)
298           q = q0 + 0.5 * t * t * ((((((a7 * v + a6) * v + a5) * v + a4) * v
299                                     + a3) * v + a2) * v + a1) * v;
300 	else
301           q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
302 
303 
304 	/* Step 7: quotient acceptance (q) */
305 	if (log(1.0 - u) <= q)
306           return scale * ret_val;
307       }
308 
309     for(;;)
310       {
311 	/* Step 8: e = standard exponential deviate
312 	 *	u =  0,1 -uniform deviate
313 	 *	t = (b,si)-double exponential (laplace) sample */
314 	e = Rexp(1.0);
315 	u = Uni();
316 	u = u + u - 1.0;
317 	if (u < 0.0)
318 	    t = b - si * e;
319 	else
320 	    t = b + si * e;
321 	/* Step	 9:  rejection if t < tau(1) = -0.71874483771719 */
322 	if (t >= -0.71874483771719) {
323 	    /* Step 10:	 calculation of v and quotient q */
324 	    v = t / (s + s);
325 	    if (fabs(v) <= 0.25)
326 		q = q0 + 0.5 * t * t *
327 		    ((((((a7 * v + a6) * v + a5) * v + a4) * v + a3) * v
328 		      + a2) * v + a1) * v;
329 	    else
330 		q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
331 	    /* Step 11:	 hat acceptance (h) */
332 	    /* (if q not positive go to step 8) */
333 	    if (q > 0.0) {
334 		w = exp(q)-1.0;
335 		/* if t is rejected sample again at step 8 */
336 		if (c * fabs(u) <= w * exp(e - 0.5 * t * t))
337 		    break;
338 	    }
339 	}
340     } /* repeat .. until  `t' is accepted */
341     x = s + 0.5 * t;
342     return scale * x * x;
343 }
344 
345 //////////////////////////////////////////////////////////////
346 //////////////////////////////////////////////////////////////
347 
Rexp(phydbl lambda)348 phydbl Rexp(phydbl lambda)
349 {
350   return -log(Uni()+SMALL)/lambda;
351 }
352 
353 /*////////////////////////////////////////////////////////////
354 ////////////////////////////////////////////////////////////*/
355 
356 /* Returns a random deviates from an exponential distribution */
357 /* left-truncated at 'left' and right-truncated at 'rght' */
Rexp_Trunc(phydbl lambda,phydbl left,phydbl rght)358 phydbl Rexp_Trunc(phydbl lambda, phydbl left, phydbl rght)
359 {
360   phydbl u;
361   u = Uni();
362   return (left-log(1. - u*(1.-exp(-lambda*(rght-left))))/lambda);
363 }
364 
365 /*////////////////////////////////////////////////////////////
366 ////////////////////////////////////////////////////////////*/
367 
Rnorm(phydbl mean,phydbl sd)368 phydbl Rnorm(phydbl mean, phydbl sd)
369 {
370   /* Box-Muller transformation */
371   phydbl u1, u2, res;
372 
373   /* u1=Uni(); */
374   /* u2=Uni(); */
375   /* u1 = SQRT(-2.*log(u1))*COS(6.28318530717959f*u2); */
376 
377   /* Polar */
378   phydbl d,x,y;
379 
380   do
381     {
382       u1=Uni();
383       u2=Uni();
384       x = 2.*u1-1.;
385       y = 2.*u2-1.;
386       d = x*x + y*y;
387       if(d>.0 && d<1.) break;
388     }
389   while(1);
390   u1 = x*SQRT((-2.*log(d))/d);
391 
392   res = u1*sd+mean;
393 
394   if(isnan(res) || isinf(res))
395     {
396       printf("\n. res=%f sd=%f mean=%f u1=%f u2=%f",res,sd,mean,u1,u2);
397     }
398   return res;
399 }
400 
401 //////////////////////////////////////////////////////////////
402 //////////////////////////////////////////////////////////////
403 
404 
Rnorm_Multid(phydbl * mu,phydbl * cov,int dim)405 phydbl *Rnorm_Multid(phydbl *mu, phydbl *cov, int dim)
406 {
407   phydbl *L,*x,*y;
408   int i,j;
409 
410   x = (phydbl *)mCalloc(dim,sizeof(phydbl));
411   y = (phydbl *)mCalloc(dim,sizeof(phydbl));
412 
413   L = (phydbl *)Cholesky_Decomp(cov,dim);
414 
415   for(i=0;i<dim;i++) x[i]=Rnorm(0.0,1.0);
416   for(i=0;i<dim;i++) for(j=0;j<dim;j++) y[i] += L[i*dim+j]*x[j];
417   for(i=0;i<dim;i++) y[i] += mu[i];
418 
419   Free(L);
420   Free(x);
421 
422   return(y);
423 }
424 
425 //////////////////////////////////////////////////////////////
426 //////////////////////////////////////////////////////////////
427 
Rnorm_Trunc_Inverse(phydbl mean,phydbl sd,phydbl min,phydbl max,int * error)428 phydbl Rnorm_Trunc_Inverse(phydbl mean, phydbl sd, phydbl min, phydbl max, int *error)
429 {
430 
431   phydbl u, ret_val,eps;
432   phydbl z;
433   phydbl z_min,z_max;
434   phydbl cdf_min, cdf_max;
435 
436   z      = 0.0;
437   u      = -1.0;
438   *error = 0;
439 
440   if(sd < 1.E-100)
441     {
442       PhyML_Printf("\n. Small variance detected in Rnorm_Trunc.");
443       PhyML_Printf("\n. mean=%f sd=%f min=%f max=%f",mean,sd,min,max);
444       *error = 1;
445       return -1.0;
446     }
447 
448   z_min = (min - mean)/sd;
449   z_max = (max - mean)/sd;
450 
451   eps = (z_max-z_min)/1E+6;
452 
453 
454   /*       Simple inversion method. Seems to work well. Needs more thorough testing though... */
455   cdf_min = Pnorm(z_min,0.0,1.0);
456   cdf_max = Pnorm(z_max,0.0,1.0);
457   u = cdf_min + (cdf_max-cdf_min) * Uni();
458   z = PointNormal(u);
459 
460   if((z < z_min-eps) || (z > z_max+eps))
461     {
462       *error = 1;
463       PhyML_Printf("\n. Numerical precision issue detected in Rnorm_Trunc.");
464       PhyML_Printf("\n. z = %f",z);
465       PhyML_Printf("\n. mean=%f sd=%f z_min=%f z_max=%f min=%f max=%f",mean,sd,z_min,z_max,min,max);
466       ret_val = (max - min)/2.;
467       Exit("\n");
468     }
469 
470   ret_val = z*sd+mean;
471 
472   return ret_val;
473 }
474 
475 //////////////////////////////////////////////////////////////
476 //////////////////////////////////////////////////////////////
477 /* Borrowed from https://github.com/olafmersmann/truncnorm/blob/afc91b696db8a3feda25d39435fd979bacd962c6/src/rtruncnorm.c */
478 
Rnorm_Trunc_Algo1(phydbl alpha,phydbl beta)479 phydbl Rnorm_Trunc_Algo1(phydbl alpha, phydbl beta)
480 {
481   phydbl z = -DBL_MAX;
482   while(z < alpha || z > beta)
483     {
484       z = Rnorm(0.0,1.0);
485     }
486   return(z);
487 }
488 
Rnorm_Trunc_Algo2(phydbl alpha,phydbl beta)489 phydbl Rnorm_Trunc_Algo2(phydbl alpha, phydbl beta)
490 {
491   phydbl z = 0.0;
492   phydbl d_alpha = Dnorm(alpha,0.0,1.0);
493   const double ub = alpha < 0.0 && beta > 0.0 ? M_1_SQRT_2PI : d_alpha;
494   do
495     {
496       z = Uni()*(beta-alpha) + alpha;
497     }
498   while(Uni() * ub > Dnorm(z,0.0,1.0));
499   return(z);
500 }
501 
Rnorm_Trunc_Algo3(phydbl alpha,phydbl beta)502 phydbl Rnorm_Trunc_Algo3(phydbl alpha, phydbl beta)
503 {
504 
505   phydbl z = alpha - 1.0;
506   while(z < alpha || z > beta)
507     {
508       z = Rnorm(0,1);
509       z = fabs(z);
510     }
511   return(z);
512 }
513 
Rnorm_Trunc_Algo4(phydbl alpha,phydbl beta)514 phydbl Rnorm_Trunc_Algo4(phydbl alpha, phydbl beta)
515 {
516   phydbl z = 0.0;
517   const phydbl ainv = 1.0/alpha;
518   phydbl rho;
519   do
520     {
521       z = Rexp(ainv) + alpha;
522       rho = exp(-0.5 * pow((z-alpha),2));
523     }
524   while(Uni() > rho || z > beta);
525   return(z);
526 }
527 
528 
Rnorm_Trunc(phydbl mean,phydbl sd,phydbl min,phydbl max,int * error)529 phydbl Rnorm_Trunc(phydbl mean, phydbl sd, phydbl min, phydbl max, int *error)
530 {
531   phydbl alpha,beta;
532   phydbl d_alpha,d_beta;
533   phydbl z,ret_val;
534 
535   alpha = (min - mean)/sd;
536   beta  = (max - mean)/sd;
537 
538   d_alpha = Dnorm(alpha,0.0,1.0);
539   d_beta  = Dnorm(beta,0.0,1.0);
540 
541   /* PhyML_Printf("\n. alpha: %f beta: %f d_alpha: %f d_beta: %f",alpha,beta,d_alpha,d_beta); */
542 
543 
544   if(beta < alpha) return NAN;
545   else
546     {
547       if(!(alpha > 0.0) && !(beta < 0.0))
548         {
549           if(!(d_alpha > 0.15) || !(d_beta > 0.15))
550             {
551               z = Rnorm_Trunc_Algo1(alpha,beta);
552               return(mean + sd * z);
553             }
554           else
555             {
556               z = Rnorm_Trunc_Algo2(alpha,beta);
557               return(mean + sd * z);
558             }
559         }
560       else if(alpha > 0.0)
561         {
562           if(!(d_alpha / d_beta > 2.18))
563             {
564               z = Rnorm_Trunc_Algo2(alpha,beta);
565               return(mean + sd * z);
566             }
567           else
568             {
569               if(!(alpha > 0.725))
570                 {
571                   z = Rnorm_Trunc_Algo3(alpha,beta);
572                   return(mean + sd * z);
573                 }
574               else
575                 {
576                   z = Rnorm_Trunc_Algo4(alpha,beta);
577                   return(mean + sd * z);
578                 }
579             }
580         }
581       else
582         {
583           if(!(d_beta / d_alpha > 2.18))
584             {
585               z = Rnorm_Trunc_Algo2(alpha,beta);
586               return(mean - sd * z);
587             }
588           else if(beta > -0.725)
589             {
590               z = Rnorm_Trunc_Algo3(alpha,beta);
591               return(mean - sd * z);
592             }
593           else
594             {
595               z = Rnorm_Trunc_Algo4(alpha,beta);
596               return(mean - sd * z);
597             }
598         }
599     }
600 
601   ret_val = mean + sd*z;
602   return ret_val;
603 }
604 
605 //////////////////////////////////////////////////////////////
606 //////////////////////////////////////////////////////////////
607 
608 
Rnorm_Multid_Trunc(phydbl * mean,phydbl * cov,phydbl * min,phydbl * max,int dim)609 phydbl *Rnorm_Multid_Trunc(phydbl *mean, phydbl *cov, phydbl *min, phydbl *max, int dim)
610 {
611   int i,j;
612   phydbl *L,*x, *u;
613   phydbl up, low, rec;
614   int err;
615 
616   u = (phydbl *)mCalloc(dim,sizeof(phydbl));
617   x = (phydbl *)mCalloc(dim,sizeof(phydbl));
618 
619   L = Cholesky_Decomp(cov,dim);
620 
621   low = (min[0]-mean[0])/L[0*dim+0];
622   up  = (max[0]-mean[0])/L[0*dim+0];
623   u[0] = Rnorm_Trunc(0.0,1.0,low,up,&err);
624 
625   for(i=1;i<dim;i++)
626     {
627       rec = .0;
628       for(j=0;j<i;j++) rec += L[i*dim+j] * u[j];
629       low  = (min[i]-mean[i]-rec)/L[i*dim+i];
630       up   = (max[i]-mean[i]-rec)/L[i*dim+i];
631       u[i] = Rnorm_Trunc(0.0,1.0,low,up,&err);
632     }
633 
634   x = Matrix_Mult(L,u,dim,dim,dim,1);
635 
636 /*   PhyML_Printf("\n>>>\n"); */
637 /*   for(i=0;i<dim;i++) */
638 /*     { */
639 /*       for(j=0;j<dim;j++) */
640 /* 	{ */
641 /* 	  PhyML_Printf("%10lf ",L[i*dim+j]); */
642 /* 	} */
643 /*       PhyML_Printf("\n"); */
644 /*     } */
645 /*   PhyML_Printf("\n"); */
646 
647 /*   for(i=0;i<dim;i++) PhyML_Printf("%f ",u[i]); */
648 /*   PhyML_Printf("\n"); */
649 
650 
651 /*   PhyML_Printf("\n"); */
652 /*   for(i=0;i<dim;i++) PhyML_Printf("%10lf ",x[i]); */
653 /*   PhyML_Printf("\n<<<\n"); */
654 
655   for(i=0;i<dim;i++) x[i] += mean[i];
656 
657   Free(L);
658   Free(u);
659 
660   return x;
661 }
662 
663 //////////////////////////////////////////////////////////////
664 //////////////////////////////////////////////////////////////
665 /* Inversion method for sampling from Geometric distibution */
Rgeom(phydbl p)666 phydbl Rgeom(phydbl p)
667 {
668   phydbl x,u;
669 
670   u = Uni();
671   if(u < SMALL) return(0.0);
672   x = log(u) / log(1. - p);
673 
674   return(CEIL(x));
675 }
676 
677 //////////////////////////////////////////////////////////////
678 //////////////////////////////////////////////////////////////
679 
Dgeom(phydbl k,phydbl p,int logit)680 phydbl Dgeom(phydbl k, phydbl p, int logit)
681 {
682   phydbl prob;
683 
684   if(k < 1.) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
685   if(p > 1.-SMALL)
686     {
687       if(logit == YES) return(-INFINITY);
688       else return(0.0);
689     }
690 
691   if(logit == YES)
692     prob = (k - 1.)*log(1. - p) + log(p);
693   else
694     prob = POW(1.-p,k-1.)*p;
695 
696   return(prob);
697 }
698 
699 
700 //////////////////////////////////////////////////////////////
701 //////////////////////////////////////////////////////////////
702 
Pgeom(phydbl k,phydbl p)703 phydbl Pgeom(phydbl k, phydbl p)
704 {
705 
706   if(k < 1.) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
707   if(p > 1.-SMALL) return(0.0);
708 
709   return(1. - POW((1. - p),k));
710 
711 }
712 
713 
714 /*
715  * Random variates from the Poisson distribution. Completely stolen from R code.
716  *
717  * REFERENCE
718  *
719  * Ahrens, J.H. and Dieter, U. (1982).
720  * Computer generation of Poisson deviates
721  * from modified normal distributions.
722  * ACM Trans. Math. Software 8, 163-179.
723  */
Rpois(phydbl mmu)724 phydbl Rpois(phydbl mmu)
725 {
726   double mu = (double)mmu;
727 
728   double a0	= -0.5;
729   double a1	= 0.3333333;
730   double a2	= -0.2500068;
731   double a3	= 0.2000118;
732   double a4	= -0.1661269;
733   double a5	= 0.1421878;
734   double a6	= -0.1384794;
735   double a7	= 0.1250060;
736   double one_7	= 0.1428571428571428571;
737   double one_12	= 0.0833333333333333333;
738   double one_24	= 0.0416666666666666667;
739 
740   /* Factorial Table (0:9)! */
741   const static double fact[10] =
742     {
743       1., 1., 2., 6., 24., 120., 720., 5040., 40320., 362880.
744     };
745   /* These are static --- persistent between calls for same mu : */
746   static int l, m;
747   static double b1, b2, c, c0, c1, c2, c3;
748   static double pp[36], p0, p, q, s, d, omega;
749   static double big_l;/* integer "w/o overflow" */
750   static double muprev = 0., muprev2 = 0.;/*, muold = 0.*/
751   /* Local Vars [initialize some for -Wall]: */
752   double del, difmuk= 0., E= 0., fk= 0., fx, fy, g, px, py, t, u= 0., v, x;
753   double pois = -1.;
754   int k, kflag, big_mu, new_big_mu = FALSE;
755 
756   if (isnan(mu) || mu < 0.0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
757   if (mu <= 0.) return 0.;
758 
759   big_mu = mu >= 10.;
760   if(big_mu) new_big_mu = FALSE;
761   if (!(big_mu && mu == muprev)) {/* maybe compute new persistent par.s */
762     if (big_mu)
763       {
764         new_big_mu = TRUE;
765         /* Case A. (recalculation of s,d,l because mu has changed):
766          * The poisson probabilities pk exceed the discrete normal
767          * probabilities fk whenever k >= m(mu).
768          */
769         muprev = mu;
770         s = sqrt(mu);
771         d = 6. * mu * mu;
772         big_l = floor(mu - 1.1484);
773         /* = an upper bound to m(mu) for all mu >= 10.*/
774       }
775     else
776       {
777         /* Small mu ( < 10) -- not using normal approx. */
778         /* Case B. (start new table and calculate p0 if necessary) */
779         /*muprev = 0.;-* such that next time, mu != muprev ..*/
780         if (mu != muprev)
781           {
782             muprev = mu;
783             m = MAX(1, (int) mu);
784             l = 0; /* pp[] is already ok up to pp[l] */
785             q = p0 = p = exp(-mu);
786           }
787         for(;;)
788           {
789             /* Step U. uniform sample for inversion method */
790             u = Uni();
791             if (u <= p0) return 0.;
792             /* Step T. table comparison until the end pp[l] of the
793                pp-table of cumulative poisson probabilities
794                (0.458 > ~= pp[9](= 0.45792971447) for mu=10 ) */
795             if (l != 0)
796               {
797                 for (k = (u <= 0.458) ? 1 : MIN(l, m); k <= l; k++) if (u <= pp[k]) return((phydbl)k);
798                 if (l == 35) /* u > pp[35] */
799                   continue;
800               }
801             /* Step C. creation of new poisson
802                probabilities p[l..] and their cumulatives q =: pp[k] */
803             l++;
804             for (k = l; k <= 35; k++)
805               {
806                 p *= mu / k;
807                 q += p;
808                 pp[k] = q;
809                 if (u <= q)
810                   {
811                     l = k;
812                     return((phydbl)k);
813                   }
814               }
815             l = 35;
816           } /* end(repeat) */
817       }/* mu < 10 */
818   } /* end {initialize persistent vars} */
819   /* Only if mu >= 10 : ----------------------- */
820   /* Step N. normal sample */
821   g = mu + s * Rnorm(0.0,1.0);/* norm_rand() ~ N(0,1), standard normal */
822   if (g >= 0.)
823     {
824       pois = floor(g);
825       /* Step I. immediate acceptance if pois is large enough */
826       if (pois >= big_l)
827         return((phydbl)pois);
828       /* Step S. squeeze acceptance */
829       fk = pois;
830       difmuk = mu - fk;
831       u = Uni(); /* ~ U(0,1) - sample */
832       if (d * u >= difmuk * difmuk * difmuk)
833         return((phydbl)pois);
834     }
835   /* Step P. preparations for steps Q and H.
836      (recalculations of parameters if necessary) */
837   if (new_big_mu || mu != muprev2) {
838     /* Careful! muprev2 is not always == muprev
839        because one might have exited in step I or S
840     */
841     muprev2 = mu;
842     omega = M_1_SQRT_2PI / s;
843     /* The quantities b1, b2, c3, c2, c1, c0 are for the Hermite
844      * approximations to the discrete normal probabilities fk. */
845     b1 = one_24 / mu;
846     b2 = 0.3 * b1 * b1;
847     c3 = one_7 * b1 * b2;
848     c2 = b2 - 15. * c3;
849     c1 = b1 - 6. * b2 + 45. * c3;
850     c0 = 1. - b1 + 3. * b2 - 15. * c3;
851     c = 0.1069 / mu; /* guarantees majorization by the 'hat'-function. */
852   }
853   if (g >= 0.) {
854     /* 'Subroutine' F is called (kflag=0 for correct return) */
855     kflag = 0;
856     goto Step_F;
857   }
858   for(;;)
859     {
860       /* Step E. Exponential Sample */
861       E = Rexp(1.0); /* ~ Exp(1) (standard exponential) */
862       /* sample t from the laplace 'hat'
863          (if t <= -0.6744 then pk < fk for all mu >= 10.) */
864       u = 2. * Uni() - 1.;
865       /* t = 1.8 + fsign(E, u); */
866       t = 1.8 + ((u >= 0.0) ? fabs(E) : -fabs(E));
867       if (t > -0.6744)
868         {
869           pois = floor(mu + s * t);
870           fk = pois;
871           difmuk = mu - fk;
872           /* 'subroutine' F is called (kflag=1 for correct return) */
873           kflag = 1;
874         Step_F: /* 'subroutine' F : calculation of px,py,fx,fy. */
875           if (pois < 10) { /* use factorials from table fact[] */
876             px = -mu;
877             py = pow(mu, pois) / fact[(int)pois];
878           }
879           else {
880             /* Case pois >= 10 uses polynomial approximation
881                a0-a7 for accuracy when advisable */
882             del = one_12 / fk;
883             del = del * (1. - 4.8 * del * del);
884             v = difmuk / fk;
885             if (fabs(v) <= 0.25)
886               px = fk * v * v * (((((((a7 * v + a6) * v + a5) * v + a4) *
887                                     v + a3) * v + a2) * v + a1) * v + a0)
888                 - del;
889             else /* |v| > 1/4 */
890               px = fk * log(1. + v) - difmuk - del;
891             py = M_1_SQRT_2PI / sqrt(fk);
892           }
893           x = (0.5 - difmuk) / s;
894           x *= x;/* x^2 */
895           fx = -0.5 * x;
896           fy = omega * (((c3 * x + c2) * x + c1) * x + c0);
897           if (kflag > 0) {
898             /* Step H. Hat acceptance (E is repeated on rejection) */
899             if (c * fabs(u) <= py * exp(px + E) - fy * exp(fx + E))
900               break;
901           } else
902             /* Step Q. Quotient acceptance (rare case) */
903             if (fy - u * fy <= py * exp(px - fx))
904               break;
905         }/* t > -.67.. */
906     }
907   return((phydbl)pois);
908 }
909 
910 
911 //////////////////////////////////////////////////////////////
912 //////////////////////////////////////////////////////////////
913 
914 /* DENSITIES / PROBA */
915 //////////////////////////////////////////////////////////////
916 //////////////////////////////////////////////////////////////
917 
918 
Dnorm_Moments(phydbl x,phydbl mean,phydbl var)919 phydbl Dnorm_Moments(phydbl x, phydbl mean, phydbl var)
920 {
921   phydbl dens,sd,pi;
922 
923   pi = 3.141593;
924   sd = SQRT(var);
925 
926   dens = 1./(SQRT(2*pi)*sd)*exp(-((x-mean)*(x-mean)/(2.*sd*sd)));
927 
928   return dens;
929 }
930 
931 //////////////////////////////////////////////////////////////
932 //////////////////////////////////////////////////////////////
933 
934 
Dnorm(phydbl x,phydbl mean,phydbl sd)935 phydbl Dnorm(phydbl x, phydbl mean, phydbl sd)
936 {
937   phydbl dens;
938 
939   /* dens = -(.5*LOG2PI+log(sd))  - .5*POW(x-mean,2)/POW(sd,2); */
940   /* return exp(dens); */
941 
942   if(sd < SMALL && fabs(x-mean) < SMALL) return(1.0);
943 
944   x = (x-mean)/sd;
945 
946   dens = M_1_SQRT_2_PI * exp(-0.5*x*x);
947 
948   return dens / sd;
949 }
950 
951 //////////////////////////////////////////////////////////////
952 //////////////////////////////////////////////////////////////
953 
Log_Dnorm(phydbl x,phydbl mean,phydbl sd,int * err)954 phydbl Log_Dnorm(phydbl x, phydbl mean, phydbl sd, int *err)
955 {
956   phydbl dens;
957 
958   *err = NO;
959 
960   if(sd < SMALL && fabs(x-mean) < SMALL) return(0.0);
961 
962   x = (x-mean)/sd;
963 
964   dens = -(phydbl)log(SQRT(2.*PI)) - x*x*0.5 - log(sd);
965 
966   if(dens < -BIG)
967     {
968       PhyML_Printf("\n. dens=%f -- x=%f mean=%f sd=%f\n",dens,x,mean,sd);
969       *err = 1;
970     }
971 
972   return dens;
973 }
974 
975 /*////////////////////////////////////////////////////////////
976 ////////////////////////////////////////////////////////////*/
977 
Log_Dnorm_Trunc(phydbl x,phydbl mean,phydbl sd,phydbl lo,phydbl up,int * err)978 phydbl Log_Dnorm_Trunc(phydbl x, phydbl mean, phydbl sd, phydbl lo, phydbl up, int *err)
979 {
980   phydbl log_dens;
981   phydbl cdf_up, cdf_lo;
982 
983   if(x < lo || x > up) return -230.;
984 
985   if(sd < SMALL && fabs(x-mean) < SMALL) return(0.0);
986 
987   *err = NO;
988   cdf_lo = cdf_up = 0.0;
989 
990   log_dens = Log_Dnorm(x,mean,sd,err);
991 
992   if(*err == YES)
993     {
994       PhyML_Printf("\n== mean=%f sd=%f lo=%f up=%f cdf_lo=%G CDF_up=%G log_dens=%G",mean,sd,lo,up,cdf_lo,cdf_up,log_dens);
995       PhyML_Printf("\n== Warning in file %s at line %d\n",__FILE__,__LINE__);
996       *err = YES;
997     }
998 
999   cdf_up = Pnorm(up,mean,sd);
1000   cdf_lo = Pnorm(lo,mean,sd);
1001 
1002   if(cdf_up - cdf_lo < 1.E-20)
1003     {
1004       log_dens = -230.; /* ~log(1.E-100) */
1005     }
1006   else
1007     {
1008       log_dens -= log(cdf_up - cdf_lo);
1009     }
1010 
1011   if(isnan(log_dens) || isinf(fabs(log_dens)))
1012     {
1013       PhyML_Printf("\n. x=%f mean=%f sd=%f lo=%f up=%f cdf_lo=%G CDF_up=%G log_dens=%G",x,mean,sd,lo,up,cdf_lo,cdf_up,log_dens);
1014       PhyML_Printf("\n. Warning in file %s at line %d\n",__FILE__,__LINE__);
1015       *err = YES;
1016     }
1017 
1018   return log_dens;
1019 }
1020 
1021 //////////////////////////////////////////////////////////////
1022 //////////////////////////////////////////////////////////////
1023 
1024 
Dnorm_Trunc(phydbl x,phydbl mean,phydbl sd,phydbl lo,phydbl up)1025 phydbl Dnorm_Trunc(phydbl x, phydbl mean, phydbl sd, phydbl lo, phydbl up)
1026 {
1027   phydbl dens;
1028   phydbl cdf_up, cdf_lo;
1029 
1030   if(sd < SMALL && fabs(x-mean) < SMALL) return(1.0);
1031 
1032   dens   = Dnorm(x,mean,sd);
1033   cdf_up = Pnorm(up,mean,sd);
1034   cdf_lo = Pnorm(lo,mean,sd);
1035 
1036   dens /= (cdf_up - cdf_lo);
1037 
1038   if(isnan(dens) || isinf(fabs(dens)))
1039     {
1040       PhyML_Printf("\n== mean=%f sd=%f lo=%f up=%f cdf_lo=%G CDF_up=%G",mean,sd,lo,up,cdf_lo,cdf_up);
1041       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
1042       Exit("\n");
1043     }
1044 
1045   return dens;
1046 }
1047 
1048 //////////////////////////////////////////////////////////////
1049 //////////////////////////////////////////////////////////////
1050 
1051 
Dnorm_Multi(phydbl * x,phydbl * mu,phydbl * cov,int size,int _log)1052 phydbl Dnorm_Multi(phydbl *x, phydbl *mu, phydbl *cov, int size, int _log)
1053 {
1054   phydbl *xmmu,*invcov;
1055   phydbl *buff1,*buff2;
1056   int i;
1057   phydbl det,density;
1058 
1059   xmmu   = (phydbl *)mCalloc(size,sizeof(phydbl));
1060   invcov = (phydbl *)mCalloc(size*size,sizeof(phydbl));
1061 
1062   for(i=0;i<size;i++) xmmu[i] = x[i] - mu[i];
1063   For(i,size*size) invcov[i] = cov[i];
1064 
1065   if(!Matinv(invcov,size,size,NO))
1066     {
1067       PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
1068       Exit("\n");
1069     }
1070 
1071   buff1 = Matrix_Mult(xmmu,invcov,1,size,size,size);
1072   buff2 = Matrix_Mult(buff1,xmmu,1,size,size,1);
1073 
1074   det = Matrix_Det(cov,size,NO);
1075   /* det_1D(cov,size,&det); */
1076 
1077   density = size * LOG2PI + log(det) + buff2[0];
1078   density /= -2.;
1079 
1080 /*   density = (1./(POW(2.*PI,size/2.)*SQRT(fabs(det)))) * exp(-0.5*buff2[0]); */
1081 
1082   Free(xmmu);
1083   Free(invcov);
1084   Free(buff1);
1085   Free(buff2);
1086 
1087   return (_log)?(density):(exp(density));
1088 }
1089 
1090 //////////////////////////////////////////////////////////////
1091 //////////////////////////////////////////////////////////////
1092 
1093 
Dnorm_Multi_Given_InvCov_Det(phydbl * x,phydbl * mu,phydbl * invcov,phydbl log_det,int size,int _log)1094 phydbl Dnorm_Multi_Given_InvCov_Det(phydbl *x, phydbl *mu, phydbl *invcov, phydbl log_det, int size, int _log)
1095 {
1096   phydbl *xmmu;
1097   phydbl *buff1,*buff2;
1098   int i;
1099   phydbl density;
1100 
1101   xmmu = (phydbl *)mCalloc(size,sizeof(phydbl));
1102 
1103   for(i=0;i<size;i++) xmmu[i] = x[i] - mu[i];
1104 
1105   buff1 = Matrix_Mult(xmmu,invcov,1,size,size,size);
1106   buff2 = Matrix_Mult(buff1,xmmu,1,size,size,1);
1107 
1108   density = size * LOG2PI + log_det + buff2[0];
1109   density /= -2.;
1110 
1111   Free(xmmu);
1112   Free(buff1);
1113   Free(buff2);
1114 
1115   return (_log)?(density):(exp(density));
1116 }
1117 
1118 //////////////////////////////////////////////////////////////
1119 //////////////////////////////////////////////////////////////
1120 
1121 
Pbinom(int N,int ni,phydbl p)1122 phydbl Pbinom(int N, int ni, phydbl p)
1123 {
1124   return Bico(N,ni)*POW(p,ni)*POW(1-p,N-ni);
1125 }
1126 
1127 //////////////////////////////////////////////////////////////
1128 //////////////////////////////////////////////////////////////
1129 
1130 
Bivariate_Normal_Density(phydbl x,phydbl y,phydbl mux,phydbl muy,phydbl sdx,phydbl sdy,phydbl rho)1131 phydbl Bivariate_Normal_Density(phydbl x, phydbl y, phydbl mux, phydbl muy, phydbl sdx, phydbl sdy, phydbl rho)
1132 {
1133   phydbl cx, cy;
1134   phydbl pi;
1135   phydbl dens;
1136   phydbl rho2;
1137 
1138   pi = 3.141593;
1139 
1140   cx = x - mux;
1141   cy = y - muy;
1142 
1143   rho2 = rho*rho;
1144 
1145   dens = 1./(2*pi*sdx*sdy*SQRT(1.-rho2));
1146   dens *= exp((-1./(2.*(1.-rho2)))*(cx*cx/(sdx*sdx)+cy*cy/(sdy*sdy)+2*rho*cx*cy/(sdx*sdy)));
1147 
1148   return dens;
1149 }
1150 
1151 //////////////////////////////////////////////////////////////
1152 //////////////////////////////////////////////////////////////
1153 // E(X) = n(1-p)/p ; V(X) = n(1-p)/p^2
Dnbinom(phydbl x,phydbl n,phydbl p,int logit)1154 phydbl Dnbinom(phydbl x, phydbl n, phydbl p, int logit)
1155 {
1156 
1157   phydbl lnDens = LnGamma(x+n) - LnGamma(n) - LnFact(x) + n*log(p) + x*log(1.-p);
1158 
1159   if(logit == TRUE)
1160     {
1161       return(lnDens);
1162     }
1163   else
1164     {
1165       return(exp(lnDens));
1166     }
1167 }
1168 
1169 //////////////////////////////////////////////////////////////
1170 //////////////////////////////////////////////////////////////
1171 
Rnbinom(phydbl n,phydbl p)1172 phydbl Rnbinom(phydbl n, phydbl p)
1173 {
1174   phydbl y,x;
1175   y = Rgamma(n,(1.-p)/p);
1176   x = Rpois(y);
1177   return(x);
1178 }
1179 
1180 //////////////////////////////////////////////////////////////
1181 //////////////////////////////////////////////////////////////
1182 
Dgamma_Moments(phydbl x,phydbl mean,phydbl var)1183 phydbl Dgamma_Moments(phydbl x, phydbl mean, phydbl var)
1184 {
1185   phydbl shape, scale;
1186 
1187   if(var < 1.E-20)
1188     {
1189 /*       var  = 1.E-20;  */
1190       PhyML_Printf("\n. var=%f mean=%f",var,mean);
1191       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
1192       Exit("\n");
1193     }
1194 
1195   if(mean < 1.E-20)
1196     {
1197 /*       mean = 1.E-20;  */
1198       PhyML_Printf("\n. var=%f mean=%f",var,mean);
1199       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
1200       Exit("\n");
1201     }
1202 
1203 
1204   shape = mean * mean / var;
1205   scale = var / mean;
1206 
1207   return(Dgamma(x,shape,scale));
1208 }
1209 
1210 //////////////////////////////////////////////////////////////
1211 //////////////////////////////////////////////////////////////
1212 
Dgamma(phydbl x,phydbl shape,phydbl scale)1213 phydbl Dgamma(phydbl x, phydbl shape, phydbl scale)
1214 {
1215   phydbl v;
1216 
1217   if(x > INFINITY)
1218     {
1219       PhyML_Printf("\n. WARNING: huge value of x -> x = %G",x);
1220       x = 1.E+10;
1221     }
1222 
1223   if(x < 1.E-200)
1224     {
1225       if(x < 0.0) return 0.0;
1226       else
1227 	{
1228 	  PhyML_Printf("\n. WARNING: Dgamma -> small value of x = %G (shape: %G scale: %G <X>: %G)",x,shape,scale,shape*scale);
1229 	  x = 1.E-200;
1230 	}
1231     }
1232 
1233 
1234   if(scale < 0.0 || shape < 0.0)
1235     {
1236       PhyML_Printf("\n. scale=%f shape=%f",scale,shape);
1237       Exit("\n");
1238     }
1239 
1240 
1241   v = (shape-1.) * log(x) - shape * log(scale) - x / scale - LnGamma(shape);
1242 
1243 
1244   if(v < 500.)
1245     {
1246       v = exp(v);
1247     }
1248   else
1249     {
1250       PhyML_Printf("\n. WARNING v=%f x=%f shape=%f scale=%f",v,x,shape,scale);
1251       PhyML_Printf("\n. log(x) = %G LnGamma(shape)=%G",log(x),LnGamma(shape));
1252       v = exp(v);
1253       /* PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__); */
1254       /* Exit("\n"); */
1255     }
1256 
1257 
1258   return v;
1259 }
1260 
1261 /*////////////////////////////////////////////////////////////
1262 ////////////////////////////////////////////////////////////*/
1263 
Dexp(phydbl x,phydbl param)1264 phydbl Dexp(phydbl x, phydbl param)
1265 {
1266   return param * exp(-param * x);
1267 }
1268 
1269 /*////////////////////////////////////////////////////////////
1270 ////////////////////////////////////////////////////////////*/
1271 
1272 /* Returns the density of an exponential distribution left-truncated */
1273 /* at 'left' and right-truncated at 'rght' */
1274 
Dexp_Trunc(phydbl x,phydbl lambda,phydbl left,phydbl rght)1275 phydbl Dexp_Trunc(phydbl x, phydbl lambda, phydbl left, phydbl rght)
1276 {
1277   return (lambda * exp(-lambda * x))/(exp(-lambda * left) - exp(-lambda * rght));
1278 }
1279 
1280 /*////////////////////////////////////////////////////////////
1281 ////////////////////////////////////////////////////////////*/
1282 /* Poisson probability */
Dpois(phydbl x,phydbl param,int logit)1283 phydbl Dpois(phydbl x, phydbl param, int logit)
1284 {
1285   phydbl v;
1286 
1287   if(param < SMALL)
1288     {
1289       if(x < SMALL)
1290         {
1291           if(logit) return 0.0;
1292           else return 1.0;
1293         }
1294       else
1295         {
1296           if(logit) return -INFINITY;
1297           else return 0.0;
1298         }
1299     }
1300 
1301   if(x < .0)
1302     {
1303       if(logit == YES) return(-INFINITY);
1304       else return 0.0;
1305    }
1306 
1307   v = x * log(param) - param - Factln(x);
1308   if(logit == YES) return v;
1309   else
1310     {
1311       if(v < 500.)
1312         {
1313           v = exp(v);
1314         }
1315       else
1316         {
1317           PhyML_Printf("\n. WARNING v=%f x=%f param=%f",v,x,param);
1318           v = exp(500);
1319         }
1320       return v;
1321     }
1322   return(-1.0);
1323 }
1324 
1325 //////////////////////////////////////////////////////////////
1326 //////////////////////////////////////////////////////////////
1327 
1328 
1329 
1330 //////////////////////////////////////////////////////////////
1331 //////////////////////////////////////////////////////////////
1332 
1333 /* CDFs */
1334 //////////////////////////////////////////////////////////////
1335 //////////////////////////////////////////////////////////////
1336 // Error function
Erf(phydbl x)1337 phydbl Erf(phydbl x)
1338 {
1339   return(2.*Pnorm(x*sqrt(2.),0.0,1.0)-1.);
1340 }
1341 
Pnorm(phydbl x,phydbl mean,phydbl sd)1342 phydbl Pnorm(phydbl x, phydbl mean, phydbl sd)
1343 {
1344 /*   const phydbl b1 =  0.319381530; */
1345 /*   const phydbl b2 = -0.356563782; */
1346 /*   const phydbl b3 =  1.781477937; */
1347 /*   const phydbl b4 = -1.821255978; */
1348 /*   const phydbl b5 =  1.330274429; */
1349 /*   const phydbl p  =  0.2316419; */
1350 /*   const phydbl c  =  0.39894228; */
1351 
1352   x = (x-mean)/sd;
1353 
1354 /*   if(x >= 0.0) */
1355 /*     { */
1356 /*       phydbl t = 1.0 / ( 1.0 + p * x ); */
1357 /*       return (1.0 - c * exp( -x * x / 2.0 ) * t * */
1358 /* 	      ( t *( t * ( t * ( t * b5 + b4 ) + b3 ) + b2 ) + b1 )); */
1359 /*     } */
1360 /*   else */
1361 /*     { */
1362 /*       phydbl t = 1.0 / ( 1.0 - p * x ); */
1363 /*       return ( c * exp( -x * x / 2.0 ) * t * */
1364 /* 	       ( t *( t * ( t * ( t * b5 + b4 ) + b3 ) + b2 ) + b1 )); */
1365 /*     } */
1366 
1367 /* i_tail in {0,1,2} means: "lower", "upper", or "both" :
1368    if(lower) return  *cum := P[X <= x]
1369    if(upper) return *ccum := P[X >  x] = 1 - P[X <= x]
1370 */
1371 
1372 /*   return Pnorm_Marsaglia(x); */
1373   return Pnorm_Ihaka_Derived_From_Cody(x);
1374 }
1375 
1376 
1377 /* G. Marsaglia. "Evaluating the Normal distribution". Journal of Statistical Software. 2004. Vol. 11. Issue 4. */
Pnorm_Marsaglia(phydbl x)1378 phydbl  Pnorm_Marsaglia(phydbl x)
1379 {
1380   long double s=x,t=0,b=x,q=x*x,i=1;
1381   while(s!=t) s=(t=s)+(b*=q/(i+=2));
1382   return .5+s*exp(-.5*q-.91893853320467274178L);
1383 
1384 }
1385 
1386 
1387 
1388 /* Stolen from R source code */
1389 #define SIXTEN 16
1390 
Pnorm_Ihaka_Derived_From_Cody(phydbl x)1391 phydbl Pnorm_Ihaka_Derived_From_Cody(phydbl x)
1392 {
1393 
1394     const static double a[5] = {
1395 	2.2352520354606839287,
1396 	161.02823106855587881,
1397 	1067.6894854603709582,
1398 	18154.981253343561249,
1399 	0.065682337918207449113
1400     };
1401     const static double b[4] = {
1402 	47.20258190468824187,
1403 	976.09855173777669322,
1404 	10260.932208618978205,
1405 	45507.789335026729956
1406     };
1407     const static double c[9] = {
1408 	0.39894151208813466764,
1409 	8.8831497943883759412,
1410 	93.506656132177855979,
1411 	597.27027639480026226,
1412 	2494.5375852903726711,
1413 	6848.1904505362823326,
1414 	11602.651437647350124,
1415 	9842.7148383839780218,
1416 	1.0765576773720192317e-8
1417     };
1418     const static double d[8] = {
1419 	22.266688044328115691,
1420 	235.38790178262499861,
1421 	1519.377599407554805,
1422 	6485.558298266760755,
1423 	18615.571640885098091,
1424 	34900.952721145977266,
1425 	38912.003286093271411,
1426 	19685.429676859990727
1427     };
1428     const static double p[6] = {
1429 	0.21589853405795699,
1430 	0.1274011611602473639,
1431 	0.022235277870649807,
1432 	0.001421619193227893466,
1433 	2.9112874951168792e-5,
1434 	0.02307344176494017303
1435     };
1436     const static double q[5] = {
1437 	1.28426009614491121,
1438 	0.468238212480865118,
1439 	0.0659881378689285515,
1440 	0.00378239633202758244,
1441 	7.29751555083966205e-5
1442     };
1443 
1444     double xden, xnum, temp, del, eps, xsq, y;
1445     int i, lower, upper;
1446     double cum,ccum;
1447     int i_tail;
1448 
1449     i_tail = 0;
1450     cum = ccum = 0.0;
1451 
1452     if(isnan(x)) { cum = ccum = x; return (phydbl)cum; }
1453 
1454     /* Consider changing these : */
1455     eps = DBL_EPSILON * 0.5;
1456 
1457     /* i_tail in {0,1,2} =^= {lower, upper, both} */
1458     lower = i_tail != 1;
1459     upper = i_tail != 0;
1460 
1461     y = fabs(x);
1462     if (y <= 0.67448975) { /* qnorm(3/4) = .6744.... -- earlier had 0.66291 */
1463 	if (y > eps) {
1464 	    xsq = x * x;
1465 	    xnum = a[4] * xsq;
1466 	    xden = xsq;
1467 	    for (i = 0; i < 3; ++i) {
1468 		xnum = (xnum + a[i]) * xsq;
1469 		xden = (xden + b[i]) * xsq;
1470 	    }
1471 	} else xnum = xden = 0.0;
1472 
1473 	temp = x * (xnum + a[3]) / (xden + b[3]);
1474 	if(lower)  cum = 0.5 + temp;
1475 	if(upper) ccum = 0.5 - temp;
1476 	}
1477     else if (y <= M_SQRT_32) {
1478 
1479 	/* Evaluate pnorm for 0.674.. = qnorm(3/4) < |x| <= SQRT(32) ~= 5.657 */
1480 
1481 	xnum = c[8] * y;
1482 	xden = y;
1483 	for (i = 0; i < 7; ++i) {
1484 	    xnum = (xnum + c[i]) * y;
1485 	    xden = (xden + d[i]) * y;
1486 	}
1487 	temp = (xnum + c[7]) / (xden + d[7]);
1488 
1489 #define do_del(X)							\
1490 	xsq = floor(X * SIXTEN) / SIXTEN;			\
1491 	del = (X - xsq) * (X + xsq);					\
1492 	cum = exp(-xsq * xsq * 0.5) * exp(-del * 0.5) * temp;		\
1493 	ccum = 1.0 - cum;						\
1494 
1495 #define swap_tail						\
1496 	if (x > 0.) {/* swap  ccum <--> cum */			\
1497 	    temp = cum; if(lower) cum = ccum; ccum = temp;	\
1498 	}
1499 
1500 	do_del(y);
1501 	swap_tail;
1502     }
1503 
1504 /* else	  |x| > SQRT(32) = 5.657 :
1505  * the next two case differentiations were really for lower=T, log=F
1506  * Particularly	 *not*	for  log_p !
1507 
1508  * Cody had (-37.5193 < x  &&  x < 8.2924) ; R originally had y < 50
1509  *
1510  * Note that we do want symmetry(0), lower/upper -> hence use y
1511  */
1512     else if((lower && -37.5193 < x  &&  x < 8.2924) || (upper && -8.2924  < x  &&  x < 37.5193))
1513       {
1514 	/* Evaluate pnorm for x in (-37.5, -5.657) union (5.657, 37.5) */
1515 	xsq = 1.0 / (x * x);
1516 	xnum = p[5] * xsq;
1517 	xden = xsq;
1518 	for (i = 0; i < 4; ++i) {
1519 	    xnum = (xnum + p[i]) * xsq;
1520 	    xden = (xden + q[i]) * xsq;
1521 	}
1522 	temp = xsq * (xnum + p[4]) / (xden + q[4]);
1523 	temp = (M_1_SQRT_2_PI - temp) / y;
1524 
1525 	do_del(x);
1526 	swap_tail;
1527       }
1528     else
1529       { /* no log_p , large x such that probs are 0 or 1 */
1530 	if(x > 0) {	cum = 1.; ccum = 0.;	}
1531 	else {	        cum = 0.; ccum = 1.;	}
1532       }
1533 
1534     return (phydbl)cum;
1535 
1536 
1537 }
1538 
1539 //////////////////////////////////////////////////////////////
1540 //////////////////////////////////////////////////////////////
1541 
Pgamma(phydbl x,phydbl shape,phydbl scale)1542 phydbl Pgamma(phydbl x, phydbl shape, phydbl scale)
1543 {
1544   return IncompleteGamma(x/scale,shape,LnGamma(shape));
1545 }
1546 
1547 //////////////////////////////////////////////////////////////
1548 //////////////////////////////////////////////////////////////
1549 
Ppois(phydbl x,phydbl param)1550 phydbl Ppois(phydbl x, phydbl param)
1551 {
1552   /* Press et al. (1990) approximation of the CDF for the Poisson distribution */
1553   if(param < SMALL || x < 0.0)
1554     {
1555       PhyML_Printf("\n== param = %G x=%G",param,x);
1556       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
1557       Warn_And_Exit("");
1558     }
1559   return IncompleteGamma(x,param,LnGamma(param));
1560 }
1561 
1562 //////////////////////////////////////////////////////////////
1563 //////////////////////////////////////////////////////////////
1564 
1565 /* Inverse CDFs */
1566 //////////////////////////////////////////////////////////////
1567 //////////////////////////////////////////////////////////////
1568 
1569 
PointChi2(phydbl prob,phydbl v)1570 phydbl PointChi2 (phydbl prob, phydbl v)
1571 {
1572 /* returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v
1573    returns -1 if in error.   0.000002<prob<0.999998
1574    RATNEST FORTRAN by
1575        Best DJ & Roberts DE (1975) The percentage points of the
1576        Chi2 distribution.  Applied Statistics 24: 385-388.  (AS91)
1577    Converted into C by Ziheng Yang, Oct. 1993.
1578 */
1579    double aa=.6931471805, p=prob, g;
1580    double xx, c, ch, a=0,q=0,p1=0,p2=0,t=0,x=0,b=0,s1,s2,s3,s4,s5,s6;
1581    double e=.5e-6;
1582 
1583    if (p<.000002 || p>.999998 || v<=0) return ((phydbl)-1);
1584 
1585    g = (double)LnGamma(v/2);
1586    xx=v/2;   c=xx-1;
1587    if (v >= -1.24*log(p)) goto l1;
1588 
1589    ch=pow((p*xx*exp(g+xx*aa)), 1/xx);
1590    if (ch-e<0) return (ch);
1591    goto l4;
1592 l1:
1593    if (v>.32) goto l3;
1594    ch=0.4;   a=log(1-p);
1595 l2:
1596    q=ch;  p1=1+ch*(4.67+ch);  p2=ch*(6.73+ch*(6.66+ch));
1597    t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2;
1598    ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t;
1599    if (fabs(q/ch-1)-.01 <= 0) goto l4;
1600    else                       goto l2;
1601 
1602 l3:
1603    x=(double)PointNormal (p);
1604    p1=0.222222/v;   ch=v*pow((x*sqrt(p1)+1-p1), 3.0);
1605    if (ch>2.2*v+6)  ch=-2*(log(1-p)-c*log(.5*ch)+g);
1606 l4:
1607    q=ch;   p1=.5*ch;
1608    if ((t=(double)IncompleteGamma (p1, xx, g))<0) {
1609       PhyML_Printf ("\nerr IncompleteGamma");
1610       return ((phydbl)-1.);
1611    }
1612    p2=p-t;
1613    t=p2*exp(xx*aa+g+p1-c*log(ch));
1614    b=t/ch;  a=0.5*t-b*c;
1615 
1616    s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420;
1617    s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520;
1618    s3=(210+a*(462+a*(707+932*a)))/2520;
1619    s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040;
1620    s5=(84+264*a+c*(175+606*a))/2520;
1621    s6=(120+c*(346+127*c))/5040;
1622    ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6))))));
1623    if (fabs(q/ch-1) > e) goto l4;
1624 
1625    return (phydbl)(ch);
1626 }
1627 
1628 //////////////////////////////////////////////////////////////
1629 //////////////////////////////////////////////////////////////
1630 
1631 
1632 
1633 /*
1634   The following function was extracted from the source code of R.
1635   It implements the methods referenced below.
1636  *  REFERENCE
1637  *
1638  *	Beasley, J. D. and S. G. Springer (1977).
1639  *	Algorithm AS 111: The percentage points of the normal distribution,
1640  *	Applied Statistics, 26, 118-121.
1641  *
1642  *      Wichura, M.J. (1988).
1643  *      Algorithm AS 241: The Percentage Points of the Normal Distribution.
1644  *      Applied Statistics, 37, 477-484.
1645  */
1646 
1647 
PointNormal(phydbl prob)1648 phydbl PointNormal (phydbl prob)
1649 {
1650 /* returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12)<prob<1-(1e-12)
1651    returns (-9999) if in error
1652    Odeh RE & Evans JO (1974) The percentage points of the normal distribution.
1653    Applied Statistics 22: 96-97 (AS70)
1654 
1655    Newer methods:
1656      Wichura MJ (1988) Algorithm AS 241: the percentage points of the
1657        normal distribution.  37: 477-484.
1658      Beasley JD & Springer SG  (1977).  Algorithm AS 111: the percentage
1659        points of the normal distribution.  26: 118-121.
1660 */
1661    phydbl a0=-.322232431088, a1=-1, a2=-.342242088547, a3=-.0204231210245;
1662    phydbl a4=-.453642210148e-4, b0=.0993484626060, b1=.588581570495;
1663    phydbl b2=.531103462366, b3=.103537752850, b4=.0038560700634;
1664    phydbl y, z=0, p=prob, p1;
1665 
1666    p1 = (p<0.5 ? p : 1-p);
1667    if (p1<1e-20) z=999;
1668    else {
1669       y = SQRT (log(1/(p1*p1)));
1670       z = y + ((((y*a4+a3)*y+a2)*y+a1)*y+a0) / ((((y*b4+b3)*y+b2)*y+b1)*y+b0);
1671    }
1672    return (p<0.5 ? -z : z);
1673 }
1674 
1675 
1676 /* phydbl PointNormal(phydbl p) */
1677 /* { */
1678 /*     double p_, q, r, val; */
1679 
1680 /*     p_ = p; */
1681 /*     q = p_ - 0.5; */
1682 
1683 /*     /\*-- use AS 241 --- *\/ */
1684 /*     /\* double ppnd16_(double *p, long *ifault)*\/ */
1685 /*     /\*      ALGORITHM AS241  APPL. STATIST. (1988) VOL. 37, NO. 3 */
1686 
1687 /* 	    Produces the normal deviate Z corresponding to a given lower */
1688 /* 	    tail area of P; Z is accurate to about 1 part in 10**16. */
1689 
1690 /* 	    (original fortran code used PARAMETER(..) for the coefficients */
1691 /* 	    and provided hash codes for checking them...) */
1692 /* *\/ */
1693 /*     if (fabs(q) <= .425)  */
1694 /*       {/\* 0.075 <= p <= 0.925 *\/ */
1695 /* 	r = .180625 - q * q; */
1696 /* 	val = */
1697 /* 	  q * (((((((r * 2509.0809287301226727 + */
1698 /* 		     33430.575583588128105) * r + 67265.770927008700853) * r + */
1699 /* 		   45921.953931549871457) * r + 13731.693765509461125) * r + */
1700 /* 		 1971.5909503065514427) * r + 133.14166789178437745) * r + */
1701 /* 	       3.387132872796366608) */
1702 /* 	  / (((((((r * 5226.495278852854561 + */
1703 /* 		   28729.085735721942674) * r + 39307.89580009271061) * r + */
1704 /* 		 21213.794301586595867) * r + 5394.1960214247511077) * r + */
1705 /* 	       687.1870074920579083) * r + 42.313330701600911252) * r + 1.); */
1706 /*       } */
1707 /*     else  */
1708 /*       { /\* closer than 0.075 from {0,1} boundary *\/ */
1709 
1710 /* 	/\* r = min(p, 1-p) < 0.075 *\/ */
1711 /* 	if (q > 0) */
1712 /* 	  r = 1-p;/\* 1-p *\/ */
1713 /* 	else */
1714 /* 	  r = p_;/\* = R_DT_Iv(p) ^=  p *\/ */
1715 
1716 /* 	r = sqrt(-log(r)); */
1717 /*         /\* r = sqrt(-log(r))  <==>  min(p, 1-p) = exp( - r^2 ) *\/ */
1718 
1719 /*         if (r <= 5.) { /\* <==> min(p,1-p) >= exp(-25) ~= 1.3888e-11 *\/ */
1720 /* 	  r += -1.6; */
1721 /* 	  val = (((((((r * 7.7454501427834140764e-4 + */
1722 /*                        .0227238449892691845833) * r + .24178072517745061177) * */
1723 /*                      r + 1.27045825245236838258) * r + */
1724 /*                     3.64784832476320460504) * r + 5.7694972214606914055) * */
1725 /*                   r + 4.6303378461565452959) * r + */
1726 /*                  1.42343711074968357734) */
1727 /* 	    / (((((((r * */
1728 /* 		     1.05075007164441684324e-9 + 5.475938084995344946e-4) * */
1729 /* 		    r + .0151986665636164571966) * r + */
1730 /* 		   .14810397642748007459) * r + .68976733498510000455) * */
1731 /* 		 r + 1.6763848301838038494) * r + */
1732 /* 		2.05319162663775882187) * r + 1.); */
1733 /*         } */
1734 /*         else  */
1735 /* 	  { /\* very close to  0 or 1 *\/ */
1736 /* 	    r += -5.; */
1737 /* 	    val = (((((((r * 2.01033439929228813265e-7 + */
1738 /* 			 2.71155556874348757815e-5) * r + */
1739 /* 			.0012426609473880784386) * r + .026532189526576123093) * */
1740 /* 		      r + .29656057182850489123) * r + */
1741 /* 		     1.7848265399172913358) * r + 5.4637849111641143699) * */
1742 /* 		   r + 6.6579046435011037772) */
1743 /* 	      / (((((((r * */
1744 /* 		       2.04426310338993978564e-15 + 1.4215117583164458887e-7)* */
1745 /* 		      r + 1.8463183175100546818e-5) * r + */
1746 /* 		     7.868691311456132591e-4) * r + .0148753612908506148525) */
1747 /* 		   * r + .13692988092273580531) * r + */
1748 /* 		  .59983220655588793769) * r + 1.); */
1749 /* 	  } */
1750 
1751 /* 	if(q < 0.0) */
1752 /* 	  val = -val; */
1753 /*         /\* return (q >= 0.)? r : -r ;*\/ */
1754 /*       } */
1755 /*     return (phydbl)val; */
1756 /* } */
1757 
1758 //////////////////////////////////////////////////////////////
1759 //////////////////////////////////////////////////////////////
1760 
1761 /* MISCs */
1762 //////////////////////////////////////////////////////////////
1763 //////////////////////////////////////////////////////////////
1764 
1765 
1766 //////////////////////////////////////////////////////////////
1767 //////////////////////////////////////////////////////////////
1768 
1769 
Bico(int n,int k)1770 phydbl Bico(int n, int k)
1771 {
1772   return FLOOR(0.5+exp(Factln(n)-Factln(k)-Factln(n-k)));
1773 }
1774 
1775 
1776 //////////////////////////////////////////////////////////////
1777 //////////////////////////////////////////////////////////////
1778 
Factln(int n)1779 phydbl Factln(int n)
1780 {
1781   /* static phydbl a[101]; */
1782 
1783   /* if (n < 0)    { Warn_And_Exit("\n== Err: negative factorial in routine FACTLN"); } */
1784   /* if (n <= 1)     return 0.0; */
1785   /* if (n <= 100)   return (a[n]>SMALL) ? a[n] : (a[n]=Gammln(n+1.0)); */
1786   /* else return     Gammln(n+1.0); */
1787   return Gammln(n+1.0);
1788 }
1789 
1790 //////////////////////////////////////////////////////////////
1791 //////////////////////////////////////////////////////////////
1792 
1793 
Gammln(phydbl xx)1794 phydbl Gammln(phydbl xx)
1795 {
1796   double x,tmp,ser;
1797   static double cof[6]={76.18009173,-86.50532033,24.01409822,
1798 			-1.231739516,0.120858003e-2,-0.536382e-5};
1799   int j;
1800 
1801   x=xx-1.0;
1802   tmp=x+5.5;
1803   tmp -= (x+0.5)*log(tmp);
1804   ser=1.0;
1805   for (j=0;j<=5;j++)
1806     {
1807       x += 1.0;
1808       ser += cof[j]/x;
1809     }
1810   return (phydbl)(-tmp+log(2.50662827465*ser));
1811 }
1812 
1813 //////////////////////////////////////////////////////////////
1814 //////////////////////////////////////////////////////////////
1815 
1816 
1817 /* void Plim_Binom(phydbl pH0, int N, phydbl *pinf, phydbl *psup) */
1818 /* { */
1819 /*   *pinf = pH0 - 1.64*SQRT(pH0*(1-pH0)/(phydbl)N); */
1820 /*   if(*pinf < 0) *pinf = .0; */
1821 /*   *psup = pH0 + 1.64*SQRT(pH0*(1-pH0)/(phydbl)N); */
1822 /* } */
1823 
1824 //////////////////////////////////////////////////////////////
1825 //////////////////////////////////////////////////////////////
1826 
1827 
LnGamma(phydbl alpha)1828 phydbl LnGamma (phydbl alpha)
1829 {
1830 /* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
1831    Stirling's formula is used for the central polynomial part of the procedure.
1832    Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
1833    Communications of the Association for Computing Machinery, 9:684
1834 */
1835    double x=alpha, f=0, z;
1836    if (x<7) {
1837       f=1;  z=x-1;
1838       while (++z<7)  f*=z;
1839       x=z;   f=-log(f);
1840    }
1841    z = 1/(x*x);
1842    return (phydbl)(f + (x-0.5)*log(x) - x + .918938533204673
1843 		   + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z
1844 		      +.083333333333333)/x);
1845 }
1846 
1847 //////////////////////////////////////////////////////////////
1848 //////////////////////////////////////////////////////////////
1849 
1850 
IncompleteGamma(phydbl x,phydbl alpha,phydbl ln_gamma_alpha)1851 phydbl IncompleteGamma(phydbl x, phydbl alpha, phydbl ln_gamma_alpha)
1852 {
1853 /* returns the incomplete gamma ratio I(x,alpha) where x is the upper
1854 	   limit of the integration and alpha is the shape parameter.
1855    returns (-1) if in error
1856    ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant.
1857    (1) series expansion     if (alpha>x || x<=1)
1858    (2) continued fraction   otherwise
1859    RATNEST FORTRAN by
1860    Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,
1861    19: 285-287 (AS32)
1862 */
1863    int i;
1864    double p=alpha, g=ln_gamma_alpha;
1865    double accurate=1e-8, overflow=1e30;
1866    double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6];
1867 
1868    if (fabs(x) < SMALL) return ((phydbl).0);
1869    if (x<0 || p<=0)        return ((phydbl)-1);
1870 
1871    factor=exp(p*log(x)-x-g);
1872    if (x>1 && x>=p) goto l30;
1873    /* (1) series expansion */
1874    gin=1;  term=1;  rn=p;
1875  l20:
1876    rn++;
1877    term*=x/rn;   gin+=term;
1878 
1879    if (term > accurate) goto l20;
1880    gin*=factor/p;
1881    goto l50;
1882  l30:
1883    /* (2) continued fraction */
1884    a=1-p;   b=a+x+1;  term=0;
1885    pn[0]=1;  pn[1]=x;  pn[2]=x+1;  pn[3]=x*b;
1886    gin=pn[2]/pn[3];
1887  l32:
1888    a++;  b+=2;  term++;   an=a*term;
1889    for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i];
1890    if (fabs(pn[5]) < .0) goto l35;
1891    rn=pn[4]/pn[5];   dif=fabs(gin-rn);
1892    if (dif>accurate) goto l34;
1893    if (dif<=accurate*rn) goto l42;
1894  l34:
1895    gin=rn;
1896  l35:
1897    for (i=0; i<4; i++) pn[i]=pn[i+2];
1898    if (fabs(pn[4]) < overflow) goto l32;
1899    for (i=0; i<4; i++) pn[i]/=overflow;
1900    goto l32;
1901  l42:
1902    gin=1-factor*gin;
1903 
1904  l50:
1905    return (phydbl)(gin);
1906 }
1907 
1908 
1909 //////////////////////////////////////////////////////////////
1910 //////////////////////////////////////////////////////////////
1911 
1912 
DiscreteGamma(phydbl freqK[],phydbl rK[],phydbl alfa,phydbl beta,int K,int median)1913 int DiscreteGamma (phydbl freqK[], phydbl rK[],
1914 		   phydbl alfa, phydbl beta, int K, int median)
1915 {
1916   /* discretization of gamma distribution with equal proportions in each
1917      category
1918   */
1919 
1920   int i;
1921   phydbl gap05=1.0/(2.0*K), t, factor=alfa/beta*K, lnga1;
1922 
1923   if(K==1)
1924     {
1925       freqK[0] = 1.0;
1926       rK[0] = 1.0;
1927       return 0;
1928     }
1929 
1930    if (median)
1931      {
1932        for (i=0; i<K; i++)     rK[i]=PointGamma((i*2.0+1)*gap05, alfa, beta);
1933        for (i=0,t=0; i<K; i++) t+=rK[i];
1934        for (i=0; i<K; i++)     rK[i]*=factor/t;
1935      }
1936    else
1937      {
1938        lnga1=LnGamma(alfa+1);
1939        for (i=0; i<K-1; i++)
1940          {
1941            freqK[i]=PointGamma((i+1.0)/K, alfa, beta);
1942          }
1943 
1944        for (i=0; i<K-1; i++)
1945          {
1946            freqK[i]=IncompleteGamma(freqK[i]*beta, alfa+1, lnga1);
1947          }
1948 
1949        rK[0] = freqK[0]*factor;
1950        rK[K-1] = (1-freqK[K-2])*factor;
1951        for (i=1; i<K-1; i++)  rK[i] = (freqK[i]-freqK[i-1])*factor;
1952      }
1953    for (i=0; i<K; i++) freqK[i]=1.0/K;
1954    return (0);
1955 }
1956 
1957 //////////////////////////////////////////////////////////////
1958 //////////////////////////////////////////////////////////////
1959 
1960 
1961 /* Return log(n!) */
1962 
LnFact(int n)1963 phydbl LnFact(int n)
1964 {
1965   int i;
1966   phydbl res;
1967 
1968   res = .0;
1969   for(i=2;i<n+1;i++) res += log((phydbl)i);
1970 
1971   return(res);
1972 }
1973 
1974 //////////////////////////////////////////////////////////////
1975 //////////////////////////////////////////////////////////////
1976 
Choose(int n,int k)1977 int Choose(int n, int k)
1978 {
1979   phydbl accum;
1980   int i;
1981 
1982   if (k > n) return(0);
1983   if (k > n/2) k = n-k;
1984   if(!k) return(1);
1985 
1986   accum = 1.;
1987   for(i=1;i<k+1;i++) accum = accum * (n-k+i) / i;
1988 
1989   return((int)accum);
1990 }
1991 
1992 //////////////////////////////////////////////////////////////
1993 //////////////////////////////////////////////////////////////
1994 
LnChoose(int n,int k)1995 phydbl LnChoose(int n, int k)
1996 {
1997   return(LnFact(n) - LnFact(k) - LnFact(n-k));
1998 }
1999 
2000 //////////////////////////////////////////////////////////////
2001 //////////////////////////////////////////////////////////////
2002 
Covariance_Matrix(t_tree * tree)2003 phydbl *Covariance_Matrix(t_tree *tree)
2004 {
2005   phydbl *cov, *mean;
2006   int *ori_wght,*site_num;
2007   int dim,i,j,replicate,n_site,position,sample_size;
2008 
2009   sample_size = 100;
2010   dim = 2*tree->n_otu-3;
2011 
2012   cov      = (phydbl *)mCalloc(dim*dim,sizeof(phydbl));
2013   mean     = (phydbl *)mCalloc(    dim,sizeof(phydbl));
2014   ori_wght = (int *)mCalloc(tree->data->crunch_len,sizeof(int));
2015   site_num = (int *)mCalloc(tree->data->init_len,sizeof(int));
2016 
2017   for(i=0;i<tree->data->crunch_len;i++) ori_wght[i] = tree->data->wght[i];
2018 
2019   n_site = 0;
2020   for(i=0;i<tree->data->crunch_len;i++) For(j,tree->data->wght[i])
2021     {
2022       site_num[n_site] = i;
2023       n_site++;
2024     }
2025 
2026 
2027   tree->verbose = VL0;
2028   for(replicate=0;replicate<sample_size;replicate++)
2029     {
2030       For(i,2*tree->n_otu-3) tree->a_edges[i]->l->v = .1;
2031 
2032       for(i=0;i<tree->data->crunch_len;i++) tree->data->wght[i] = 0;
2033 
2034       for(i=0;i<tree->data->init_len;i++)
2035 	{
2036 	  position = Rand_Int(0,(int)(tree->data->init_len-1.0));
2037 	  tree->data->wght[site_num[position]] += 1;
2038 	}
2039 
2040       Round_Optimize(tree,ROUND_MAX);
2041 
2042       For(i,2*tree->n_otu-3) For(j,2*tree->n_otu-3) cov[i*dim+j] += log(tree->a_edges[i]->l->v) * log(tree->a_edges[j]->l->v);
2043       For(i,2*tree->n_otu-3) mean[i] += log(tree->a_edges[i]->l->v);
2044 
2045       PhyML_Printf("[%3d/%3d]",replicate,sample_size); fflush(NULL);
2046 /*       PhyML_Printf("\n. %3d %12f %12f %12f ", */
2047 /* 	     replicate, */
2048 /*  	     cov[1*dim+1]/(replicate+1)-mean[1]*mean[1]/POW(replicate+1,2), */
2049 /* 	     tree->a_edges[1]->l->v, */
2050 /* 	     mean[1]/(replicate+1)); */
2051    }
2052 
2053   For(i,2*tree->n_otu-3) mean[i] /= (phydbl)sample_size;
2054   For(i,2*tree->n_otu-3) For(j,2*tree->n_otu-3) cov[i*dim+j] /= (phydbl)sample_size;
2055   For(i,2*tree->n_otu-3) For(j,2*tree->n_otu-3) cov[i*dim+j] -= mean[i]*mean[j];
2056 /*   For(i,2*tree->n_otu-3) if(cov[i*dim+i] < var_min) cov[i*dim+i] = var_min; */
2057 
2058 
2059 /*   PhyML_Printf("\n"); */
2060 /*   For(i,2*tree->n_otu-3) PhyML_Printf("%f %f\n",mean[i],tree->a_edges[i]->l->v); */
2061 /*   PhyML_Printf("\n"); */
2062 /*   PhyML_Printf("\n"); */
2063 /*   For(i,2*tree->n_otu-3) */
2064 /*     { */
2065 /*       For(j,2*tree->n_otu-3) */
2066 /* 	{ */
2067 /* 	  PhyML_Printf("%G\n",cov[i*dim+j]); */
2068 /* 	} */
2069 /*       PhyML_Printf("\n"); */
2070 /*     } */
2071 
2072   for(i=0;i<tree->data->crunch_len;i++) tree->data->wght[i] = ori_wght[i];
2073 
2074   Free(mean);
2075   Free(ori_wght);
2076   Free(site_num);
2077 
2078   return cov;
2079 }
2080 
2081 //////////////////////////////////////////////////////////////
2082 //////////////////////////////////////////////////////////////
2083 
2084 /* Work out the Hessian for the likelihood function. Only branch lengths are considered as variable.
2085    This function is very much inspired from Jeff Thorne's 'hessian' function in his program 'estbranches'. */
Hessian(t_tree * tree)2086 phydbl *Hessian(t_tree *tree)
2087 {
2088   phydbl *hessian;
2089   phydbl *plus_plus, *minus_minus, *plus_zero, *minus_zero, *plus_minus, zero_zero;
2090   phydbl *ori_bl,*inc,*buff;
2091   int *ok_edges,*is_ok;
2092   int dim;
2093   int n_ok_edges;
2094   int i,j;
2095   phydbl eps;
2096   phydbl lk;
2097   phydbl lnL,lnL1,lnL2,ori_lnL;
2098   phydbl l_inf;
2099 
2100   dim = 2*tree->n_otu-3;
2101   eps = (tree->mod->log_l == YES)?(0.2):(1E-4);
2102 
2103   hessian     = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2104   ori_bl      = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2105   plus_plus   = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2106   minus_minus = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2107   plus_minus  = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2108   plus_zero   = (phydbl *)mCalloc((int)dim    ,sizeof(phydbl));
2109   minus_zero  = (phydbl *)mCalloc((int)dim    ,sizeof(phydbl));
2110   inc         = (phydbl *)mCalloc((int)dim    ,sizeof(phydbl));
2111   buff        = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2112   ok_edges    = (int *)mCalloc((int)dim,sizeof(int));
2113   is_ok       = (int *)mCalloc((int)dim,sizeof(int));
2114 
2115   lnL = lnL1 = lnL2 = UNLIKELY;
2116 
2117   Set_Both_Sides(YES,tree);
2118   Lk(NULL,tree);
2119   ori_lnL = tree->c_lnL;
2120 
2121 
2122   for(i=0;i<dim;i++) ori_bl[i] = tree->a_edges[i]->l->v;
2123 
2124   if(tree->mod->log_l == NO)
2125     l_inf = MAX(tree->mod->l_min,1./(phydbl)tree->data->init_len);
2126   else
2127     l_inf = MAX(tree->mod->l_min,-log((phydbl)tree->data->init_len));
2128 
2129 
2130   n_ok_edges = 0;
2131   for(i=0;i<dim;i++)
2132     {
2133       if(tree->a_edges[i]->l->v*(1.-eps) > l_inf)
2134 	{
2135 	  inc[i] = eps * tree->a_edges[i]->l->v;
2136 	  ok_edges[n_ok_edges] = i;
2137 	  n_ok_edges++;
2138 	  is_ok[i] = 1;
2139 	}
2140       else
2141 	{
2142 	  inc[i] = -1.0;
2143 	  is_ok[i] = 0;
2144 	}
2145     }
2146 
2147 
2148   /* Fine tune the increments */
2149   for(i=0;i<dim;i++)
2150     {
2151       do
2152 	{
2153 	  tree->a_edges[i]->l->v += inc[i];
2154 	  lnL1 = Lk(tree->a_edges[i],tree);
2155 	  tree->a_edges[i]->l->v = ori_bl[i];
2156 	  inc[i] *= 1.1;
2157 	}while((fabs(lnL1 - ori_lnL) < 1.E-1) &&
2158 	       (tree->a_edges[i]->l->v+inc[i] < tree->mod->l_max));
2159       inc[i] /= 1.1;
2160     }
2161 
2162 
2163 
2164   /* zero zero */
2165   zero_zero = tree->c_lnL;
2166 
2167   /* plus zero */
2168   for(i=0;i<dim;i++)
2169     {
2170       if(is_ok[i])
2171 	{
2172 	  tree->a_edges[i]->l->v += inc[i];
2173 	  lk = Lk(tree->a_edges[i],tree);
2174 	  plus_zero[i] = lk;
2175 	  tree->a_edges[i]->l->v = ori_bl[i];
2176 	}
2177     }
2178 
2179 
2180   /* minus zero */
2181   for(i=0;i<dim;i++)
2182     {
2183       if(is_ok[i])
2184 	{
2185 	  tree->a_edges[i]->l->v -= inc[i];
2186 	  lk = Lk(tree->a_edges[i],tree);
2187 	  minus_zero[i] = lk;
2188 	  tree->a_edges[i]->l->v = ori_bl[i];
2189 	}
2190     }
2191 
2192 
2193   for(i=0;i<dim;i++) Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2194 
2195   /* plus plus  */
2196   for(i=0;i<dim;i++)
2197     {
2198       if(is_ok[i])
2199 	{
2200 	  tree->a_edges[i]->l->v += inc[i];
2201 	  Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2202 
2203 	  for(j=0;j<3;j++)
2204 	    if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
2205 	      Recurr_Hessian(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],1,inc,plus_plus+i*dim,is_ok,tree);
2206 
2207 	  for(j=0;j<3;j++)
2208 	    if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
2209 	      Recurr_Hessian(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],1,inc,plus_plus+i*dim,is_ok,tree);
2210 
2211 	  tree->a_edges[i]->l->v = ori_bl[i];
2212 	  Lk(NULL,tree);
2213 	}
2214     }
2215 
2216 
2217   /* plus minus */
2218   for(i=0;i<dim;i++)
2219     {
2220       if(is_ok[i])
2221 	{
2222 	  tree->a_edges[i]->l->v += inc[i];
2223 	  Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2224 
2225 	  for(j=0;j<3;j++)
2226 	    if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
2227 	      Recurr_Hessian(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],-1,inc,plus_minus+i*dim,is_ok,tree);
2228 
2229 	  for(j=0;j<3;j++)
2230 	    if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
2231 	      Recurr_Hessian(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],-1,inc,plus_minus+i*dim,is_ok,tree);
2232 
2233 	  tree->a_edges[i]->l->v = ori_bl[i];
2234 	  Lk(NULL,tree);
2235 	}
2236     }
2237 
2238 
2239   /* minus minus */
2240   for(i=0;i<dim;i++)
2241     {
2242       if(is_ok[i])
2243 	{
2244 	  tree->a_edges[i]->l->v -= inc[i];
2245 
2246 	  Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2247 
2248 	  for(j=0;j<3;j++)
2249 	    if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
2250 	      Recurr_Hessian(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],-1,inc,minus_minus+i*dim,is_ok,tree);
2251 
2252 	  for(j=0;j<3;j++)
2253 	    if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
2254 	      Recurr_Hessian(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],-1,inc,minus_minus+i*dim,is_ok,tree);
2255 
2256 	  tree->a_edges[i]->l->v = ori_bl[i];
2257 	  Lk(NULL,tree);
2258 	}
2259     }
2260 
2261 
2262 
2263   for(i=0;i<dim;i++)
2264     {
2265       if(is_ok[i])
2266 	{
2267 	  hessian[i*dim+i] = (plus_zero[i]-2*zero_zero+minus_zero[i])/(POW(inc[i],2));
2268 
2269 	  for(j=i+1;j<dim;j++)
2270 	    {
2271 	      if(is_ok[j])
2272 		{
2273 		  hessian[i*dim+j] =
2274 		    (plus_plus[i*dim+j]-plus_minus[i*dim+j]-plus_minus[j*dim+i]+minus_minus[i*dim+j])/
2275 		    (4*inc[i]*inc[j]);
2276 		  hessian[j*dim+i] = hessian[i*dim+j];
2277 		}
2278 	    }
2279 	}
2280     }
2281 
2282   for(i=0;i<n_ok_edges;i++)
2283     {
2284       for(j=0;j<n_ok_edges;j++)
2285 	{
2286 	  buff[i*n_ok_edges+j] = -1.0*hessian[ok_edges[i]*dim+ok_edges[j]];
2287 	}
2288     }
2289 
2290 
2291   if(!Matinv(buff,n_ok_edges,n_ok_edges,NO))
2292     {
2293       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
2294       Exit("\n");
2295     }
2296 
2297   for(i=0;i<n_ok_edges;i++)
2298     {
2299       for(j=0;j<n_ok_edges;j++)
2300 	{
2301 	  hessian[ok_edges[i]*dim+ok_edges[j]] = buff[i*n_ok_edges+j];
2302 	}
2303     }
2304 
2305 /*   eps = 1./(phydbl)tree->data->init_len; */
2306   /* Approximate variance for very short branches */
2307   for(i=0;i<dim;i++)
2308     if(inc[i] < 0.0 || hessian[i*dim+i] < MIN_VAR_BL)
2309       {
2310 	eps = 0.2 * tree->a_edges[i]->l->v;
2311 	do
2312 	  {
2313 	    lnL  = Lk(tree->a_edges[i],tree);
2314 	    tree->a_edges[i]->l->v += eps;
2315 	    lnL1 = Lk(tree->a_edges[i],tree);
2316 	    tree->a_edges[i]->l->v += eps;
2317 	    lnL2 = Lk(tree->a_edges[i],tree);
2318 	    tree->a_edges[i]->l->v -= 2.*eps;
2319 
2320 	    hessian[i*dim+i] = (lnL2 - 2.*lnL1 + lnL) / POW(eps,2);
2321 
2322 /* 	    printf("\n* l=%G eps=%f lnL=%f lnL1=%f lnL2=%f var=%f",tree->a_edges[i]->l->v,eps,lnL,lnL1,lnL2,hessian[i*dim+i]); */
2323 	    eps *= 5.;
2324 	  }while(fabs(lnL2 - lnL) < 1.E-3);
2325 
2326 	hessian[i*dim+i] = -1.0 / hessian[i*dim+i];
2327 
2328       }
2329 
2330 
2331   /* Fit a straight line to the log-likelihood (i.e., an exponential to the likelihood) */
2332   /* It is only a straight line when considering branch length (rather than log(branch lengths)) */
2333   for(i=0;i<dim;i++)
2334     if((tree->a_edges[i]->l->v / tree->mod->l_min < 1.1) &&
2335        (tree->a_edges[i]->l->v / tree->mod->l_min > 0.9))
2336       {
2337 	phydbl *x,*y,l;
2338 	phydbl cov,var;
2339 
2340 	x=plus_plus;
2341 	y=minus_minus;
2342 	l=(tree->mod->log_l == YES)?(exp(tree->a_edges[i]->l->v)):(tree->a_edges[i]->l->v); /* Get actual branch length */
2343 
2344 	for(j=0;j<dim;j++)
2345 	  {
2346 	    x[j] = l + (100.*l-l)*((phydbl)j/dim);
2347 	    tree->a_edges[i]->l->v = (tree->mod->log_l)?(log(x[j])):(x[j]); /* Transform to log if necessary */
2348 	    y[j] = Lk(tree->a_edges[i],tree);
2349 	    tree->a_edges[i]->l->v = (tree->mod->log_l)?(log(l)):(l); /* Go back to initial edge length */
2350 	  }
2351 
2352 	cov = Covariance(x,y,dim);
2353 	var = Covariance(x,x,dim);
2354 
2355 	/* cov/var is minus the parameter of the exponential distribution.
2356 	   The variance is therefore : */
2357 	hessian[i*dim+i] = 1.0 / pow(cov/var,2);
2358 
2359 	/* 	    printf("\n. Hessian = %G cov=%G var=%G",hessian[i*dim+i],cov,var); */
2360       }
2361   /*     } */
2362 
2363 
2364   for(i=0;i<dim;i++)
2365     if(hessian[i*dim+i] < 0.0)
2366       {
2367 	PhyML_Printf("\n. l=%G var=%G",tree->a_edges[i]->l->v,hessian[i*dim+i]);
2368 /* 	PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__); */
2369 /* 	Exit("\n"); */
2370 	hessian[i*dim+i] = MIN_VAR_BL;
2371       }
2372 
2373   for(i=0;i<dim;i++)
2374     {
2375       if(hessian[i*dim+i] < MIN_VAR_BL)
2376 	{
2377 	  PhyML_Printf("\n. l=%10G var(l)=%12G. WARNING: numerical precision issues may affect this analysis.",
2378 		       tree->a_edges[i]->l->v,hessian[i*dim+i]);
2379 	  hessian[i*dim+i] = MIN_VAR_BL;
2380 	}
2381       if(hessian[i*dim+i] > MAX_VAR_BL)
2382 	{
2383 	  PhyML_Printf("\n. l=%10G var(l)=%12G. WARNING: numerical precision issues may affect this analysis.",
2384 		       tree->a_edges[i]->l->v,hessian[i*dim+i]);
2385 	  hessian[i*dim+i] = MAX_VAR_BL;
2386 	}
2387     }
2388 
2389   Iter_Matinv(hessian,dim,dim,NO);
2390 
2391   For(i,dim*dim) hessian[i] = -1.0*hessian[i];
2392 
2393   for(i=0;i<dim;i++)
2394     {
2395       for(j=0;j<dim;j++)
2396 	{
2397 	  if(fabs(hessian[i*dim+j]-hessian[j*dim+i]) > 1.E-3)
2398 	    {
2399 	      PhyML_Printf("\n. Hessian not symmetrical.");
2400 	      PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
2401 	      Exit("\n");
2402 	    }
2403 	  hessian[i*dim+j] = (hessian[i*dim+j] + hessian[j*dim+i]) / 2.;
2404 	  hessian[j*dim+i] = hessian[i*dim+j];
2405 	}
2406     }
2407 
2408 /*   printf("\n"); */
2409 /*   printf("HESSIAN\n"); */
2410 /*   for(i=0;i<dim;i++) */
2411 /*     { */
2412 /*       PhyML_Printf("[%f] ",tree->a_edges[i]->l->v); */
2413 /*       for(j=0;j<dim;j++) */
2414 /* 	{ */
2415 /* 	  PhyML_Printf("%12lf ",hessian[i*dim+j]); */
2416 /* 	} */
2417 /*       PhyML_Printf("\n"); */
2418 /*     } */
2419 
2420   /* Matinv(hessian,dim,dim,NO); */
2421 
2422   /* PhyML_Printf("\n"); */
2423 
2424   /* for(i=0;i<dim;i++) */
2425   /*   { */
2426   /*     PhyML_Printf("[%f] ",tree->a_edges[i]->l->v); */
2427   /*     for(j=0;j<dim;j++) */
2428   /* 	{ */
2429   /* 	  PhyML_Printf("%12G ",-hessian[i*dim+j]); */
2430   /* 	} */
2431   /*     PhyML_Printf("\n"); */
2432   /*   } */
2433   /* Exit("\n"); */
2434 
2435 
2436   /* Make sure to update likelihood before bailing out */
2437   Set_Both_Sides(YES,tree);
2438   Lk(NULL,tree);
2439 
2440   Free(ori_bl);
2441   Free(plus_plus);
2442   Free(minus_minus);
2443   Free(plus_zero);
2444   Free(minus_zero);
2445   Free(plus_minus);
2446   Free(inc);
2447   Free(buff);
2448   Free(ok_edges);
2449   Free(is_ok);
2450 
2451   return hessian;
2452 
2453 }
2454 
2455 //////////////////////////////////////////////////////////////
2456 //////////////////////////////////////////////////////////////
2457 
2458 
2459 /* Work out the gradient for the likelihood function. Only branch lengths are considered as variable.
2460  */
Gradient(t_tree * tree)2461 phydbl *Gradient(t_tree *tree)
2462 {
2463   phydbl *gradient;
2464   phydbl *plus, *minus;
2465   phydbl *ori_bl,*inc;
2466   int *is_ok;
2467   int dim;
2468   int i;
2469   phydbl eps;
2470   phydbl lk;
2471   phydbl lnL,lnL1,lnL2;
2472   phydbl l_inf;
2473 
2474   dim = 2*tree->n_otu-3;
2475   eps = (tree->mod->log_l == YES)?(0.2):(1.E-6);
2476 
2477   gradient    = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2478   ori_bl      = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2479   plus        = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2480   minus       = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2481   inc         = (phydbl *)mCalloc((int)dim    ,sizeof(phydbl));
2482   is_ok       = (int *)mCalloc((int)dim,sizeof(int));
2483 
2484   lnL = lnL1 = lnL2 = UNLIKELY;
2485 
2486   Set_Both_Sides(YES,tree);
2487   Lk(NULL,tree);
2488 
2489   for(i=0;i<dim;i++) ori_bl[i] = tree->a_edges[i]->l->v;
2490 
2491   if(tree->mod->log_l == NO)
2492     l_inf = MAX(tree->mod->l_min,1./(phydbl)tree->data->init_len);
2493   else
2494     l_inf = MAX(tree->mod->l_min,-log((phydbl)tree->data->init_len));
2495 
2496   for(i=0;i<dim;i++)
2497     {
2498       if(tree->a_edges[i]->l->v*(1.-eps) > l_inf)
2499 	{
2500 	  inc[i] = eps * tree->a_edges[i]->l->v;
2501 	  is_ok[i] = YES;
2502 	}
2503       else
2504 	{
2505 	  inc[i] = -1.0;
2506 	  is_ok[i] = NO;
2507 	}
2508     }
2509 
2510   /* plus */
2511   for(i=0;i<dim;i++)
2512     {
2513       if(is_ok[i] == YES)
2514 	{
2515 	  tree->a_edges[i]->l->v += inc[i];
2516 	  lk = Lk(tree->a_edges[i],tree);
2517 	  plus[i] = lk;
2518 	  tree->a_edges[i]->l->v = ori_bl[i];
2519 	}
2520     }
2521 
2522 
2523   /* minus */
2524   for(i=0;i<dim;i++)
2525     {
2526       if(is_ok[i] == YES)
2527 	{
2528 	  tree->a_edges[i]->l->v -= inc[i];
2529 	  lk = Lk(tree->a_edges[i],tree);
2530 	  minus[i] = lk;
2531 	  tree->a_edges[i]->l->v = ori_bl[i];
2532 	}
2533     }
2534 
2535 
2536   for(i=0;i<dim;i++)
2537     {
2538       if(is_ok[i] == YES)
2539 	{
2540 	  gradient[i] = (plus[i] - minus[i])/(2.*inc[i]);
2541 	}
2542     }
2543 
2544 
2545   for(i=0;i<dim;i++)
2546     {
2547       if(is_ok[i] == NO)
2548 	{
2549 	  eps = fabs(0.2 * tree->a_edges[i]->l->v);
2550 	  lnL  = Lk(tree->a_edges[i],tree);
2551 	  tree->a_edges[i]->l->v += eps;
2552 	  lnL1 = Lk(tree->a_edges[i],tree);
2553 	  tree->a_edges[i]->l->v += eps;
2554 	  lnL2 = Lk(tree->a_edges[i],tree);
2555 	  tree->a_edges[i]->l->v -= eps;
2556 	  tree->a_edges[i]->l->v -= eps;
2557 	  gradient[i] = (4.*lnL1 - lnL2 - 3.*lnL) / (2.*eps);
2558 	}
2559     }
2560 
2561   /* Make sure to update likelihood before bailing out */
2562   Set_Both_Sides(YES,tree);
2563   Lk(NULL,tree);
2564 
2565   Free(ori_bl);
2566   Free(plus);
2567   Free(minus);
2568   Free(inc);
2569   Free(is_ok);
2570 
2571 
2572 /*   printf("\n"); */
2573 /*   printf("GRADIENT\n"); */
2574 /*   for(i=0;i<dim;i++) */
2575 /*     { */
2576 /*       PhyML_Printf("[%f] ",tree->a_edges[i]->l->v); */
2577 /*       for(j=0;j<dim;j++) */
2578 /* 	{ */
2579 /* 	  printf("%12lf ",gradient[i]*gradient[j]); */
2580 /* 	} */
2581 /*       printf("\n"); */
2582 /*     } */
2583 /*   printf("\n"); */
2584 /*   for(i=0;i<dim;i++) */
2585 /*     { */
2586 /*       PhyML_Printf("[%f] [%f]\n",tree->a_edges[i]->l->v,gradient[i]); */
2587 /*     } */
2588 
2589 /*   Exit("\n"); */
2590 
2591   return gradient;
2592 
2593 }
2594 
2595 //////////////////////////////////////////////////////////////
2596 //////////////////////////////////////////////////////////////
2597 
2598 
2599 /* Work out the Hessian for the likelihood function using the method described by Seo et al., 2004, MBE.
2600    Corresponds to the outer product of the scores approach described in Porter, 2002. (matrix J1)
2601 */
Hessian_Seo(t_tree * tree)2602 phydbl *Hessian_Seo(t_tree *tree)
2603 {
2604   phydbl *hessian,*site_hessian;
2605   phydbl *gradient;
2606   phydbl *plus, *minus, *plusplus, *zero;
2607   phydbl *ori_bl,*inc_plus,*inc_minus,*inc;
2608   int *is_ok;
2609   int dim;
2610   int i,j,k;
2611   phydbl eps;
2612   phydbl ori_lnL,lnL1,lnL2;
2613   phydbl l_inf;
2614   int l,n;
2615   phydbl small_var;
2616 
2617   dim = 2*tree->n_otu-3;
2618   eps = (tree->mod->log_l == YES)?(0.2):(1.E-4);
2619 
2620   hessian      = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2621   site_hessian = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2622   gradient     = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2623   ori_bl       = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2624   plus         = (phydbl *)mCalloc((int)dim*tree->n_pattern,sizeof(phydbl));
2625   plusplus     = (phydbl *)mCalloc((int)dim*tree->n_pattern,sizeof(phydbl));
2626   minus        = (phydbl *)mCalloc((int)dim*tree->n_pattern,sizeof(phydbl));
2627   zero         = (phydbl *)mCalloc((int)dim*tree->n_pattern,sizeof(phydbl));
2628   inc_plus     = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2629   inc_minus    = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2630   inc          = (phydbl *)mCalloc((int)dim,sizeof(phydbl));
2631   is_ok        = (int *)mCalloc((int)dim,sizeof(int));
2632 
2633   lnL1 = lnL2 = UNLIKELY;
2634 
2635   for(i=0;i<dim;i++) ori_bl[i] = tree->a_edges[i]->l->v;
2636 
2637   Set_Both_Sides(YES,tree);
2638   Lk(NULL,tree);
2639   ori_lnL = tree->c_lnL;
2640 
2641 
2642   if(tree->mod->log_l == NO)
2643     l_inf = MAX(tree->mod->l_min,1./(phydbl)tree->data->init_len);
2644   else
2645     l_inf = MAX(tree->mod->l_min,-log((phydbl)tree->data->init_len));
2646 
2647   for(i=0;i<dim;i++)
2648     {
2649       if(tree->a_edges[i]->l->v*(1.-eps) > l_inf)
2650 	{
2651 	  inc_plus[i]  = fabs(eps * MAX(tree->mod->l_min,tree->a_edges[i]->l->v));
2652 	  inc_minus[i] = fabs(eps * MAX(tree->mod->l_min,tree->a_edges[i]->l->v));
2653 	  is_ok[i]     = YES;
2654 	}
2655       else
2656 	{
2657 	  inc_plus[i]  = fabs(0.2 * MAX(tree->mod->l_min,tree->a_edges[i]->l->v));
2658 	  inc_minus[i] = fabs(0.2 * MAX(tree->mod->l_min,tree->a_edges[i]->l->v));
2659 	  is_ok[i]     = NO;
2660     }
2661 
2662 
2663     }
2664 
2665   /* Fine tune the increments */
2666   for(i=0;i<dim;i++)
2667     {
2668       do
2669 	{
2670 	  tree->a_edges[i]->l->v += inc_plus[i];
2671 	  lnL1 = Lk(tree->a_edges[i],tree);
2672 	  tree->a_edges[i]->l->v = ori_bl[i];
2673 	  inc_plus[i] *= 1.1;
2674 	}while((fabs(lnL1 - ori_lnL) < 1.E-1) && (tree->a_edges[i]->l->v+inc_plus[i] < tree->mod->l_max));
2675       inc_plus[i] /= 1.1;
2676     }
2677 
2678   for(i=0;i<dim;i++)
2679     {
2680       do
2681 	{
2682 	  tree->a_edges[i]->l->v -= inc_minus[i];
2683 	  lnL1 = Lk(tree->a_edges[i],tree);
2684 	  tree->a_edges[i]->l->v = ori_bl[i];
2685 	  inc_minus[i] *= 1.1;
2686 	}while((fabs(lnL1 - ori_lnL) < 1.E-1) &&
2687 	       (tree->a_edges[i]->l->v -inc_minus[i] > tree->mod->l_min));
2688       inc_minus[i] /= 1.1;
2689     }
2690 
2691   for(i=0;i<dim;i++)
2692     {
2693       inc[i] = MIN(inc_plus[i],inc_minus[i]);
2694     }
2695 
2696   /* plus */
2697   for(i=0;i<dim;i++)
2698     {
2699       if(is_ok[i] == YES)
2700 	{
2701 	  tree->a_edges[i]->l->v += inc[i];
2702 	  Lk(tree->a_edges[i],tree);
2703 	  for(j=0;j<tree->n_pattern;j++) plus[i*tree->n_pattern+j] = log(tree->cur_site_lk[j]);
2704 	  tree->a_edges[i]->l->v = ori_bl[i];
2705 	}
2706     }
2707 
2708 
2709   /* minus */
2710   for(i=0;i<dim;i++)
2711     {
2712       if(is_ok[i] == YES)
2713 	{
2714 	  tree->a_edges[i]->l->v -= inc[i];
2715 	  Lk(tree->a_edges[i],tree);
2716 	  for(j=0;j<tree->n_pattern;j++) minus[i*tree->n_pattern+j] = log(tree->cur_site_lk[j]);
2717 	  tree->a_edges[i]->l->v = ori_bl[i];
2718 	}
2719     }
2720 
2721 
2722   for(i=0;i<dim;i++)
2723     {
2724       if(is_ok[i] == NO)
2725 	{
2726 	  Lk(tree->a_edges[i],tree);
2727 	  for(j=0;j<tree->n_pattern;j++) zero[i*tree->n_pattern+j] = log(tree->cur_site_lk[j]);
2728 
2729 	  tree->a_edges[i]->l->v += inc[i];
2730 	  lnL1 = Lk(tree->a_edges[i],tree);
2731 	  for(j=0;j<tree->n_pattern;j++) plus[i*tree->n_pattern+j] = log(tree->cur_site_lk[j]);
2732 
2733 	  tree->a_edges[i]->l->v += inc[i];
2734 	  lnL2 = Lk(tree->a_edges[i],tree);
2735 	  for(j=0;j<tree->n_pattern;j++) plusplus[i*tree->n_pattern+j] = log(tree->cur_site_lk[j]);
2736 
2737 	  tree->a_edges[i]->l->v = ori_bl[i];
2738 
2739 	}
2740     }
2741 
2742   For(i,dim*dim) hessian[i] = 0.0;
2743 
2744   for(k=0;k<tree->n_pattern;k++)
2745     {
2746       for(i=0;i<dim;i++)
2747 	{
2748 	  if(is_ok[i] == YES)
2749 	    gradient[i] = (plus[i*tree->n_pattern+k] - minus[i*tree->n_pattern+k])/(inc[i] + inc[i]);
2750 	  else
2751 	    gradient[i] = (4.*plus[i*tree->n_pattern+k] - plusplus[i*tree->n_pattern+k] - 3.*zero[i*tree->n_pattern+k])/(inc[i] + inc[i]);
2752 
2753 	  /* if(is_ok[i] == NO) */
2754 	  /*   printf("\n. i=%d site=%d l=%G plus=%G plusplus=%G zero=%G num=%f grad=%G", */
2755 	  /* 	   i,k,tree->a_edges[i]->l->v, */
2756 	  /* 	   plus[i*tree->n_pattern+k],plusplus[i*tree->n_pattern+k],zero[i*tree->n_pattern+k], */
2757 	  /* 	   (4.*plus[i*tree->n_pattern+k] - plusplus[i*tree->n_pattern+k] - 3.*zero[i*tree->n_pattern+k]), */
2758 	  /* 	   gradient[i]); */
2759 
2760 	}
2761       for(i=0;i<dim;i++) for(j=0;j<dim;j++) site_hessian[i*dim+j] = gradient[i] * gradient[j];
2762       For(i,dim*dim) hessian[i] -= site_hessian[i] * tree->data->wght[k];
2763     }
2764 
2765 
2766   /* Make sure to update likelihood before bailing out */
2767   Set_Both_Sides(YES,tree);
2768   Lk(NULL,tree);
2769 
2770   l = tree->data->init_len;
2771   n = tree->mod->ns;
2772   /* Delta method for variance. Assumes Jukes and Cantor with p=1/n */
2773   small_var = (1./(l*l))*(1.-1./l)*(n-1.)*(n-1.)/(n-1.-n/l);
2774   for(i=0;i<dim;i++)
2775     if(is_ok[i] == NO)
2776       {
2777 	for(j=0;j<dim;j++)
2778 	  {
2779 	    hessian[i*dim+j] = 0.;
2780 	    hessian[j*dim+i] = 0.;
2781 	  }
2782 	hessian[i*dim+i] = -1./small_var;
2783 
2784 	if(tree->mod->log_l == YES)
2785 	  {
2786 	    hessian[i*dim+i] = small_var * POW(exp(tree->a_edges[i]->l->v),-2);
2787 	    hessian[i*dim+i] = -1./hessian[i*dim+i];
2788 	  }
2789       }
2790 
2791   for(i=0;i<dim;i++)
2792     if(is_ok[i] == YES && hessian[i*dim+i] < -1./small_var)
2793       {
2794   	for(j=0;j<dim;j++)
2795   	  {
2796   	    hessian[i*dim+j] = 0.;
2797   	    hessian[j*dim+i] = 0.;
2798   	  }
2799   	hessian[i*dim+i] = -1./small_var;
2800 
2801   	if(tree->mod->log_l == YES)
2802   	  {
2803   	    hessian[i*dim+i] = small_var * POW(exp(tree->a_edges[i]->l->v),-2);
2804   	    hessian[i*dim+i] = -1./hessian[i*dim+i];
2805   	  }
2806       }
2807 
2808 
2809   for(i=0;i<dim;i++)
2810     {
2811       for(j=0;j<dim;j++)
2812 	{
2813 	  if(fabs(hessian[i*dim+j]-hessian[j*dim+i]) > 1.E-3)
2814 	    {
2815 	      PhyML_Printf("\n== Hessian not symmetrical.");
2816 	      PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__);
2817 	      Exit("\n");
2818 	    }
2819 	  hessian[i*dim+j] = (hessian[i*dim+j] + hessian[j*dim+i]) / 2.;
2820 	  hessian[j*dim+i] = hessian[i*dim+j];
2821 	}
2822     }
2823 
2824   /* printf("\n"); */
2825   /* printf("HESSIAN SEO\n"); */
2826   /* for(i=0;i<dim;i++) */
2827   /*   { */
2828   /*     PhyML_Printf("[%f] ",tree->a_edges[i]->l->v); */
2829   /*     for(j=0;j<dim;j++) */
2830   /* 	{ */
2831   /* 	  PhyML_Printf("%12lf ",hessian[i*dim+j]); */
2832   /* 	} */
2833   /*     PhyML_Printf("\n"); */
2834   /*   } */
2835 
2836   Free(site_hessian);
2837   Free(ori_bl);
2838   Free(plus);
2839   Free(minus);
2840   Free(plusplus);
2841   Free(zero);
2842   Free(inc);
2843   Free(inc_plus);
2844   Free(inc_minus);
2845   Free(is_ok);
2846   Free(gradient);
2847 
2848   return hessian;
2849 
2850 }
2851 
2852 //////////////////////////////////////////////////////////////
2853 //////////////////////////////////////////////////////////////
2854 
2855 
Recurr_Hessian(t_node * a,t_node * d,int plus_minus,phydbl * inc,phydbl * res,int * is_ok,t_tree * tree)2856 void Recurr_Hessian(t_node *a, t_node *d, int plus_minus, phydbl *inc, phydbl *res, int *is_ok, t_tree *tree)
2857 {
2858   int i;
2859   phydbl ori_l;
2860 
2861   for(i=0;i<3;i++)
2862     if(a->v[i] == d)
2863       {
2864 	Update_Partial_Lk(tree,a->b[i],a);
2865 
2866 	ori_l = a->b[i]->l->v;
2867 	if(is_ok[a->b[i]->num])
2868 	  {
2869 	    if(plus_minus > 0) a->b[i]->l->v += inc[a->b[i]->num];
2870 	    else               a->b[i]->l->v -= inc[a->b[i]->num];
2871 	    res[a->b[i]->num] = Lk(a->b[i],tree);
2872 	    a->b[i]->l->v = ori_l;
2873 	    Update_PMat_At_Given_Edge(a->b[i],tree);
2874 	  }
2875 	break;
2876       }
2877 
2878   if(d->tax) return;
2879   else
2880     for(i=0;i<3;i++)
2881       if(d->v[i] != a)
2882 	Recurr_Hessian(d,d->v[i],plus_minus,inc,res,is_ok,tree);
2883 }
2884 
2885 //////////////////////////////////////////////////////////////
2886 //////////////////////////////////////////////////////////////
2887 
2888 
2889 /* Work out the Hessian for the likelihood function. Only logARITHM of branch lengths are considered as variable.
2890    This function is very much inspired from Jeff Thorne's 'hessian' function in his program 'estbranches'. */
Hessian_Log(t_tree * tree)2891 phydbl *Hessian_Log(t_tree *tree)
2892 {
2893   phydbl *hessian;
2894   phydbl *plus_plus, *minus_minus, *plus_zero, *minus_zero, *plus_minus, *zero_zero;
2895   phydbl *ori_bl,*inc,*buff;
2896   int *ok_edges,*is_ok;
2897   int dim;
2898   int n_ok_edges;
2899   int i,j;
2900   phydbl eps;
2901   phydbl lk;
2902 
2903   dim = 2*tree->n_otu-3;
2904   eps = 1.E-4;
2905 
2906   hessian     = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2907   ori_bl      = (phydbl *)mCalloc((int)dim,    sizeof(phydbl));
2908   plus_plus   = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2909   minus_minus = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2910   plus_minus  = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2911   plus_zero   = (phydbl *)mCalloc((int)dim    ,sizeof(phydbl));
2912   minus_zero  = (phydbl *)mCalloc((int)dim    ,sizeof(phydbl));
2913   zero_zero   = (phydbl *)mCalloc((int)dim    ,sizeof(phydbl));
2914   inc         = (phydbl *)mCalloc((int)dim    ,sizeof(phydbl));
2915   buff        = (phydbl *)mCalloc((int)dim*dim,sizeof(phydbl));
2916   ok_edges    = (int *)mCalloc((int)dim,       sizeof(int));
2917   is_ok       = (int *)mCalloc((int)dim,       sizeof(int));
2918 
2919   Set_Both_Sides(YES,tree);
2920   Lk(NULL,tree);
2921 
2922   for(i=0;i<dim;i++) ori_bl[i] = tree->a_edges[i]->l->v;
2923 
2924   n_ok_edges = 0;
2925   for(i=0;i<dim;i++)
2926     {
2927       if(tree->a_edges[i]->l->v > 3.0/(phydbl)tree->data->init_len)
2928 	{
2929 	  inc[i] = fabs(eps * tree->a_edges[i]->l->v);
2930 	  ok_edges[n_ok_edges] = i;
2931 	  n_ok_edges++;
2932 	  is_ok[i] = 1;
2933 	}
2934       else is_ok[i] = 0;
2935     }
2936 
2937   /* zero zero */
2938   lk = Log_Det(is_ok,tree);
2939   for(i=0;i<dim;i++) if(is_ok[i]) zero_zero[i] = tree->c_lnL+lk;
2940 
2941   /* plus zero */
2942   for(i=0;i<dim;i++)
2943     {
2944       if(is_ok[i])
2945 	{
2946 	  tree->a_edges[i]->l->v += inc[i];
2947 	  lk = Lk(tree->a_edges[i],tree);
2948 	  plus_zero[i] = lk+Log_Det(is_ok,tree);
2949 	  tree->a_edges[i]->l->v = ori_bl[i];
2950 	}
2951     }
2952 
2953 
2954   /* minus zero */
2955   for(i=0;i<dim;i++)
2956     {
2957       if(is_ok[i])
2958 	{
2959 	  tree->a_edges[i]->l->v -= inc[i];
2960 	  lk = Lk(tree->a_edges[i],tree);
2961 	  minus_zero[i] = lk+Log_Det(is_ok,tree);
2962 	  tree->a_edges[i]->l->v = ori_bl[i];
2963 	}
2964     }
2965 
2966   for(i=0;i<dim;i++) Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2967 
2968   /* plus plus  */
2969   for(i=0;i<dim;i++)
2970     {
2971       if(is_ok[i])
2972 	{
2973 	  tree->a_edges[i]->l->v += inc[i];
2974 	  Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
2975 
2976 	  for(j=0;j<3;j++)
2977 	    if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
2978 	      Recurr_Hessian_Log(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],1,inc,plus_plus+i*dim,is_ok,tree);
2979 
2980 	  for(j=0;j<3;j++)
2981 	    if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
2982 	      Recurr_Hessian_Log(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],1,inc,plus_plus+i*dim,is_ok,tree);
2983 
2984 /* 	  for(j=0;j<dim;j++)  */
2985 /* 	    if(j != i) */
2986 /* 	      { */
2987 /* 		if(inc[j] > 0.0) */
2988 /* 		  { */
2989 /* 		    tree->a_edges[j]->l->v += inc[j]; */
2990 /* 		    Lk(tree); */
2991 /* 		    plus_plus[i*dim+j]=tree->c_lnL; */
2992 /* 		    tree->a_edges[j]->l->v = ori_bl[j]; */
2993 /* 		  } */
2994 /* 	      } */
2995 
2996 	  tree->a_edges[i]->l->v = ori_bl[i];
2997 	  Lk(NULL,tree);
2998 	}
2999     }
3000 
3001   /* plus minus */
3002   for(i=0;i<dim;i++)
3003     {
3004       if(is_ok[i])
3005 	{
3006 	  tree->a_edges[i]->l->v += inc[i];
3007 	  Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
3008 
3009 	  for(j=0;j<3;j++)
3010 	    if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
3011 	      Recurr_Hessian_Log(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],-1,inc,plus_minus+i*dim,is_ok,tree);
3012 
3013 	  for(j=0;j<3;j++)
3014 	    if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
3015 	      Recurr_Hessian_Log(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],-1,inc,plus_minus+i*dim,is_ok,tree);
3016 
3017 /* 	  for(j=0;j<dim;j++)  */
3018 /* 	    if(j != i) */
3019 /* 	      { */
3020 /* 		if(inc[j] > 0.0) */
3021 /* 		  { */
3022 /* 		    tree->a_edges[j]->l->v -= inc[j]; */
3023 /* 		    Lk(tree); */
3024 /* 		    plus_minus[i*dim+j] = tree->c_lnL; */
3025 /* 		    tree->a_edges[j]->l->v = ori_bl[j]; */
3026 /* 		  } */
3027 /* 	      } */
3028 
3029 	  tree->a_edges[i]->l->v = ori_bl[i];
3030 	  Lk(NULL,tree);
3031 	}
3032     }
3033 
3034 
3035   /* minus minus */
3036   for(i=0;i<dim;i++)
3037     {
3038       if(is_ok[i])
3039 	{
3040 	  tree->a_edges[i]->l->v -= inc[i];
3041 
3042 	  Update_PMat_At_Given_Edge(tree->a_edges[i],tree);
3043 
3044 	  for(j=0;j<3;j++)
3045 	    if((!tree->a_edges[i]->left->tax) && (tree->a_edges[i]->left->v[j] != tree->a_edges[i]->rght))
3046 	      Recurr_Hessian_Log(tree->a_edges[i]->left,tree->a_edges[i]->left->v[j],-1,inc,minus_minus+i*dim,is_ok,tree);
3047 
3048 	  for(j=0;j<3;j++)
3049 	    if((!tree->a_edges[i]->rght->tax) && (tree->a_edges[i]->rght->v[j] != tree->a_edges[i]->left))
3050 	      Recurr_Hessian_Log(tree->a_edges[i]->rght,tree->a_edges[i]->rght->v[j],-1,inc,minus_minus+i*dim,is_ok,tree);
3051 
3052 /* 	  for(j=0;j<dim;j++)  */
3053 /* 	    if(j != i) */
3054 /* 	      { */
3055 /* 		if(inc[j] > 0.0) */
3056 /* 		  { */
3057 /* 		    tree->a_edges[j]->l->v -= inc[j]; */
3058 /* 		    Lk(tree); */
3059 /* 		    minus_minus[i*dim+j] = tree->c_lnL; */
3060 /* 		    tree->a_edges[j]->l->v = ori_bl[j]; */
3061 /* 		  } */
3062 /* 	      } */
3063 
3064 	  tree->a_edges[i]->l->v = ori_bl[i];
3065 	  Lk(NULL,tree);
3066 	}
3067     }
3068 
3069 /*   for(i=0;i<dim;i++) if(is_ok[i]) inc[i] = POW(tree->a_edges[i]->l->v+inc[i],2)-POW(tree->a_edges[i]->l->v,2); */
3070   for(i=0;i<dim;i++) if(is_ok[i]) inc[i] = log(tree->a_edges[i]->l->v+inc[i])-log(tree->a_edges[i]->l->v);
3071 /*   for(i=0;i<dim;i++) inc[i] = 2.*inc[i]; */
3072 /*   for(i=0;i<dim;i++) if(is_ok[i]) inc[i] = SQRT(tree->a_edges[i]->l->v+inc[i])-SQRT(tree->a_edges[i]->l->v); */
3073 
3074   for(i=0;i<dim;i++)
3075     {
3076       if(is_ok[i])
3077 	{
3078 	  hessian[i*dim+i] = (plus_zero[i]-2*zero_zero[i]+minus_zero[i])/(POW(inc[i],2));
3079 
3080 	  for(j=i+1;j<dim;j++)
3081 	    {
3082 	      if(is_ok[j])
3083 		{
3084 		  hessian[i*dim+j] =
3085 		    (plus_plus[i*dim+j]-plus_minus[i*dim+j]-plus_minus[j*dim+i]+minus_minus[i*dim+j])/
3086 		    (4*inc[i]*inc[i]);
3087 		  hessian[j*dim+i] = hessian[i*dim+j];
3088 		}
3089 	    }
3090 	}
3091     }
3092 
3093 
3094   for(i=0;i<n_ok_edges;i++)
3095     {
3096       for(j=0;j<n_ok_edges;j++)
3097 	{
3098 	  buff[i*n_ok_edges+j] = -hessian[ok_edges[i]*dim+ok_edges[j]];
3099 	}
3100     }
3101 
3102   if(!Matinv(buff,n_ok_edges,n_ok_edges,NO))
3103     {
3104       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3105       Exit("\n");
3106     }
3107 
3108   for(i=0;i<n_ok_edges;i++)
3109     {
3110       for(j=0;j<n_ok_edges;j++)
3111 	{
3112 	  hessian[ok_edges[i]*dim+ok_edges[j]] = buff[i*n_ok_edges+j];
3113 	}
3114     }
3115 
3116   /* Approximate variance for very short branches */
3117   for(i=0;i<dim;i++)
3118     if(!is_ok[i])
3119       {
3120 	hessian[i*dim+i] = 1./POW(tree->data->init_len,2);
3121       }
3122 
3123   if(!Matinv(hessian,dim,dim,NO))
3124     {
3125       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3126       Exit("\n");
3127     }
3128 
3129   For(i,dim*dim) hessian[i] = -1.0*hessian[i];
3130 
3131 /*   for(i=0;i<dim;i++) */
3132 /*     { */
3133 /*       PhyML_Printf("[%f] ",tree->a_edges[i]->l->v); */
3134 /*       For(j,i+1) */
3135 /* 	{ */
3136 /* 	  PhyML_Printf("%12lf ",hessian[i*dim+j]); */
3137 /* 	} */
3138 /*       PhyML_Printf("\n"); */
3139 /*     } */
3140 
3141 /*   Matinv(hessian,dim,dim); */
3142 
3143 /*   PhyML_Printf("\n"); */
3144 
3145   for(i=0;i<dim;i++)
3146     {
3147       PhyML_Printf("[%f] ",tree->a_edges[i]->l->v);
3148       For(j,i+1)
3149 	{
3150 	  PhyML_Printf("%12lf ",hessian[i*dim+j]);
3151 	}
3152       PhyML_Printf("\n");
3153     }
3154 /*   Exit("\n"); */
3155 
3156 
3157   Free(ori_bl);
3158   Free(plus_plus);
3159   Free(minus_minus);
3160   Free(plus_zero);
3161   Free(minus_zero);
3162   Free(plus_minus);
3163   Free(zero_zero);
3164   Free(inc);
3165   Free(buff);
3166   Free(ok_edges);
3167   Free(is_ok);
3168 
3169   return hessian;
3170 
3171 }
3172 
3173 //////////////////////////////////////////////////////////////
3174 //////////////////////////////////////////////////////////////
3175 
3176 
Recurr_Hessian_Log(t_node * a,t_node * d,int plus_minus,phydbl * inc,phydbl * res,int * is_ok,t_tree * tree)3177 void Recurr_Hessian_Log(t_node *a, t_node *d, int plus_minus, phydbl *inc, phydbl *res, int *is_ok, t_tree *tree)
3178 {
3179   int i;
3180   phydbl ori_l;
3181 
3182   for(i=0;i<3;i++)
3183     if(a->v[i] == d)
3184       {
3185 	Update_Partial_Lk(tree,a->b[i],a);
3186 
3187 	ori_l = a->b[i]->l->v;
3188 	if(is_ok[a->b[i]->num])
3189 	  {
3190 	    if(plus_minus > 0) a->b[i]->l->v += inc[a->b[i]->num];
3191 	    else               a->b[i]->l->v -= inc[a->b[i]->num];
3192 	    res[a->b[i]->num]  = Lk(a->b[i],tree);
3193 	    res[a->b[i]->num] += Log_Det(is_ok,tree);
3194 	    a->b[i]->l->v = ori_l;
3195 	    Update_PMat_At_Given_Edge(a->b[i],tree);
3196 	  }
3197 	break;
3198       }
3199 
3200   if(d->tax) return;
3201   else
3202     for(i=0;i<3;i++)
3203       if(d->v[i] != a)
3204 	Recurr_Hessian_Log(d,d->v[i],plus_minus,inc,res,is_ok,tree);
3205 }
3206 
3207 //////////////////////////////////////////////////////////////
3208 //////////////////////////////////////////////////////////////
3209 
3210 
Log_Det(int * is_ok,t_tree * tree)3211 phydbl Log_Det(int *is_ok, t_tree *tree)
3212 {
3213   int i;
3214   phydbl ldet;
3215 
3216   ldet = 0.0;
3217 /*   For(i,2*tree->n_otu-3) if(is_ok[i]) ldet += log(2.*SQRT(tree->a_edges[i]->l->v)); */
3218   For(i,2*tree->n_otu-3) if(is_ok[i]) ldet += log(tree->a_edges[i]->l->v);
3219 /*   For(i,2*tree->n_otu-3) if(is_ok[i]) ldet -= log(2*tree->a_edges[i]->l->v); */
3220 
3221   return ldet;
3222 
3223 }
3224 
3225 //////////////////////////////////////////////////////////////
3226 //////////////////////////////////////////////////////////////
3227 
Normal_Trunc_Mean(phydbl mu,phydbl sd,phydbl min,phydbl max)3228 phydbl Normal_Trunc_Mean(phydbl mu, phydbl sd, phydbl min, phydbl max)
3229 {
3230   phydbl mean;
3231 
3232   mean = mu + sd *
3233     (Dnorm((min-mu)/sd,0.,1.)-Dnorm((max-mu)/sd,0.,1.))/
3234     (Pnorm((max-mu)/sd,0.,1.)-Pnorm((min-mu)/sd,0.,1.));
3235   return mean;
3236 }
3237 
3238 //////////////////////////////////////////////////////////////
3239 //////////////////////////////////////////////////////////////
3240 
3241 
Constraint_Normal_Trunc_Mean(phydbl wanted_mu,phydbl sd,phydbl min,phydbl max)3242 phydbl Constraint_Normal_Trunc_Mean(phydbl wanted_mu, phydbl sd, phydbl min, phydbl max)
3243 {
3244   int j;
3245   phydbl dx,f,fmid,xmid,rtb;
3246   phydbl x1, x2;
3247 
3248   x1 = min;
3249   x2 = max;
3250 
3251   f    = Normal_Trunc_Mean(x1,sd,min,max) - wanted_mu;
3252   fmid = Normal_Trunc_Mean(x2,sd,min,max) - wanted_mu;
3253 
3254   if(f*fmid >= 0.0)
3255     {
3256       PhyML_Printf("\n. Root must be bracketed for bisection!");
3257       PhyML_Printf("\n. f=%f fmid=%f",f,fmid);
3258       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3259       Exit("\n");
3260     }
3261 
3262   rtb = f < 0.0 ? (dx=x2-x1,x1) : (dx=x1-x2,x2);
3263 
3264   for(j=0;j<100;j++)
3265     {
3266       xmid=rtb+(dx *= 0.5);
3267       fmid=Normal_Trunc_Mean(xmid,sd,min,max)-wanted_mu;
3268       if(fmid <= 0.0) rtb=xmid;
3269       if(fmid > -1.E-10 && fmid < 1.E-10) return rtb;
3270     }
3271 
3272   Exit("Too many bisections in RTBIS");
3273   return(-1.);
3274 }
3275 
3276 //////////////////////////////////////////////////////////////
3277 //////////////////////////////////////////////////////////////
3278 
3279 
Matinv(phydbl * x,const int n,const int m,const int verbose)3280 int Matinv(phydbl *x, const int n, const int m, const int verbose)
3281 {
3282 
3283 /* x[n*m]  ... m>=n
3284 */
3285 
3286    int i,j,k;
3287    int *irow;
3288    phydbl ee, t,t1,xmax;
3289    phydbl det;
3290 
3291    ee = 1.0E-10;
3292    det = 1.0;
3293 
3294    irow = (int *)mCalloc(n,sizeof(int));
3295 
3296    for(i=0;i<n;++i)
3297      {
3298        xmax = 0.;
3299        for (j=i; j<n; j++)
3300          if (xmax < fabs(x[j*m+i]))
3301 	   {
3302 	     xmax = fabs(x[j*m+i]);
3303 	     irow[i]=j;
3304 	   }
3305 
3306       det *= xmax;
3307       if (xmax < ee)
3308 	{
3309 	  Free(irow);
3310 	  if(verbose)
3311 	    {
3312 	      PhyML_Printf("\n. xmax=%g",xmax);
3313 	      PhyML_Printf("\n. Determinant becomes zero at %3d!\t", i+1);
3314 	      PhyML_Printf("\n. Failed to invert the matrix.");
3315 	    }
3316 	  return(0);
3317 	}
3318       if (irow[i] != i)
3319 	{
3320 	  for (j=0;j<m;++j)
3321 	    {
3322 	      t = x[i*m+j];
3323 	      x[i*m+j] = x[irow[i]*m+j];
3324 	      x[irow[i]*m+j] = t;
3325 	    }
3326 	}
3327       t = 1./x[i*m+i];
3328       for (j=0;j<n;++j)
3329 	{
3330 	  if (j == i) continue;
3331 	  t1 = t*x[j*m+i];
3332 	  for(k=0;k<m;k++)  x[j*m+k] -= t1*x[i*m+k];
3333 	  x[j*m+i] = -t1;
3334 	}
3335       for(j=0;j<m;j++)   x[i*m+j] *= t;
3336       x[i*m+i] = t;
3337    }                            /* i  */
3338    for (i=n-1; i>=0; i--)
3339      {
3340        if (irow[i] == i) continue;
3341        for(j=0;j<n;j++)
3342 	 {
3343 	   t = x[j*m+i];
3344 	   x[j*m+i] = x[j*m + irow[i]];
3345 	   x[j*m + irow[i]] = t;
3346 	 }
3347      }
3348 
3349    Free(irow);
3350    return (1);
3351 
3352 /*   int i, j, k, lower, upper; */
3353 /*   phydbl temp; */
3354 /*   phydbl *a; */
3355 /*   int nsize; */
3356 
3357 /*   nsize = n; */
3358 /*   a = x; */
3359 
3360 /*   /\*Gauss-Jordan reduction -- invert matrix a in place, */
3361 /*          overwriting previous contents of a.  On exit, matrix a */
3362 /*          contains the inverse.*\/ */
3363 /*   lower = 0; */
3364 /*   upper = nsize-1; */
3365 /*   for(i = lower; i <= upper; i++)  */
3366 /*     { */
3367 /*       temp = 1.0 / a[i*n+i]; */
3368 /*       a[i*n+i] = 1.0; */
3369 /*       for (j = lower; j <= upper; j++)  */
3370 /* 	{ */
3371 /* 	  a[i*n+j] *= temp; */
3372 /* 	} */
3373 /*       for (j = lower; j <= upper; j++)  */
3374 /* 	{ */
3375 /* 	  if (j != i)  */
3376 /* 	    { */
3377 /* 	      temp = a[j*n+i]; */
3378 /* 	      a[j*n+i] = 0.0; */
3379 /* 	      for (k = lower; k <= upper; k++)  */
3380 /* 		{ */
3381 /* 		  a[j*n+k] -= temp * a[i*n+k]; */
3382 /* 		}	       */
3383 /* 	    } */
3384 /* 	} */
3385 /*     } */
3386 
3387   return(1);
3388 
3389 }
3390 
3391 //////////////////////////////////////////////////////////////
3392 //////////////////////////////////////////////////////////////
3393 
3394 
Iter_Matinv(phydbl * x,int n,int m,int verbose)3395 int Iter_Matinv(phydbl *x, int n, int m, int verbose)
3396 {
3397   phydbl *buff;
3398   int i,iter;
3399   phydbl scaler;
3400   int pb;
3401 
3402   buff = (phydbl *)mCalloc(n*m,sizeof(phydbl));
3403 
3404   pb = NO;
3405   iter   = 0;
3406   scaler = 1.;
3407   For(i,n*m) buff[i] = x[i];
3408   while(!Matinv(buff,n,m,verbose))
3409     {
3410       pb = YES;
3411       For(i,n*m) buff[i] = x[i];
3412       scaler *= 10.;
3413       For(i,n*m) buff[i] *= scaler;
3414       iter++;
3415 
3416       if(iter > 100)
3417 	{
3418 	  PhyML_Printf("\n== Err in file %s at line %d.",__FILE__,__LINE__);
3419 	  return 0;
3420 	}
3421     }
3422   if(pb)  PhyML_Printf("\n== Managed to fix the problem by rescaling the matrix.");
3423   For(i,n*m) x[i] = buff[i]*scaler;
3424   Free(buff);
3425   return 1;
3426 }
3427 
3428 
3429 //////////////////////////////////////////////////////////////
3430 //////////////////////////////////////////////////////////////
3431 
3432 
Matrix_Mult(phydbl * A,phydbl * B,int nra,int nca,int nrb,int ncb)3433 phydbl *Matrix_Mult(phydbl *A, phydbl *B, int nra, int nca, int nrb, int ncb)
3434 {
3435   int i,j,k;
3436   phydbl *C;
3437 
3438   C = (phydbl *)mCalloc(nra*ncb,sizeof(phydbl));
3439 
3440   if(nca != nrb)
3441     {
3442       PhyML_Printf("\n. Matrices dimensions don't match.");
3443       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3444       Exit("\n");
3445     }
3446 
3447   for(i=0;i<nra;i++)
3448     for(j=0;j<ncb;j++)
3449        for(k=0;k<nca;k++)
3450          C[i*ncb+j] += A[i*nca+k] * B[k*ncb+j];
3451 
3452   return C;
3453 }
3454 
3455 //////////////////////////////////////////////////////////////
3456 //////////////////////////////////////////////////////////////
3457 
3458 
Matrix_Transpose(phydbl * A,int dim)3459 phydbl *Matrix_Transpose(phydbl *A, int dim)
3460 {
3461   phydbl *tA,buff;
3462   int i,j;
3463 
3464   tA = (phydbl *)mCalloc(dim*dim,sizeof(phydbl));
3465 
3466   For(i,dim*dim) tA[i]=A[i];
3467 
3468   for(i=0;i<dim;i++) for(j=i+1;j<dim;j++)
3469     {
3470       buff        = tA[i*dim+j];
3471       tA[i*dim+j] = tA[j*dim+i];
3472       tA[j*dim+i]  = buff;
3473     }
3474 
3475   return tA;
3476 }
3477 
3478 //////////////////////////////////////////////////////////////
3479 //////////////////////////////////////////////////////////////
3480 
3481 
Matrix_Det(phydbl * A,int size,int _log)3482 phydbl Matrix_Det(phydbl *A, int size, int _log)
3483 {
3484   phydbl *triA;
3485   int i;
3486   phydbl det;
3487 
3488   triA = Cholesky_Decomp(A,size);
3489   det = 0.0;
3490   for(i=0;i<size;i++) det += log(triA[i*size+i]);
3491   Free(triA);
3492 
3493   if(_log == NO)
3494     {
3495       det = exp(det);
3496       return det*det;
3497     }
3498   else
3499     {
3500       return 2.*det;
3501     }
3502 }
3503 
3504 //////////////////////////////////////////////////////////////
3505 //////////////////////////////////////////////////////////////
3506 
3507 
3508 /* http://en.wikipedia.org/wiki/Multivariate_normal_distribution (Conditional distributions) */
Normal_Conditional(phydbl * mu,phydbl * cov,phydbl * a,int n,short int * is_1,int n1,phydbl * cond_mu,phydbl * cond_cov)3509 void Normal_Conditional(phydbl *mu, phydbl *cov, phydbl *a, int n, short int *is_1, int n1, phydbl *cond_mu, phydbl *cond_cov)
3510 {
3511   phydbl *mu1,*mu2;
3512   phydbl *sig11,*sig12,*sig21,*sig22,*sig12_invsig22,*buff;
3513   phydbl *ctrd_a;
3514   phydbl *cond_cov_norder,*cond_mu_norder;
3515   int    n2;
3516   int i,j,nr,nc;
3517   phydbl *buff_mat;
3518 
3519   n2 = n-n1;
3520 
3521   mu1             = (phydbl *)mCalloc(n1,   sizeof(phydbl));
3522   mu2             = (phydbl *)mCalloc(n2,   sizeof(phydbl));
3523   sig11           = (phydbl *)mCalloc(n1*n1,sizeof(phydbl));
3524   sig12           = (phydbl *)mCalloc(n1*n2,sizeof(phydbl));
3525   sig21           = (phydbl *)mCalloc(n2*n1,sizeof(phydbl));
3526   sig22           = (phydbl *)mCalloc(n2*n2,sizeof(phydbl));
3527   ctrd_a          = (phydbl *)mCalloc(n2,   sizeof(phydbl));
3528   cond_cov_norder = (phydbl *)mCalloc(n1*n1,sizeof(phydbl));
3529   cond_mu_norder  = (phydbl *)mCalloc(n1*n1,sizeof(phydbl));
3530   buff_mat        = (phydbl *)mCalloc(n2*n2,sizeof(phydbl));
3531 
3532   nr=0;
3533   for(i=0;i<n;i++) { if(!is_1[i]) { ctrd_a[nr] = a[i]-mu[i]; nr++; } }
3534 
3535   nr=0;
3536   for(i=0;i<n;i++) { if( is_1[i]) { mu1[nr] = mu[i]; nr++; } }
3537 
3538   nr=0;
3539   for(i=0;i<n;i++) { if(!is_1[i]) { mu2[nr] = mu[i]; nr++; } }
3540 
3541   nr=0; nc=0;
3542   for(i=0;i<n;i++)
3543     {
3544       if(is_1[i])
3545 	{
3546 	  nc = nr;
3547  	  for(j=i;j<n;j++)
3548 /* 	  nc = 0; */
3549 /* 	  for(j=0;j<n;j++) */
3550 	    {
3551 	      if(is_1[j])
3552 		{
3553 		  sig11[nr*n1+nc] = cov[i*n+j];
3554 		  sig11[nc*n1+nr] = cov[i*n+j];
3555 		  nc++;
3556 		}
3557 	    }
3558 	  nr++;
3559 	}
3560     }
3561 
3562 
3563   nr=0; nc=0;
3564   for(i=0;i<n;i++)
3565     {
3566       if(is_1[i])
3567 	{
3568 /* 	  nc = nr; */
3569 /*  	  for(j=i;j<n;j++) */
3570 	  nc = 0;
3571 	  for(j=0;j<n;j++)
3572 	    {
3573 	      if(!is_1[j])
3574 		{
3575 		  sig12[nr*n2+nc] = cov[i*n+j];
3576 /* 		  sig12[nc*n2+nr] = cov[i*n+j]; */
3577 		  nc++;
3578 		}
3579 	    }
3580 	  nr++;
3581 	}
3582     }
3583 
3584   nr=0; nc=0;
3585   for(i=0;i<n;i++)
3586     {
3587       if(!is_1[i])
3588 	{
3589 /* 	  nc = nr; */
3590 /* 	  for(j=i;j<n;j++) */
3591 	  nc = 0;
3592 	  for(j=0;j<n;j++)
3593 	    {
3594 	      if(is_1[j])
3595 		{
3596 		  sig21[nr*n1+nc] = cov[i*n+j];
3597 /* 		  sig21[nc*n1+nr] = cov[i*n+j]; */
3598 		  nc++;
3599 		}
3600 	    }
3601 	  nr++;
3602 	}
3603     }
3604 
3605 
3606   nr=0; nc=0;
3607   for(i=0;i<n;i++)
3608     {
3609       if(!is_1[i])
3610 	{
3611 	  nc = nr;
3612 	  for(j=i;j<n;j++)
3613 /* 	  nc = 0; */
3614 /* 	  for(j=0;j<n;j++) */
3615 	    {
3616 	      if(!is_1[j])
3617 		{
3618 		  sig22[nr*n2+nc] = cov[i*n+j];
3619  		  sig22[nc*n2+nr] = cov[i*n+j];
3620 		  nc++;
3621 		}
3622 	    }
3623 	  nr++;
3624 	}
3625     }
3626 
3627   Iter_Matinv(sig22,n2,n2,NO);
3628 
3629   sig12_invsig22 = Matrix_Mult(sig12,sig22,n1,n2,n2,n2);
3630 
3631   buff = Matrix_Mult(sig12_invsig22,ctrd_a,n1,n2,n2,1);
3632   for(i=0;i<n1;i++) cond_mu_norder[i] = mu1[i]+buff[i];
3633   Free(buff);
3634 
3635   buff = Matrix_Mult(sig12_invsig22,sig21,n1,n2,n2,n1);
3636   for(i=0;i<n1;i++) for(j=0;j<n1;j++) cond_cov_norder[i*n1+j] = sig11[i*n1+j] - buff[i*n1+j];
3637   Free(buff);
3638 
3639   nr = 0;
3640   for(i=0;i<n;i++) if(is_1[i]) { cond_mu[i] = cond_mu_norder[nr]; nr++; }
3641 
3642   nr = nc = 0;
3643   for(i=0;i<n;i++)
3644     {
3645       if(is_1[i])
3646 	{
3647 	  nc = 0;
3648 	  for(j=0;j<n;j++)
3649 	    {
3650 	      if(is_1[j])
3651 		{
3652 		  cond_cov[i*n+j] = cond_cov_norder[nr*n1+nc];
3653 		  nc++;
3654 		}
3655 	    }
3656 	  nr++;
3657 	}
3658     }
3659 
3660 /*   for(i=0;i<n1;i++) */
3661 /*     { */
3662 /*       for(j=i;j<n1;j++) */
3663 /* 	if(fabs(cond_cov_norder[i*n1+j] - cond_cov_norder[j*n1+i]) > 1.E-3) */
3664 /* 	  { */
3665 /* 	    PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__); */
3666 /* 	    Warn_And_Exit(""); */
3667 /* 	  } */
3668 /*     } */
3669 
3670 
3671   for(i=0;i<n;i++)
3672     {
3673       for(j=i+1;j<n;j++)
3674 	if(fabs(cond_cov[i*n+j] - cond_cov[j*n+i]) > 1.E-3)
3675 	  {
3676 	    PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3677 	    Warn_And_Exit("");
3678 	  }
3679     }
3680 
3681   Free(mu1);
3682   Free(mu2);
3683   Free(sig11);
3684   Free(sig12);
3685   Free(sig21);
3686   Free(sig22);
3687   Free(ctrd_a);
3688   Free(cond_cov_norder);
3689   Free(cond_mu_norder);
3690   Free(sig12_invsig22);
3691   Free(buff_mat);
3692 }
3693 
3694 
3695 //////////////////////////////////////////////////////////////
3696 //////////////////////////////////////////////////////////////
3697 
3698 
3699 /* http://en.wikipedia.org/wiki/Multivariate_normal_distribution (Conditional distributions) */
Normal_Conditional_Unsorted(phydbl * mu,phydbl * cov,phydbl * a,int n,short int * is_1,int n1,phydbl * cond_mu,phydbl * cond_cov)3700 void Normal_Conditional_Unsorted(phydbl *mu, phydbl *cov, phydbl *a, int n, short int *is_1, int n1, phydbl *cond_mu, phydbl *cond_cov)
3701 {
3702   phydbl *mu1,*mu2;
3703   phydbl *sig11,*sig12,*sig21,*sig22,*sig12_invsig22,*buff;
3704   phydbl *ctrd_a;
3705   int    n2;
3706   int i,j,nr,nc;
3707 
3708   n2 = n-n1;
3709 
3710   mu1             = (phydbl *)mCalloc(n1,   sizeof(phydbl));
3711   mu2             = (phydbl *)mCalloc(n2,   sizeof(phydbl));
3712   sig11           = (phydbl *)mCalloc(n1*n1,sizeof(phydbl));
3713   sig12           = (phydbl *)mCalloc(n1*n2,sizeof(phydbl));
3714   sig21           = (phydbl *)mCalloc(n2*n1,sizeof(phydbl));
3715   sig22           = (phydbl *)mCalloc(n2*n2,sizeof(phydbl));
3716   ctrd_a          = (phydbl *)mCalloc(n2,   sizeof(phydbl));
3717 
3718   nr=0;
3719   for(i=0;i<n;i++) { if(!is_1[i]) { ctrd_a[nr] = a[i]-mu[i]; nr++; } }
3720 
3721   nr=0;
3722   for(i=0;i<n;i++) { if( is_1[i]) { mu1[nr] = mu[i]; nr++; } }
3723 
3724   nr=0;
3725   for(i=0;i<n;i++) { if(!is_1[i]) { mu2[nr] = mu[i]; nr++; } }
3726 
3727   nr=0; nc=0;
3728   for(i=0;i<n;i++)
3729     {
3730       if(is_1[i])
3731 	{
3732 	  nc = nr;
3733  	  for(j=i;j<n;j++)
3734 /* 	  nc = 0; */
3735 /* 	  for(j=0;j<n;j++) */
3736 	    {
3737 	      if(is_1[j])
3738 		{
3739 		  sig11[nr*n1+nc] = cov[i*n+j];
3740 		  sig11[nc*n1+nr] = cov[i*n+j];
3741 		  nc++;
3742 		}
3743 	    }
3744 	  nr++;
3745 	}
3746     }
3747 
3748 
3749   nr=0; nc=0;
3750   for(i=0;i<n;i++)
3751     {
3752       if(is_1[i])
3753 	{
3754 /* 	  nc = nr; */
3755 /*  	  for(j=i;j<n;j++) */
3756 	  nc = 0;
3757 	  for(j=0;j<n;j++)
3758 	    {
3759 	      if(!is_1[j])
3760 		{
3761 		  sig12[nr*n2+nc] = cov[i*n+j];
3762 /* 		  sig12[nc*n2+nr] = cov[i*n+j]; */
3763 		  nc++;
3764 		}
3765 	    }
3766 	  nr++;
3767 	}
3768     }
3769 
3770   nr=0; nc=0;
3771   for(i=0;i<n;i++)
3772     {
3773       if(!is_1[i])
3774 	{
3775 /* 	  nc = nr; */
3776 /* 	  for(j=i;j<n;j++) */
3777 	  nc = 0;
3778 	  for(j=0;j<n;j++)
3779 	    {
3780 	      if(is_1[j])
3781 		{
3782 		  sig21[nr*n1+nc] = cov[i*n+j];
3783 /* 		  sig21[nc*n1+nr] = cov[i*n+j]; */
3784 		  nc++;
3785 		}
3786 	    }
3787 	  nr++;
3788 	}
3789     }
3790 
3791 
3792   nr=0; nc=0;
3793   for(i=0;i<n;i++)
3794     {
3795       if(!is_1[i])
3796 	{
3797 	  nc = nr;
3798 	  for(j=i;j<n;j++)
3799 /* 	  nc = 0; */
3800 /* 	  for(j=0;j<n;j++) */
3801 	    {
3802 	      if(!is_1[j])
3803 		{
3804 		  sig22[nr*n2+nc] = cov[i*n+j];
3805  		  sig22[nc*n2+nr] = cov[i*n+j];
3806 		  nc++;
3807 		}
3808 	    }
3809 	  nr++;
3810 	}
3811     }
3812 
3813   if(!Matinv(sig22,n2,n2,NO))
3814     {
3815       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3816       Exit("\n");
3817     }
3818   sig12_invsig22 = Matrix_Mult(sig12,sig22,n1,n2,n2,n2);
3819 
3820   buff = Matrix_Mult(sig12_invsig22,ctrd_a,n1,n2,n2,1);
3821   for(i=0;i<n1;i++) cond_mu[i] = mu1[i]+buff[i];
3822   Free(buff);
3823 
3824   buff = Matrix_Mult(sig12_invsig22,sig21,n1,n2,n2,n1);
3825   for(i=0;i<n1;i++) for(j=0;j<n1;j++) cond_cov[i*n1+j] = sig11[i*n1+j] - buff[i*n1+j];
3826 
3827 
3828   Free(mu1);
3829   Free(mu2);
3830   Free(sig11);
3831   Free(sig12);
3832   Free(sig21);
3833   Free(sig22);
3834   Free(ctrd_a);
3835   Free(sig12_invsig22);
3836 }
3837 
3838 
3839 //////////////////////////////////////////////////////////////
3840 //////////////////////////////////////////////////////////////
3841 
3842 
3843 /* http://en.wikipedia.org/wiki/Multivariate_normal_distribution (Conditional distributions) */
Get_Reg_Coeff(phydbl * mu,phydbl * cov,phydbl * a,int n,short int * is_1,int n1,phydbl * reg_coeff)3844 void Get_Reg_Coeff(phydbl *mu, phydbl *cov, phydbl *a, int n, short int *is_1, int n1, phydbl *reg_coeff)
3845 {
3846   phydbl *sig12,*sig22,*sig12_invsig22;
3847   int    n2;
3848   int    i,j,nr,nc;
3849 
3850   n2 = n-n1;
3851 
3852   sig12 = (phydbl *)mCalloc(n1*n2,sizeof(phydbl));
3853   sig22 = (phydbl *)mCalloc(n2*n2,sizeof(phydbl));
3854 
3855   nr=0; nc=0;
3856   for(i=0;i<n;i++)
3857     {
3858       if(is_1[i])
3859 	{
3860 	  nc = 0;
3861 	  for(j=0;j<n;j++)
3862 	    {
3863 	      if(!is_1[j])
3864 		{
3865 		  sig12[nr*n2+nc] = cov[i*n+j];
3866 		  nc++;
3867 		}
3868 	    }
3869 	  nr++;
3870 	}
3871     }
3872 
3873 
3874   nr=0; nc=0;
3875   for(i=0;i<n;i++)
3876     {
3877       if(!is_1[i])
3878 	{
3879 	  nc = nr;
3880 	  for(j=i;j<n;j++)
3881 	    {
3882 	      if(!is_1[j])
3883 		{
3884 		  sig22[nr*n2+nc] = cov[i*n+j];
3885  		  sig22[nc*n2+nr] = cov[i*n+j];
3886 		  nc++;
3887 		}
3888 	    }
3889 	  nr++;
3890 	}
3891     }
3892 
3893 
3894   if(!Matinv(sig22,n2,n2,NO))
3895     {
3896       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3897       Exit("\n");
3898     }
3899   sig12_invsig22 = Matrix_Mult(sig12,sig22,n1,n2,n2,n2);
3900 
3901 
3902   for(i=0;i<n;i++) reg_coeff[i] = 0.0;
3903 
3904 /*   nr = 0; */
3905 /*   for(i=0;i<n;i++) if(!is_1[i]) { reg_coeff[i] = sig12_invsig22[nr]; nr++; } */
3906 
3907   nc = 0;
3908   nr = 0;
3909   for(i=0;i<n1;i++)
3910     {
3911       nc = 0;
3912       for(j=0;j<n;j++)
3913 	if(!is_1[j])
3914 	  {
3915 	    reg_coeff[i*n+j] = sig12_invsig22[nr*n2+nc];
3916 	    nc++;
3917 	  }
3918       nr++;
3919     }
3920 
3921 
3922   if(nc != n2 || nr != n1)
3923     {
3924       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3925       Exit("\n");
3926     }
3927 
3928 
3929   Free(sig12);
3930   Free(sig22);
3931   Free(sig12_invsig22);
3932 }
3933 
3934 
3935 //////////////////////////////////////////////////////////////
3936 //////////////////////////////////////////////////////////////
3937 
3938 
Norm_Trunc_Sd(phydbl mu,phydbl sd,phydbl a,phydbl b)3939 phydbl Norm_Trunc_Sd(phydbl mu, phydbl sd, phydbl a, phydbl b)
3940 {
3941   phydbl pdfa, pdfb;
3942   phydbl cdfa, cdfb;
3943   phydbl ctra, ctrb;
3944   phydbl cond_var;
3945   phydbl cdfbmcdfa;
3946 
3947   ctra = (a - mu)/sd;
3948   ctrb = (b - mu)/sd;
3949 
3950   pdfa = Dnorm(ctra,0.0,1.0);
3951   pdfb = Dnorm(ctrb,0.0,1.0);
3952 
3953   cdfa = Pnorm(ctra,0.0,1.0);
3954   cdfb = Pnorm(ctrb,0.0,1.0);
3955 
3956   cdfbmcdfa = cdfb - cdfa;
3957 
3958   if(cdfbmcdfa < SMALL)
3959     {
3960       cdfbmcdfa = SMALL;
3961       PhyML_Printf("\n. mu=%G sd=%G a=%G b=%G",mu,sd,a,b);
3962       PhyML_Printf("\n. Numerical precision issue detected.");
3963       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
3964     }
3965 
3966   cond_var = sd*sd*(1. + (ctra*pdfa - ctrb*pdfb)/cdfbmcdfa - POW((pdfa - pdfb)/cdfbmcdfa,2));
3967 
3968   return SQRT(cond_var);
3969 }
3970 
3971 //////////////////////////////////////////////////////////////
3972 //////////////////////////////////////////////////////////////
3973 
3974 
Norm_Trunc_Mean(phydbl mu,phydbl sd,phydbl a,phydbl b)3975 phydbl Norm_Trunc_Mean(phydbl mu, phydbl sd, phydbl a, phydbl b)
3976 {
3977   phydbl pdfa, pdfb;
3978   phydbl cdfa, cdfb;
3979   phydbl ctra, ctrb;
3980   phydbl cond_mu;
3981   phydbl cdfbmcdfa;
3982 
3983   ctra = (a - mu)/sd;
3984   ctrb = (b - mu)/sd;
3985 
3986   pdfa = Dnorm(ctra,0.0,1.0);
3987   pdfb = Dnorm(ctrb,0.0,1.0);
3988 
3989   cdfa = Pnorm(ctra,0.0,1.0);
3990   cdfb = Pnorm(ctrb,0.0,1.0);
3991 
3992   cdfbmcdfa = cdfb - cdfa;
3993 
3994   if(cdfbmcdfa < SMALL)
3995     {
3996       cdfbmcdfa = SMALL;
3997       PhyML_Printf("\n. mu=%G sd=%G a=%G b=%G",mu,sd,a,b);
3998       PhyML_Printf("\n. Numerical precision issue detected.");
3999       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
4000     }
4001 
4002   cond_mu = mu + sd*(pdfa - pdfb)/cdfbmcdfa;
4003 
4004   return cond_mu;
4005 }
4006 
4007 //////////////////////////////////////////////////////////////
4008 //////////////////////////////////////////////////////////////
4009 
4010 
Norm_Trunc_Mean_Sd(phydbl mu,phydbl sd,phydbl a,phydbl b,phydbl * trunc_mu,phydbl * trunc_sd)4011 int Norm_Trunc_Mean_Sd(phydbl mu, phydbl sd, phydbl a, phydbl b, phydbl *trunc_mu, phydbl *trunc_sd)
4012 {
4013 
4014   phydbl pdfa, pdfb;
4015   phydbl cdfa, cdfb;
4016   phydbl ctra, ctrb;
4017   phydbl cdfbmcdfa;
4018 
4019   ctra = (a - mu)/sd;
4020   ctrb = (b - mu)/sd;
4021 
4022   pdfa = Dnorm(ctra,0.0,1.0);
4023   pdfb = Dnorm(ctrb,0.0,1.0);
4024 
4025   cdfa = Pnorm(ctra,0.0,1.0);
4026   cdfb = Pnorm(ctrb,0.0,1.0);
4027 
4028   cdfbmcdfa = cdfb - cdfa;
4029 
4030   if(cdfbmcdfa < SMALL)
4031     {
4032       cdfbmcdfa = SMALL;
4033       PhyML_Printf("\n. mu=%G sd=%G a=%G b=%G",mu,sd,a,b);
4034       PhyML_Printf("\n. cdfa=%G cdfb=%G",cdfa,cdfb);
4035       PhyML_Printf("\n. Numerical precision issue detected.");
4036       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
4037       return 0;
4038     }
4039 
4040   *trunc_mu = mu + sd*(pdfa - pdfb)/cdfbmcdfa;
4041   *trunc_sd = sd*sd*(1. + (ctra*pdfa - ctrb*pdfb)/cdfbmcdfa - POW((pdfa - pdfb)/cdfbmcdfa,2));
4042   *trunc_sd = SQRT(*trunc_sd);
4043   return 1;
4044 }
4045 
4046 //////////////////////////////////////////////////////////////
4047 //////////////////////////////////////////////////////////////
4048 
VarCov_Approx_Likelihood(t_tree * tree)4049 void VarCov_Approx_Likelihood(t_tree *tree)
4050 {
4051   int i,j;
4052   phydbl *cov;
4053   phydbl *mean;
4054   int dim;
4055   int iter;
4056   phydbl cur_mean,new_mean,diff_mean,max_diff_mean;
4057   phydbl cur_cov,new_cov,diff_cov,max_diff_cov;
4058   FILE *fp;
4059 
4060 
4061   cov = tree->rates->cov_l;
4062   mean = tree->rates->mean_l;
4063   dim = 2*tree->n_otu-3;
4064 
4065   fp = fopen("covariance","w");
4066   fprintf(fp,"\n");
4067   fprintf(fp,"Run\t");
4068   fprintf(fp,"lnL\t");
4069   for(i=0;i<dim;i++) fprintf(fp,"Edge%d[%f]\t",i,tree->rates->u_ml_l[i]);
4070 
4071 
4072   for(i=0;i<dim;i++)     mean[i] = .0;
4073   For(i,dim*dim) cov[i]  = .0;
4074 
4075   MCMC_Randomize_Branch_Lengths(tree);
4076 
4077   /* For(i,2*tree->n_otu-3) tree->a_edges[i]->l->v *= Rgamma(5.,1./5.); */
4078 
4079   Set_Both_Sides(YES,tree);
4080   Lk(NULL,tree);
4081 
4082   iter = 0;
4083   do
4084     {
4085       /* tree->both_sides = YES; */
4086       /* Lk(tree); */
4087       MCMC_Br_Lens(tree);
4088       /* MCMC_Scale_Br_Lens(tree); */
4089 
4090 
4091       max_diff_mean = 0.0;
4092       for(i=0;i<dim;i++)
4093 	{
4094 	  cur_mean = mean[i];
4095 
4096 	  mean[i] *= (phydbl)iter;
4097 	  mean[i] += tree->a_edges[i]->l->v;
4098 	  mean[i] /= (phydbl)(iter+1);
4099 
4100 	  new_mean = mean[i];
4101 	  diff_mean = MAX(cur_mean,new_mean)/MIN(cur_mean,new_mean);
4102 	  if(diff_mean > max_diff_mean) max_diff_mean = diff_mean;
4103 	  /* printf("\n. %d diff_mean = %f %f %f %f",i,diff_mean,cur_mean,new_mean,tree->a_edges[i]->l->v); */
4104 	}
4105 
4106       max_diff_cov = 0.0;
4107       for(i=0;i<dim;i++)
4108 	{
4109 	  for(j=0;j<dim;j++)
4110 	    {
4111 	      cur_cov = cov[i*dim+j];
4112 
4113 	      cov[i*dim+j] *= (phydbl)iter;
4114 	      cov[i*dim+j] += tree->a_edges[i]->l->v * tree->a_edges[j]->l->v;
4115 	      cov[i*dim+j] /= (phydbl)(iter+1);
4116 
4117 	      new_cov = cov[i*dim+j];
4118 	      diff_cov = MAX(cur_cov,new_cov)/MIN(cur_cov,new_cov);
4119 	      if(diff_cov > max_diff_cov) max_diff_cov = diff_cov;
4120 	    }
4121 	}
4122       iter++;
4123 
4124       /* if(!(iter%10)) */
4125       /* printf("\n. iter=%d max_diff_mean=%f max_diff_cov=%f",iter,max_diff_mean,max_diff_cov); */
4126 
4127       /* if(iter && max_diff_mean < 1.01 && max_diff_cov < 1.01) break; */
4128 
4129       if(!(iter%20))
4130 	{
4131 	  fprintf(fp,"\n");
4132 	  fprintf(fp,"%d\t",iter);
4133 	  fprintf(fp,"%f\t",tree->c_lnL);
4134  	  for(i=0;i<dim;i++) fprintf(fp,"%f\t",tree->a_edges[i]->l->v);
4135 	  fflush(NULL);
4136 	}
4137 
4138     }while(iter < 5000);
4139 
4140 
4141   for(i=0;i<dim;i++)
4142     {
4143       for(j=0;j<dim;j++)
4144 	{
4145 	  cov[i*dim+j] = cov[i*dim+j] - mean[i]*mean[j];
4146 	  if(i == j && cov[i*dim+j] < MIN_VAR_BL) cov[i*dim+j] = MIN_VAR_BL;
4147 	}
4148     }
4149 
4150   fclose(fp);
4151 }
4152 
4153 //////////////////////////////////////////////////////////////
4154 //////////////////////////////////////////////////////////////
4155 
4156 
4157 /* Order statistic. x_is are uniformily distributed in [min,max] */
Dorder_Unif(phydbl x,int r,int n,phydbl min,phydbl max)4158 phydbl Dorder_Unif(phydbl x, int r, int n, phydbl min, phydbl max)
4159 {
4160   phydbl cons;
4161   phydbl Fx;
4162   phydbl dens;
4163 
4164   if(x < min || x > max || min > max)
4165     {
4166       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
4167       Exit("\n");
4168     }
4169 
4170   cons = LnGamma(n+1) - LnGamma(r) - LnGamma(n-r+1);
4171   cons = exp(cons);
4172   cons = ROUND(cons);
4173 
4174   Fx = (x-min)/(max-min);
4175 
4176   dens = cons * pow(Fx,r-1) * pow(1.-Fx,n-r) * (1./(max-min));
4177 
4178   /* printf("\n. x=%f r=%d n=%d min=%f max=%f dens=%f",x,r,n,min,max,dens); */
4179   /* Exit("\n"); */
4180 
4181   return(dens);
4182 }
4183 
4184 //////////////////////////////////////////////////////////////
4185 //////////////////////////////////////////////////////////////
4186 
4187 
Covariance(phydbl * x,phydbl * y,int n)4188 phydbl Covariance(phydbl *x, phydbl *y, int n)
4189 {
4190   int i;
4191   phydbl mean_x,mean_y,mean_xy;
4192 
4193   mean_x = .0;
4194   for(i=0;i<n;i++) mean_x += x[i];
4195   mean_x /= (phydbl)n;
4196 
4197   mean_y = .0;
4198   for(i=0;i<n;i++) mean_y += y[i];
4199   mean_y /= (phydbl)n;
4200 
4201   mean_xy = .0;
4202   for(i=0;i<n;i++) mean_xy += x[i]*y[i];
4203   mean_xy /= (phydbl)n;
4204 
4205   return (mean_xy - mean_x*mean_y);
4206 }
4207 
4208 //////////////////////////////////////////////////////////////
4209 //////////////////////////////////////////////////////////////
4210 
4211 /* Sample X from a multivariate normal with mean mu and covariance cov, within
4212    the interval [min,max], under the linear constraint X.lambda=k
4213 */
4214 
Rnorm_Multid_Trunc_Constraint(phydbl * mu,phydbl * cov,phydbl * min,phydbl * max,phydbl * lambda,phydbl k,phydbl * res,int len)4215 phydbl *Rnorm_Multid_Trunc_Constraint(phydbl *mu, phydbl *cov, phydbl *min, phydbl *max, phydbl *lambda, phydbl k, phydbl *res, int len)
4216 {
4217 
4218   phydbl *loc_res;
4219   int i,j,cond,iter;
4220   phydbl *x;
4221   phydbl cond_mean,cond_var;
4222   phydbl cov_zic,cov_zii,cov_zcc;
4223   phydbl mean_zi, mean_zc;
4224   phydbl alpha;
4225   phydbl sum;
4226   int err;
4227   phydbl zi;
4228 
4229 
4230   cond    = 0;
4231 
4232   loc_res = NULL;
4233   if(!res)
4234     {
4235       loc_res = (phydbl *)mCalloc(len,sizeof(phydbl));
4236       x = loc_res;
4237     }
4238   else x = res;
4239 
4240 
4241 
4242   /* zi = x[i] . lambda[i] */
4243 
4244   iter = 0;
4245   do
4246     {
4247       sum = 0.0;
4248       for(i=0;i<len;i++)
4249 	{
4250 	  if(i != cond)
4251 	    {
4252 	      cov_zic = lambda[i]    * lambda[cond] * cov[i*len+cond];
4253 	      cov_zii = lambda[i]    * lambda[i]    * cov[i*len+i];
4254 	      cov_zcc = lambda[cond] * lambda[cond] * cov[cond*len+cond];
4255 
4256 	      mean_zi = lambda[i];
4257 	      mean_zc = lambda[cond];
4258 
4259 	      /* alpha = k - \sum_{j != cond, j !=i} z_j */
4260 	      alpha = k;
4261 	      for(j=0;j<len;j++) if(j != cond && j != i) alpha -= lambda[j] * x[j];
4262 
4263 	      cond_mean = mean_zi + (cov_zii + cov_zic) / (cov_zii + 2.*cov_zic + cov_zcc) * (alpha - mean_zi - mean_zc);
4264 	      cond_var  = cov_zii - POW(cov_zii + cov_zic,2)/(cov_zii + 2.*cov_zic + cov_zcc);
4265 
4266 	      if(lambda[i]*min[i] > alpha - lambda[cond]*min[i])
4267 		{
4268 		  PhyML_Printf("\n. Cannot satisfy the constraint.\n");
4269 		  PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
4270 		  Exit("\n");
4271 		}
4272 
4273 	      err = NO;
4274 	      zi = Rnorm_Trunc(cond_mean,SQRT(cond_var),
4275 			       MAX(lambda[i]*min[i],alpha-lambda[cond]*max[cond]),
4276 			       MIN(lambda[i]*max[i],alpha-lambda[cond]*min[cond]),&err);
4277 	      if(err == YES)
4278 		{
4279 		  PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
4280 		  Exit("\n");
4281 		}
4282 	      sum += zi;
4283 	      x[i] = zi / lambda[i];
4284 	    }
4285 	}
4286 
4287       x[cond] = (k - sum)/lambda[cond];
4288 
4289     }while(iter++ < 10);
4290 
4291   return(loc_res);
4292 
4293 }
4294 
4295 //////////////////////////////////////////////////////////////
4296 //////////////////////////////////////////////////////////////
4297 
Integrated_Brownian_Bridge_Moments(phydbl x_beg,phydbl x_end,phydbl y_beg,phydbl y_end,phydbl brownian_var,phydbl * mean,phydbl * var)4298 void Integrated_Brownian_Bridge_Moments(phydbl x_beg, phydbl x_end,
4299 					phydbl y_beg, phydbl y_end,
4300 					phydbl brownian_var, phydbl *mean, phydbl *var)
4301 {
4302   /* phydbl *y; */
4303   /* phydbl *y_mean; */
4304   /* int n_rep; */
4305   int n_breaks;
4306   int i;
4307   /* int j; */
4308   /* phydbl traj_mean, traj_sd; */
4309   /* phydbl x_prev, x_curr; */
4310   phydbl x;
4311   phydbl x_step;
4312   phydbl sum;
4313   /* phydbl sumsum; */
4314   phydbl scaled_var;
4315 
4316   scaled_var = brownian_var/fabs(x_end - x_beg);
4317 
4318   n_breaks = 100;
4319 
4320 
4321   /* n_rep    = 500;   */
4322 
4323   /* x_step   = (x_end - x_beg)/(n_breaks+1); */
4324 
4325   /* y      = (phydbl *)mCalloc(n_breaks+2,sizeof(phydbl)); */
4326   /* y_mean = (phydbl *)mCalloc(n_rep,sizeof(phydbl)); */
4327 
4328   /* y[0] = y_beg; */
4329   /* y[n_breaks+1] = y_end; */
4330 
4331   /* for(i=0;i<n_rep;i++) */
4332   /*   { */
4333   /*     for(j=1;j<n_breaks+1;j++) */
4334   /* 	{ */
4335   /* 	  x_prev = x_beg + (j-1)*x_step; */
4336   /* 	  x_curr = x_prev + x_step; */
4337 
4338   /* 	  traj_mean = y[j-1] + (y_end - y[j-1])*(x_curr - x_prev)/(x_end - x_prev); */
4339   /* 	  traj_sd   = SQRT(scaled_var*(x_curr - x_prev)*(x_end - x_curr)/(x_end - x_prev)); */
4340 
4341   /* 	  if(isnan(traj_mean) || isnan(traj_sd)) */
4342   /* 	    { */
4343   /* 	      PhyML_Printf("\n. traj_mean=%f traj_sd=%f x_end=%f x_prev=%f x_step=%f [%f %f %f %f %f %f %f] j=%d n_breaks=%d", */
4344   /* 			   traj_mean,traj_sd,x_end,x_prev,x_step, */
4345   /* 			   y[j-1],y_end,y[j-1],x_curr,x_prev,x_end,x_prev,j,n_breaks); */
4346   /* 	      Exit("\n"); */
4347   /* 	    } */
4348 
4349   /* 	  y[j] = Rnorm(traj_mean,traj_sd); */
4350 
4351   /* 	  if(isnan(y[j]) || isinf(y[j])) */
4352   /* 	    { */
4353   /* 	      printf("\n. mean=%f sd=%f %f j=%d y[j]=%f",traj_sd,traj_mean,Rnorm(traj_mean,traj_sd),j,y[j]); */
4354   /* 	      Exit("\n"); */
4355   /* 	    } */
4356 
4357   /* 	} */
4358 
4359   /*     sum = 0.0; */
4360   /*     For(j,n_breaks+2) sum += fabs(y[j]); */
4361   /*     y_mean[i] = sum/(n_breaks+2); */
4362   /*   } */
4363 
4364   /* sum = sumsum = 0.0; */
4365   /* for(i=0;i<n_rep;i++) */
4366   /*   { */
4367   /*     sum += y_mean[i]; */
4368   /*     sumsum += y_mean[i] * y_mean[i]; */
4369   /*   } */
4370 
4371   /* *mean = sum/n_rep; */
4372   /* *var = sumsum/n_rep - (*mean) * (*mean); */
4373 
4374   /* if(isnan(*mean) || isnan(*var)) */
4375   /*   { */
4376   /*     PhyML_Printf("\n. sum=%f sumsum=%f n_rep=%d",sum,sumsum,n_rep); */
4377   /*     Exit("\n"); */
4378   /*   } */
4379 
4380   /* Free(y); */
4381   /* Free(y_mean); */
4382 
4383   /* /\* printf("\n. [%f %f]",*mean,*var); *\/ */
4384 
4385   phydbl mux,six;
4386 
4387   x_step = (x_end - x_beg)/(n_breaks+1);
4388   sum = y_beg;
4389   for(i=1;i<n_breaks+1;i++)
4390     {
4391       x = x_beg + i*x_step;
4392 
4393       mux = y_beg + (y_end - y_beg)*(x - x_beg)/(x_end - x_beg);
4394       six = SQRT(scaled_var*(x - x_beg)*(x_end - x)/(x_end - x_beg));
4395 
4396       sum +=
4397 	(2.*six)/SQRT(2.*PI)*exp(-POW(mux,2)/(2.*POW(six,2))) +
4398 	2.*mux*Pnorm(mux/six,.0,1.) - mux;
4399     }
4400   sum += y_end;
4401 
4402   (*mean) = sum / (n_breaks+2.);
4403   (*var)  = (1./12.)*scaled_var*(x_end - x_beg);
4404 
4405   /* printf(" [%f %f] -- x_beg=%f x_end=%f y_beg=%f y_end=%f sd=%f", */
4406   /* 	 (*mean),(*var),x_beg,x_end,y_beg,y_end,brownian_var); */
4407 }
4408 
4409 
4410 //////////////////////////////////////////////////////////////
4411 //////////////////////////////////////////////////////////////
4412 
4413 //////////////////////////////////////////////////////////////
4414 //////////////////////////////////////////////////////////////
4415 
4416 //////////////////////////////////////////////////////////////
4417 //////////////////////////////////////////////////////////////
4418 
4419 
4420 /* Let X'(t) = A + (B-A)t/T + X(t) and X(t) = W(t) + t/T * W(T),
4421 i.e., X(t) is a Brownian bridge starting at 0 at t=0 and stopping
4422 at value 1 at time t=T. X'(t) starts at X'(t)=A at t=0 and stops at X'(t)=B at
4423 t=T. This function calculates the mean and variance of
4424 Z(T) = 1/T \int_0^T exp(X'(t)) dt.
4425 */
4426 
Integrated_Geometric_Brownian_Bridge_Moments(phydbl T,phydbl A,phydbl B,phydbl u,phydbl * mean,phydbl * var)4427 void Integrated_Geometric_Brownian_Bridge_Moments(phydbl T, phydbl A, phydbl B, phydbl u, phydbl *mean, phydbl *var)
4428 {
4429   Integrated_Geometric_Brownian_Bridge_Mean(T,A,B,u,mean);
4430   Integrated_Geometric_Brownian_Bridge_Var(T,A,B,u,var);
4431 }
4432 
4433 //////////////////////////////////////////////////////////////
4434 //////////////////////////////////////////////////////////////
4435 
Integrated_Geometric_Brownian_Bridge_Mean(phydbl T,phydbl A,phydbl B,phydbl u,phydbl * mean)4436 void Integrated_Geometric_Brownian_Bridge_Mean(phydbl T, phydbl A, phydbl B, phydbl u, phydbl *mean)
4437 {
4438   phydbl pnormarg1 = log(B/A)/sqrt(u*u*T) + .5*sqrt(u*u*T);
4439   phydbl pnormarg2 = pnormarg1 - sqrt(u*u*T);
4440 
4441   /* printf("\n. %G %G",pnormarg1,pnormarg2); */
4442 
4443   if((pnormarg1 >  2.0 && pnormarg2 >  2.0) ||
4444      (pnormarg1 < -2.0 && pnormarg2 < -2.0))
4445     {
4446       // Proposition 5.1 in Privaut & Guindon, 2015, Journal of mathematical biology 71 (6-7), 1387-1409
4447       *mean = T*(B-A)/log(B/A)+u*u*T*T*((A+B)/(2.*pow(log(B/A),2)) - (B-A)/pow(log(B/A),3));
4448     }
4449   else
4450     {
4451       // Proposition 3.3 in Privaut & Guindon, 2015, Journal of mathematical biology 71 (6-7), 1387-1409
4452       *mean =
4453         (A/(u*u))*sqrt(2.*PI*u*u*T)*exp(pow(u*u*T/2.+log(B/A),2)/(2.*u*u*T))*
4454         (Pnorm(pnormarg1,0.,1.) -
4455          Pnorm(pnormarg2,0.,1.));
4456     }
4457 
4458   *mean = *mean/T;
4459 }
4460 
4461 //////////////////////////////////////////////////////////////
4462 //////////////////////////////////////////////////////////////
4463 
Integrated_Geometric_Brownian_Bridge_Var(phydbl T,phydbl A,phydbl B,phydbl u,phydbl * var)4464 void Integrated_Geometric_Brownian_Bridge_Var(phydbl T, phydbl A, phydbl B, phydbl u, phydbl *var)
4465 {
4466 
4467   double U = T*u*u;
4468   double logz = log(B/A);
4469   double z = B/A;
4470   double m;
4471 
4472   phydbl pnormarg1 = log(B/A)/sqrt(u*u*T) + .5*sqrt(u*u*T);
4473   phydbl pnormarg2 = pnormarg1 - sqrt(u*u*T);
4474 
4475   if((pnormarg1 >  2.0 && pnormarg2 >  2.0) ||
4476      (pnormarg1 < -2.0 && pnormarg2 < -2.0))
4477     {
4478       // Proposition 5.1 in Privaut & Guindon, 2015, Journal of mathematical biology 71 (6-7), 1387-1409
4479       *var = 0.0;
4480     }
4481   else
4482     {
4483       // Proposition 3.3 in Privaut & Guindon, 2015, Journal of mathematical biology 71 (6-7), 1387-1409
4484       *var =
4485         2.*sqrt(2.*PI*U)*exp(pow(U   +logz,2)/(2.*U))*(Pnorm(logz/sqrt(U) +    sqrt(U),0.0,1.0)-Pnorm(logz/sqrt(U) -    sqrt(U),0.0,1.0)) -
4486  (1.+z)*2.*sqrt(2.*PI*U)*exp(pow(U/2.+logz,2)/(2.*U))*(Pnorm(logz/sqrt(U) + .5*sqrt(U),0.0,1.0)-Pnorm(logz/sqrt(U) - .5*sqrt(U),0.0,1.0));
4487       *var = *var * pow(A,2) / pow(u,4);
4488       Integrated_Geometric_Brownian_Bridge_Mean(T,A,B,u,&m);
4489       *var = *var / pow(T,2);
4490       *var = *var - m*m;
4491     }
4492   if(*var < 0.0) *var = 0.0;
4493 }
4494 
4495 //////////////////////////////////////////////////////////////
4496 //////////////////////////////////////////////////////////////
4497 // Inverse method to sample from X where P(X=xi)=pi[i]
4498 
Sample_i_With_Proba_pi(phydbl * pi,int len)4499 int Sample_i_With_Proba_pi(phydbl *pi, int len)
4500 {
4501   phydbl *cum_pi;
4502   int i;
4503   phydbl u;
4504 
4505   cum_pi = (phydbl *)mCalloc(len,sizeof(phydbl));
4506 
4507   u = .0;
4508   for(i=0;i<len;i++) u += pi[i];
4509   for(i=0;i<len;i++) cum_pi[i] = pi[i] / u;
4510   for(i=1;i<len;i++) cum_pi[i] += cum_pi[i-1];
4511 
4512   if((cum_pi[i-1] > 1. + 1.E-10) || (cum_pi[i-1] < 1. - 1.E-10))
4513     {
4514       PhyML_Printf("\n== Sum of probabilities is different from 1.0 (%f).",cum_pi[i-1]);
4515       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
4516       Exit("\n");
4517     }
4518 
4519   i = 0;
4520   u = Uni();
4521   for(i=0;i<len;i++) if(cum_pi[i] > u) break;
4522 
4523   if(i == len)
4524     {
4525       for(i=0;i<len;i++) printf("\n== idx:%d prob:%g",i,pi[i]);
4526       PhyML_Printf("\n== Len = %d",len);
4527       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
4528       Exit("\n");
4529     }
4530   Free(cum_pi);
4531 
4532   return(i);
4533 }
4534 
4535 //////////////////////////////////////////////////////////////
4536 //////////////////////////////////////////////////////////////
4537 // Same method as previous one, but returning n_elts sampled
4538 // elements at once, avoiding to recreate cum_pi for each sample.
4539 // Alias Method (https://en.wikipedia.org/wiki/Alias_method)
4540 // Inspired from:
4541 // https://jugit.fz-juelich.de/mlz/ransampl/blob/master/lib/ransampl.c
4542 // and
4543 // https://possiblywrong.wordpress.com/2012/02/05/the-alias-method-and-double-precision/
Sample_n_i_With_Proba_pi(phydbl * pi,int len,int n_elts)4544 int* Sample_n_i_With_Proba_pi(phydbl *pi, int len, int n_elts)
4545 {
4546   int i,n;
4547   phydbl sum;
4548   int *sampled;
4549   int *alias;
4550   phydbl *prob, *p;
4551   int *small, *large;
4552   int num_small = 0, num_large = 0;
4553   int a, g;
4554 
4555   alias = mCalloc(len, sizeof(int));
4556   prob = mCalloc(len, sizeof(phydbl));
4557   p = mCalloc(len, sizeof(phydbl));
4558   small = mCalloc(len, sizeof(int));
4559   large = mCalloc(len, sizeof(int));
4560 
4561   sampled = (int *)mCalloc(n_elts,sizeof(int));
4562 
4563   sum = .0;
4564   for(i=0;i<len;i++)
4565     {
4566       if( pi[i]<0 )
4567 	{
4568 	  PhyML_Printf("\n== Probability < 0");
4569 	  PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
4570 	  Exit("\n");
4571 	}
4572       sum += pi[i];
4573     }
4574 
4575   if(sum == 0. )
4576     {
4577       PhyML_Printf("\n== Sum of probabilities is 0");
4578       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
4579       Exit("\n");
4580     }
4581 
4582   for(i=0;i<len;i++) p[i] = pi[i] * len / sum;
4583 
4584   num_small = 0;
4585   num_large = 0;
4586   for ( i=len-1; i>=0; --i )
4587     {
4588       if ( p[i]<1 )
4589 	small[num_small++] = i;
4590       else
4591 	large[num_large++] = i;
4592     }
4593 
4594   while ( num_small && num_large )
4595     {
4596       a = small[--num_small];
4597       g = large[--num_large];
4598       prob[a] = p[a];
4599       alias[a] = g;
4600       p[g] = p[g] + p[a] - 1;
4601       if ( p[g] < 1 )
4602 	small[num_small++] = g;
4603       else
4604 	large[num_large++] = g;
4605     }
4606 
4607   while ( num_large )
4608     prob[ large[--num_large] ] = 1;
4609 
4610   while ( num_small )
4611       prob[ small[--num_small] ] = 1;
4612 
4613   Free( p );
4614   Free( small );
4615   Free( large );
4616 
4617   for(n=0; n<n_elts;n++)
4618     {
4619       phydbl r1 = Uni();
4620       phydbl r2 = Uni();
4621       i = (int) (len * r1);
4622       sampled[n] = (r2 < prob[i] ? i : alias[i]);
4623     }
4624 
4625   Free(alias );
4626   Free(prob );
4627 
4628   return(sampled);
4629 }
4630 
4631 
4632 //////////////////////////////////////////////////////////////
4633 //////////////////////////////////////////////////////////////
4634 
4635 // Return the value y such that Prob(x<y) = p
Quantile(phydbl * x,int len,phydbl p)4636 phydbl Quantile(phydbl *x, int len, phydbl p)
4637 {
4638   phydbl *y,q;
4639   int i;
4640   int swap;
4641   phydbl buff;
4642 
4643   y = (phydbl *)mCalloc(len,sizeof(phydbl));
4644   for(i=0;i<len;i++) y[i] = x[i];
4645 
4646   do
4647     {
4648       swap = NO;
4649       for(i=0;i<len-1;i++)
4650         {
4651           if(y[i+1] < y[i])
4652             {
4653               swap = YES;
4654 
4655               buff = y[i+1];
4656               y[i+1] = y[i];
4657               y[i] = buff;
4658             }
4659         }
4660     }
4661   while(swap == YES);
4662 
4663   q = y[(int)((len-1)*p)];
4664 
4665   Free(y);
4666 
4667   return(q);
4668 
4669 }
4670 
4671 
4672 //////////////////////////////////////////////////////////////
4673 //////////////////////////////////////////////////////////////
4674 
4675 // Return p such that Prob(x<z) = p
Prob(phydbl * x,int len,phydbl z)4676 phydbl Prob(phydbl *x, int len, phydbl z)
4677 {
4678   int i;
4679   phydbl hit;
4680 
4681   hit = 0.;
4682   for(i=0;i<len;i++) if(x[i] < z) hit+=1.;
4683 
4684   return(hit/(phydbl)len);
4685 
4686 }
4687 
4688 //////////////////////////////////////////////////////////////
4689 //////////////////////////////////////////////////////////////
4690 
4691 // Return x  where mu is the first moment of the normal density
4692 // and x is the value such that f(x;mu,sigma)=y
4693 
Inverse_Truncated_Normal(phydbl y,phydbl mu,phydbl sigma,phydbl lim_inf,phydbl lim_sup)4694 phydbl Inverse_Truncated_Normal(phydbl y, phydbl mu, phydbl sigma, phydbl lim_inf, phydbl lim_sup)
4695 {
4696   phydbl p_inf, p_sup;
4697 
4698   p_inf = Pnorm(lim_inf,mu,sigma);
4699   p_sup = Pnorm(lim_sup,mu,sigma);
4700 
4701   /* return(mu + sigma * SQRT(-log( y * y * (p_sup - p_inf) * (p_sup - p_inf) * 2 * PI * sigma * sigma))); */
4702   return(mu + sigma * SQRT(-log( y * y * (p_sup - p_inf) * (p_sup - p_inf) * 2. * PI * sigma * sigma)));
4703 }
4704 
4705 //////////////////////////////////////////////////////////////
4706 //////////////////////////////////////////////////////////////
4707 // Returns a vector with a permutation of all the integers from 0
4708 // to len-1. Fisher-Yates algorithm.
4709 
Permutate(int len)4710 int *Permutate(int len)
4711 {
4712   int i,pos,tmp;
4713   int *x;
4714 
4715   x = (int *)mCalloc(len,sizeof(int));
4716 
4717   for(i=0;i<len;i++) x[i] = i;
4718 
4719   for(i=0;i<len;i++)
4720     {
4721       pos = Rand_Int(i,len-1);
4722 
4723       tmp    = x[i];
4724       x[i]   = x[pos];
4725       x[pos] = tmp;
4726     }
4727 
4728   return(x);
4729 }
4730 
4731 //////////////////////////////////////////////////////////////
4732 //////////////////////////////////////////////////////////////
4733 // Returns the p-value for the Mantel test of correlation between
4734 // matrices x and y.
4735 
Mantel(phydbl * x,phydbl * y,int nrow,int ncol)4736 phydbl Mantel(phydbl *x, phydbl *y, int nrow, int ncol)
4737 {
4738 
4739   phydbl obs_stat;  // Value of the statistic on the observed data
4740   phydbl mc_stat;   // Value of the statistics on the Monte Carlo generated data
4741   int N;
4742   phydbl sumx, sumy, sumxy, sumxx, sumyy;
4743   int i,j,k;
4744   int npermut;
4745   int *permut;
4746   phydbl p_val;
4747 
4748   N = nrow*ncol;
4749 
4750   sumx = .0;
4751   for(i=0;i<N;i++) sumx += x[i];
4752 
4753   sumy = .0;
4754   for(i=0;i<N;i++) sumy += y[i];
4755 
4756   sumxx = .0;
4757   for(i=0;i<N;i++) sumxx += x[i]*x[i];
4758 
4759   sumyy = .0;
4760   for(i=0;i<N;i++) sumyy += y[i]*y[i];
4761 
4762   sumxy = .0;
4763   for(i=0;i<N;i++) sumxy += x[i]*y[i];
4764 
4765   obs_stat = (N * sumxy - sumx * sumy) / (SQRT((N-1)*sumxx - (sumx/N)*(sumx/N)) * SQRT((N-1)*sumyy - (sumy/N)*(sumy/N)));
4766 
4767   npermut = 1000;
4768   p_val   = 0.0;
4769   for(k=0;k<npermut;k++)
4770     {
4771       permut = Permutate(nrow);
4772 
4773       sumxy = .0;
4774       for(i=0;i<nrow;i++)
4775         {
4776           for(j=0;j<ncol;j++)
4777             {
4778               sumxy += x[i*ncol+j] * y[permut[i]*ncol+permut[j]];
4779             }
4780         }
4781 
4782       mc_stat = (N * sumxy - sumx * sumy) / (SQRT((N-1)*sumxx - (sumx/N)*(sumx/N)) * SQRT((N-1)*sumyy - (sumy/N)*(sumy/N)));
4783 
4784       Free(permut);
4785 
4786       if(mc_stat > obs_stat) p_val += 1.;
4787     }
4788 
4789   return(p_val / (phydbl)npermut);
4790 
4791 }
4792 
4793 //////////////////////////////////////////////////////////////
4794 //////////////////////////////////////////////////////////////
4795 
Weighted_Mean(phydbl * x,phydbl * w,int l)4796 phydbl Weighted_Mean(phydbl *x, phydbl *w, int l)
4797 {
4798   int i;
4799   phydbl wm;
4800   wm = .0;
4801   if(w) for(i=0;i<l;i++) wm += x[i]*w[i];
4802   else
4803     {
4804       for(i=0;i<l;i++) wm += x[i];
4805       wm /= (phydbl)l;
4806     }
4807   return(wm);
4808 }
4809 
4810 //////////////////////////////////////////////////////////////
4811 //////////////////////////////////////////////////////////////
4812 
Variance(phydbl * x,int l)4813 phydbl Variance(phydbl *x, int l)
4814 {
4815   phydbl mean,sum;
4816   int i;
4817 
4818   mean = Weighted_Mean(x,NULL,l);
4819   sum = 0.0;
4820   for(i=0;i<l;i++) sum += x[i]*x[i];
4821 
4822   return(sum/l - mean*mean);
4823 }
4824 
4825 //////////////////////////////////////////////////////////////
4826 //////////////////////////////////////////////////////////////
4827 
Sum_Bits(int value,int range)4828 int Sum_Bits(int value, int range)
4829 {
4830   int i;
4831   int sum;
4832 
4833   if(range > 8*(int)sizeof(int))
4834     {
4835       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
4836       Exit("\n");
4837     }
4838 
4839   sum = 0;
4840   for(i=0;i<range;i++)
4841     {
4842       sum += (value >> i) & 1;
4843     }
4844 
4845   return(sum);
4846 }
4847 
4848 //////////////////////////////////////////////////////////////
4849 //////////////////////////////////////////////////////////////
4850 
Modulo(int a,int b)4851 int Modulo (int a, int b)
4852 {
4853    if(b < 0)
4854      return Modulo(-a, -b);
4855    int ret = a % b;
4856    if(ret < 0)
4857      ret+=b;
4858    return ret;
4859 }
4860 
4861 //////////////////////////////////////////////////////////////
4862 //////////////////////////////////////////////////////////////
4863 
Runif_Disk(phydbl * sampled_x,phydbl * sampled_y,phydbl centrx,phydbl centry,phydbl radius)4864 void Runif_Disk(phydbl *sampled_x, phydbl *sampled_y, phydbl centrx, phydbl centry, phydbl radius)
4865 {
4866   phydbl r,theta;
4867 
4868   r     = Uni();
4869   theta = Uni()*2.*PI;
4870 
4871   (*sampled_x) = SQRT(r)*COS(theta);
4872   (*sampled_y) = SQRT(r)*SIN(theta);
4873 
4874   (*sampled_x) *= radius;
4875   (*sampled_y) *= radius;
4876 
4877   (*sampled_x) += centrx;
4878   (*sampled_y) += centry;
4879 
4880 }
4881 
4882 //////////////////////////////////////////////////////////////
4883 //////////////////////////////////////////////////////////////
4884 
Random_String(char * s,int len)4885 void Random_String(char *s, int len)
4886 {
4887   int i;
4888   for(i=0;i<len;i++) s[i] = Rand_Int(97,121);
4889   s[i] = '\0';
4890 }
4891 
4892 //////////////////////////////////////////////////////////////
4893 //////////////////////////////////////////////////////////////
4894 
Random_Permut(int n)4895 int *Random_Permut(int n)
4896 {
4897   int *permut;
4898   int i,j;
4899   int tmp;
4900 
4901   if(n < 3)
4902     {
4903       PhyML_Printf("\n== Number of vertices in a polygon has to be at least 3.");
4904       Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
4905     }
4906 
4907   permut = (int *)mCalloc(n,sizeof(int));
4908 
4909   for(i=0;i<n;i++) permut[i] = i;
4910 
4911   for(i=0;i<n-1;i++)
4912     {
4913       j = Rand_Int(i,n-1);
4914       tmp = permut[i];
4915       permut[i] = permut[j];
4916       permut[j] = tmp;
4917     }
4918 
4919   return(permut);
4920 }
4921 
4922 //////////////////////////////////////////////////////////////
4923 //////////////////////////////////////////////////////////////
4924 
4925 /* Generate a random polygon with n vertices. Each point lies in [0,1] */
Rpoly(int n)4926 t_poly *Rpoly(int n)
4927 {
4928   t_poly *p;
4929   int i;
4930 
4931   p = (t_poly *)Make_Poly(n);
4932   p->n_poly_vert = n;
4933 
4934   for(i=0;i<n;i++)
4935     {
4936       p->poly_vert[i]->lonlat[0] = Uni();
4937       p->poly_vert[i]->lonlat[1] = Uni();
4938     }
4939 
4940   return(p);
4941 }
4942 
4943 //////////////////////////////////////////////////////////////
4944 //////////////////////////////////////////////////////////////
4945 
Area_Of_Poly_Monte_Carlo(t_poly * poly,t_geo_coord * lim)4946 phydbl Area_Of_Poly_Monte_Carlo(t_poly *poly, t_geo_coord *lim)
4947 {
4948   int n_hit,n_trials,trial;
4949   t_geo_coord *point;
4950 
4951   point = (t_geo_coord *)GEO_Make_Geo_Coord(2);
4952 
4953   n_trials = 1E+7;
4954   trial = 0;
4955   n_hit = 0;
4956   do
4957     {
4958       point->lonlat[0] = Uni()*lim->lonlat[0];
4959       point->lonlat[1] = Uni()*lim->lonlat[1];
4960       if(Is_In_Polygon(point,poly) == YES) n_hit++;
4961       trial++;
4962     }
4963   while(trial < n_trials);
4964 
4965 
4966   Free_Geo_Coord(point);
4967 
4968   return((phydbl)(n_hit)/n_trials*lim->lonlat[0]*lim->lonlat[1]);
4969 }
4970 
4971 /*////////////////////////////////////////////////////////////
4972 ////////////////////////////////////////////////////////////*/
4973 
Is_In_Polygon(t_geo_coord * point,t_poly * poly)4974 int Is_In_Polygon(t_geo_coord *point, t_poly *poly)
4975 {
4976   int i,j;
4977   phydbl x,y,x1,y1,x2,y2;
4978   phydbl x_intersect;
4979   short int is_in;
4980 
4981   assert(point);
4982   assert(poly);
4983 
4984   /* Coordinates of the point to test */
4985   x = point->lonlat[0];
4986   y = point->lonlat[1];
4987 
4988   j = poly->n_poly_vert-1;
4989   is_in = NO;
4990   for(i=0;i<poly->n_poly_vert;i++)
4991     {
4992       /* Edge of polygon goes from (x1,y1) to (x2,y2) */
4993       x1 = poly->poly_vert[i]->lonlat[0];
4994       y1 = poly->poly_vert[i]->lonlat[1];
4995       x2 = poly->poly_vert[j]->lonlat[0];
4996       y2 = poly->poly_vert[j]->lonlat[1];
4997 
4998       j = i;
4999 
5000       /* Shoot an horizontal ray to the right. Find out if
5001          this ray hits the polygon edge */
5002       if((y1 < y && y2 > y) || (y2 < y && y1 > y))
5003         {
5004           /* Coordinates along X-axis of the intersection between ray and edge */
5005           x_intersect = (y-y1)/(y1-y2)*(x1-x2)+x1;
5006           if(x_intersect > x) /* Intersection is on the righthand side */
5007             is_in = (is_in == YES)?NO:YES;
5008         }
5009     }
5010 
5011   return is_in;
5012 }
5013 
5014 /*////////////////////////////////////////////////////////////
5015 ////////////////////////////////////////////////////////////*/
5016 
5017 /* Modified Bessel function of the first kind. Stolen from Numerical Recipes in C. */
Bessi0(phydbl x)5018 phydbl Bessi0(phydbl x)
5019 {
5020   phydbl ax,ans;
5021   phydbl y;
5022 
5023   if ((ax=fabs(x)) < 3.75)
5024     {
5025       y=x/3.75;
5026       y*=y;
5027       ans=1.0+y*(3.5156229+y*(3.0899424+y*(1.2067492+y*(0.2659732+y*(0.360768e-1+y*0.45813e-2)))));
5028     }
5029   else
5030     {
5031       y=3.75/ax;
5032       ans=(exp(ax)/sqrt(ax))*(0.39894228+y*(0.1328592e-1+y*(0.225319e-2+y*(-0.157565e-2+y*(0.916281e-2+y*(-0.2057706e-1+y*(0.2635537e-1+y*(-0.1647633e-1+y*0.392377e-2))))))));
5033     }
5034 
5035   return ans;
5036 }
5037 
5038 
5039 /*////////////////////////////////////////////////////////////
5040 ////////////////////////////////////////////////////////////*/
5041 
5042 /* Modified Bessel function of the second kind (degree 0). Stolen from Numerical Recipes in C. */
Bessk0(phydbl x)5043 phydbl Bessk0(phydbl x)
5044 {
5045   phydbl y,ans;
5046 
5047   if (x <= 2.0)
5048     {
5049       y=x*x/4.0;
5050       ans=(-log(x/2.0)*Bessi0(x))+(-0.57721566+y*(0.42278420+y*(0.23069756+y*(0.3488590e-1+y*(0.262698e-2+y*(0.10750e-3+y*0.74e-5))))));
5051     }
5052   else
5053     {
5054       y=2.0/x;
5055       ans=(exp(-x)/sqrt(x))*(1.25331414+y*(-0.7832358e-1+y*(0.2189568e-1+y*(-0.1062446e-1+y*(0.587872e-2+y*(-0.251540e-2+y*0.53208e-3))))));
5056     }
5057 
5058   return ans;
5059 }
5060 
5061 
5062 /*////////////////////////////////////////////////////////////
5063 ////////////////////////////////////////////////////////////*/
5064 
Euclidean_Dist(t_geo_coord * x,t_geo_coord * y)5065 phydbl Euclidean_Dist(t_geo_coord *x, t_geo_coord *y)
5066 {
5067   int i;
5068   phydbl dist;
5069 
5070   if(x->dim != y->dim)
5071     {
5072       PhyML_Printf("\n. x->dim: %d y->dim: %d",x->dim,y->dim);
5073       Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5074     }
5075 
5076   dist = 0.0;
5077   for(i=0;i<x->dim;i++) dist += pow(x->lonlat[i]-y->lonlat[i],2);
5078 
5079   return(sqrt(dist));
5080 }
5081 
5082 /*////////////////////////////////////////////////////////////
5083 ////////////////////////////////////////////////////////////*/
5084 /* Return a vector rk such that rk[i] gives the index of the i-th largest element in x */
Ranks(phydbl * x,int len)5085 int *Ranks(phydbl *x, int len)
5086 {
5087   int *rk,tmp;
5088   int i,swap;
5089 
5090   rk = (int *)mCalloc(len,sizeof(int));
5091 
5092   for(i=0;i<len;i++) rk[i] = i;
5093 
5094   do
5095     {
5096       swap = NO;
5097       for(i=0;i<len-1;i++)
5098         {
5099           if(x[rk[i]] < x[rk[i+1]])
5100             {
5101               swap  = YES;
5102               tmp   = rk[i];
5103               rk[i] = rk[i+1];
5104               rk[i+1] = tmp;
5105             }
5106         }
5107     }
5108   while(swap == YES);
5109 
5110   return(rk);
5111 }
5112 
5113 /*////////////////////////////////////////////////////////////
5114 ////////////////////////////////////////////////////////////*/
5115 
Brownian_Bridge_Generate(phydbl start,phydbl end,phydbl var,phydbl beg_time,phydbl end_time,int n_steps,phydbl * time)5116 phydbl *Brownian_Bridge_Generate(phydbl start, phydbl end, phydbl var, phydbl beg_time, phydbl end_time, int n_steps, phydbl *time)
5117 {
5118   phydbl *state,end_brown;
5119   int i;
5120 
5121   if(n_steps == 0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5122   if(beg_time > end_time) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5123   for(i=0;i<n_steps-1;i++) if(!(time[i+1] > time[i])) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5124 
5125   state = Brownian_Generate(var,n_steps,beg_time,time);
5126   end_brown = Rnorm(state[n_steps-1],SQRT((time[n_steps-1] - end_time)*var));
5127 
5128   for(i=0;i<n_steps;i++)
5129     {
5130       state[i] = state[i] - (time[i]/end_time) * end_brown;
5131       state[i] = start + (end - start)/end_time * time[i] + state[i];
5132     }
5133 
5134 
5135   return(state);
5136 }
5137 
5138 /*////////////////////////////////////////////////////////////
5139 ////////////////////////////////////////////////////////////*/
5140 
Brownian_Generate(phydbl var,int n_steps,phydbl beg_time,phydbl * time)5141 phydbl *Brownian_Generate(phydbl var, int n_steps, phydbl beg_time, phydbl *time)
5142 {
5143   phydbl *state;
5144   int i;
5145 
5146   if(n_steps == 0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5147 
5148   state = (phydbl *)mCalloc(n_steps,sizeof(phydbl));
5149 
5150   state[0] = Rnorm(0.0,SQRT((time[0] - beg_time)*var));
5151 
5152   for(i=1;i<n_steps;i++)
5153     {
5154       state[i] = Rnorm(state[i-1],SQRT((time[i]-time[i-1])*var));
5155       if(time[i] < time[i-1]) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5156     }
5157 
5158   return(state);
5159 }
5160 
5161 /*////////////////////////////////////////////////////////////
5162 ////////////////////////////////////////////////////////////*/
5163 
Random_Walk_Bridged_Generate(phydbl start,phydbl end,phydbl var,int n_steps)5164 phydbl *Random_Walk_Bridged_Generate(phydbl start, phydbl end, phydbl var, int n_steps)
5165 {
5166   phydbl *state,end_walk;
5167   int i;
5168 
5169   if(n_steps == 0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5170 
5171   state = Random_Walk_Generate(var,n_steps);
5172   end_walk = Rnorm(state[n_steps-1],SQRT(var));
5173 
5174   for(i=0;i<n_steps;i++)
5175     {
5176       state[i] = state[i] - ((phydbl)(i+1.)/n_steps) * end_walk;
5177       state[i] = start + (end - start)/n_steps * (i+1.) + state[i];
5178     }
5179 
5180   return(state);
5181 }
5182 
5183 /*////////////////////////////////////////////////////////////
5184 ////////////////////////////////////////////////////////////*/
5185 
Random_Walk_Generate(phydbl var,int n_steps)5186 phydbl *Random_Walk_Generate(phydbl var, int n_steps)
5187 {
5188   phydbl *state;
5189   int i;
5190 
5191   if(n_steps == 0) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
5192 
5193   state = (phydbl *)mCalloc(n_steps,sizeof(phydbl));
5194 
5195   state[0] = Rnorm(0.0,SQRT(var));
5196 
5197   for(i=1;i<n_steps;i++) state[i] = Rnorm(state[i-1],SQRT(var));
5198 
5199   return(state);
5200 }
5201 
5202 /*////////////////////////////////////////////////////////////
5203 ////////////////////////////////////////////////////////////*/
5204 
Reflected(phydbl x,phydbl down,phydbl up)5205 phydbl Reflected(phydbl x, phydbl down, phydbl up)
5206 {
5207   phydbl ref;
5208 
5209   ref = x;
5210   do
5211     {
5212       if(ref > up)   ref = up   - (ref  -  up);
5213       if(ref < down) ref = down + (down - ref);
5214     }while(!(ref < up && ref > down));
5215 
5216   return(ref);
5217 }
5218