1 /*------------------------------------------------------
2  Maximum likelihood estimation
3  of migration rate  and effectice population size
4  using a Metropolis-Hastings Monte Carlo algorithm
5  -------------------------------------------------------
6  R e p o r t e r   R O U T I N E  reports things progress=True or verbose
7 
8  Peter Beerli
9  beerli@fsu.edu
10 
11 Copyright 1996-2002 Peter Beerli and Joseph Felsenstein, Seattle WA
12 Copyright 2003-2012 Peter Beerli, Tallahassee FL
13 
14  Permission is hereby granted, free of charge, to any person obtaining
15  a copy of this software and associated documentation files (the
16  "Software"), to deal in the Software without restriction, including
17  without limitation the rights to use, copy, modify, merge, publish,
18  distribute, sublicense, and/or sell copies of the Software, and to
19  permit persons to whom the Software is furnished to do so, subject
20  to the following conditions:
21 
22  The above copyright notice and this permission notice shall be
23  included in all copies or substantial portions of the Software.
24 
25  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
29  ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
30  CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 
33 
34 $Id: reporter.c 2158 2013-04-29 01:56:20Z beerli $
35 
36 -------------------------------------------------------*/
37 /* \file reporter.c
38 Routines that report progress and also calculates Gelman-Rubin convergence statistic
39 */
40 
41 #include "migration.h"
42 #include "mcmc.h"
43 
44 #include "fst.h"
45 #include "random.h"
46 #include "tools.h"
47 #include "broyden.h"
48 #include "combroyden.h"
49 #include "options.h"
50 #include "sighandler.h"
51 #include "migrate_mpi.h"
52 
53 #ifdef DMALLOC_FUNC_CHECK
54 #include <dmalloc.h>
55 #endif
56 
57 
58 void both_chain_means (MYREAL *mc, MYREAL *lc, MYREAL *tc, long len,
59                        long lastn, long n);
60 void calc_gelmanb (MYREAL *gelmanb, MYREAL *mc, MYREAL *tc, MYREAL *lc,
61                    long len, long lastn, long n);
62 void calc_gelmanw (MYREAL *gelmanw, world_fmt * world, MYREAL *mc, MYREAL *tc,
63                    long len, long lastn, long n);
64 void calc_gelmanr (MYREAL *gelmanr, MYREAL *gelmanw, MYREAL *gelmanb,
65                    long len, long lastn, long n);
66 void calc_average_biggest_gelmanr (MYREAL *gelmanr, long len, MYREAL *meanR,
67                                    MYREAL *bigR);
68 void print_gelmanr (MYREAL average, MYREAL biggest);
69 MYREAL calc_s (long tthis, MYREAL *tc, world_fmt * world);
70 MYREAL calc_s_bayes (long tthis, MYREAL *tc, world_fmt * world);
71 void chain_means (MYREAL *thischainmeansm, world_fmt * world);
72 
73 void calc_gelmanw2 (MYREAL *gelmanw, MYREAL *s1, MYREAL *s2, long len);
74 void all_chain_means (MYREAL *mc, MYREAL *chainmeans, long *nmeans, long len, long maxreplicate);
75 void calc_allgelmanb (MYREAL *gelmanb, MYREAL *mc, MYREAL *chainmeans, long *nmeans, long len, long maxreplicate);
76 void calc_allgelmanw2 (MYREAL *gelmanw, MYREAL *chain_s, long *nmeans, long len, long maxreplicate);
77 void calc_allgelmanr (MYREAL *gelmanr, MYREAL *gelmanw, MYREAL *gelmanb, long *nmeans, long len, long maxreplicate);
78 
79 
80 
81 
82 //public functions
83 ///
84 /// convergence indicator for ML runs
85 void
convergence_check(world_fmt * world,boolean progress)86 convergence_check (world_fmt * world, boolean progress)
87 {
88     static MYREAL *lastchainmeans, *chainmeans, *thischainmeans;
89     static MYREAL *gelmanw, *gelmanb, *gelmanr;
90 
91     static boolean done = FALSE;
92     static long len = 0;
93     static long lastn = 0;
94     static boolean first = TRUE;
95 
96     long maxreplicate = (world->options->replicate
97                          && world->options->replicatenum >
98                          0) ? world->options->replicatenum : 1;
99 
100     long n = 0;
101     if (world->chains == 1 && maxreplicate <= 1)
102         return;
103 
104     if (world->start)
105         first = TRUE;
106     if (progress || world->options->gelman)
107     {
108         if (!done)
109         {
110             done = TRUE;
111             // len defines the length of arrays that
112             // have to hold all km, kt, p, and mindex means (ml) or parametes (bayes)
113 	    if(world->options->bayes_infer)
114 	      len = world->numpop2 + 1;
115 	    else
116 	      len = world->numpop2 + world->numpop * 3;
117 
118             lastchainmeans = (MYREAL *) mycalloc (len, sizeof (MYREAL));
119             thischainmeans = (MYREAL *) mycalloc (len, sizeof (MYREAL));
120             chainmeans = (MYREAL *) mycalloc (len, sizeof (MYREAL));
121             gelmanw = (MYREAL *) mycalloc (len, sizeof (MYREAL));
122             gelmanb = (MYREAL *) mycalloc (len, sizeof (MYREAL));
123             gelmanr = (MYREAL *) mycalloc (len, sizeof (MYREAL));
124         }
125         n = -1; // a dummy value because the gelman_ functions expect a dummy last value
126         memset (thischainmeans, 0, sizeof (MYREAL) * len);
127         if (first)
128         {
129             first = FALSE;
130             chain_means (lastchainmeans, world);
131             return;
132         }
133         else
134         {
135             chain_means (thischainmeans, world);
136             both_chain_means (chainmeans, lastchainmeans, thischainmeans, len,
137                               lastn, n);
138             calc_gelmanb (gelmanb, chainmeans, thischainmeans, lastchainmeans,
139                           len, lastn, n);
140             calc_gelmanw (gelmanw, world, thischainmeans, lastchainmeans, len,
141                           lastn, n);
142             calc_gelmanr (gelmanr, gelmanw, gelmanb, len, lastn, n);
143             calc_average_biggest_gelmanr (gelmanr, len, &world->convergence->gelmanmeanRall,
144                                           &world->convergence->gelmanmaxRall);
145             memcpy (lastchainmeans, thischainmeans, sizeof (MYREAL) * len);
146             lastn = n;
147         }
148     }
149 }
150 
report_values(MYREAL * vec,long len,char text[])151 void report_values(MYREAL *vec, long len, char text[])
152 {
153   long i;
154   fprintf(stdout,"%s ",text);
155   for(i=0;i<len;i++)
156     {
157       fprintf(stdout,"%f ",vec[i]);
158     }
159   fprintf(stdout,"\n");
160 }
161 
162 ///
163 /// convergence indicator for Bayesian runs
164 void
convergence_check_bayes(world_fmt * world,long maxreplicate)165 convergence_check_bayes (world_fmt *world,  long maxreplicate)
166 {
167     MYREAL *gelmanw, *gelmanb, *gelmanr;
168     long len = 0;
169     long n_i = 0;
170     long n_j = 0;
171     long i;
172     long j;
173 
174     MYREAL *chain_i;
175     MYREAL *chain_j;
176     MYREAL *s_i;
177     MYREAL *s_j;
178     MYREAL *chain_averages;
179     MYREAL *chain_means = world->convergence->chain_means;
180     long   *chain_counts = world->convergence->chain_counts;
181     MYREAL *chain_s = world->convergence->chain_s;
182     long   *nmeans;
183 
184     if (world->chains == 1 && maxreplicate <= 1)
185         return;
186     // len defines the length of arrays that
187     len = world->numpop2 + 1;
188     nmeans  = (long *) mycalloc (maxreplicate, sizeof (long));
189     gelmanw = (MYREAL *) mycalloc (len, sizeof (MYREAL));
190     gelmanb = (MYREAL *) mycalloc (len, sizeof (MYREAL));
191     gelmanr = (MYREAL *) mycalloc (len, sizeof (MYREAL));
192     chain_averages = (MYREAL *) mycalloc (len, sizeof (MYREAL));
193 
194     // can be done independently before this function
195     // evaluate the within_variances for each monitored parameter:
196     // wp[i] = 1/(n-1) Sum[p[i]- mean[p[i]]]
197     // evaluate the total_within_variance w = 1/m Sum[wp[i]]
198     // needs to be done here
199     // evaluate between_variance for each monitored parameter:
200     // bp[i] =  Sum[p[j]-mean[p[j]]]
201     // where m is the number of indpendent chains, m = 2 ... many, shall we evaluate all pairs?
202     // evaluate between_variance for all parameters
203     // b = n/(m-1) .... [check program?
204     // calculate expected variance V = (1-1/n) W + 1/n B
205     // Gelman-Rubin statistic sqrt(R) = Sqrt[V/W]
206 
207     for(i = 0; i < maxreplicate; i++)
208       {
209 	chain_i = &chain_means[i*len];
210 	s_i = &chain_s[i*len];
211 	n_i = chain_counts[i];
212 	nmeans[i] = n_i;
213 	//report_values(chain_i, len, "chain_i");
214 	// setting whether this replicate is already recorded or not
215 	// would allow to use asynchronous MPI stuff
216 	if(chain_i[0] <= 0.)
217 	  continue;
218 	for(j = 0; j < i; j++)
219 	  {
220 	    chain_j = &chain_means[j*len];
221 	    s_j = &chain_s[j*len];
222 	    n_j = chain_counts[j];
223 	    //report_values(chain_j, len, "chain_j");
224 	    if(chain_j[0] <= 0.)
225 	      continue;
226 	    both_chain_means (chain_averages, chain_i, chain_j, len, n_i, n_j);
227 	    //report_values(chain_averages, len, "averages");
228 	    calc_gelmanb (gelmanb, chain_averages, chain_i, chain_j, len, n_i, n_j);
229 	    //report_values(gelmanb, len, "gelmanb");
230 	    calc_gelmanw2 (gelmanw, s_i, s_j, len);
231 	    //report_values(gelmanw, len, "gelmanw");
232 	    calc_gelmanr (gelmanr, gelmanw, gelmanb, len, n_i, n_j);
233 	    //report_values(gelmanr, len, "gelmanr");
234 	    calc_average_biggest_gelmanr (gelmanr, len,
235 					  &world->convergence->gelmanmeanmaxR[j * maxreplicate + i],
236 					  &world->convergence->gelmanmeanmaxR[i * maxreplicate + j]);
237 	  }
238       }
239     all_chain_means(chain_averages,chain_means, nmeans, len, maxreplicate);
240     calc_allgelmanb(gelmanb,chain_averages, chain_means, nmeans, len, maxreplicate);
241     calc_allgelmanw2 (gelmanw, chain_s, nmeans, len, maxreplicate);
242     calc_allgelmanr (gelmanr, gelmanw, gelmanb, nmeans, len, maxreplicate);
243     calc_average_biggest_gelmanr (gelmanr, len,
244 				  &world->convergence->gelmanmeanRall,
245 				  &world->convergence->gelmanmaxRall);
246 
247     myfree(gelmanw);
248     myfree(gelmanb);
249     myfree(gelmanr);
250     myfree(chain_averages);
251     myfree(nmeans);
252 }
253 
254 
255 
256 void
both_chain_means(MYREAL * mc,MYREAL * lc,MYREAL * tc,long len,long lastn,long n)257 both_chain_means (MYREAL *mc, MYREAL *lc, MYREAL *tc, long len, long lastn,
258                   long n)
259 {
260     long i;
261 
262     for (i = 0; i < len; i++)
263     {
264         mc[i] = (lc[i] * lastn + tc[i] * n) / (n + lastn);
265     }
266 }
267 
268 ///
269 /// calculates the overall means of all replicate chains
270 void
all_chain_means(MYREAL * mc,MYREAL * chainmeans,long * nmeans,long len,long maxreplicate)271 all_chain_means (MYREAL *mc, MYREAL *chainmeans, long *nmeans, long len, long maxreplicate)
272 {
273     long i;
274     long j;
275     long nsum = 0;
276     MYREAL sum = 0.;
277     for (i = 0; i < len; i++)
278     {
279       sum = 0.;
280       nsum = 0;
281       for(j=0; j < maxreplicate; j++)
282 	{
283 	  if(nmeans[j]>1)
284 	    {
285 	      sum += chainmeans[j * len + i] * nmeans[j];
286 	      nsum += nmeans[j];
287 	    }
288 	}
289       mc[i] = sum / nsum;
290     }
291 }
292 
293 
294 void
calc_gelmanb(MYREAL * gelmanb,MYREAL * mc,MYREAL * tc,MYREAL * lc,long len,long lastn,long n)295 calc_gelmanb (MYREAL *gelmanb, MYREAL *mc, MYREAL *tc, MYREAL *lc, long len,
296               long lastn, long n)
297 {
298     long i;
299 
300     for (i = 0; i < len; i++)
301     {
302       gelmanb[i] = /* 1/(2-1)* */  (pow ((lc[i] - mc[i]), 2.) + (pow ((tc[i] - mc[i]), 2.)));
303     }
304 
305 }
306 void
calc_allgelmanb(MYREAL * gelmanb,MYREAL * mc,MYREAL * chainmeans,long * nmeans,long len,long maxreplicate)307 calc_allgelmanb (MYREAL *gelmanb, MYREAL *mc, MYREAL *chainmeans, long *nmeans, long len, long maxreplicate)
308 {
309     long i;
310     long j;
311     //long nsum = 0;;
312     MYREAL sum = 0.;
313     MYREAL val;
314     for (i = 0; i < len; i++)
315       {
316 	sum = 0.;
317 	//nsum = 0;
318 	for(j=0; j < maxreplicate; j++)
319 	  {
320 	    if(nmeans[j]>1)
321 	      {
322 		val = chainmeans[j * len + i] -  mc[i];
323 		sum += val * val;
324 		//nsum += nmeans[j];
325 	      }
326 	}
327       gelmanb[i] = sum / (maxreplicate -1);
328 
329       //gelmanb[i] = nn * (pow ((lc[i] - mc[i]), 2.) + (pow ((tc[i] - mc[i]), 2.)));
330     }
331 
332 }
333 
334 
335 ///
336 /// collect the autocorrelation values and the ESS values
collect_acceptance(world_fmt * world)337 void collect_acceptance(world_fmt *world)
338 {
339   const long np = world->numpop2;
340   const long nn = world->numpop2 + world->bayes->mu; //position of genealogy acceptance
341   // the archives have length of #parameters + #all_loci_murates + genealogy
342   long i;
343   for(i=0;i<np; i++)
344     {
345       world->accept_archive[i] += world->bayes->accept[i];
346       world->trials_archive[i] += world->bayes->trials[i];
347     }
348   if(world->bayes->mu)
349     {
350       i = world->locus + np;
351       world->accept_archive[i] += world->bayes->accept[np];
352       world->trials_archive[i] += world->bayes->trials[np];
353     }
354   world->accept_archive[nn] += world->bayes->accept[nn];
355   world->trials_archive[nn] += world->bayes->trials[nn];
356 
357 
358 }
359 
360 ///
361 /// collect the autocorrelation values and the ESS values
collect_ess_values(world_fmt * world)362 void collect_ess_values(world_fmt *world)
363 {
364   static long n=1;
365   const long nn = world->numpop2 + world->bayes->mu * world->loci + 1;// One is for Log(Prob(Data|Model)
366   long i;
367   // long maxrep = (world->options->replicate == TRUE) ? ((world->options->replicatenum > 0) ?
368   //		      world->options->replicatenum  : world->options->lchains ) : 1;
369 
370   for(i=0;i<nn; i++)
371     {
372       // onepass mean of autocorrelation
373       world->auto_archive[i] += (world->autocorrelation[i] - world->auto_archive[i])/n;
374       // summing ess values
375       world->ess_archive[i] += world->effective_sample[i];
376     }
377   n++;
378   //printf("%i> %li autoarchive %f autocorr %f n=%li\n", myID, world->rep,  world->auto_archive[0], world->autocorrelation[0], nn);
379 }
380 
381 
382 
383 ///
384 /// prints the effective sample size and autocorrelation
print_bayes_ess(FILE * file,world_fmt * world,long numparam,int offset,MYREAL * autocorr,MYREAL * effsample)385 void print_bayes_ess(FILE *file, world_fmt *world, long numparam, int offset,
386 			  MYREAL *autocorr, MYREAL *effsample)
387 {
388   long pa=0;
389   long pa0;
390   long numpop = world->numpop;
391   long numpop2 = world->numpop2;
392   long frompop, topop;
393   long frompop2, topop2;
394   long pos=0;
395   char *custm2 = world->options->custm2;
396   bayes_fmt * bayes = world->bayes;
397   char *buffer;
398   long start=0;
399 #ifdef MPI
400   long i;
401 #endif
402   //long locus = 1;
403   if(!world->options->bayes_infer)
404     start=world->numpop2;
405 
406   buffer = (char*) mycalloc(numparam * LINESIZE, sizeof(char));
407   pos = sprintf(buffer,"%*.*sParameter         Autocorrelation%s   Effective Sample size\n", offset, offset, " ",world->loci>1 ? "(*)" : "   ");
408   pos += sprintf(buffer+pos,"%*.*s---------         ---------------      ---------------------\n", offset, offset, " ");
409 
410   for(pa0=start;pa0<numparam;pa0++)
411     {
412       if(pa0 < numpop2)
413 	{
414 	  if(bayes->map[pa0][1] != INVALID)
415 	    {
416 	      pa = bayes->map[pa0][1];
417 	      if(pa0 < numpop)
418 		{
419 		  if((pa0 == pa) && (custm2[pa0] == '*'))
420 		    {
421 		      pos += sprintf(buffer+pos,"%*.*s%s_%li               % -5.5f           %10.2f\n",
422 				     offset, offset, " ", "Theta",pa0+1,autocorr[pa],effsample[pa]);
423 		      if(effsample[pa]<ESSMINIMUM && file==world->outfile)
424 			record_warnings(world,"Param %li: Effective sample size of run seems too short! ",pa+1);
425 		    }
426 		  else
427 		    {
428 		      pos += sprintf(buffer+pos,"%*.*s%s_%li=%li [%c]      % -5.5f        %10.2f\n", offset, offset, " ",
429 				     "Theta",pa0+1, pa+1, custm2[pa0],autocorr[pa],effsample[pa]);
430 		      if(effsample[pa]<ESSMINIMUM && file==world->outfile)
431 			record_warnings(world,"Param %li: Effective sample size of run seems too short! ",pa+1);
432 		    }
433 		}
434 	      else
435 		{
436 		  m2mm(pa0,numpop,&frompop,&topop);
437 		  if((pa0==pa) && (custm2[pa0]=='*'))
438 		    {
439 		      pos += sprintf(buffer+pos,"%*.*s%s_%li->%li                % -5.5f           %10.2f\n", offset, offset, " ", "M", frompop+1, topop+1,autocorr[pa],effsample[pa]);
440 		      if(effsample[pa]<ESSMINIMUM && file==world->outfile)
441 			record_warnings(world,"Param %li: Effective sample size of run seems too short! ",pa+1);
442 
443 		    }
444 		  else
445 		    {
446 		      m2mm(pa,numpop,&frompop2,&topop2);
447 		      pos += sprintf(buffer+pos,"%*.*s%s_(%li,%li) [%c]             % -5.5f           %10.2f\n",
448 				     offset, offset, " ", "M", frompop+1, topop+1, custm2[pa0], autocorr[pa], effsample[pa]);
449 		      if(effsample[pa]<ESSMINIMUM && file==world->outfile)
450 			record_warnings(world,"Param %li: Effective sample size of run seems too short! ",pa+1);
451 
452 		    }
453 		}
454 	    }
455 	}
456       else		  // do we estimate mutation rate changes?
457 	{
458 	  if(pa0+1 == numparam)
459 	    {
460 	      pos += sprintf(buffer+pos,"%*.*s%s         % -5.5f           %10.2f\n", offset, offset, " ", "Ln[Prob(D|P)]",autocorr[pa0],effsample[pa0]);
461 	      if(effsample[pa]<ESSMINIMUM && file==world->outfile)
462 		record_warnings(world,"Param %li: Effective sample size of run seems too short! ",pa0+1);
463 	    }
464 	  else
465 	    {
466 	      if(world->locus + world->numpop2 == pa0)
467 		{
468 		  pos += sprintf(buffer+pos,"%*.*s%s_%-5li            % -5.5f           %10.2f\n",
469 				 offset, offset, " ", "Rate", world->locus, autocorr[pa0],effsample[pa0]);
470 		  if(effsample[pa]<ESSMINIMUM && file==world->outfile)
471 		    record_warnings(world,"Param %li: Effective sample size of run seems too short! ",pa0+1);
472 		}
473 	    }
474 	}
475     }
476   if(world->loci>1 && file!=stdout)
477     {
478       //xcode pos +=
479       sprintf(buffer+pos,"%*.*s(*) averaged over loci.\n", offset, offset, " ");
480     }
481   //  printf("bufsize in print_bayes_ess_buffer: %li\n",pos);
482 #ifdef MPI
483   if(file==stdout)
484     {
485       if(pos>LINESIZE)
486 	{
487 	  //printf("bufsize in print_bayes_ess_buffer2: %li / %li\n",pos, numparam*LINESIZE);
488 	  fprintf(file,"[%3i] %*.*s\n", myID, (int) LINESIZE, (int) LINESIZE, buffer);
489 	  pos -= LINESIZE;
490 	  i=0;
491 	  while(pos>LINESIZE)
492 	    {
493 	      i += LINESIZE;
494 	      fprintf(file,"%*.*s\n",(int) LINESIZE, (int) LINESIZE, buffer+i);
495 	      pos -= LINESIZE;
496 	    }
497 	  fprintf(file,"%s\n", buffer+i);
498 	}
499       else
500 	{
501 	  //  printf("bufsize in print_bayes_ess_buffer3: %li\n",pos);
502 	fprintf(file,"[%3i] %s\n", myID, buffer);
503 	}
504     }
505   else
506     {
507       //      printf("bufsize in print_bayes_ess_buffer4: %li\n",pos);
508       if(pos>LINESIZE)
509 	{
510 	  //printf("bufsize in print_bayes_ess_buffer4.1: %li\n",pos);
511 	  fprintf(file,"[%3i] %*.*s\n", myID, (int) LINESIZE, (int) LINESIZE, buffer);
512 	  pos -= LINESIZE;
513 	  i=0;
514 	  while(pos>LINESIZE)
515 	    {
516 	      i += LINESIZE;
517 	      //  printf("bufsize in print_bayes_ess_buffer4.2 %li: %li\n",pos,i);
518 	      fprintf(file,"%*.*s\n", (int) LINESIZE,(int) LINESIZE, buffer+i);
519 	      pos -= LINESIZE;
520 	    }
521 	  //	  printf("bufsize in print_bayes_ess_buffer4.3za: %li %li\n",pos,i);
522 	  fprintf(file,"%s\n", buffer+i);
523 	}
524       else
525 	{
526 	  //	  printf("bufsize in print_bayes_ess_buffer5: %li\n",pos);
527 	  fprintf(file,"[%3i] %s\n", myID, buffer);
528 	}
529     }
530 #else
531   fprintf(file,"%s\n", buffer);
532 #endif
533   myfree(buffer);
534 }
535 
536 
537 
538 ///
539 /// returns variance and other measurement of a single chain used in Bayesian burnin_chain()
540 /// the standard deviation and the mean are calculated using a result of
541 /// B. P. Welford 1962 (from D. E. Knuth Art of computer programming Vol 2 page 232
542 /// the recursion form of the autocorrelation r was developed with Koffi Sampson
543 /// April 2007, see mathematica code below
544 /// newff[xx_] := Module[{},
545 ///		     nt = Length[xx];
546 ///		     r = 0;
547 ///		     x1 = xx[[1]];
548 ///		     mo = x1;
549 ///		     mn = x1;
550 ///		     n = 2 ;
551 ///		     s = 0;
552 ///		     While[n <= nt,
553 ///			   xo = xx[[n - 1]];
554 ///			   xn = xx[[n]];
555 ///			   mn = mn  + (xn - mn)/n;
556 ///			   s = s + (xn - mo) (xn - mn);
557 ///KOFFI       r = r  + xo xn + mo (n mo  - x1 - xo) - mn ((n + 1) mn - x1 - xn);
558 ///PETER       r = r + (xo - mo)(xn-mn)
559 ///			   mo = mn;
560 ///			   n++;
561 ///		     ];
562 ///  r/s
563 ///    ]
564 /// The effective sample is calculated as the length the sample n * (1-r)/(1+r)
565 ///
single_chain_var(world_fmt * world,long T,MYREAL * variance,MYREAL * autoc,MYREAL * effsample)566 MYREAL single_chain_var(world_fmt *world, long T, MYREAL *variance, MYREAL *autoc, MYREAL *effsample)
567 {
568   static double *mean;
569   static double *S;
570   static double *r;
571   static double *xold;
572   static double *xstart;
573   static boolean done=FALSE;
574   static long n = 0;
575   static long nn = 0;
576   long i;
577   long j;
578   long start;
579 
580   double x  = 0.0;
581   double xo = 0.0;
582   double x1 = 0.0;
583   double m  = 0.0;
584   double mo = 0.0;
585   double v  = 0.0;
586   double *delta;
587   double temp;
588   if(!done)
589     {
590       nn = world->numpop2 + world->bayes->mu * world->loci + 1;
591       mean = (double *) mycalloc(nn,sizeof(double));
592       S    = (double *) mycalloc(nn,sizeof(double));
593       r    = (double *) mycalloc(nn,sizeof(double));
594       xold = (double *) mycalloc(nn,sizeof(double));
595       xstart = (double *) mycalloc(nn,sizeof(double));
596       done=TRUE;
597     }
598   // reset static variable for each chain
599   if(world==NULL)
600     {
601       n=0;
602       memset(mean,0,nn * sizeof(double));
603       memset(S,0,nn * sizeof(double));
604       memset(r,0,nn * sizeof(double));
605       memset(xold,0,nn * sizeof(double));
606       memset(xstart,0,nn * sizeof(double));
607       return 0;
608     }
609   // syntax so that an init really works
610   if(T==0)
611     return 0;
612 
613   delta = (double *) mycalloc(nn,sizeof(double));
614   //handles BAYES or ML
615  start = (world->options->bayes_infer ? 0 : world->numpop2);
616   if(n==0) //initialization of mean calculator
617     {
618       n += 1;
619       for(i=start; i < nn; i++)
620 	{
621 	  if(i<world->numpop2)
622 	    {
623 	      if(world->bayes->map[i][1] == INVALID)
624 		continue;
625 	      else
626 		{
627 		  j  = world->bayes->map[i][1];
628 		  x         = world->param0[j];
629 		}
630 	    }
631 	  else
632 	    {
633 	      j = i;
634 	      if(j+1 == nn)
635 		{
636 		  x = world->likelihood[world->numlike-1];
637 		}
638 	      else
639 		{
640 		  x = world->options->mu_rates[world->locus];
641 		}
642 	    }
643 	  mean[j]   = x;
644 	  S[j]      = 0.0;
645 	  r[j]      = 0.0;
646 	  xstart[j] = x;
647 	  xold[j] = x;
648 	}
649       v = 0.0;
650       myfree(delta);
651     }
652   else
653     {
654       // n is at least 1
655       n += 1;
656       for(i=start; i < nn; i++)
657 	{
658 	  if(i<world->numpop2)
659 	    {
660 	      if(world->bayes->map[i][1] == INVALID)
661 		continue;
662 	      else
663 		{
664 		  j  = world->bayes->map[i][1];
665 		  x  = (double) world->param0[j];
666 		}
667 	    }
668 	  else
669 	    {
670 	      j = i;
671 	      if(j+1 == nn)
672 		{
673 		  x = (double) world->likelihood[world->numlike-1];
674 		}
675 	      else
676 		{
677 		  x = (double) world->options->mu_rates[world->locus];
678 		}
679 	    }
680 	  xo        = xold[j];
681 	  x1        = xstart[j];
682 	  delta[j]  = x - mean[j];
683 	  mo        = mean[j];
684 	  mean[j]  += delta[j]/n;
685 	  m         = mean[j];
686 	  S[j]     += delta[j] * (x - m);
687 	  temp = xo * x + mo * (n * mo - x1 - xo) - m * ((n+1) * m - x1 - x);
688 	  r[j]     += temp;
689 	  autoc[j]  = S[j] > 0.0 ? (MYREAL) (r[j]/S[j]): (MYREAL) 1.0;
690 	  effsample[j] = (MYREAL) (n * (1. - autoc[j])/(1. + autoc[j]));
691 	  v        += S[j];
692 	  //printf("[%li] n=%li effsample=%g atoc=%g r=%g (%f) s=%g mean=%g\n", j, n,effsample[j],autoc[j],r[j], temp, S[j],mean[j]);
693 	  xold[j] = x;
694 	}
695       myfree(delta);
696     }
697   *variance =  (MYREAL) v / n;
698   return *variance;
699 }
700 ///
701 /// this function mimics single_chain_var() but is used only in the reading bayesallfile function read_bayes_fromfile()
702 /// taking into account that there may be a mixture of different loci and some variables are not where one expect them
703 /// during the standard run
calculate_ess_frombayes(world_fmt * world,long T,MYREAL * params,long locus,MYREAL * autoc,MYREAL * effsample)704 void calculate_ess_frombayes(world_fmt *world, long T, MYREAL *params, long locus, MYREAL *autoc, MYREAL *effsample)
705 {
706   static double *mean;
707   static double *S;
708   static double *r;
709   static double *xold;
710   static double *xstart;
711   static boolean done=FALSE;
712   static long *n;
713   static long nn = 0;
714   static long nnbase = 0;
715   long i;
716   long j;
717   long z;
718   long start;
719 
720   double x  = 0.0;
721   double xo = 0.0;
722   double x1 = 0.0;
723   double m  = 0.0;
724   double mo = 0.0;
725   //static double *v;
726   double *delta;
727   double temp;
728   if(!done)
729     {
730       nnbase = (world->numpop2 + world->bayes->mu * world->loci + 1);
731       nn = world->loci * nnbase;
732       n = (long *) mycalloc(world->loci,sizeof(long));
733       mean = (double *) mycalloc(nn,sizeof(double));
734       S    = (double *) mycalloc(nn,sizeof(double));
735       r    = (double *) mycalloc(nn,sizeof(double));
736       xold = (double *) mycalloc(nn,sizeof(double));
737       xstart = (double *) mycalloc(nn,sizeof(double));
738       done=TRUE;
739     }
740   // syntax so that an init really works
741   if(T==0)
742     return;
743 
744   delta = (double *) mycalloc(nn,sizeof(double));
745   //handles BAYES or ML
746   start = (world->options->bayes_infer ? 0 : world->numpop2);
747   if(n[locus]==0) //initialization of mean calculator
748     {
749       n[locus] += 1;
750       for(i=start; i < nnbase; i++)
751 	{
752 	  if(i<world->numpop2)
753 	    {
754 	      if(world->bayes->map[i][1] == INVALID)
755 		continue;
756 	      else
757 		{
758 		  j  = world->bayes->map[i][1];
759 		  z  = locus * nnbase + j;
760 		  x  = params[j+2];
761 		}
762 	    }
763 	  else
764 	    {
765 	      j = i;
766 	      z  = locus * nnbase + j;
767 	      if(j+1 == nnbase)
768 		{
769 		  x = params[1];
770 		}
771 	      else
772 		{
773 		  x = world->options->mu_rates[locus];
774 		}
775 	    }
776 	  mean[z]   = x;
777 	  S[z]      = 0.0;
778 	  r[z]      = 0.0;
779 	  xstart[z] = x;
780 	  xold[z] = x;
781 	}
782     }
783   else
784     {
785       // n is at least 1
786       n[locus] += 1;
787       for(i=start; i < nnbase; i++)
788 	{
789 	  if(i<world->numpop2)
790 	    {
791 	      if(world->bayes->map[i][1] == INVALID)
792 		continue;
793 	      else
794 		{
795 		  j  = world->bayes->map[i][1];
796 		  z  = locus * nnbase + j;
797 		  x  = params[j+2];
798 		}
799 	    }
800 	  else
801 	    {
802 	      j = i;
803 	      z  = locus * nnbase + j;
804 	      if(j+1 == nnbase)
805 		{
806 		  x = params[1];
807 		}
808 	      else
809 		{
810 		  x = (double) world->options->mu_rates[locus];
811 		}
812 	    }
813 	  xo        = xold[z];
814 	  x1        = xstart[z];
815 	  delta[z]  = x - mean[z];
816 	  mo        = mean[z];
817 	  mean[z]  += delta[z]/n[locus];
818 	  m         = mean[z];
819 	  S[z]     += delta[z] * (x - m);
820 	  temp = xo * x + mo * (n[locus] * mo - x1 - xo) - m * ((n[locus]+1) * m - x1 - x);
821 	  r[z]     += temp;
822 	  autoc[z]  = S[z] > 0.0 ? (MYREAL) (r[z]/S[z]): (MYREAL) 1.0;
823 	  effsample[z] = (MYREAL) (n[locus] * (1. - autoc[z])/(1. + autoc[z]));
824 	  //v        += S[z];
825 	  //printf("[%li] n=%li effsample=%g atoc=%g r=%g (%f) s=%g mean=%g\n", j, n[locus],effsample[j],autoc[j],r[j], temp, S[j],mean[j]);
826 	  xold[z] = x;
827 	}
828     }
829   myfree(delta);
830   //  *variance =  (MYREAL) v[locus] / n[locus];
831   //return *variance;
832 }
833 
max_ess(const MYREAL * ess,const long n,const MYREAL minimum,MYREAL * miness)834 boolean max_ess(const MYREAL * ess, const long n, const MYREAL minimum, MYREAL *miness)
835 {
836   long i;
837   long z=0;
838   *miness = HUGE;
839   for(i=0; i < n; i++)
840     {
841       if (ess[i]< *miness)
842 	*miness = ess[i];
843       z += (long) (ess[i] >= minimum);
844     }
845   if(z==n)
846     {
847       return TRUE;
848     }
849   return FALSE;
850 }
851 
852 /// calculate effective sample size
853 /// which is the N' = N (1-r)/(1+r)
854 /// where r is the autocorrelation coefficient for lag 1.
855 /// with r(1) = sum[(x[i]-xmean)(x[i+1]-xmean)/standarddeviation(x),i,0,n-1]
856 
857 void
calc_gelmanw(MYREAL * gelmanw,world_fmt * world,MYREAL * mc,MYREAL * tc,long len,long lastn,long n)858 calc_gelmanw (MYREAL *gelmanw, world_fmt * world, MYREAL *mc, MYREAL *tc,
859               long len, long lastn, long n)
860 {
861     long i;
862     MYREAL s1, s2;
863     for (i = 0; i < len; i++)
864       {
865 	s1 = calc_s (i, tc, world);
866 	s2 = calc_s (i, mc, world);
867 	gelmanw[i] = 0.5 * (s1 + s2);
868       }
869 }
870 
871 void
calc_gelmanw2(MYREAL * gelmanw,MYREAL * s1,MYREAL * s2,long len)872 calc_gelmanw2 (MYREAL *gelmanw, MYREAL *s1, MYREAL *s2, long len)
873 {
874     long i;
875 
876     for (i = 0; i < len; i++)
877     {
878         gelmanw[i] = 0.5 * (s1[i] + s2[i]);
879     }
880 }
881 
882 ///
883 /// calculates the overall s^2 of all replicate chains
884 void
calc_allgelmanw2(MYREAL * gelmanw,MYREAL * chain_s,long * nmeans,long len,long maxreplicate)885 calc_allgelmanw2 (MYREAL *gelmanw, MYREAL *chain_s, long *nmeans, long len, long maxreplicate)
886 {
887     long i;
888     long j;
889     MYREAL sum = 0.;
890     for (i = 0; i < len; i++)
891     {
892       sum = 0.;
893       for(j=0; j < maxreplicate; j++)
894 	{
895 	  if(nmeans[j]>1)
896 	    {
897 	      sum += chain_s[j * len + i];
898 	    }
899 	}
900       gelmanw[i] = sum / maxreplicate;
901     }
902 }
903 
904 
905 
906 void
calc_gelmanr(MYREAL * gelmanr,MYREAL * gelmanw,MYREAL * gelmanb,long len,long lastn,long n)907 calc_gelmanr (MYREAL *gelmanr, MYREAL *gelmanw, MYREAL *gelmanb, long len,
908               long lastn, long n)
909 {
910     long i;
911     MYREAL nn = (n + lastn) / 2.;
912     MYREAL sqplus = 0.;
913     //    MYREAL v;
914     for (i = 0; i < len; i++)
915     {
916       sqplus = ((nn - 1.) / nn * gelmanw[i] + gelmanb[i]);
917       //v = 1.5 * sqplus;
918       gelmanr[i] = sqrt (sqplus / gelmanw[i]);// - (nn-1)/(2. * nn));
919     }
920 }
921 
922 void
calc_allgelmanr(MYREAL * gelmanr,MYREAL * gelmanw,MYREAL * gelmanb,long * nmeans,long len,long maxreplicate)923 calc_allgelmanr (MYREAL *gelmanr, MYREAL *gelmanw, MYREAL *gelmanb, long *nmeans, long len, long maxreplicate)
924 {
925     long i;
926     long j;
927     long nn=0;
928     MYREAL sqplus;
929     //    MYREAL v;
930 
931     for(j=0; j < maxreplicate; j++)
932       {
933 	nn += nmeans[j];
934       }
935     nn /= maxreplicate;
936 
937     for (i = 0; i < len; i++)
938     {
939       sqplus = (nn - 1.) / nn * gelmanw[i] + gelmanb[i];
940       //      v = (maxreplicate+1)/maxreplicate * sqplus;
941       gelmanr[i] = sqrt (sqplus / gelmanw[i]);// - (nn-1)/(maxreplicate * nn));
942     }
943 }
944 
945 void
calc_average_biggest_gelmanr(MYREAL * gelmanr,long len,MYREAL * meanR,MYREAL * bigR)946 calc_average_biggest_gelmanr (MYREAL *gelmanr, long len,
947                               MYREAL *meanR, MYREAL *bigR)
948 {
949     long i;
950     MYREAL average = 0;
951     MYREAL biggest = 0.;
952     for (i = 0; i < len; i++)
953     {
954         if (biggest < gelmanr[i])
955             biggest = gelmanr[i];
956         average += gelmanr[i];
957     }
958     if (len > 0)
959         *meanR = average / len;
960     else
961         *meanR = average;
962     *bigR = biggest;
963 }
964 
965 ///
966 /// calculate the variance of a chain tc is the parameter average
967 MYREAL
calc_s(long tthis,MYREAL * tc,world_fmt * world)968 calc_s (long tthis, MYREAL *tc, world_fmt * world)
969 {
970   timearchive_fmt * atl     = &world->atl[world->rep][world->locus];
971   long                    T = atl->T;
972   long              numpop  = world->numpop;
973   //  long              numpop2 = numpop * numpop;
974   long              startp;
975   long              startl;
976   long              startkm;
977   long              i;
978   long              j;
979   MYREAL            xx;
980   MYREAL            s = 0.0;
981 
982   startkm = numpop;
983   startp  = startkm + numpop;
984   startl  = startp + numpop;
985 
986   if (tthis < startkm)
987     {
988       i = tthis;
989       for (j = 0; j < T; j++)
990 	{
991 	  xx = atl->tl[j].kt[i] - tc[i];
992 	  s += xx * xx;
993 	}
994       if(s>0.)
995 	s /= T - 1.;
996       return s;
997     }
998     else
999     {
1000         if (tthis < startp)
1001         {
1002             i = tthis - startkm;
1003 	    for (j = 0; j < T; j++)
1004 	      {
1005 		xx = atl->tl[j].km[i] - tc[i];
1006 		s += xx * xx;
1007 	      }
1008 	    s /= T - 1.;
1009 	    return s;
1010         }
1011         else
1012         {
1013             if (tthis < startl)
1014 	      {
1015                 i = tthis - startp;
1016 		for (j = 0; j < T; j++)
1017 		  {
1018 		    xx = atl->tl[j].p[i] - tc[i];
1019 		    s += xx * xx;
1020 		  }
1021 		s /= T - 1.;
1022 		return s;
1023 	      }
1024             else
1025 	      {
1026                 i = tthis - startl;
1027 		for (j = 0; j < T; j++)
1028 		  {
1029 		    xx = atl->tl[j].mindex[i] - tc[i];
1030 		    s += xx * xx;
1031 		  }
1032 		s /= T - 1.;
1033 		return s;
1034 	      }
1035         }
1036     }
1037 }
1038 ///
1039 /// calculate the variance of a chain tc is the parameter average
1040 /// by supplying the length of the chain
1041 MYREAL
calc_s2(long tthis,MYREAL * tc,world_fmt * world,long T)1042 calc_s2 (long tthis, MYREAL *tc, world_fmt * world, long T)
1043 {
1044   timearchive_fmt * atl     = &world->atl[world->rep][world->locus];
1045   long              numpop  = world->numpop;
1046   //  long              numpop2 = numpop * numpop;
1047   long              startp;
1048   long              startl;
1049   long              startkm;
1050   long              i;
1051   long              j;
1052   MYREAL            xx;
1053   MYREAL            s = 0.0;
1054 
1055   startkm = numpop;
1056   startp  = startkm + numpop;
1057   startl  = startp + numpop;
1058 
1059   if (tthis < startkm)
1060     {
1061       i = tthis;
1062       for (j = 0; j < T; j++)
1063 	{
1064 	  xx = atl->tl[j].kt[i] - tc[i];
1065 	  s += xx * xx;
1066 	}
1067       if(s>0.)
1068 	s /= T - 1.;
1069       return s;
1070     }
1071     else
1072     {
1073         if (tthis < startp)
1074         {
1075             i = tthis - startkm;
1076 	    for (j = 0; j < T; j++)
1077 	      {
1078 		xx = atl->tl[j].km[i] - tc[i];
1079 		s += xx * xx;
1080 	      }
1081 	    s /= T - 1.;
1082 	    return s;
1083         }
1084         else
1085         {
1086             if (tthis < startl)
1087 	      {
1088                 i = tthis - startp;
1089 		for (j = 0; j < T; j++)
1090 		  {
1091 		    xx = atl->tl[j].p[i] - tc[i];
1092 		    s += xx * xx;
1093 		  }
1094 		s /= T - 1.;
1095 		return s;
1096 	      }
1097             else
1098 	      {
1099                 i = tthis - startl;
1100 		for (j = 0; j < T; j++)
1101 		  {
1102 		    xx = atl->tl[j].mindex[i] - tc[i];
1103 		    s += xx * xx;
1104 		  }
1105 		s /= T - 1.;
1106 		return s;
1107 	      }
1108         }
1109     }
1110 }
1111 
1112 ///
1113 /// calculate the variance of a chain tc is the parameter average
1114 MYREAL
calc_s_bayes(long tthis,MYREAL * tc,world_fmt * world)1115 calc_s_bayes (long tthis, MYREAL *tc, world_fmt * world)
1116 {
1117   //long T            = world->convergence->chain_counts[world->rep];
1118   long nn           = 2+world->numpop2 + (world->bayes->mu) ;
1119   long pnum         = world->bayes->numparams;
1120 
1121   MYREAL  * params     = world->bayes->params;
1122   long              i;
1123   long              j;
1124   MYREAL            xx;
1125   MYREAL            s = 0.0;
1126 
1127   i = tthis;
1128   for (j = 0; j < pnum /*T*/; j++)
1129     {
1130       xx = params[j * nn + i] - tc[i];
1131       s += xx * xx;
1132     }
1133   if(s>0.)
1134     s /= pnum - 1.;
1135   return s;
1136 }
1137 
1138 
1139 ///
1140 /// calculates means for the different events on a tree for the Gelman-Rubin statistic
1141 /// this version of the statistics operates on the compressed treedata and not the parameters
1142 ///
1143 void
chain_means_ml(MYREAL * thischainmeans,world_fmt * world)1144 chain_means_ml (MYREAL *thischainmeans, world_fmt * world)
1145 {
1146   timearchive_fmt * atl     = &world->atl[world->rep][world->locus];
1147   long              T       = atl->T;
1148   long              numpop  = world->numpop;
1149   long              numpop2 = numpop * numpop;
1150   long              startp;
1151   long              startl;
1152   long              startkm;
1153   long              i;
1154   long              j;
1155 
1156   startkm = numpop;
1157   startp  = startkm + numpop;
1158   startl  = startp + numpop;
1159 
1160   for (j = 0; j < T; j++)
1161     {
1162       for (i = 0; i < numpop; i++)
1163         {
1164 	  thischainmeans[i]           += atl->tl[j].kt[i];
1165 	  thischainmeans[i + startkm] += atl->tl[j].km[i];
1166 	  thischainmeans[i + startp]  += atl->tl[j].p[i];
1167         }
1168       for (i = 0; i < numpop2; i++)
1169 	{
1170 	  thischainmeans[i + startl]  += atl->tl[j].mindex[i];
1171 	}
1172     }
1173   for (i = 0; i < numpop; i++)
1174     {
1175       thischainmeans[i]           /= T;
1176       thischainmeans[i + startkm] /= T;
1177       thischainmeans[i + startp]  /= T;
1178     }
1179   for (i = startl; i < numpop2 + startl; i++)
1180     {
1181       thischainmeans[i] /= T;
1182     }
1183 }
1184 ///
1185 /// calculates means for the different parameters for the Gelman-Rubin statistic
1186 /// for bayesian inference
1187 void
chain_means_bayes(MYREAL * thischainmeans,world_fmt * world)1188 chain_means_bayes (MYREAL *thischainmeans, world_fmt * world)
1189 {
1190   //  long              T       = world->convergence->chain_counts[world->rep];
1191   long              T       = world->bayes->numparams;
1192   long              i;
1193   long              j;
1194   MYREAL           *params  = world->bayes->params;
1195   long              nn      = 2+world->numpop2 + (world->bayes->mu) ;
1196 
1197   for (j = 0; j < T; j++)
1198     {
1199       for (i = 2; i < nn; i++)
1200         {
1201 	  thischainmeans[i-2]           += params[j * nn + i];
1202         }
1203     }
1204   for (i = 2; i < nn; i++)
1205     {
1206       thischainmeans[i-2]           /= T;
1207     }
1208 }
1209 
1210 ///
1211 /// chain_means
1212 void
chain_means(MYREAL * thischainmeans,world_fmt * world)1213 chain_means (MYREAL *thischainmeans, world_fmt * world)
1214 {
1215   float temp;
1216 
1217   if(world->options->bayes_infer)
1218     {
1219       temp = (float) (world->convergence->chain_counts[world->rep]!=0) ? world->convergence->chain_counts[world->rep] : 1;
1220       world->convergence->chain_counts[world->rep] = temp;
1221       chain_means_bayes (thischainmeans, world);
1222     }
1223   else
1224     {
1225 #ifdef DEBUG
1226       printf("%i> world->rep=%li\n",myID, world->rep);
1227 #endif
1228       world->convergence->chain_counts[world->rep] = world->atl[world->rep][world->locus].T;
1229       chain_means_ml (thischainmeans, world);
1230     }
1231 }
1232 
1233 ///
1234 /// calculates the average over
1235 void
calc_chain_s(MYREAL * cs,MYREAL * cm,world_fmt * world,long replicate)1236 calc_chain_s(MYREAL *cs, MYREAL *cm, world_fmt *world, long replicate)
1237 {
1238   long len;
1239   long start;
1240   long stop;
1241   long i;
1242   if(world->options->bayes_infer)
1243     {
1244       len = world->numpop2+1;
1245       start = replicate * len;
1246       stop = start + len;
1247       for(i = start; i < stop; i++)
1248 	{
1249 	  cs[i] = calc_s_bayes (i-start, &cm[start], world);
1250 	}
1251     }
1252   else
1253     {
1254       len = world->numpop2 + 3*world->numpop;
1255       start = replicate * len;
1256       stop = start + len;
1257       for(i = start; i < stop; i++)
1258 	{
1259 	  cs[i] = calc_s (i-start, &cm[start], world);
1260 	}
1261     }
1262 }
1263 
1264 ///
1265 /// report convergence statistic to screen or file
convergence_progress(FILE * file,world_fmt * world)1266 void convergence_progress(FILE *file, world_fmt *world)
1267 {
1268   long i;
1269   long j;
1270   long maxreplicate = (world->options->replicate
1271 		       && world->options->replicatenum >
1272 		       0) ? world->options->replicatenum : 1;
1273 
1274   MYREAL *gelmanmeanR = world->convergence->gelmanmeanmaxR; //upper half of matrix
1275   MYREAL *gelmanmaxR = world->convergence->gelmanmeanmaxR;  //lower half of matrix
1276 
1277   fprintf(file,"\n\nGelman-Rubin convergence statistic\n");
1278   fprintf(file,"----------------------------------\n\n");
1279   fprintf(file,"Values close to 1.0, especially values < 1.2 are a sign of convergence of the\n");
1280   fprintf(file,"chains. On very short chains this statistic does not work well\n\n");
1281   if(world->options->gelmanpairs)
1282     {
1283       fprintf(file,"Pairwise evaluation of replicates\n[above diagonal: mean; below diagonal: maximum value]\n");
1284       fprintf(file,"%4li ", 1L);
1285       for(i=1; i < maxreplicate; i++)
1286 	{
1287 	  fprintf(file,"%6li ", i+1);
1288 	}
1289       fprintf(file,"\n");
1290       for(i=0; i < maxreplicate; i++)
1291 	{
1292 	  for(j=0; j < maxreplicate; j++)
1293 	    {
1294 	      if(i==j)
1295 		fprintf(file,"------ ");
1296 	      else
1297 		{
1298 		  if(i < j)
1299 		    {
1300 		      if(gelmanmeanR[i * maxreplicate + j] <=0.)
1301 			{
1302 			  fprintf(file,"  -N-  ");
1303 			}
1304 		      else
1305 			{
1306 			  fprintf(file," %1.3f ",gelmanmeanR[i * maxreplicate + j]);
1307 			}
1308 		    }
1309 		  else
1310 		    {
1311 		      if(gelmanmaxR[i * maxreplicate + j] <=0.)
1312 			{
1313 			  fprintf(file," -N-   ");
1314 			}
1315 		      else
1316 			{
1317 			  fprintf(file,"*%1.3f ",gelmanmaxR[i * maxreplicate + j]);
1318 			}
1319 		    }
1320 		}
1321 	    }
1322 	  fprintf(file,"\n");
1323 	}
1324       fprintf(file,"\n\n");
1325     }
1326   fprintf(file,"Overall convergence:\n");
1327   fprintf(file,"Mean sqrt(R)    = %f\n", world->convergence->gelmanmeanRall);
1328   fprintf(file,"Maximum sqrt(R) = %f\n\n\n", world->convergence->gelmanmaxRall);
1329 }
1330 
methods(world_fmt * world)1331 void methods(world_fmt *world)
1332 {
1333     char title[] = "Method description suggestion:";
1334     fprintf(world->outfile,"%s\n",title);
1335     fprintf(world->outfile,"Migrate was run in %s with the following run time settings:",
1336 	    world->options->bayes_infer ? "Bayesian mode (Beerli and Felsenstein 1999, 2001; Beerli 1998,2006)" :
1337 	    "Maximum Likelihood mode (Beerli 1998, Beerli and Felsenstein 1999, 2001)");
1338     if (world->options->bayes_infer)
1339       {
1340 	fprintf(world->outfile,"A total of x replicates were run. Each replicate represents a full\n");
1341 	fprintf(world->outfile,"MCMC run of one cold chain with x-1 heated connected chains. Each replicate\n");
1342 	fprintf(world->outfile,"visited a total of x states (Genealogy were visited with frequency of x and\n");
1343 	fprintf(world->outfile,"each parameter was visited with this average frequency x. The run parameters\n");
1344 	fprintf(world->outfile,"were set so that each locus sampled x observations from the visited states using\n");
1345 	fprintf(world->outfile,"an increment of y. We used uniform prior distributions with ranges of a to b\n" );
1346 	fprintf(world->outfile,"and c to d for Theta and M, respectively");
1347       }
1348 }
1349 
citations(world_fmt * world)1350 void citations(world_fmt *world)
1351 {
1352   char title[] = "Citation suggestions:";
1353   char texttitle[5][50]  = {"General method:", "Maximum likelihood:", "Bayesian inference:", "Likelihood ratio test:", "Bayes factors:"};
1354   char citations[9][320]  = {"[1] P. Beerli, 1998. Estimation of migration rates and population sizes in geographically structured populations., in Advances in Molecular Ecology, G. R. Carvalho, ed., vol. 306 of NATO sciences series, Series A: Life sciences, ISO Press, Amsterdam, pp. 39–53.",\
1355     "[2] P. Beerli and J. Felsenstein, 1999. Maximum-likelihood estimation of migration rates and effective popu- lation numbers in two populations using a coalescent approach, Genetics, 152:763–773.",\
1356     "[3] P. Beerli and J. Felsenstein, 2001. Maximum likelihood estimation of a migration matrix and effective population sizes in n subpopulations by using a coalescent approach, Proceedings of the National Academy of Sciences of the United States of America, 98: p. 4563–4568.",\
1357     "[4] P. Beerli, 2004. Effect of unsampled populations on the estimation of population sizes and migration rates between sampled populations, Molecular Ecology, 13: 827–836.",\
1358       "[5] P. Beerli, 2006. Comparison of Bayesian and maximum-likelihood inference of population genetic parameters. Bioinformatics 22:341-345",\
1359     "[6] P. Beerli, 2007. Estimation of the population scaled mutation rate from microsatellite data, Genetics, 177:1967–1968.",\
1360     "[7] P. Beerli, 2009. How to use migrate or why are Markov chain Monte Carlo programs difficult to use?, in Population Genetics for Animal Conservation, G. Bertorelle, M. W. Bruford, H. C. Hauffe, A. Rizzoli, and C. Vernesi, eds., vol. 17 of Conservation Biology, Cambridge University Press, Cambridge UK, pp. 42–79.",\
1361     "[8] P. Beerli and M. Palczewski, 2010. Unified framework to evaluate panmixia and migration direction among multiple sampling locations, Genetics, 185: 313–326."};
1362 
1363     fprintf(world->outfile,"%s\n",title);
1364 //general
1365     fprintf(world->outfile,"%s\n",texttitle[0]);
1366     fprintf(world->outfile,"%s\n%s\n%s\n",citations[0],citations[6],citations[5]);
1367 //likelihood
1368     fprintf(world->outfile,"%s\n",texttitle[1]);
1369     fprintf(world->outfile,"%s\n%s\n",citations[1],citations[2]);
1370 //Bayesian inference
1371     fprintf(world->outfile,"%s\n",texttitle[2]);
1372     fprintf(world->outfile,"%s\n%s\n",citations[4],citations[8]);
1373 //likelihood
1374     fprintf(world->outfile,"%s\n",texttitle[4]);
1375     fprintf(world->outfile,"%s\n",citations[7]);
1376 
1377 }
1378