1 /*! \file main.c */
2 /*! \mainpage M I G R A T E
3 *  \section intro Introduction
4 *  Migrate is a program to estimate population-genetic parameters from genetic data such as
5 *  electrophoretic marker data, microsatellite data, DNA or RNA sequence data, and single
6 *  nucleotide polymorphisms. Migrate uses the concepts of Maximum likelihood and Bayesian
7 *  inference to infer parameters based on coalescence theory. For more information related
8 *  to the use of the program or the theory behind check out
9 *  http://popgen.scs.fsu.edu/migrate.html
10 *
11 *  \section install Installation
12 *  to install the program simply do
13 *  \subsection step1 configure
14 *  \subsection step2 make
15 *  \subsection step3 sudo make install
16 *
17 *  \section maintainer Maintainer and Copyrights
18 *   Peter Beerli,
19 *   beerli@fsu.edu
20 *
21 *   Copyright 1997-2002 Peter Beerli and Joseph Felsenstein, Seattle WA\n
22 *   Copyright 2003-2013 Peter Beerli, Tallahassee FL\n
23 *
24 *  Permission is hereby granted, free of charge, to any person obtaining a copy
25 *  of this software and associated documentation files (the "Software"), to deal
26 *  in the Software without restriction, including without limitation the rights
27 *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
28 *  of the Software, and to permit persons to whom the Software is furnished to do
29 *  so, subject to the following conditions:
30 *
31 *  The above copyright notice and this permission notice shall be included in all copies
32 *  or substantial portions of the Software.
33 *
34 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
35 *  INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
36 *  PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
37 *  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
38 *  CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
39 *  OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 *
41 */
42 /* $Id: main.c 2169 2013-08-24 19:02:04Z beerli $*/
43 
44 #include "migration.h"
45 #include "sighandler.h"
46 #include "heating.h"
47 #include "histogram.h"
48 #include "world.h"
49 #include "data.h"
50 #include "options.h"
51 #include "mcmc.h"
52 #include "bayes.h"
53 #include "broyden.h"
54 #include "combroyden.h"
55 #include "menu.h"
56 #include "random.h"
57 #include "sequence.h"
58 #include "tree.h"
59 #include "tools.h"
60 #include "profile.h"
61 #include "aic.h"
62 #include "lrt.h"
63 #include "migrate_mpi.h"
64 #include "migevents.h"
65 #include "skyline.h"
66 #include "reporter.h"
67 #include "autotune.h"
68 #ifdef PRETTY
69 #include "pretty.h"
70 #endif /*PRETTY*/
71 #include <stdio.h>
72 #ifdef BEAGLE
73 #include "calculator.h"
74 #endif
75 #include "mutationmodel.h"
76 #ifdef UEP
77 #include "uep.h"
78 #endif
79 #ifdef DMALLOC_FUNC_CHECK
80 #include <dmalloc.h>
81 #endif
82 
83 #ifdef GRANDCENTRAL
84 #include <dispatch/dispatch.h>
85 #endif
86 
87 //use this if you have nice() and need to be nice with others
88 #ifdef HAVE_NICE
89 #include <unistd.h>
90 #endif
91 
92 #ifdef PRETTY
93 #ifdef WIN32
94 #include "pretty-win32.h"
95 #endif
96 #endif
97 
98 
99 /* Definitions for MCMCMC -- heated chains
100  * definitions of the first four heated chains, they are ordered from cold to hot */
101 #define EARTH  universe[0]	/*!< cold chain has always a temperature=1            */
102 #define VENUS  universe[1]	/*!< second coldest chain                             */
103 #define MERKUR universe[2]	/*!< thrid coldest chain                              */
104 #define SUN    universe[3]	/*!< fourth coldest chain                             */
105 
106 /* GLOBAL VARIABLES +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
107 int myID;	  /*!< myID=0 for single-cpu program and master in MPI program, worker nodes myID > 0 */
108 int myRepID;	  /*!< myID=0 for single-cpu program and master in MPI program, worker nodes myID > 0 */
109 int color;	  /*!< either 1 or MPI_UNDEFINED, in anticipation of more complicated MPI schemes     */
110 int numcpu;	  /*!< used if MPI is running otherwise == 1                                          */
111 int locidone;	  /*!< used in MPI workers                                                            */
112 long unique_id_global;
113 #ifdef MPI
114 #ifdef PARALIO
115 boolean my_file_open_error;
116 #endif
117 #ifdef  USE_MYREAL_FLOAT
118 #ifdef WINDOWS
119 #define mpisizeof MPI_FLOAT ;
120 #else
121 const MPI_Datatype mpisizeof = MPI_FLOAT ;
122 #endif
123 #else
124 #ifdef WINDOWS
125 //#define mpisizeof MPI_DOUBLE ;
126  MPI_Datatype mpisizeof ;
127 #else
128 const  MPI_Datatype mpisizeof = MPI_DOUBLE ;
129 #endif
130 #endif
131 #endif
132 
133 
134 #ifdef CAUTIOUS
135 boolean cautious; /*!< forces a check whether a file exists and will ask before overwriting           */
136 #endif
137 #ifdef MEMDEBUG
138 #include <sys/time.h>
139 FILE *memfile;
140 struct timeval memt_start, memt_finish;
141 double memelapsed;
142 long totalsize;
143 #endif
144 #ifdef MPI
145 filedb_fmt filedb[30];
146 long filenum;
147 
148 MPI_Comm comm_world;		/*!< parallel environment that contains knowledge about all workers and master */
149 //MPI_Comm comm_workers;		/*!< parallel environment that contains knowledge about all workers */
150 MPI_Group worker_group;
151 MPI_Group world_group;
152 #endif
153 #ifdef SLOWNET
154 int profiledone;		/*!< used in MPI workers in profiles on slow networks    */
155 #endif
156 #ifdef PTHREADS
157 tpool_t heating_pool;		/*!< when compiled with PTHREADS then holds all threads */
158 #else
159 #ifndef tpool_t
160 #define tpool_t char
161 #endif
162 tpool_t heating_pool;
163 #endif
164 
165 // random generator related global variables
166 long *seed;			/*!< contains the seed of the random number */
167 long *newseed;			/*!< contains the new random seed */
168 char *generator;		/*!< string that shows what random number generator is used */
169 
170 // for bayesian output counter, initialized in bayes_init()
171 long *mdimfilecount;
172 
173 //#ifdef PRETTY
174 // for pretty printing
175 pdf_doc doc;
176 pdf_page page;
177 pdf_contents canvas;
178 int page_counter;
179 char pdf_pagetitle[LINESIZE+1];
180 char pdf_time[LINESIZE+1];
181 float page_height;
182 float left_margin;
183 float page_width;
184 //#endif
185 
186 
187 int
188 setup_locus (long locus, world_fmt * world, option_fmt * options,
189              data_fmt * data);
190 
191 void
192 condense_time (world_fmt * world, long *step, long *j,
193                MYREAL * accepted, long *G, long *steps, long oldsteps,
194                long rep);
195 
196 MYREAL sumbezier(long intervals, MYREAL x0, MYREAL y0, MYREAL x1, MYREAL y1, MYREAL x2, MYREAL y2);
197 void calculate_BF(world_fmt **universe, option_fmt *options);
198 
199 long set_repkind (option_fmt * options);
200 
201 void
202 heating_init (world_fmt ** universe, int usize, data_fmt * data,
203               option_fmt * options);
204 
205 void
206 heating_prepare (world_fmt ** universe, int usize,
207                  option_fmt * options, data_fmt * data, long rep);
208 
209 void heating_prepare2 (world_fmt ** universe, int usize);
210 
211 long
212 replicate_number (option_fmt * options, long chain, char type,
213                   long replicate);
214 
215 void
216 combine_loci_standard (char type, option_fmt * options, world_fmt * world,
217                        long *Gmax);
218 void
219 combine_loci_mpi (char type, option_fmt * options, world_fmt * world,
220                   long *Gmax);
221 
222 void set_penalizer (long chain, long chains, char type,
223                     world_fmt ** universe);
224 
225 void
226 run_sampler (option_fmt * options, data_fmt * data,
227              world_fmt ** universe, int usize, long *outfilepos, long *Gmax);
228 
229 void run_replicate (long locus,
230                     long replicate,
231                     world_fmt ** universe,
232                     option_fmt * options,
233                     data_fmt * data,
234                     tpool_t * localheating_pool,
235                     int usize, long *treefilepos, long *Gmax);
236 
237 
238 void
239 run_locus (world_fmt ** universe, int usize, option_fmt * options,
240            data_fmt * data, tpool_t * localheating_pool, long maxreplicate,
241            long locus, long *treefilepos, long *Gmax);
242 
243 void
244 run_loci (world_fmt ** universe, int usize, option_fmt * options,
245           data_fmt * data, tpool_t * localheating_pool, long maxreplicate,
246           long *treefilepos, long *Gmax);
247 
248 void run_one_update (world_fmt * world);
249 
250 void run_updates (world_fmt ** universe,
251                   int usize, option_fmt * options,
252                   tpool_t * localheating_pool, long inc, long increment,
253                   long step, long steps);
254 
255 void heated_swap (world_fmt ** universe, worldoption_fmt * options);
256 
257 void
258 change_chaintype (long locus, char *type, world_fmt * world,
259                   long *increment, long *oldsteps, long *chains,
260                   option_fmt * options);
261 
262 void
263 prepare_next_chain (world_fmt ** universe, worldoption_fmt * options,
264                     char type, long chain, long *chains,
265                     long *pluschain, long locus, long replicate);
266 
267 void print_bayesfactor(FILE *file, world_fmt **universe, option_fmt * options);
268 #if defined(MPI) && !defined(PARALIO)
269 //void fix_bayesfactor(world_fmt *world, option_fmt * options);
270 void      print_marginal_like(float *temp, long *z, world_fmt * world);
271 #else /*not MPI*/
272 void      print_marginal_like(char *temp, long *c, world_fmt * world);
273 #endif
274 
275 void print_burnin_stop(FILE *file, world_fmt **universe, option_fmt * options);
276 void
277 print_heating_progress (world_fmt ** universe,
278                         worldoption_fmt * options, long stepinc);
279 
280 long analyze_olddata (world_fmt * world, option_fmt * options, data_fmt * data, long *outfilepos);
281 boolean analyze_oldbayesdata(world_fmt *world, option_fmt *options, data_fmt *data, long *outfilepos);
282 void print_theta0(FILE *file, world_fmt *world, long maxreplicate);
283 void profile_tables (option_fmt * options, world_fmt * world, long *gmaxptr);
284 
285 void finish_mac (option_fmt * options, data_fmt * data);
286 
287 int
288 setup_locus (long locus, world_fmt * world, option_fmt * options,
289              data_fmt * data);
290 
291 long set_repkind (option_fmt * options);
292 
293 void heating_prepare2 (world_fmt ** universe, int usize);
294 
295 long replicate_number (option_fmt * options, long chain, char type,
296                        long replicate);
297 
298 void print_heating_progress2 (FILE * file, worldoption_fmt * options,
299                               world_fmt ** universe);
300 
301 
302 void get_bayeshist (world_fmt * world, option_fmt * options);
303 void get_treedata (world_fmt * world, option_fmt * options);
304 void get_mighistdata (world_fmt * world, option_fmt * options);
305 
306 void change_longsum_times (world_fmt * world);
307 
308 boolean  check_parmfile(long argcount, char **arguments, char *parmfilename);
309 boolean set_usemenu(boolean usemenu, boolean fromparmfile);
310 void check_bayes_options(option_fmt *options);
311 void reset_bayesmdimfile(world_fmt *world, option_fmt *options);
312 long  calculate_newpop_numpop(option_fmt *options, data_fmt *data);
313 
314 extern void unset_penalizer_function (boolean inprofiles);
315 
316 ///
317 /// specifies the classes of override the menu as a parameter to the program
318 enum override_enum
319   {
320     OVERRIDE_NO, OVERRIDE_MENU, OVERRIDE_NOMENU
321   };
322 
323 
324 
325 ///
326 /// the program migrate calculates migration rates and population sizes
327 /// from genetic data, it allows for a wide variety of data types and many options
328 /// \callgraph
329 int
main(int argc,char ** argv)330 main (int argc, char **argv)
331 {
332   char      type       = 's';
333   int       usize      = 1;
334   int       usemenu    = OVERRIDE_NO;
335   long      locus;
336   long      Gmax       = 0;
337   long      outfilepos = 0;
338   data_fmt  *data;
339   boolean   restarted_bayes_bool=FALSE;
340 #ifdef INTEGRATEDLIKE
341   long      maxreplicate;
342 #endif
343   option_fmt *options;
344   world_fmt **universe;
345 #ifdef MPI
346   int       server;
347 #ifdef WINDOWS
348  mpisizeof = MPI_DOUBLE ;
349 #endif
350 #endif
351 #ifdef HAVE_NICE
352   nice (10); //nice value arbitrarily set to 10, 5 is still nice, 0 is standard
353 #endif
354   // puts(argv[0]);
355 
356   //---------------------------------------------------------------------------------------------
357   // windows specific code for pretty printing
358 #ifdef PRETTY
359 #ifndef MPI
360 #ifdef WIN32
361     set_haru_handler();
362 #endif
363 #endif
364 #endif
365 
366     //---------------------------------------------------------------------------------------------
367     // MPI initialisation
368 #ifdef MPI
369     // parallel version
370     filenum = 0; //setting up the filename database so that worker node can write back to master
371 
372     MPI_Init (&argc, &argv);
373     comm_world = MPI_COMM_WORLD;
374     MPI_Comm_size (comm_world, &numcpu);
375     MPI_Comm_rank (comm_world, &myID);
376     MPI_Comm_group (comm_world, &world_group);
377     server = MASTER;		//server ID
378     MPI_Group_excl (world_group, 1, &server, &worker_group);
379     locidone = 0;
380 
381 #ifdef SLOWNET
382     // slow network parallel version -- this will turn into the standard version
383     profiledone = 0;
384     which_calc_like (SINGLELOCUS);
385 #endif
386 
387 #else /*MPI*/
388     //scalar version of migrate
389     myID = MASTER;
390     numcpu = 1;
391 #endif /*MPI*/
392 
393     // debug code for memory problems
394 #ifdef MEMDEBUG
395     totalsize = 0 ;
396     memfile = fopen("memoryfile","w+");
397     gettimeofday(&memt_start, NULL);
398 #endif
399 
400     //---------------------------------------------------------------------------------------------
401     // initialization of main container and random number parts
402     seed = (long *) mymalloc (sizeof (long) * 3);
403     newseed = (long *) mymalloc (sizeof (long) * 3);
404     generator = (char *) mycalloc (1,sizeof(char) * 80);
405     universe = (world_fmt **) mycalloc (HEATED_CHAIN_NUM, sizeof (world_fmt *));
406 
407     // set flag to decide whether we use caution when writing files
408 #ifdef CAUTIOUS
409     cautious = FALSE;
410 #endif
411     // try to catch and beautify some error messages
412     signalhandling (ON);
413     unique_id_global=0;
414     // create main data structures
415     create_data (&data);
416     create_world (&(EARTH), 1L);
417     create_options (&options);
418 
419     // parmfile and menu
420     init_options (options);
421 
422     //---------------------------------------------------------------------------------------------
423     // master initializations, this works for both parallel and single-cpu version
424     if (myID == MASTER)
425       {
426 	usemenu = check_parmfile((long) argc,argv,options->parmfilename);
427 	read_options_master (options);
428 	options->menu = set_usemenu(usemenu, options->menu);
429 	print_menu_title (stdout, options);
430 	get_menu (options, EARTH, data);
431 	check_bayes_options(options);
432 #ifdef MPI
433 	//	MYMPIBARRIER (comm_world);
434         broadcast_options_master (options, data);
435 #endif
436       }
437     else
438       {
439 #ifdef MPI
440 	// MYMPIBARRIER (comm_world);
441 	broadcast_options_worker (options);
442 #endif
443 	options->menu = FALSE;
444       }
445 
446     //---------------------------------------------------------------------------------------------
447     // data initialization and data reading phase
448     usize = (options->heating ? ( MAX (1, options->heated_chains)) : 1 );
449     universe = (world_fmt **) myrealloc (universe, usize * sizeof (world_fmt *));
450     // opens files on all nodes
451 #ifdef DEBUG
452     char nowstr[LINESIZE];
453     get_time (nowstr, "%c");
454     printf("%i> before opening files for master and 'pointers' for workers  at %s\n",myID,nowstr);
455 #endif
456     init_files (EARTH, data, options);
457 #ifdef DEBUG
458     get_time (nowstr, "%c");
459     printf("%i> files are now open and ready at %s\n", myID, nowstr);
460 #endif
461 
462     if (options->writelog && myID == MASTER)
463       {
464         print_menu_title (options->logfile, options);
465       }
466 
467     EARTH->repkind = SINGLECHAIN;
468     //fprintf(stderr,"myID=%i myRepID=%i\n",myID, myRepID);
469     unset_penalizer_function (FALSE);	// install penalizer for far jumps in MCMC
470 
471     //---------------------------------------------------------------------------------------------
472     // sampling phase
473     if (!options->readsum) // all go here except when reading old runs
474       {
475         run_sampler (options, data, universe, usize, &outfilepos, &Gmax);
476       }
477     else
478       {
479 	if(options->bayes_infer)
480 	  {
481 	    // reanalyze old or broken bayesrun [not ready yet]
482 	    restarted_bayes_bool = analyze_oldbayesdata(EARTH, options, data, &outfilepos);
483 	  }
484 	else
485 	  {
486 	    //reanalyze sumfile
487 	    Gmax = analyze_olddata (EARTH, options, data, &outfilepos);
488 	  }
489       }
490     unset_penalizer_function (TRUE);
491 
492     //---------------------------------------------------------------------------------------------
493     // combining phase for multiple loci and replicates
494 #ifdef MPI
495     //with MPI:the workers stay here much longer than the Master
496     combine_loci_mpi (type, options, EARTH, &Gmax);
497 #else
498     combine_loci_standard (type, options, EARTH, &Gmax);
499 #endif
500     // bayes inference
501     if (options->bayes_infer)
502       {
503 	// if bayes intermediate data recording is ON then reset the file
504 	// for reading for printing and combining, else use the material still in RAM
505 	if(!restarted_bayes_bool)
506 	  reset_bayesmdimfile(EARTH, options);
507       }
508     else
509       {
510         // gather the treedata(1) into sumfile or from worker nodes
511         get_treedata (EARTH, options);
512       }
513 
514     //---------------------------------------------------------------------------------------------
515     // printing main results
516     if (myID == MASTER)
517       {
518 	if (options->bayes_infer)
519 	  {
520 	    // printe bayes inference main table
521 	    bayes_stat (EARTH);
522 	  }
523 	else
524 	  {
525 	    // print ML table
526 	    print_list (universe, options, data);
527 	    print_alpha_curve (EARTH, EARTH->atl, &Gmax);
528 	    if(options->printcov)
529 	      print_cov(EARTH, EARTH->numpop, EARTH->loci, EARTH->cov);
530 	    if (EARTH->options->lratio->counter > 0)
531 	      print_lratio_test (EARTH, &Gmax);
532 	    if (options->aic)
533 	      akaike_information (EARTH, &Gmax);
534 	  }
535 	fflush (EARTH->outfile);
536 	//---------------------------------------------------------------------------------------------
537 	// printing additional results
538 #ifdef MPI
539 	get_mighistdata (EARTH, options);
540 #endif
541 	if(options->skyline && options->bayes_infer)
542 	  {
543 	    print_expected_values(EARTH, options);
544 	  }
545 	if(options->mighist || options->skyline)
546 	  {
547 	    print_event_values(EARTH);
548 #ifdef PRETTY
549 	    pdf_histogram_legend();
550 #endif
551 	  }
552 #ifdef UEP
553 	// print UEP probabilities
554 	if (options->uep)
555 	  analyze_uep (EARTH);
556 #endif
557         // profile tables        unset_penalizer_function (TRUE);	//now we calculate profile and shut down
558 	// the penalizing of far jumps in the maximizer function
559       } // end of printing main tables and histograms
560 
561 #ifdef SLOWNET
562     which_calc_like (PROFILE);
563     if (myID == MASTER)
564       {
565 	// release the workers from the mpi_maximize_worker() function.
566 	// the workers will stop working on locus-likelihoods and advance to profiles
567 	mpi_send_stop (EARTH);
568       }
569     if (!options->bayes_infer)
570       {
571 	profile_tables (options, EARTH, &Gmax);
572       }
573 #else  /* SLOWNET*/
574     if (myID == MASTER)
575       {
576 #ifdef INTEGRATEDLIKE
577 	if(options->integrated_like)
578 	  {
579 	    maxreplicate = (options->replicate
580 			    && options->replicatenum >
581 			    0) ? options->replicatenum : 1;
582 	    print_theta0(stdout, EARTH, maxreplicate);
583 
584 	    if (EARTH->options->lratio->counter > 0)
585 	      print_lratio_test (EARTH, &Gmax);
586 	    if (options->aic)
587 	      akaike_information (EARTH, &Gmax);
588 
589 	    profile_tables (options, EARTH, &Gmax);
590 	  }
591 #else
592 	if (!options->bayes_infer)
593 	  {
594 	    profile_tables (options, EARTH, &Gmax);
595 	  }
596 #endif /*INTEGRATEDLIKE*/
597       }
598 #endif /* SLOWNET */
599 
600 #ifdef MPI
601     if (myID == MASTER)
602       {
603 #endif
604 	if(EARTH->options->bayes_infer)
605 	  {
606 	// print marginal likelihoods
607 #ifdef MPI
608 	    //    fix_bayesfactor(EARTH,options);
609 #endif
610 	    print_bayesfactor(EARTH->outfile, universe,options);
611 	// printing of MCMC run characteristics
612 	    fprintf(EARTH->outfile,"MCMC run characteristics\n");
613 	    fprintf(EARTH->outfile,"========================\n\n");
614 	    bayes_print_accept(EARTH->outfile,EARTH);
615 #ifdef PRETTY
616 	    pdf_bayes_print_accept(EARTH);
617 #endif
618 	    fprintf(EARTH->outfile,"Autocorrelation and Effective sample size\n");
619 	    fprintf(EARTH->outfile,"-------------------------------------------------------------------\n\n");
620 	    print_bayes_ess(EARTH->outfile,EARTH,EARTH->numpop2 + EARTH->bayes->mu * EARTH->loci + 1,2,EARTH->auto_archive, EARTH->ess_archive);
621 #ifdef PRETTY
622 	    pdf_bayes_print_ess(EARTH);
623 #endif
624 	  }
625 
626 	if(strchr("aet",EARTH->options->burnin_autostop))
627 	  {
628 	    print_burnin_stop(EARTH->outfile, universe, options);
629 	  }
630 
631 	if(EARTH->options->progress)
632 	  {
633 	    if(EARTH->options->bayes_infer)
634 	      //	    if(EARTH->options->bayes_infer && options->datatype!='g')
635 	      {
636 		fprintf(stdout,"\nMCMC run characteristics: Autocorrelation and Effective sample size\n");
637 		fprintf(stdout,"-------------------------------------------------------------------\n\n");
638 		print_bayes_ess(stdout,EARTH,EARTH->numpop2+ EARTH->bayes->mu * EARTH->loci + 1,2,
639 				     EARTH->auto_archive, EARTH->ess_archive);
640 
641 		print_bayesfactor(stdout, universe,options);
642 
643 	      }
644 	  }
645 	if(options->treeprint)
646 	  {
647 	    if(EARTH->options->treeinmemory)
648 	      {
649 		for(locus=0;locus<EARTH->loci; locus++)
650 		  {
651 		    fprintf(EARTH->treefile,"%s", EARTH->treespace[locus]);
652 		  }
653 	      }
654 #ifdef NEXUSTREE
655 	    FPRINTF(EARTH->treefile,"\nend;\n");
656 #endif
657 	  }
658 
659 	// if adaptive heating print a table with the average temperatures
660 	//
661 	if(options->heating && options->adaptiveheat!=NOTADAPTIVE)
662 	  {
663 	    fprintf(EARTH->outfile,"\n\n\nAverage temperatures during the run using %s\n",(options->adaptiveheat!=NOTADAPTIVE)
664 		    ==STANDARD ? "standard adaptive heating scheme" : "bounded adaptive heating scheme" );
665 	    fprintf(EARTH->outfile,"===========================================================================\n\n");
666 	    fprintf(EARTH->outfile,"Chain Temperature\n");
667 	    // locus means indicator for chain
668 	    for(locus = 0 ; locus < options->heated_chains; locus++)
669 	      {
670 		fprintf(EARTH->outfile,"%5li %10.5f\n",locus+1,universe[locus]->averageheat);
671 	      }
672 	    fprintf(EARTH->outfile,"Adaptive heating often fails, if the average temperatures are very close together\n");
673 	    fprintf(EARTH->outfile,"try to rerun using static heating! If you want to compare models using marginal\n");
674 	    fprintf(EARTH->outfile,"likelihoods then you MUST use static heating\n");
675 	    pdf_print_averageheat(universe,options);
676 	  }
677 	//
678 	// print warnings into the PDF and the outfile
679 	//
680 	if(EARTH->options->bayes_infer)
681 	  {
682 	    print_stored_warnings(EARTH);
683 	    pdf_print_stored_warnings(EARTH);
684 	  }
685 	//
686 	//
687 	//
688 	pdf_print_end_time(&page_height);
689 	// write out to PDF file
690 	pdf_write_file(options);
691 	print_finish (EARTH, outfilepos);
692 	// closing all files
693 #ifdef MAC
694 	finish_mac (options, data);
695 #endif
696 
697 	exit_files (EARTH, data, options);
698 #ifdef MPI
699 #    ifndef SLOWNET
700 	mpi_send_stop (EARTH); //stop workers
701 	// printf("%i> at barrier in slownet-main before finalize\n", myID);
702 #    endif
703       }
704     MYMPIBARRIER (comm_world);
705     MPI_Finalize ();
706 #endif /*MPI*/
707     //	myfree(seed);
708     //myfree(newseed);
709     //myfree(generator);
710     //free_universe(universe, usize, options);
711     //destroy_data(data);
712     //destroy_options(options);
713     // for debugging with MallocDebug on macosx
714 #ifdef MACMALLOCDEBUG
715     while(TRUE)
716       {
717 	sleep(1);
718       } //end for debugging
719 #endif
720     return 0;
721 }				/* main end */
722 
723 ///
724 /// Calculates the maximum likelihood estimates from the trees that were gathered for each locus
725 /// \callgraph
726 void
combine_loci_standard(char type,option_fmt * options,world_fmt * world,long * Gmax)727 combine_loci_standard (char type, option_fmt * options, world_fmt * world,
728                        long *Gmax)
729 {
730     long kind = MULTILOCUS;
731     if (options->readsum)
732     {
733         if (world->loci == 1)
734             kind = SINGLELOCUS;
735     }
736 #ifndef INTEGRATEDLIKE
737     if ((world->loci - world->skipped > 1) || (options->readsum))
738 #else
739     if ((world->loci - world->skipped > 1) || (options->readsum))
740 #endif
741     {
742         if (options->gamma)
743             world->param0[world->numpop2] = world->options->alphavalue;
744         world->repkind = set_repkind (options);
745         decide_plot (world->options, world->options->lchains,
746                      world->options->lchains, 'l');
747 #ifdef LONGSUM
748         change_longsum_times (world);	//multilocus
749 #endif /*LONGSUM*/
750 #ifdef INTEGRATEDLIKE
751 	    (void) estimateParameter (options->replicatenum, *Gmax, world, options,\
752 				      world->cov[world->loci], 0 /* chain */ ,\
753 				      type, kind, world->repkind);
754 #else
755 	if(!options->bayes_infer)
756 	  {
757 	    (void) estimateParameter (options->replicatenum, *Gmax, world, options,
758 				      world->cov[world->loci], 0L /* chain */ ,
759 				      type, kind, world->repkind);
760 	  }
761 #endif
762     }
763 }
764 
765 ///
766 /// Calculates the maximum likelihood estimates from the trees that were gathered for each locus
767 /// \callgraph
768 void
combine_loci_mpi(char type,option_fmt * options,world_fmt * world,long * Gmax)769 combine_loci_mpi (char type, option_fmt * options, world_fmt * world,
770                   long *Gmax)
771 {
772 #ifdef MPI
773     long kind = MULTILOCUS;
774     if (options->readsum)
775     {
776         if (world->loci == 1)
777             kind = SINGLELOCUS;
778     }
779     if ((world->loci - world->skipped > 0) || (options->readsum))
780     {
781         if (options->gamma)
782             world->param0[world->numpop2] = world->options->alphavalue;
783         world->repkind = set_repkind (options);
784         if (myID == MASTER)
785         {
786 #ifdef INTEGRATEDLIKE
787 	      mpi_gmax_master (world, Gmax);
788 	      mpi_startparam_master (world);
789 	      decide_plot (world->options, world->options->lchains,
790 			   world->options->lchains, 'l');
791 #else
792 	  if(!options->bayes_infer)
793 	    {
794 	      mpi_gmax_master (world, Gmax);
795 	      mpi_startparam_master (world);
796 	      decide_plot (world->options, world->options->lchains,
797 			   world->options->lchains, 'l');
798 	    }
799 #endif
800 #ifdef LONGSUM
801             change_longsum_times (world);	//multilocus
802 #endif	   /*LONGSUM*/
803             // broadcast Gmax to all workers, the worker pendant is in mpi_gmax_worker
804 #ifndef INTEGRATEDLIKE
805 	    if (!options->bayes_infer)
806 	      {
807 		MYMPIBCAST (Gmax, 1, MPI_LONG, MASTER, comm_world);
808 		(void) estimateParameter (options->replicatenum, *Gmax, world,
809 					  options, world->cov[world->loci],
810 					  0 /* chain */ ,
811 					  type, kind, world->repkind);
812 	      }
813 #else
814 		MYMPIBCAST (Gmax, 1, MPI_LONG, MASTER, comm_world);
815 		(void) estimateParameter (options->replicatenum, *Gmax, world,
816 					  options, world->cov[world->loci],
817 					  0 /* chain */ ,
818 					  type, kind, world->repkind);
819 #endif
820 	}
821         else
822         {
823 #ifndef INTEGRATEDLIKE
824             //            printf("%i> before gmax worker\n", myID);
825 	  if(!options->bayes_infer)
826 	    {
827 	      mpi_gmax_worker (world);
828 	      mpi_startparam_worker (world);
829 	    }
830             //            printf("%i> start worker postlike calculations\n", myID);
831 	  mpi_maximize_worker (world, options, MULTILOCUS, options->replicatenum);	// receive broadcast for Gmax
832 #else
833 	    mpi_gmax_worker (world);
834 	    mpi_startparam_worker (world);
835             mpi_maximize_worker (world, options, MULTILOCUS, options->replicatenum);	// receive broadcast for Gmax
836 #endif
837         }
838     }
839     //   else
840     //   {
841     //       if (myID != MASTER)
842     //       {
843     //           maxreplicate = (options->replicate
844     //                           && options->replicatenum >
845     //                           0) ? options->replicatenum : 1;
846     //           fprintf(stdout,"about to pack result buffer and send too");
847     //           mpi_results_worker(MIGMPI_SUMFILE, world, maxreplicate, pack_result_buffer);
848     //           //mpi_maximize_worker(world, options->replicatenum);
849     //       }
850     //   }
851 #endif
852 }
853 
854 ///
855 /// calculate the number populations to analyze and resize the newpop array if necessary
856 /// this function is essential only for cases where the user changes the population number
857 /// using the relabel option in the parmfile
858 /// in almost all cases options->newpops_numalloc and data->numpop are the same size.
859 /// Fix May 2015: relabel > numpop gets adjust to numpop and a warning is issued, because the
860 ///               datafile and the parmfile are mismatched; relabel can be smaller than numpop
861 ///               but not larger
calculate_newpop_numpop(option_fmt * options,data_fmt * data)862 long  calculate_newpop_numpop(option_fmt *options, data_fmt *data)
863 {
864   long pop;
865   long i;
866   long newnumpop=0;
867   long numpop = data->numpop;
868 #ifdef WIN32
869   // (old?) windows compilers have difficult to assign memory based on variables
870   long temp[5000];//this is __way__ more than migrate ever will be able to handle
871 #else
872   long temp[data->numpop < options->newpops_numalloc ? options->newpops_numalloc : data->numpop];
873 #endif
874   //use order of populations {1,2,3,4,5,....} , do not start with zero!!
875   //reset to options->newpops={1,1,2,1,2,3,4,...}
876   if (options->newpops_numalloc != numpop)
877     {
878       // newpops_numalloc > numpop ===> warn and resize
879       // newpops_numalloc < numpop ===> resize and fill
880       if(options->newpops_numalloc > numpop)
881 	{
882 	  warning("Population number in datafile is smaller than in the relabel option!");
883 	  warning("Relabel option will be adjust to number of populations in datafile");
884 	  options->newpops_numalloc = numpop;
885 	}
886       options->newpops = (long *) myrealloc(options->newpops,sizeof(long)*numpop);
887       for(i=options->newpops_numalloc;i<numpop;i++)
888 	{
889 	  options->newpops[i] = i+1;
890 	}
891       options->newpops_numalloc = numpop;
892     }
893   memcpy(temp,options->newpops,sizeof(long)*numpop);
894   qsort((void *) temp, (size_t) numpop, sizeof(long), longcmp);
895   pop = temp[0];
896   newnumpop = 1;
897   for(i=1;i<numpop;i++)
898     {
899       if(temp[i] != pop)
900 	{
901 	  newnumpop++;
902 	  pop = temp[i];
903 	}
904     }
905   options->newpops_numpop = newnumpop;
906   return newnumpop;
907 }
908 
909 
910 ///
911 /// the slice sampling stick size is allocated here for the options, this is
912 /// done in three different places for: standard, genealogy, MPI
alloc_sticksize(option_fmt * options,data_fmt * data)913 void alloc_sticksize(option_fmt *options, data_fmt *data)
914 {
915   if( options->slice_sticksizes==NULL)
916     {
917       options->slice_sticksizes = (MYREAL *) mycalloc(data->numpop * data->numpop + 1, sizeof(MYREAL));
918     }
919   else
920     {
921       options->slice_sticksizes = (MYREAL *) myrealloc(options->slice_sticksizes, (data->numpop * data->numpop + 1)* sizeof(MYREAL));
922     }
923 }
924 
set_skiploci(option_fmt * options,data_fmt * data,world_fmt ** universe)925 void set_skiploci(option_fmt *options, data_fmt *data, world_fmt **universe)
926 {
927   long locus;
928   long i;
929   for(locus=0;locus<data->loci;locus++)
930     {
931       for (i = 0; i < options->heated_chains; i++)
932 	universe[i]->data->skiploci[locus] = data->skiploci[locus];
933     }
934 }
935 
936 ///
937 /// runs the MCMC sampling
938 void
run_sampler(option_fmt * options,data_fmt * data,world_fmt ** universe,int usize,long * outfilepos,long * Gmax)939 run_sampler (option_fmt * options, data_fmt * data, world_fmt ** universe,
940              int usize, long *outfilepos, long *Gmax)
941 {
942   MYREAL var;
943   long i;
944   long maxreplicate;
945   long treefilepos;
946 #ifdef MPI
947   MPI_Request *irequests;	// contains requests generated in MPI_Isend
948   MPI_Status *istatus;		// conatins stata from MPI_Wait_any
949   long minnodes;
950   long *twolongs;		// message sent to workers contains locus and replicate number
951 
952   if (myID < data->loci + 1)
953     color = 1;
954   else
955     color = MPI_UNDEFINED;
956 
957   twolongs = (long *) mycalloc (TWO, sizeof (long));
958 #endif
959   getseed(options);
960 #ifdef MPI
961   if (myID == MASTER)
962     {
963 #endif
964 #ifdef DEBUG
965       char nowstr[LINESIZE];
966       get_time (nowstr, "%c");
967       printf("\n\nMaster start reading data at %s\n",nowstr);
968 #endif
969       get_data (data->infile, data, options, EARTH);
970 #ifdef MPI
971 #ifdef DEBUG
972       get_time (nowstr, "%c");
973       printf("Master finished reading data at %s\n",nowstr);
974 #endif
975       if (numcpu == 1)
976         {
977 	  error
978             ("This program was compiled to use a parallel computer\n and you tried to run it on only a single node.\nThis will not work because it uses a \n\"single_master-many_worker\" architecture \nand needs at least TWO nodes\n");
979         }
980       broadcast_data_master (data, options);
981     } /*end of myID==MASTER loop for MPI*/
982   else
983     {
984       broadcast_data_worker (data, options);
985     }
986 #endif
987   set_plot (options);
988   EARTH->cold = TRUE; // this is the cold chain when we use heating, the hotter chains have FALSE
989   // sticksizes are filled in init_world but initialized here so make sure that
990   // the heated chains do not produce a memory leak by mutiply initializing the sticksizes
991   alloc_sticksize(options,data);
992   // filling of options->slice_sticksizes
993   // deferred after world->bayes initialization in init_world()
994   calculate_newpop_numpop(options,data);
995 
996   init_world (EARTH, data, options);
997   //  set_meanmu(EARTH,options);
998   if(EARTH->cold)
999     single_chain_var (EARTH, 0L, &var, NULL, NULL);
1000 
1001 #ifdef NEXUSTREE
1002   if (EARTH->options->treeprint == LASTCHAIN)
1003     {
1004       FPRINTF(EARTH->treefile,"#NEXUS\n\nbegin trees;");
1005     }
1006 #endif
1007 
1008 #ifdef PTHREADS
1009   tpool_init (&heating_pool, usize, usize, 0);
1010 #endif
1011 
1012   if (options->heating)
1013     {
1014       //first in universe is the cold chain[EARTH]
1015       heating_init (universe, usize, data, options);
1016     }
1017     if(options->printfst || options->thetaguess==FST || options->migrguess==FST)
1018       {
1019 	calc_simple_param (EARTH, data);
1020       }
1021 
1022   /* report to screen */
1023   if (myID == MASTER)
1024     {
1025       print_menu_options (EARTH, options, data);
1026       if (options->progress)
1027 	print_data_summary (stdout, EARTH, options, data);
1028       /* print to outfile */
1029 #ifdef PRETTY
1030       pdf_master_init(EARTH, options, data);
1031 #endif
1032       *outfilepos = print_title (EARTH, options);
1033       print_options (EARTH->outfile, EARTH, options, data);
1034       print_data_summary (EARTH->outfile, EARTH, options, data);
1035       print_data (EARTH, options, data);
1036       print_spectra (EARTH, options, data);
1037       if(options->murates_fromdata)
1038 	{
1039 	  if (options->progress)
1040 	    print_mutationrate_weights(stdout, options->mu_rates, options->segregs,options->wattersons, data->loci);
1041 	  print_mutationrate_weights(EARTH->outfile, options->mu_rates, options->segregs,options->wattersons, data->loci);
1042 	}
1043 
1044       if (options->printfst)
1045 	print_fst (EARTH, options, data, EARTH->fstparam);
1046 
1047       set_skiploci(options, data, universe);
1048       if(options->bayes_infer && myID == MASTER)
1049 	{
1050 	  if(options->has_bayesmdimfile)
1051 	    print_bayes_mdimfileheader(EARTH->bayesmdimfile,
1052 				       options->bayesmdiminterval, EARTH, data);
1053 	}
1054     }
1055   if(!options->bayes_infer) // only needed for ML
1056     {
1057       if (options->lchains < 2 && options->replicatenum == 0)
1058 	{
1059 	  options->replicate = 0;
1060 	}
1061     }
1062   maxreplicate = universe[0]->maxreplicate;
1063     //(options->replicate
1064     //		  && options->replicatenum > 0) ? options->replicatenum : 1;
1065 #ifdef MPI
1066     minnodes = MAX (0, numcpu - data->loci - 1);
1067     irequests = (MPI_Request *) mycalloc (minnodes + 1, sizeof (MPI_Request));
1068     istatus = (MPI_Status *) mycalloc (minnodes + 1, sizeof (MPI_Status));
1069     if (myID == MASTER)
1070       {
1071 	mpi_runloci_master (data->loci, EARTH->who, EARTH, options->readsum, options->menu);
1072     }
1073     else
1074     {
1075         mpi_runloci_worker (universe, usize, options, data,
1076                             &heating_pool, maxreplicate, &treefilepos, Gmax);
1077     }
1078     myfree(istatus);
1079     myfree(irequests);
1080 #else /*MPI*/
1081     run_loci (universe, usize, options, data,
1082               &heating_pool, maxreplicate, &treefilepos, Gmax);
1083 
1084 #endif /*MPI*/
1085 
1086 #ifdef BEAGLE
1087     beagle_stop(universe, usize);
1088 #endif
1089 
1090 #ifdef PTHREADS
1091     tpool_destroy (heating_pool, 1);
1092 #endif
1093 #ifdef MPI
1094 
1095     if (myID != MASTER)
1096     {
1097 #endif
1098       if (options->heating)
1099 	{
1100 	  for (i = 0; i < options->heated_chains; i++)
1101 	    {
1102 	      //free_tree(universe[i]->root, universe[i]);
1103 	      free_timevector (universe[i]->treetimes);
1104 	    }
1105 	}
1106       else
1107 	{
1108 	  //free_tree(EARTH->root, EARTH);
1109 	  free_timevector (EARTH->treetimes);
1110 	}
1111 #ifdef MPI
1112     }
1113     myfree(twolongs);
1114 #endif
1115 }
1116 
1117 /// generate samples of genealogies for all loci
1118 void
run_loci(world_fmt ** universe,int usize,option_fmt * options,data_fmt * data,tpool_t * localheating_pool,long maxreplicate,long * treefilepos,long * Gmax)1119 run_loci (world_fmt ** universe, int usize, option_fmt * options,
1120           data_fmt * data, tpool_t * localheating_pool, long maxreplicate,
1121           long *treefilepos, long *Gmax)
1122 {
1123     long locus;
1124     for (locus = 0; locus < data->loci; locus++)
1125     {
1126       if(!data->skiploci[locus])
1127 	{
1128 	  run_locus (universe, usize, options, data,
1129 		     localheating_pool, maxreplicate, locus, treefilepos, Gmax);
1130 	}
1131       free_datapart(data,options,locus);
1132     }
1133 }
1134 
1135 
1136 /// save all genealogy summaries
1137 void
get_treedata(world_fmt * world,option_fmt * options)1138 get_treedata (world_fmt * world, option_fmt * options)
1139 {
1140 #ifdef MPI
1141   long maxreplicate = world->maxreplicate;
1142 #endif
1143 
1144     if (myID == MASTER)
1145     {
1146         if (options->writesum)
1147         {
1148 #ifdef MPI
1149             //get all sumfile data from the workers using unpack_sumfile_buffer()
1150             mpi_results_master (MIGMPI_SUMFILE, world, maxreplicate,
1151                                 unpack_sumfile_buffer);
1152 #endif
1153             write_savesum (world);
1154         }
1155 #ifdef SLOWNET
1156         else
1157         {
1158             //      printf("%i> about to receive the sumfile data\n", myID);
1159             mpi_results_master (MIGMPI_SUMFILE, world, maxreplicate,
1160                                 unpack_sumfile_buffer);
1161             //      printf("%i> all sumfile data received and unpacked\n", myID);
1162         }
1163 #endif
1164     }
1165 }
1166 
1167 ///
1168 /// save all genealogy summaries
1169 void
get_bayeshist(world_fmt * world,option_fmt * options)1170 get_bayeshist (world_fmt * world, option_fmt * options)
1171 {
1172 #ifdef MPI
1173   //long i;
1174   long maxreplicate = world->maxreplicate;
1175   if (myID == MASTER)
1176     {
1177       //for(i=0;i<world->loci;i++)
1178       //	  printf("%i> before moving stuff  %f %f\n",myID, world->hm[i],world->am[i]);
1179       mpi_results_master (MIGMPI_BAYESHIST, world, maxreplicate,
1180                             unpack_bayes_buffer);
1181       //for(i=0;i<world->loci;i++)
1182       //  printf("%i> after moving stuff  %f %f\n",myID, world->hm[i],world->am[i]);
1183     }
1184 #endif
1185 }
1186 
recalc_skyline_values(world_fmt * world,option_fmt * options,long maxreplicate)1187 void 	recalc_skyline_values(world_fmt *world, option_fmt * options, long maxreplicate)
1188 {
1189   long i, j, locus;
1190   mighistloci_fmt *aa;
1191   long * eventnum;
1192   const float invmax = (float) (1./maxreplicate);
1193   if(world->options->mighist && world->options->skyline)
1194     {
1195       for(locus=0; locus < world->loci; locus++)
1196 	{
1197 	  aa = &world->mighistloci[locus];
1198 	  eventnum = world->mighistloci[locus].eventbinnum;
1199 	  for(j=0; j< world->numpop2; j++)
1200 	    {
1201 	      for (i = 0; i < eventnum[j]; i++)
1202 		{
1203 		  aa->eventbins[j][i][0] *= invmax;
1204 		  aa->eventbins[j][i][1] *= invmax;
1205 		  aa->eventbins[j][i][2] *= invmax;
1206 		  aa->eventbins[j][i][3] *= invmax;
1207 		  aa->eventbins[j][i][4] *= invmax;
1208 		  aa->eventbins[j][i][5] *= invmax;
1209 		}
1210 	    }
1211 	}
1212     }
1213 }
1214 
1215 
1216 /// get migration event time data
1217 /// and skyline data when present
1218 void
get_mighistdata(world_fmt * world,option_fmt * options)1219 get_mighistdata (world_fmt * world, option_fmt * options)
1220 {
1221 #ifdef MPI
1222   long maxreplicate = world->maxreplicate;
1223   if (myID == MASTER)
1224     {
1225         if (options->mighist)
1226 	  {
1227             //get all mighist data from the workers using unpack_mighist_buffer()
1228             mpi_results_master (MIGMPI_MIGHIST, world, maxreplicate,
1229                                 unpack_mighist_buffer);
1230 	    if(options->skyline)
1231 	      {
1232 		//get all mighist data from the workers using unpack_mighist_buffer()
1233 		fprintf(stdout,"%i> before unpack skyline\n",myID);
1234 		mpi_results_master (MIGMPI_SKYLINE, world, maxreplicate,
1235 				    unpack_skyline_buffer);
1236 	      }
1237 	  }
1238 	if(/*world->options->treeprint ==BEST &&*/ world->options->treeinmemory==TRUE)
1239 	  {
1240             mpi_results_master (MIGMPI_TREESPACE, world, maxreplicate,
1241                                 unpack_treespace_buffer);
1242 	  }
1243 	recalc_skyline_values(world, options, maxreplicate);
1244     }
1245 #endif
1246 
1247 }
1248 
1249 /// swap the tree pointer between chains with different temperatures
1250 void
heated_swap(world_fmt ** universe,worldoption_fmt * options)1251 heated_swap (world_fmt ** universe, worldoption_fmt * options)
1252 {
1253     long sm = 0, mv = 0, vw = 0;
1254     long r;
1255     //debug for heating traverse issues
1256     if(options->heatedswap_off || (options->heating_count++ % options->heating_interval) != 0)
1257       return;
1258     switch (r = RANDINT (0L, options->heated_chains - 2L))
1259     {
1260         case 2:
1261             sm = chance_swap_tree (SUN, MERKUR);
1262             MERKUR->swapped += sm;
1263             break;
1264         case 1:
1265             mv = chance_swap_tree (MERKUR, VENUS);
1266             VENUS->swapped += mv;
1267             break;
1268         case 0:
1269             vw = chance_swap_tree (VENUS, EARTH);
1270             EARTH->swapped += vw;
1271             break;
1272         default:
1273             universe[r]->swapped += chance_swap_tree (universe[r + 1], universe[r]);
1274     }
1275 }
1276 
1277 
1278 /// change the type of the chain form short to long and reset the treelist
1279 void
change_chaintype(long locus,char * type,world_fmt * world,long * increment,long * oldsteps,long * chains,option_fmt * options)1280 change_chaintype (long locus, char *type, world_fmt * world, long *increment,
1281                   long *oldsteps, long *chains, option_fmt * options)
1282 {
1283     if (*type == 's')
1284     {
1285         *type = 'l';
1286         create_treetimelist (world, &(world->treetimes), locus);
1287         set_bounds (increment, oldsteps, chains, options, *type);
1288         world->increment = *increment;
1289     }
1290 }
1291 
1292 /// prepare the next chain
1293 void
prepare_next_chain(world_fmt ** universe,worldoption_fmt * options,char type,long chain,long * chains,long * pluschain,long locus,long replicate)1294 prepare_next_chain (world_fmt ** universe, worldoption_fmt * options,
1295                     char type, long chain, long *chains, long *pluschain,
1296                     long locus, long replicate)
1297 {
1298     long i;
1299     EARTH->likelihood[0] = EARTH->likelihood[EARTH->G];
1300     if (options->heating)
1301     {
1302         for (i = 1; i < options->heated_chains; i++)
1303             universe[i]->likelihood[0] = universe[i]->likelihood[universe[i]->G];
1304     }
1305     EARTH->treetimes[0].copies = 0;
1306     if (type == 'l')
1307     {
1308         if (options->replicate && options->replicatenum > 0)
1309             EARTH->chainlikes[locus][replicate] = EARTH->param_like;
1310         else
1311             EARTH->chainlikes[locus][chain] = EARTH->param_like;
1312     }
1313     EARTH->start = FALSE;
1314     if (type == 'l')
1315     {
1316         if (chain < *chains + *pluschain)
1317         {
1318             if (((EARTH->param_like > options->lcepsilon)
1319 		 //|| (options->gelman && EARTH->convergence->gelmanmaxRall > GELMAN_MYSTIC_VALUE)
1320 		 )
1321 		&& !(options->replicate && options->replicatenum == 0))
1322             {
1323                 (*chains)++;
1324                 (*pluschain)--;
1325             }
1326         }
1327     }
1328 }
1329 
1330 ///
1331 /// resetting of accept and accept_freq for heated chains
reset_heated_accept(world_fmt ** universe,long unum)1332 void reset_heated_accept(world_fmt **universe, long unum)
1333 {
1334   long i;
1335   for(i=0; i < unum; i++)
1336     {
1337       universe[i]->swapped = 0;
1338       universe[i]->accept = 0L;
1339       universe[i]->accept_freq = 0.;
1340     }
1341 }
1342 
1343 
1344 /// print progress about heated chains
1345 void
print_heating_progress(world_fmt ** universe,worldoption_fmt * options,long stepinc)1346 print_heating_progress (world_fmt ** universe, worldoption_fmt * options,
1347                         long stepinc)
1348 {
1349   MYREAL fstepinc = (MYREAL) stepinc;
1350     if (options->heating)
1351     {
1352 #ifdef DEBUG_MPI
1353       printf("%i> stepinc=%li =========\n",myID, stepinc);
1354 #endif
1355         universe[0]->accept_freq /= fstepinc;
1356         universe[1]->accept_freq /= fstepinc;
1357         universe[2]->accept_freq /= fstepinc;
1358         universe[3]->accept_freq /= fstepinc;
1359         if (options->progress)
1360             print_heating_progress2 (stdout, options, universe);
1361         if (options->writelog)
1362             print_heating_progress2 (options->logfile, options, universe);
1363     }
1364 }
1365 
1366 ///
1367 /// sub-function of print_heating, in MPI mode, the string printing will minimize data transfer.
1368 void
print_heating_progress2(FILE * file,worldoption_fmt * options,world_fmt ** universe)1369 print_heating_progress2 (FILE * file, worldoption_fmt * options,
1370                          world_fmt ** universe)
1371 {
1372   char *plog;
1373   long plogsize = 0L;
1374   char nowstr[STRSIZE];
1375   char dots[STRSIZE];
1376   get_time (nowstr, "%H:%M:%S");
1377   plog = (char *) mycalloc(LONGLINESIZE,sizeof(char));
1378 #ifdef MPI
1379   plogsize = sprintf(plog, "[%3i] %s   %li chains: ", myID, nowstr, options->heated_chains);
1380 #else
1381   plogsize = sprintf(plog, "%s   %li chains: ", nowstr, options->heated_chains);
1382 #endif
1383   if (options->heated_chains > 4)
1384     strcpy(dots,",...");
1385   else
1386     dots[0]='\0';
1387   plogsize += sprintf (plog + plogsize, "Temp(%.4g,%.4g,%.4g,%.4g%s) ",universe[0]->averageheat,
1388 		       universe[1]->averageheat,universe[2]->averageheat,universe[3]->averageheat,dots);
1389   plogsize += sprintf (plog + plogsize, "Acc(%.2f,%.2f,%.2f,%.2f%s) ",universe[0]->accept_freq,
1390 		       universe[1]->accept_freq,universe[2]->accept_freq,universe[3]->accept_freq,dots);
1391   plogsize += sprintf (plog + plogsize, "Swap(%li,%li,%li,%li%s)\n",universe[0]->swapped,
1392 		       universe[1]->swapped,universe[2]->swapped,universe[3]->swapped,dots);
1393   FPRINTF(file,"%s",plog);
1394   myfree(plog);
1395 }
1396 
1397 
1398 /// analyze old sumfile data
1399 long
analyze_olddata(world_fmt * world,option_fmt * options,data_fmt * data,long * outfilepos)1400 analyze_olddata (world_fmt * world,
1401                  option_fmt * options, data_fmt * data, long *outfilepos)
1402 {
1403     long locus;
1404 #ifdef MPI
1405     long listofthings[6];
1406 #endif
1407     long Gmax = 0;
1408     long pop;
1409     options->heating = FALSE;
1410     options->gelman = FALSE;
1411     if (myID == MASTER)
1412     {
1413         unset_penalizer_function (TRUE);
1414         Gmax = read_savesum (world, options, data);
1415 
1416         data->popnames = (char **) mymalloc (sizeof (char *) * world->numpop);
1417 	data->locusweight =
1418 	  (MYREAL *) mycalloc (1, sizeof (MYREAL) * (data->loci + 1));
1419 	for (locus=0;locus < data->loci; locus++)
1420 	  {
1421 	    data->locusweight[locus]=1.0;
1422 	  }
1423         for (pop = 0; pop < world->numpop; pop++)
1424         {
1425             data->popnames[pop] =
1426             (char *) mycalloc (1, sizeof (char) * LINESIZE);
1427             MYSNPRINTF (data->popnames[pop], LINESIZE, "Pop_%li ", pop + 1);
1428         }
1429 
1430 #ifdef MPI
1431 #ifdef WINDOWS
1432         MYMPIBCAST (data->geo, world->numpop2, MPI_DOUBLE, MASTER, comm_world );
1433         MYMPIBCAST (data->lgeo, world->numpop2, MPI_DOUBLE, MASTER, comm_world );
1434 #else
1435         MYMPIBCAST (data->geo, world->numpop2, mpisizeof, MASTER, comm_world );
1436         MYMPIBCAST (data->lgeo, world->numpop2, mpisizeof, MASTER, comm_world );
1437 #endif
1438     }
1439     else
1440     {
1441         MYMPIBCAST (listofthings, 6, MPI_LONG, MASTER, comm_world );
1442         //      printf("listofthings received\n");
1443         //    fflush(stdout);
1444         world->loci = listofthings[0];
1445         world->numpop = listofthings[1];
1446         world->numpop2 = listofthings[2];
1447         options->replicate = (boolean) listofthings[3];
1448         options->replicatenum = listofthings[4];
1449         Gmax = listofthings[5];
1450         data->numpop = world->numpop;
1451 	options->newpops_numalloc = world->numpop;
1452 	options->newpops = (long*) mycalloc(options->newpops_numalloc, sizeof(long));
1453 	calculate_newpop_numpop(options,data);
1454         data->geo = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop2);
1455         data->lgeo = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop2);
1456 	data->locusweight =
1457 	  (MYREAL *) mycalloc (1, sizeof (MYREAL) * (data->loci + 1));
1458 	for (locus=0;locus < data->loci; locus++)
1459 	  {
1460 	    data->locusweight[locus]=1.0;
1461 	  }
1462 #ifdef WINDOWS
1463         MYMPIBCAST (data->geo, world->numpop2, MPI_DOUBLE, MASTER, comm_world);
1464         MYMPIBCAST (data->lgeo, world->numpop2, MPI_DOUBLE, MASTER, comm_world);
1465 #else
1466         MYMPIBCAST (data->geo, world->numpop2, mpisizeof, MASTER, comm_world);
1467         MYMPIBCAST (data->lgeo, world->numpop2, mpisizeof, MASTER, comm_world);
1468 #endif
1469         options->muloci = data->loci = world->loci;
1470         init_world (world, data, options);
1471 #endif
1472 
1473     }
1474 #ifdef MPI
1475 #ifdef SLOWNET
1476     mpi_broadcast_results (world, world->loci,
1477                            pack_sumfile_buffer, unpack_sumfile_buffer);
1478 #endif
1479 #endif
1480 
1481     synchronize_param (world, options);
1482     if (options->plot)
1483     {
1484         options->plotnow = TRUE;
1485         world->options->plotnow = TRUE;
1486     }
1487     world->locus = 0;
1488     if (myID == MASTER)
1489     {
1490         *outfilepos = print_title (world, options);
1491         print_options (stdout, world, options, data);
1492         print_options (world->outfile, world, options, data);
1493 #ifdef PRETTY
1494 	pdf_master_init(world, options, data);
1495 #endif
1496 
1497 #ifdef MPI
1498 	MYMPIBARRIER(comm_world);
1499 	printf("%i> before mpi_runlocimaster\n",myID);fflush(stdout);
1500 	mpi_runloci_master (world->loci, world->who, world, options->readsum, options->menu);
1501 
1502 #else
1503 
1504         for (locus = 0; locus < world->loci; locus++)
1505         {
1506             if (options->replicate && options->replicatenum > 0)
1507             {
1508                 world->locus = locus;
1509                 world->repkind = MULTIPLERUN;
1510 #ifdef LONGSUM
1511                 change_longsum_times (world);	//multi run replicates
1512 #endif	       /*LONGSUM*/
1513 		if (!options->bayes_infer)
1514 		  {
1515 		    (void) estimateParameter (options->replicatenum, Gmax, world,
1516 					      options, world->cov[locus], 1L, 'l',
1517 					      SINGLELOCUS, world->repkind);
1518 		  }
1519 	    }
1520         }
1521 #endif
1522 
1523     }
1524 #ifdef MPI
1525     else
1526     {
1527       MYMPIBARRIER(comm_world);
1528       printf("%i> before assignloci_worker\n",myID);fflush(stdout);
1529       assignloci_worker (world, options, &Gmax);
1530       // some workers might still wait in assignloci_worker(), will be stopped through the master
1531     }
1532 #endif
1533     return Gmax;
1534 } /*analyze_olddata()*/
1535 
1536 
analyze_oldbayesdata(world_fmt * world,option_fmt * options,data_fmt * data,long * outfilepos)1537 boolean analyze_oldbayesdata(world_fmt *world, option_fmt *options, data_fmt *data, long *outfilepos)
1538 {
1539 #ifndef MPI
1540   long pop;
1541 #endif
1542   long locus;
1543   char **files = NULL;
1544   long numfiles=1;
1545   charvec2d(&files,numfiles,LINESIZE);
1546   // insert code for number of files and allocation and naming
1547   strcpy(files[0],options->bayesmdimfilename);
1548   // read data from bayesfile
1549   world->cold=TRUE;
1550   // reads some minimal information form the header of the bayesallfile
1551   // this only works with files written with migrate 2.5+
1552 #ifdef MPI
1553   if(myID==MASTER)
1554     {
1555       warning("does not work correctly yet");
1556       read_from_bayesmdim_minimal_info(world->bayesmdimfile, world, options, data);
1557       read_geofile (data, options, world->numpop);
1558       alloc_sticksize(options,data);
1559       options->newpops_numalloc = world->numpop;
1560       options->newpops = (long*) mycalloc(options->newpops_numalloc, sizeof(long));
1561       calculate_newpop_numpop(options,data);
1562       data->locusweight =
1563 	(MYREAL *) mycalloc (1, sizeof (MYREAL) * (data->loci + 1));
1564       for (locus=0;locus < data->loci; locus++)
1565 	{
1566 	  data->locusweight[locus]=1.0;
1567 	}
1568       /*options->newpops_numalloc = world->numpop;
1569       options->newpops = (long*) mycalloc(options->newpops_numalloc, sizeof(long));
1570       for(pop=0;pop<world->numpop;pop++)
1571       options->newpops[pop]=pop+1;*/
1572       init_world (world, data, options);
1573       read_bayes_fromfile(world->bayesmdimfile, world, options,files, numfiles);
1574     }
1575 #else
1576   read_from_bayesmdim_minimal_info(world->bayesmdimfile, world, options, data);
1577   read_geofile (data, options, world->numpop);
1578   alloc_sticksize(options,data);
1579   options->newpops_numalloc = world->numpop;
1580   options->newpops = (long*) mycalloc(options->newpops_numalloc, sizeof(long));
1581   for(pop=0;pop<world->numpop;pop++)
1582     options->newpops[pop]=pop+1;
1583   data->locusweight =
1584     (MYREAL *) mycalloc (1, sizeof (MYREAL) * (data->loci + 1));
1585   for (locus=0;locus < data->loci; locus++)
1586     {
1587       data->locusweight[locus]=1.0;
1588     }
1589   init_world (world, data, options);
1590   read_bayes_fromfile(world->bayesmdimfile, world, options, files, numfiles);
1591 #endif
1592   //set_meanmu(world,options);
1593 
1594 #ifdef PRETTY
1595   pdf_master_init(world, options, data);
1596 #endif
1597   for(locus=0;locus<world->loci;locus++)
1598     {
1599       if(world->data->skiploci[locus])
1600 	continue;
1601       if(world->bayes->histogram[locus].results == NULL)
1602 	{
1603 	  world->data->skiploci[locus] = TRUE;
1604 	  continue;
1605 	}
1606       calc_hpd_credibility(world->bayes, locus, world->numpop2, world->numpop2 + world->bayes->mu);
1607     }
1608   myfree(files);
1609   return TRUE;
1610 }
1611 
1612 /// generate profile likelihood tables
1613 void
profile_tables(option_fmt * options,world_fmt * world,long * gmaxptr)1614 profile_tables (option_fmt * options, world_fmt * world, long *gmaxptr)
1615 {
1616     long len;
1617 #ifndef SLOWNET
1618 #ifdef PRETTY
1619     long location = 0;
1620 #endif
1621     long i;
1622 #endif
1623 
1624     if (options->profile != _NONE)
1625     {
1626       //        world->buffer =
1627       //  (char *) myrealloc (world->buffer, allocbufsize * sizeof (char));
1628         if (options->printprofsummary)
1629             allocate_profile_percentiles (world);
1630 #ifdef HAVE_STRFTIME
1631         time (&world->starttime);
1632 #endif
1633         len = world->numpop2 + (world->options->gamma ? 1 : 0);
1634 #ifdef LONGSUM
1635         len += world->numpop * 3;
1636 #endif /*LONGSUM*/
1637 #ifdef SLOWNET
1638         if (!options->readsum)
1639 	  {
1640 	    mpi_broadcast_results (world, world->loci,
1641                                    pack_sumfile_buffer, unpack_sumfile_buffer);
1642 	  }
1643         mpi_broadcast_results (world,
1644                                world->loci + ((world->loci == 1) ? 0 : 1),
1645                                pack_result_buffer, unpack_result_buffer);
1646         //fprintf(stdout,"%i >>>>>>>>> wait for all at barrier in profiles_tables [main.c:1053]",myID);
1647         MYMPIBARRIER (comm_world);
1648         if (myID == MASTER)
1649         {
1650             print_profile_title (world);
1651 #ifdef PRETTY
1652 	    pdf_print_profile_title(world);
1653 #endif
1654             mpi_profiles_master (world, len, world->profilewho);
1655         }
1656         else
1657             mpi_profiles_worker (world, gmaxptr);
1658 #else /*SLOWNET -not*/
1659 	if(myID==MASTER && (options->profile == TABLES || options->profile == ALL))
1660 	  {
1661 	    print_profile_title (world);
1662 #ifdef PRETTY
1663 	    pdf_print_profile_title(world);
1664 #endif
1665 	  }
1666         for (i = 0; i < len; i++)
1667 	  {
1668             boolean success =  print_profile_likelihood_driver (i, world, gmaxptr);
1669 	    if(options->profile == TABLES || options->profile == ALL)
1670 	      {
1671 		LARGEFPRINTF (world->outfile, world->allocbufsize+2, "%s\n\n", world->buffer);
1672 		//ascii_print_profile_table(options->profilemethod, world);
1673 #ifdef PRETTY
1674 		location = strlen(world->buffer);
1675 		if(success)
1676 		  pdf_print_profile_table(55.0F, &page_height, options->profilemethod, world->buffer + location + 1, world);
1677 #endif
1678 	      }
1679             world->buffer[0] = '\0';
1680 	  }
1681 #endif /*SLOWNET end of not*/
1682         if (myID == MASTER && options->printprofsummary)
1683         {
1684             world->allocbufsize = print_profile_percentile (world);
1685             // print profile summary
1686             LARGEFPRINTF (world->outfile, world->allocbufsize, "%s\n\n", world->buffer);
1687 #ifdef PRETTY
1688 	    pdf_print_profile_percentile (world);
1689 #endif
1690             world->buffer[0] = '\0';
1691             destroy_profile_percentiles (world);
1692         }
1693     }
1694     //myfree(world->buffer);
1695 }
1696 
1697 /// finish up mac files \todo remove this function
1698 void
finish_mac(option_fmt * options,data_fmt * data)1699 finish_mac (option_fmt * options, data_fmt * data)
1700 {
1701 #ifdef MAC
1702 #if 0
1703     fixmacfile (options->outfilename);
1704     if (options->plotmethod == PLOTALL)
1705         fixmacfile (options->mathfilename);
1706     if (options->treeprint)
1707         fixmacfile (options->treefilename);
1708     if (options->parmfile)
1709         fixmacfile ("parmfile");
1710     if (data->sumfile)
1711         fixmacfile (options->sumfilename);
1712 #endif
1713 #endif
1714 }
1715 
1716 /// \brief setup a locus
1717 ///
1718 /// Set up a the structures for a locus. Run-Parameters are copied into the
1719 /// major structure WORLD from options and data. a first genealogy is build
1720 /// adjusted to the starting parameters (changing times of nodes) and a first
1721 /// conditional likelihood is calculated for all nodes, also the tree-parallele
1722 /// time structure is generated.
1723 /// Several run controllers are checked: replicate, datatype
1724 /// \retvalue 0 { failed to setup structure for locus because there is no data }
1725 /// \retvalue 1 { succeeded to setup locus-structure }
1726 int
setup_locus(long locus,world_fmt * world,option_fmt * options,data_fmt * data)1727 setup_locus (long locus, world_fmt * world, option_fmt * options,
1728              data_fmt * data)
1729 {
1730   world->locus = locus;
1731   set_param (world, data, options, locus);
1732   if (world->options->progress)
1733     print_menu_locus (stdout, world, locus);
1734   if (world->options->writelog)
1735     print_menu_locus (world->options->logfile, world, locus);
1736   world->start = TRUE;
1737   buildtree (world, world, options, data, locus);
1738 #ifdef BEAGLE
1739   finish_mutationmodel(world, data, options);
1740   init_beagle(world,locus);
1741   set_beagle_instances(world,TRUE); // sets two instances for beagle
1742   //  set_beagle_instances(world,FALSE); // so that wee can easily MCMC
1743   fill_beagle_instances(world);
1744 #endif
1745   if (data->skiploci[locus])	/* skip loci with no tips */
1746     return 0;
1747 
1748   create_treetimelist (world, &world->treetimes, locus);
1749   fix_times (world, options);
1750 #ifdef DEBUG
1751   //  printf(" fixed times \n");
1752 #endif
1753   first_smooth (world, locus);
1754   world->likelihood[0] = treelikelihood (world);
1755   world->allikemax = world->likelihood[0]; //-MYREAL_MAX;	/* for best tree option */
1756   world->treelen = 0.0;
1757   calc_treelength (world->root->next->back, &world->treelen);
1758 
1759 #ifdef UEP
1760   if (options->uep)
1761     {
1762         world->treelen = 0.0;
1763         if (options->ueprate > 0.0)
1764 	  calc_treelength (world->root->next->back, &world->treelen);
1765         //world->ueplikelihood = ueplikelihood(world);
1766         //world->likelihood[0] += world->ueplikelihood;
1767     }
1768 #endif
1769 
1770   world->has_proposal_details=FALSE;
1771   return 1;
1772 }
1773 
1774 /// condense a single genealogy into a sufficient statistic for the maximization phase
1775 void
condense_time(world_fmt * world,long * step,long * j,MYREAL * accepted,long * G,long * steps,long oldsteps,long rep)1776 condense_time (world_fmt * world, long *step, long *j, MYREAL * accepted,
1777                long *G, long *steps, long oldsteps, long rep)
1778 {
1779   static long n = 0;
1780 #ifdef INTEGRATEDLIKE
1781   long i;
1782   long z;
1783   long nn = world->numpop2 + world->bayes->mu * world->loci;
1784   MYREAL x;
1785   MYREAL delta;
1786 #endif
1787   world->rep = rep;
1788   if(world->options->bayes_infer)
1789     {
1790       if (world->in_last_chain)
1791 	{
1792 	  if (*step == 0)
1793 	    return;
1794 
1795 	  // syncs treeupdate and paramupdate
1796 	  if(world->bayesaccept == -1)
1797 	    {
1798 	      world->bayes->oldval = probg_treetimes(world);
1799 	    }
1800 	  bayes_save (world, *step * world->options->lincr);
1801 	  store_events(world, world->treetimes, world->numpop, rep);
1802 	  *accepted += *j;
1803 #ifdef INTEGRATEDLIKE
1804 	  n += 1;
1805 	  //	  fprintf(stdout,"\naverage MP-values: %li ", n);
1806 	  for(i=0; i < nn; i++)
1807 	    {
1808 	      if(world->bayes->map[i][1] == INVALID)
1809 		continue;
1810 	      else
1811 		{
1812 		  z  = world->bayes->map[i][1];
1813 		}
1814 	      x = world->param0[z];
1815 	      delta = x - world->atl[rep][world->locus].param0[z];
1816 	      world->atl[rep][world->locus].param0[z] += delta/n;
1817 	      //  fprintf(stdout,"%f ",world->atl[rep][world->locus].param0[z]);
1818 	    }
1819 	  //	  fprintf(stdout,"\n");
1820 #else
1821 	  return;
1822 #endif
1823 	}
1824       else
1825 	{
1826 	  warning("Bayesian analysis: do not run multiple long chains, run 1 long chain! [nothing will be wrong but it is a waste of time]");
1827 	  return;
1828 	}
1829     }
1830   if (*step == 0)
1831     {
1832       copy_time (world, world->treetimes, FIRSTSTEP, 0L, world->numpop, rep);
1833       *accepted += *j;
1834       //for INTEGRATEDLIKE
1835       n = 0;
1836     }
1837   else
1838     {
1839       copy_time (world, world->treetimes, *G, *G + (long) (*j > 0),
1840 		 world->numpop, rep);
1841       if (*step < *steps)
1842         {
1843 	  //handled in advance_world * G += (long) (*j > 0);
1844 	  *accepted += *j;
1845         }
1846     }
1847   if (*step >= oldsteps - 1 && world->options->movingsteps)
1848     {
1849       if (((MYREAL) (*G + 1)) < world->options->acceptfreq * oldsteps)
1850         {
1851 	  (*steps)++;
1852         }
1853     }
1854 }
1855 
print_theta0(FILE * file,world_fmt * world,long maxreplicate)1856 void print_theta0(FILE *file, world_fmt *world, long maxreplicate)
1857 {
1858   long i;
1859   long z;
1860   long locus;
1861   long rep;
1862   long nn = world->numpop2;
1863   fprintf(file,"Locus Replicate Parameters\n");
1864   fprintf(file,"-----------------------------------------------------------\n");
1865   for(locus=0; locus < world->loci; locus++)
1866     {
1867       for(rep = 0; rep < maxreplicate; rep++)
1868 	{
1869 	  fprintf(file,"%5li  %5li ",locus, rep+1);
1870 	  for(i=0; i < nn; i++)
1871 	    {
1872 	      if(world->bayes->map[i][1] == INVALID)
1873 		continue;
1874 	      else
1875 		{
1876 		  z  = world->bayes->map[i][1];
1877 		}
1878 	      fprintf(file,"%f ", world->atl[rep][world->locus].param0[z]);
1879 	    }
1880 	  fprintf(file,"\n");
1881 	}
1882     }
1883   fprintf(file,"\n\n");
1884 }
1885 
1886 /// set type of replication scheme
1887 long
set_repkind(option_fmt * options)1888 set_repkind (option_fmt * options)
1889 {
1890     if (options->replicate)
1891     {
1892         if (options->replicatenum == 0)
1893             return MULTIPLECHAIN;
1894         else
1895            return MULTIPLERUN;
1896     }
1897     return SINGLECHAIN;
1898 }
1899 
1900 /// intialize heating scheme
1901 void
heating_init(world_fmt ** universe,int usize,data_fmt * data,option_fmt * options)1902 heating_init (world_fmt ** universe, int usize, data_fmt * data,
1903               option_fmt * options)
1904 {
1905     long chain;
1906     char tmp[20];
1907     for (chain = 1; chain < usize; ++chain)
1908     {
1909         MYSNPRINTF (tmp, 20L, "%10.5f", options->heat[chain]);
1910         create_world (&universe[chain], data->loci);
1911 	universe[chain]->cold = FALSE;
1912         init_world (universe[chain], data, options);
1913 	universe[chain]->heatid = chain;
1914     }
1915 }
1916 
1917 /// prepare for heating scheme: Step I
1918 void
heating_prepare(world_fmt ** universe,int usize,option_fmt * options,data_fmt * data,long rep)1919 heating_prepare (world_fmt ** universe, int usize,
1920                  option_fmt * options, data_fmt * data, long rep)
1921 {
1922     long chain;
1923     EARTH->heat = options->heat[0];
1924     for (chain = 1; chain < usize; ++chain)
1925       {
1926 	buildtree (universe[0], universe[chain], options, data, EARTH->locus);//DEBUG
1927         klone (EARTH, universe[chain], options, data, options->heat[chain]);
1928       }
1929 }
1930 
1931 /// prepare for heating scheme: Step I [this function failed in the parallel run, unused for now, replaced by above]
1932 void
heating_prepare_old(world_fmt ** universe,int usize,option_fmt * options,data_fmt * data,long rep)1933 heating_prepare_old (world_fmt ** universe, int usize,
1934                      option_fmt * options, data_fmt * data, long rep)
1935 {
1936     long chain;
1937     EARTH->heat = options->heat[0];
1938     if (rep == 0)
1939     {
1940         for (chain = 1; chain < usize; ++chain)
1941             klone (EARTH, universe[chain], options, data, options->heat[chain]);
1942     }
1943     else
1944     {
1945         for (chain = 1; chain < usize; ++chain)
1946             klone_part (EARTH, universe[chain], options, data,
1947                         options->heat[chain]);
1948     }
1949 }
1950 
1951 /// prepare for heating scheme: Step II
1952 void
heating_prepare2(world_fmt ** universe,int usize)1953 heating_prepare2 (world_fmt ** universe, int usize)
1954 {
1955     long chain;
1956     universe[0]->averageheat = 1.0;
1957     for (chain = 1; chain < usize; ++chain)
1958     {
1959         universe[chain]->G = 0;
1960         universe[chain]->averageheat = universe[chain]->options->adaptiveheat!=NOTADAPTIVE ?
1961             0.0 : 1. / universe[chain]->heat;
1962     }
1963     for (chain = 1; chain < usize; ++chain)
1964     {
1965         clone_polish (EARTH, universe[chain]);
1966     }
1967 }
1968 
1969 /// \brief generates replicate number
1970 ///
1971 /// generates replicate number
1972 /// \rtval 0 no replicate
1973 /// \rtval rep number of replicates
1974 long
replicate_number(option_fmt * options,long chain,char type,long replicate)1975 replicate_number (option_fmt * options, long chain, char type, long replicate)
1976 {
1977     long rep = 0;
1978     if (options->replicate && options->replicatenum > 0)
1979         rep = replicate;
1980     else
1981     {
1982         if (type == 'l' && options->replicate)
1983             rep = chain;
1984         else
1985             rep = 0;
1986     }
1987     return rep;
1988 }
1989 
1990 /// generate genealogies for a single locus
1991 /// \callgraph
1992 void
run_locus(world_fmt ** universe,int usize,option_fmt * options,data_fmt * data,tpool_t * localheating_pool,long maxreplicate,long locus,long * treefilepos,long * Gmax)1993 run_locus (world_fmt ** universe, int usize, option_fmt * options,
1994            data_fmt * data, tpool_t * localheating_pool, long maxreplicate,
1995            long locus, long *treefilepos, long *Gmax)
1996 {
1997   long i;
1998   long convergence_len = universe[0]->numpop2 + 3 * universe[0]->numpop;
1999   long replicate;
2000   // DEBUG Hapmap stuff
2001   if(EARTH->data->skiploci[locus])
2002     return;
2003   if(options->bayes_infer)
2004     convergence_len = universe[0]->numpop2 + 1;
2005   memset(EARTH->convergence->chain_means,0,maxreplicate * convergence_len * sizeof(MYREAL));
2006   memset(EARTH->convergence->chain_s,0,maxreplicate * convergence_len * sizeof(MYREAL));
2007   memset(EARTH->convergence->gelmanmeanmaxR,0,maxreplicate * maxreplicate * sizeof(MYREAL));
2008   for (replicate = 0; replicate < maxreplicate; replicate++)
2009     {
2010       run_replicate (locus, replicate, universe, options, data,
2011 		     localheating_pool, usize, treefilepos, Gmax);
2012       if(EARTH->cold && EARTH->options->replicatenum > 0)
2013 	{
2014 	  if(options->gelman)
2015 	    {
2016 	      chain_means(&EARTH->convergence->chain_means[replicate * convergence_len], EARTH);
2017 	      calc_chain_s(EARTH->convergence->chain_s, EARTH->convergence->chain_means, EARTH,
2018 			   replicate);
2019 	      if (replicate > 0)
2020 		{
2021 		  convergence_check_bayes(EARTH, maxreplicate);
2022 		  convergence_progress(stdout, EARTH);
2023 		}
2024 	    }
2025 	}
2026     }
2027 #ifdef UEP
2028   if (options->uep)
2029     show_uep_store (EARTH);
2030 #endif
2031   if (options->replicate && options->replicatenum > 0)
2032     {
2033       EARTH->repkind = MULTIPLERUN;
2034       if (options->bayes_infer)
2035         {
2036 	  if(!options->has_bayesmdimfile)
2037 	    calculate_credibility_interval (EARTH, locus);
2038 	  //	  return;
2039         }
2040 #ifdef LONGSUM
2041       change_longsum_times (EARTH);
2042 #endif /*LONGSUM*/
2043       // over multiple replicates if present or single locus
2044 #ifdef INTEGRATEDLIKE
2045 	  (void) estimateParameter (options->replicatenum, *Gmax, EARTH,
2046 				    options, EARTH->cov[locus],
2047 				    options->lchains, /*type */ 'l',
2048 				    SINGLELOCUS, EARTH->repkind);
2049 #else
2050       if (!options->bayes_infer)
2051 	{
2052 	  (void) estimateParameter (options->replicatenum, *Gmax, EARTH,
2053 				    options, EARTH->cov[locus],
2054 				    options->lchains, /*type */ 'l',
2055 				    SINGLELOCUS, EARTH->repkind);
2056 	}
2057 #endif
2058     }
2059   else
2060     {
2061       if (options->bayes_infer)
2062         {
2063 	  if(!options->has_bayesmdimfile)
2064 	    {
2065 	      //adjust_bayes_bins(EARTH,locus);
2066 	      calculate_credibility_interval (EARTH, locus);
2067 	    }
2068 	}
2069     }
2070   if (options->heating)
2071     {
2072       for (i = 0; i < options->heated_chains; i++)
2073         {
2074 	  free_tree (universe[i]->root, universe[i]);
2075 	  //   free_timevector(universe[i]->treetimes);
2076 	  myfree(universe[i]->nodep);
2077         }
2078     }
2079   else
2080     {
2081       free_tree (EARTH->root, EARTH);
2082       myfree(EARTH->nodep);
2083       //  free_timevector(EARTH->treetimes);
2084     }
2085   if(options->bayes_infer)
2086     bayes_reset (universe[0]);
2087 }
2088 
2089 /// \brief updates the tree and records acceptance
2090 ///
2091 /// updates the tree and records acceptance
2092 /// when in bayesian mode change between changing the tree and the parameters
2093 void
run_one_update(world_fmt * world)2094 run_one_update (world_fmt * world)
2095 {
2096   long count = 0;
2097   long np = world->numpop2;
2098   //fprintf(stderr,"%i> locus=%li heat=%f\n",myID, world->locus, world->heat);
2099   //traverse_check(crawlback (world->root->next));
2100   if (world->options->bayes_infer)
2101     {
2102       if(world->bayes->mu)
2103 	{
2104 	  np += 1;
2105 	}
2106       world->bayesaccept = bayes_update (world); // trials is update for parameter update
2107       if (world->bayesaccept == -1)	//either do tree updates or parameter updates
2108         {
2109 	  world->accept += (count = tree_update (world, world->G));
2110 	  world->bayes->accept[np] += count;	//permanent recorder for the tree acceptance,
2111 	  // for some obscure reason the world->accept gets lost, most likely every cycle;
2112 	  world->bayes->trials[np] += 1;	// counter for trees tried
2113         }
2114       world->param_like = world->bayes->oldval;
2115     }
2116   else
2117     {
2118       world->accept += tree_update (world, world->G);
2119     }
2120 }
2121 
2122 /// \brief updates all trees, controls updates and heating
2123 ///
2124 /// controls updates and temperatures (threaded or unthreaded)
2125 /// updates the tree and/or parameters
2126 /// \callgraph
2127 void
run_updates(world_fmt ** universe,int usize,option_fmt * options,tpool_t * localheating_pool,long inc,long increment,long step,long steps)2128 run_updates (world_fmt ** universe,
2129              int usize,
2130              option_fmt * options,
2131              tpool_t * localheating_pool,
2132              long inc, long increment, long step, long steps)
2133 {
2134 #ifndef  PTHREADS
2135 #ifndef GRANDCENTRAL
2136     long ii;
2137 #endif
2138 #endif
2139     if (options->heating)
2140     {
2141 #ifdef PTHREADS			/*using threads and running on an SMP machine */
2142         fill_tpool (*localheating_pool, universe, usize);
2143         tpool_synchronize (*localheating_pool, usize);
2144 #else /*heating but not using threads */
2145 #ifdef GRANDCENTRAL
2146 	//	dispatch_apply(count, dispatch_get_global_queue(0, 0), ^(size_t i){
2147 	//    results[i] = do_work(data, i);
2148 	//  });
2149 	//
2150 	//
2151 	//        dispatch_queue_t queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
2152         dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
2153         dispatch_apply(options->heated_chains, queue,
2154                        ^(unsigned long ii) {
2155                          run_one_update (universe[ii]);
2156                        }
2157                        );
2158 #else
2159         for (ii = 0; ii < options->heated_chains; ii++)
2160         {
2161 	  run_one_update (universe[ii]);
2162         }
2163 #endif /*GRANDCENTRAL*/
2164 #endif /* end of not-using threads */
2165 	if(options->bayes_infer && RANDUM() > options->updateratio)
2166 	  {
2167 #ifdef UEP
2168 	    if (options->uep && EARTH->in_last_chain)
2169 	      update_uepanc (EARTH);
2170 #endif
2171 	    return;
2172 	  }
2173 	else
2174 	  {
2175 	    heated_swap (universe, EARTH->options);
2176 	    switch (options->adaptiveheat)
2177 	      {
2178 	      case STANDARD:
2179 		adjust_temperatures (universe, options->heated_chains,
2180 				     inc /*rement */  + step * increment,
2181 				     steps * increment);
2182 		break;
2183 	      case BOUNDED:
2184 		adjust_temperatures_bounded (universe, options->heated_chains,
2185 				     inc /*rement */  + step * increment,
2186 				     steps * increment);
2187 		break;
2188 	      case NOTADAPTIVE:
2189 	      default:
2190 		break;
2191 	      }
2192 	  }
2193     }
2194     else
2195       {		/* no heating */
2196 	run_one_update (EARTH);
2197       }
2198 #ifdef UEP
2199     if (options->uep && EARTH->in_last_chain)
2200       update_uepanc (EARTH);
2201 #endif
2202 }
2203 
2204 /// generates trees over the interval increments
2205 void
run_increments(world_fmt ** universe,long usize,option_fmt * options,tpool_t * localheating_pool,long increment,long step,long steps)2206 run_increments (world_fmt ** universe,
2207                 long usize,
2208                 option_fmt * options,
2209                 tpool_t * localheating_pool,
2210                 long increment, long step, long steps)
2211 {
2212   static long treefilepos; /* write position in the treefile */
2213     long i;
2214     for (i = 0; i < increment; i++)
2215     {
2216         EARTH->actualinc = i;
2217         run_updates (universe, usize, options,
2218                      localheating_pool, i, increment, step, steps);
2219 
2220         if (EARTH->likelihood[EARTH->G] > EARTH->maxdatallike)
2221             EARTH->maxdatallike = EARTH->likelihood[EARTH->G];
2222     }
2223     if (EARTH->options->treeprint != _NONE)
2224       print_tree (EARTH, EARTH->G, &treefilepos);
2225 }
2226 
2227 /// \brief run a chain
2228 /// run a chain
2229 void
run_steps(world_fmt ** universe,long usize,option_fmt * options,tpool_t * localheating_pool,long increment,long steps,long rep)2230 run_steps (world_fmt ** universe,
2231            long usize,
2232            option_fmt * options,
2233            tpool_t * localheating_pool, long increment, long steps, long rep)
2234 {
2235     long step=0;
2236     long oldsteps = steps;
2237     long ii;
2238     MYREAL var=0.0;
2239     if(EARTH->cold && EARTH->options->bayes_infer)
2240       single_chain_var (NULL, step, &var, NULL, NULL);
2241 #ifdef MPI
2242     // does not work? check_memory_limit();
2243 #endif
2244     for (step = 0; step < steps; step++)// Cesky 2013
2245     {
2246 #ifdef MPI
2247       // does not work?? check_memory_limit();
2248 #endif
2249         EARTH->increment = increment;
2250         run_increments (universe, usize, options, localheating_pool,
2251                         increment, step, steps);
2252 #ifdef UEP
2253         store_uep (EARTH);
2254 #endif
2255 	//	printf("step=%li inc=%li locus=%li\n",step,increment,EARTH->locus);
2256 
2257         condense_time (EARTH, &step, &EARTH->accept,
2258                        &EARTH->accept_freq, &EARTH->G, &steps, oldsteps, rep);
2259 	if(EARTH->options->bayes_infer)
2260 	  {
2261 	    calculate_BF(universe,options);
2262 	    if(EARTH->cold)
2263 	      {
2264 		single_chain_var (EARTH, step, &var, EARTH->autocorrelation, EARTH->effective_sample);
2265 	      }
2266 	  }
2267         if (step > 0)
2268         {
2269             advance_clone_like (EARTH, EARTH->accept, &EARTH->G);
2270             EARTH->accept = 0L;
2271         }
2272         // if we do heating
2273         if (EARTH->options->heating)
2274         {
2275             for (ii = 1; ii < EARTH->options->heated_chains; ++ii)
2276             {
2277                 universe[ii]->accept_freq += universe[ii]->accept;
2278                 advance_clone_like (universe[ii],
2279                                     universe[ii]->accept, &(universe[ii])->G);
2280                 universe[ii]->accept = 0L;
2281             }
2282         }
2283     }
2284 }
2285 
2286 /// run all chains for short and then for long
2287 void
run_chains(world_fmt ** universe,long usize,option_fmt * options,tpool_t * localheating_pool,long replicate,long chains,char type,long increment,long oldsteps,long * treefilepos,long * Gmax)2288 run_chains (world_fmt ** universe,
2289             long usize,
2290             option_fmt * options,
2291             tpool_t * localheating_pool,
2292             long replicate,
2293             long chains,
2294             char type,
2295             long increment, long oldsteps, long *treefilepos, long *Gmax)
2296 {
2297   long i;
2298   long steps;
2299   long chain;
2300   long rep;
2301   const long locus = EARTH->locus;
2302   long kind;
2303   long pluschain = 0;
2304   const MYREAL treeupdateratio = (options->bayes_infer ? options->updateratio : 1.0);
2305 
2306   for (chain = 0;
2307        chain < chains || (type == 'l' && chain >= chains
2308 			  && EARTH->param_like > options->lcepsilon); chain++)
2309     {
2310       if (options->heating)
2311 	MERKUR->swapped = VENUS->swapped = EARTH->swapped = 0;
2312       rep = replicate_number (options, chain, type, replicate);
2313       EARTH->rep = rep;
2314       precalc_world (EARTH);
2315       polish_world (EARTH);
2316       burnin_chain (EARTH);
2317       EARTH->G = 0;
2318       EARTH->accept_freq = 0.;
2319       EARTH->accept = 0;
2320       steps = oldsteps;
2321       EARTH->maxdatallike = EARTH->likelihood[0];
2322       if (options->heating)
2323 	{
2324 	  heating_prepare2 (universe, usize);
2325 
2326 	  for(i=1; i<EARTH->options->heated_chains; i++)
2327 	    {
2328 	      burnin_chain (universe[i]);
2329 	    }
2330 	  reset_heated_accept(universe,EARTH->options->heated_chains);
2331 	}
2332       set_penalizer (chain, chains, type, universe);
2333       // run steps, run increments, run updaters
2334       run_steps (universe, usize, options, localheating_pool,
2335 		 increment, steps, rep);
2336 
2337       decide_plot (EARTH->options, chain, chains, type);
2338       // prepare for parameter estimation, precompute and copy
2339       memcpy (EARTH->param00, EARTH->param0,
2340 	      sizeof (MYREAL) * EARTH->numpop2);
2341 #ifndef INTEGRATELIKE
2342 	if(!options->bayes_infer)
2343 	  {
2344 	    memcpy (EARTH->atl[rep][locus].param0, EARTH->param00,
2345 		    sizeof (MYREAL) * EARTH->numpop2);
2346 	    log_param0 (EARTH->atl[rep][locus].param0,
2347 			EARTH->atl[rep][locus].lparam0, EARTH->numpop2);
2348 	    copies2lcopies (&EARTH->atl[rep][locus]);
2349 	  }
2350 #else
2351 	//	    memcpy (EARTH->atl[rep][locus].param0, EARTH->param00,
2352 	//	    sizeof (MYREAL) * EARTH->numpop2);
2353 	log_param0 (EARTH->atl[rep][locus].param0,
2354 		    EARTH->atl[rep][locus].lparam0, EARTH->numpop2);
2355 	copies2lcopies (&EARTH->atl[rep][locus]);
2356 #endif
2357         // start parameter estimation for
2358         // single chain, single replicate
2359         EARTH->repkind = SINGLECHAIN;
2360         kind = SINGLELOCUS;
2361         *Gmax = (EARTH->G > *Gmax) ? EARTH->G + 1 : (*Gmax);
2362 #ifdef LONGSUM
2363         change_longsum_times (EARTH);
2364 #endif /*LONGSUM*/
2365 	if (!options->bayes_infer)
2366 	  {
2367 	    (void) estimateParameter (rep, *Gmax, EARTH, options,
2368 				      EARTH->cov[locus], chain, type, kind,
2369 				      EARTH->repkind);
2370 	  }
2371         if (EARTH->options->heating)
2372 	  {
2373             print_heating_progress (universe, EARTH->options, (long) (increment * steps * treeupdateratio));
2374 	    reset_heated_accept(universe,EARTH->options->heated_chains);
2375 	  }
2376         else
2377 	  {
2378             print_progress (EARTH->options, EARTH, rep,
2379                             (long) (increment * steps * treeupdateratio), (long) EARTH->accept_freq);
2380 	    EARTH->accept_freq = 0. ;
2381 	    EARTH->accept = 0L ;
2382 	  }
2383         // cleanup and prepare next
2384         prepare_next_chain (universe, EARTH->options, type, chain,
2385                             &chains, &pluschain, locus, replicate);
2386     }				//end chains
2387 }
2388 
2389 /// set penalizing distance for jumps in the maximizer on or off
2390 void
set_penalizer(long chain,long chains,char type,world_fmt ** universe)2391 set_penalizer (long chain, long chains, char type, world_fmt ** universe)
2392 {
2393     if (type == 'l')
2394     {
2395         if (chain >= chains - 1)
2396         {
2397             EARTH->in_last_chain = TRUE;
2398             unset_penalizer_function (TRUE);
2399         }
2400     }
2401     else
2402     {
2403         EARTH->in_last_chain = FALSE;
2404         unset_penalizer_function (FALSE);
2405     }
2406 }
2407 
2408 
2409 /// \brief run a replicate
2410 ///
2411 /// run a replicate
2412 /// \callgraph
2413 void
run_replicate(long locus,long replicate,world_fmt ** universe,option_fmt * options,data_fmt * data,tpool_t * localheating_pool,int usize,long * treefilepos,long * Gmax)2414 run_replicate (long locus,
2415                long replicate,
2416                world_fmt ** universe,
2417                option_fmt * options,
2418                data_fmt * data,
2419                tpool_t * localheating_pool,
2420                int usize, long *treefilepos, long *Gmax)
2421 {
2422     long chains = 0;
2423     char type = 's';
2424     long runs;
2425     long increment = 1;
2426     long oldsteps = 0;
2427     /* loop over independent replicates----------------------- */
2428     /* but make sure that we get single chain estimates, too   */
2429     EARTH->locus = locus;
2430     EARTH->repkind = SINGLECHAIN;
2431     EARTH->replicate = replicate;
2432     if (setup_locus (locus, EARTH, options, data) == 0)
2433         return;
2434     if (options->heating)
2435         heating_prepare (universe, usize, options, data, replicate);
2436     type = 's';
2437     runs = 1;
2438     /* short and long chains ----------------------------- */
2439     set_bounds (&increment, &oldsteps, &chains, options, type);
2440     EARTH->increment = increment;
2441     while (runs-- >= 0)
2442     {
2443         if (myID == MASTER && type == 's')
2444         {
2445             print_menu_chain (type, FIRSTCHAIN, oldsteps, EARTH,
2446                               options, replicate);
2447             if (EARTH->options->treeprint == ALL)
2448                 print_tree (EARTH, 0, treefilepos);
2449         }
2450         EARTH->chains = chains;
2451         /* loop over chains--------------------------- */
2452         run_chains (universe, usize, options, localheating_pool, replicate, chains, type, increment, oldsteps, treefilepos, Gmax);	//PB 020203
2453         change_chaintype (locus, &type, EARTH, &increment, &oldsteps,
2454                           &chains, options);
2455 
2456         /* evaluate multiple long chain estimates */
2457         if (runs < 0 && options->replicate && options->replicatenum == 0)
2458         {
2459             EARTH->repkind = MULTIPLECHAIN;
2460 #ifdef LONGSUM
2461             change_longsum_times (EARTH);
2462 #endif	   /*LONGSUM*/
2463 	if (!options->bayes_infer)
2464 	  {
2465             (void) estimateParameter (replicate, *Gmax, EARTH, options,
2466                                       EARTH->cov[locus], chains, type,
2467                                       SINGLELOCUS, EARTH->repkind);
2468 	  }
2469         }
2470     }				//end runs
2471     //#ifndef MPI
2472     // collect correlation information of a locus and stick it into the archive,
2473     // the autocorrelation is averaged but the ess is summed up assuming that ll loci are independent
2474     if(EARTH->cold && EARTH->options->bayes_infer)
2475       {
2476 	  collect_ess_values(EARTH);
2477 	  collect_acceptance(EARTH);
2478       }
2479     //#endif
2480 }
2481 
2482 
print_burnin_stop(FILE * file,world_fmt ** universe,option_fmt * options)2483 void print_burnin_stop(FILE *file, world_fmt **universe, option_fmt * options)
2484 {
2485   world_fmt *world = universe[0];
2486   long z;
2487   long maxreplicate = (options->replicate
2488 		       && options->replicatenum >
2489 		       0) ? options->replicatenum : 1;
2490   fprintf(file,"\n\n\nStop of burn-in phase due to convergence\n");
2491   fprintf(file,"----------------------------------------\n");
2492   switch(options->burnin_autostop)
2493     {
2494     case 'a':
2495       fprintf(file,"[Stopping criteria was: Variance ratio between the last two groups of 1000 steps < %f]\n\n",world->varheat);
2496       break;
2497     case 't':
2498       fprintf(file,"[Stopping criteria was: reached prescribed acceptance ratio of %f]\n\n",world->options->autotune);
2499       break;
2500     case 'e':
2501       fprintf(file,"[Stopping criteria was: All effective MCMC sample sizes > %f]\n\n",world->essminimum);
2502       break;
2503     case ' ':
2504       fprintf(file,"\n\n");
2505       break;
2506     }
2507 
2508   fprintf(file,"Locus  Replicate  Steps  ESS*    Accept* Variance ratio (new/old variance)\n");
2509   fprintf(file,"-----  ---------  ------ ------- ------- ---------------------------------\n");
2510  for(z=0; z < world->loci * maxreplicate; z++)
2511     {
2512       if(world->burnin_stops[z].oldvariance > 0.0)
2513 	{
2514 	  fprintf(file,"%5li  %5li  %10li   %6.1f %6.4f %10.4f (%f/%f)\n",1 + world->burnin_stops[z].locus,
2515 		  world->burnin_stops[z].replicate,
2516 		  world->burnin_stops[z].stopstep,
2517 		  world->burnin_stops[z].ess,
2518 		  world->burnin_stops[z].accept,
2519 		  world->burnin_stops[z].variance/world->burnin_stops[z].oldvariance,
2520 		  world->burnin_stops[z].variance,
2521 		  world->burnin_stops[z].oldvariance);
2522 	}
2523       //			    world->burnin_stops[z].worker = myID;
2524     }
2525   fprintf(file,"(*=worst)\n\n");
2526 #ifdef PRETTY
2527   if(file==EARTH->outfile)
2528     pdf_burnin_stops(EARTH, maxreplicate);
2529 #endif
2530 }
2531 
combine_scaling_factor(world_fmt * world)2532 MYREAL combine_scaling_factor(world_fmt *world)
2533 {
2534   const long np = world->numpop2;
2535   long pop;
2536   long i;
2537   MYREAL scaling_factor=0.0;
2538   bayes_fmt * bayes = world->bayes;
2539   boolean *visited;
2540   visited = (boolean *) mycalloc(np,sizeof(boolean));
2541   for(i=0;i<np;i++)
2542     {
2543       if(bayes->map[i][1] == INVALID)
2544 	continue;
2545       else
2546 	{
2547 	  pop  = bayes->map[i][1];
2548 	}
2549       if(visited[pop]==TRUE)
2550 	continue;
2551       visited[pop] = TRUE;
2552       //      scaling_factor += exp(bayes->scaling_factors[pop] - bayes->maxmaxvala);
2553       scaling_factor += bayes->scaling_factors[pop]; //PRODUCT_parameters(scalinfactorcalculation_see_bayes.c)
2554 #ifdef DEBUG
2555       printf("%i> scaling factor test: %li  %li k=%f k_pop=%f %f\n",myID, i, pop, scaling_factor, bayes->scaling_factors[pop], bayes->maxmaxvala);
2556 #endif
2557     }
2558   //  scaling_factor = log(scaling_factor) + bayes->maxmaxvala;
2559   if(world->options->has_bayesfile)
2560     {
2561 #ifdef DEBUG
2562       printf("# Scaling factor %20.20f\n",scaling_factor);
2563 #endif
2564       fprintf(world->bayesfile, "# Scaling factor %20.20f\n",scaling_factor);
2565     }
2566   myfree(visited);
2567   return scaling_factor;
2568 }
2569 
print_bf_values(world_fmt * world)2570 void print_bf_values(world_fmt * world)
2571 {
2572   long locus;
2573   long t;
2574   const long hc = world->options->heated_chains;
2575   for(locus=0;locus<world->loci;locus++)
2576     {
2577       if(world->data->skiploci[locus])
2578 	continue;
2579       printf("=====> locus=%li ",locus );
2580       for(t=0; t < hc; t++)
2581 	{
2582 	  printf("%f ",world->bf[locus*hc+t]);
2583 	}
2584       printf("\n");
2585     }
2586 }
2587 
2588 #ifdef MPI_do_not_use
fix_bayesfactor(world_fmt * world,option_fmt * options)2589 void fix_bayesfactor(world_fmt *world, option_fmt * options)
2590 {
2591   return;
2592   long locus;
2593   long t;
2594   const long hc = world->options->heated_chains;
2595   const long maxreplicate = world->maxreplicate; //(options->replicate
2596   //&& options->replicatenum >
2597   //	       0) ? options->replicatenum : 1;
2598   if(options->heating)//-----------------------heating
2599     {
2600       for(locus=0;locus<world->loci;locus++)
2601 	{
2602 	  if(world->data->skiploci[locus])
2603 	    continue;
2604 	  // the thermodynamic integration is reporting maxreplicate times too high values
2605 	  // warning("this should fix the thermodynamic/mpi/replicate  problem, but needs to be checked for multilocus data");
2606 	  printf("@@@@@@@ locus=%li %li (%f) ",locus,maxreplicate, world->bf[locus * hc + 0] );
2607 	  for(t=0; t < hc; t++)
2608 	    {
2609 	      world->bf[locus * hc + t] /= maxreplicate;
2610 	      printf("%f ",world->bf[locus*hc+t]);
2611 	    }
2612 	  printf("\n");
2613 	}
2614     }
2615 }
2616 #endif
2617 
2618 
print_bayesfactor(FILE * file,world_fmt ** universe,option_fmt * options)2619 void print_bayesfactor(FILE *file, world_fmt **universe, option_fmt * options)
2620 {
2621   //#ifdef MPI
2622   //static boolean done=FALSE;
2623   //#endif
2624   long t;
2625   const long hc = EARTH->options->heated_chains;
2626   long locus;
2627   const long maxreplicate = universe[0]->maxreplicate; //(options->replicate
2628     //&& options->replicatenum >
2629     //	       0) ? options->replicatenum : 1;
2630   //  long lsteps = options->lsteps;
2631   //  MYREAL sum = 0.;
2632   MYREAL heat0 = 1.;
2633   MYREAL heat1 = 1.;
2634   MYREAL heat2 = 1.0;
2635   MYREAL val0  = 0.;
2636   MYREAL val1  = 0.;
2637   MYREAL val2  = 0.;
2638   MYREAL bfsum = 0.;
2639   MYREAL bfsum2 = 0.;
2640   MYREAL approxlsum = 0.;
2641   MYREAL hsum = 0.;
2642   MYREAL asum = 0.;
2643   MYREAL lsum;
2644   MYREAL lsum0;
2645   MYREAL scaling_factor = 0.0;
2646   MYREAL *locusweight = EARTH->data->locusweight;//invariant loci treatment
2647   // calculate the harmonic mean score
2648   for(locus=0;locus < EARTH->loci; locus++)
2649     {
2650       if(EARTH->data->skiploci[locus])
2651 	continue;
2652       hsum +=  locusweight[locus] * (EARTH->hmscale[locus] - log (EARTH->hm[locus]));//invariant loci treatment
2653     }
2654 
2655   fprintf(file,"\n\n\nLog-Probability of the data given the model (marginal likelihood = log(P(D|thisModel))\n");
2656   fprintf(file,"--------------------------------------------------------------------\n[Use this value for Bayes factor calculations:\nBF = Exp[log(P(D|thisModel) - log(P(D|otherModel)]\nshows the support for thisModel]\n\n");
2657 #ifdef PRETTY
2658   if(file==EARTH->outfile)
2659     pdf_bayes_factor_header(EARTH,options);
2660 #endif
2661   if(EARTH->loci>1)
2662     {
2663       fprintf(file,"\n\nLocus      Raw Thermodynamic score(1a)  Bezier approximated score(1b)     Harmonic mean(2)\n");
2664       fprintf(file,"------------------------------------------------------------------------------------------\n");
2665       if(file==EARTH->outfile)
2666 	pdf_bayes_factor_rawscores_header(EARTH,options);
2667     }
2668   if(options->heating)//-----------------------heating
2669     {
2670       for(locus=0;locus<EARTH->loci;locus++)
2671 	{
2672 	  if(EARTH->data->skiploci[locus])
2673 	    continue;
2674 
2675 	  lsum = 0.;
2676 	  lsum0 = 0.;
2677 	  // from very hot to cold, the
2678 	  for(t=1; t < hc; t++)
2679 	    {
2680 	      heat2 = heat0;
2681 	      val2 = val0;
2682 #ifdef MPI
2683 	      heat0 = 1./options->heat[t-1] ;
2684 	      heat1 = 1./options->heat[t];
2685 #else
2686 	      if(options->datatype!='g')
2687 		{
2688 		  if(options->adaptiveheat!=NOTADAPTIVE)
2689 		    {
2690 		      heat0 = 1./ universe[t-1]->averageheat ;
2691 		      heat1 = 1./ universe[t]->averageheat;
2692 		    }
2693 		  else
2694 		    {
2695 		      heat0 = universe[t-1]->heat ;
2696 		      heat1 = universe[t]->heat;
2697 		    }
2698 		}
2699 	      else
2700 		{
2701 		  heat0 = 1./options->heat[t-1];
2702 		  heat1 = 1./options->heat[t];
2703 		}
2704 #endif
2705 	      //Simpson's rule and trapezoidal are the same when I do only have function values at a and b
2706 	      val0 = locusweight[locus] * EARTH->bf[locus * hc + t-1];
2707 	      val1 = locusweight[locus] * EARTH->bf[locus * hc + t];
2708 	      //we keep last element to adjust for Bezier approximation
2709 	      lsum0 = (heat0 - heat1) * (val0 + val1) * 0.5;
2710 	      lsum += lsum0;
2711 #ifdef DEBUG
2712 	      //	      printf("%i> sum[%li]=%f temp=(%f %f) values=(%f %f)\n",myID,t,lsum,heat0,heat1,EARTH->bf[locus * hc + t],EARTH->bf[locus * hc + t]);
2713 #endif
2714 	    }
2715 	  //	  (x2 y1 - x1 y2)/(x1 - x2)
2716 	  // this last addition to the lsum calculates the chunk between the last temperature and
2717 	  // the infinitely hot temperature as a linear approximation of the the second hottest temperature
2718 	  // this is certainly rough, but in simulations with 3 populations one can see that with large number
2719 	  // of temperatures this looks reasonable, and one can approximate the integral more accurately with
2720 	  // with only a few columns.
2721 	  // using Bezier to approximate nice curve between the last two points to mimick the curve that
2722 	  // can be found with 16 or 32 heated chains, handle points are calculated using adhoc decisions
2723 	  // (comparison with 32 heated chains) using 0.8 of the interval for handle_y1 and the intercept
2724 	  // between the first and the second last point to calculate the handle_y2 see sumbezier()
2725 	  // for implementation. Currently this is not tunable.
2726 	  //	  approxlsum = sumbezier(100, heat1, EARTH->bf[locus * hc + t-1],
2727 	  //			 heat0, EARTH->bf[locus * hc + t-2],
2728 	  //			 1.0, EARTH->bf[locus * hc]);
2729 	  approxlsum = sumbezier(100L, heat1, val1,
2730 				 heat0, val0,
2731 				 heat2, val2);
2732 	  //#ifdef DEBUG
2733 	  //	  fprintf(stdout,"Thermo[%li]=(%f) - (%f) + (%f) = (%f)\n",locus, lsum, lsum0, approxlsum, approxlsum + lsum - lsum0);
2734 	  //	  #endif
2735 	  if(EARTH->loci>1)
2736 	    {
2737 	      fprintf(file,"  %5li%22.2f        %22.2f           %12.2f\n", locus + 1, lsum, lsum-lsum0+approxlsum, EARTH->hmscale[locus] - log (EARTH->hm[locus]));
2738 #ifdef PRETTY
2739 	      if(file==EARTH->outfile)
2740 		pdf_bayes_factor_rawscores(locus, lsum, lsum-lsum0+approxlsum, EARTH->hmscale[locus] - log (EARTH->hm[locus]));
2741 #endif
2742 	    }
2743 	  bfsum2 += approxlsum + lsum - lsum0;
2744 	  bfsum += lsum; //+ EARTH->bfscale[locus];
2745 	}
2746       if(EARTH->loci>1)
2747 	{
2748 	  scaling_factor = combine_scaling_factor(EARTH);
2749 	  bfsum  += scaling_factor;
2750 	  bfsum2 += scaling_factor;
2751 	  hsum   += scaling_factor;
2752 	  fprintf(file,"---------------------------------------------------------------------------------------\n");
2753 	  fprintf(file,"  All  %22.2f        %22.2f           %12.2f\n[Scaling factor = %f]\n", bfsum, bfsum2, hsum, scaling_factor);
2754 #ifdef PRETTY
2755 	  if(file==EARTH->outfile)
2756 	    pdf_bayes_factor_rawscores(-1L, bfsum, bfsum2, hsum);
2757 #endif
2758 	}
2759       else
2760 	{
2761 	  fprintf(file,"\n\n(1a) Thermodynamic integration: log(Prob(D|Model))=%f (%f with Bezier-approximation[1b]) %s\n", bfsum, bfsum2,
2762 		  options->adaptiveheat!=NOTADAPTIVE ? "(adaptive heating -> value may most likely be wrong)" : "");
2763 	  fprintf(file,"(2) Harmonic mean:             log(Prob(D|Model))=%f\n",hsum);
2764 	  fprintf(file,"(1) and (2) should give a similar result, in principle.\nBut (2) is overestimating the likelihood, it and is presented for historical reasons and should not be used. But (1) needs heating with chains that span a temperature range of 1.0 to at least 100,000.\n\n");
2765 	}
2766     }
2767   else   // -----------------------------------------not heating
2768     {
2769       for(locus=0;locus<EARTH->loci;locus++)
2770 	{
2771 	  if(EARTH->data->skiploci[locus])
2772 	    continue;
2773 	  if(EARTH->loci>1)
2774 	    {
2775 	      fprintf(file,"    %5li           ------                           ------            %12.2f\n", locus+1, EARTH->hmscale[locus] - log (EARTH->hm[locus]));
2776 #ifdef PRETTY
2777 	      if(file==EARTH->outfile)
2778 		pdf_bayes_factor_rawscores_harmo(locus, EARTH->hmscale[locus] - log (EARTH->hm[locus]));
2779 #endif
2780 	    }
2781 	}
2782       if(EARTH->loci>1)
2783 	{
2784 	  scaling_factor = combine_scaling_factor(EARTH);
2785 	  hsum += scaling_factor;
2786 	  fprintf(file,"---------------------------------------------------------------------------------------\n");
2787 	  fprintf(file,"      All           ------                           ------            %12.2f\n", hsum);
2788 #ifdef PRETTY
2789 	  if(file==EARTH->outfile)
2790 	    pdf_bayes_factor_rawscores_harmo(-1L, hsum);
2791 #endif
2792 	}
2793       else
2794 	{
2795 	  fprintf(file,"\n\n(1) Thermodynamic integration: log(Prob(D|Model))= UNAVAILABLE because no heating was used\n");
2796 	  fprintf(file,"(2) Harmonic mean:             log(Prob(D|Model))=%f\n",hsum);
2797 	  fprintf(file,"(1) and (2) should give a similar result, in principle.\nBut (2) is overestimating the likelihood, it \
2798 and is presented for historical reasons and should not be used. But (1) needs heating with chains that span a temperature rang\
2799 e of 1.0 to at least 100,000.\n\n");
2800 	}
2801     }
2802 #ifdef PRETTY
2803   if(file==EARTH->outfile)
2804     {
2805       pdf_bayes_factor(EARTH, bfsum, bfsum2, hsum, asum, maxreplicate,scaling_factor);
2806     }
2807 #endif
2808 }
2809 
2810 ///
2811 /// integrates over a Bezier curve between two points
2812 /// calculates two handle points that are set to adhoc values
2813 /// so that the x values of the handle are the the x value of the lowest point
2814 /// and the y values are set to about 80% of the min to max interval for the left point
2815 /// and a value that is the the y value from ax + b where a is calculated from a
2816 /// third point to the right and the second point, the third point is not used for the
2817 /// the Bezier curve otherwise
sumbezier(long intervals,MYREAL x0,MYREAL y0,MYREAL x1,MYREAL y1,MYREAL x2,MYREAL y2)2818 MYREAL sumbezier(long intervals, MYREAL x0, MYREAL y0, MYREAL x1, MYREAL y1, MYREAL x2, MYREAL y2)
2819 {
2820   const MYREAL inv_interval = 1./intervals;
2821   const MYREAL sx0 = x0;
2822   const MYREAL sx1 = x0;
2823   const MYREAL sy0 = 0.2 * y0 + 0.8 * y2;
2824   const MYREAL sy1 = (-x2 * y1 + x1 * y2)/(x1 - x2);
2825   MYREAL t     = 0.0;
2826   MYREAL t2    = 0.0;
2827   MYREAL t3    = 0.0;
2828   MYREAL onet  = 1.0;
2829   MYREAL onet2 = 1.0;
2830   MYREAL onet3 = 1.0;
2831   MYREAL newx;
2832   MYREAL newy;
2833   MYREAL oldx;
2834   MYREAL oldy;
2835   MYREAL sum = 0.0;
2836   // integrate over intervals between x0 and x1 and return sum
2837   // intialize with t=0.0
2838   oldx = x0;
2839   oldy = y0;
2840   //fprintf(stdout,"\n\n%f %f %f %f %f %f\n",x2,y2,sx0,sy0, x0,y0);
2841   for(t=inv_interval; t <= 1.0; t += inv_interval)
2842     {
2843       onet  = 1.0 - t;
2844       onet2 = onet * onet;
2845       onet3 = onet2 * onet;
2846       onet2 *= 3.0 * t;
2847       t2 = t * t;
2848       t3 = t2 * t;
2849       t2 *=  3.0 * onet;
2850       //      newx = 3sx0 (1-t)^2 t + 3 sx1 (1-t) t^2 + (1-t)^3 x0 + t^3 x1
2851       newx = sx0 * onet2 + sx1 * t2 + onet3 * x0 + t3 * x1;
2852       newy = sy0 * onet2 + sy1 * t2 + onet3 * y0 + t3 * y1;
2853       //      fprintf(stdout,"%f %f\n",newx,newy);
2854       sum += (newx - oldx) * (newy + oldy)/2.;
2855       oldx = newx;
2856       oldy = newy;
2857     }
2858   //  fprintf(stdout,"%f %f %f %f\n\n\n",x1,y1,sx1,sy1);
2859   return sum;
2860 }
2861 
2862 
2863 /// calculate values for the marginal likelihood using thermodynamic integration
2864 /// based on a method by Friel and Pettitt 2005
2865 /// (http://www.stats.gla.ac.uk/research/TechRep2005/05.10.pdf)
2866 /// this is the same method described in Lartillot and Phillippe 2006 Syst Bio
2867 /// integrate over all temperature using a simple trapezoidal rule
2868 /// prob(D|model) is only accurate with intervals for temperatures from 1/0 to 1/1.
2869 /// reports also the harmonic mean
calculate_BF(world_fmt ** universe,option_fmt * options)2870 void calculate_BF(world_fmt **universe, option_fmt *options)
2871 {
2872   long i;
2873   MYREAL xx, xx2;
2874   //  MYREAL logprior;
2875 #ifdef THERMOCHECK
2876   static long counter=0;
2877 #endif
2878   long locus = universe[0]->locus;
2879   long hc = options->heated_chains;
2880   if(universe[0]->data->skiploci[locus])
2881     return;
2882   if(EARTH->likelihood[EARTH->G] <= -HUGE)
2883     return;
2884   EARTH->am[locus] += 1;
2885   locus = EARTH->locus;
2886   xx = EARTH->likelihood[EARTH->G];
2887 
2888 #ifdef THERMOCHECK
2889   counter++;
2890 #endif
2891 
2892   if(xx <= -HUGE)
2893     return;
2894 
2895   if(xx > EARTH->hmscale[locus])
2896     {
2897       //      EARTH->hm[locus] += EXP(EARTH->hmscale[locus] - xx);
2898       xx2 = EXP(EARTH->hmscale[locus] - xx);
2899       EARTH->hm[locus] += (xx2 - EARTH->hm[locus])/ (EARTH->am[locus]);
2900     }
2901   else
2902     {
2903       EARTH->hm[locus] *= EXP(xx - EARTH->hmscale[locus]);
2904       //      EARTH->hm[locus] *= xx - EARTH->hmscale[locus];
2905       EARTH->hmscale[locus] = xx;
2906       //      EARTH->hm[locus] += 1.;
2907       EARTH->hm[locus] += (1. - EARTH->hm[locus])/ (EARTH->am[locus]);
2908     }
2909 
2910 #ifdef THERMOCHECK
2911   if(options->mixplot)
2912     fprintf(universe[0]->options->mixfile,"@BF %li %li %10.10f %10.10f",counter, locus, EARTH->heat, xx);
2913   //, EARTH->hmscale[locus]);
2914 #endif
2915 
2916   if(options->heating)
2917     {
2918       for (i = 0; i < hc; i++)
2919 	{
2920 	  xx = universe[i]->likelihood[universe[i]->G];
2921 #ifdef THERMOCHECK
2922 	  if(options->mixplot)
2923 	    fprintf(universe[0]->options->mixfile," @T %10.10f %10.10f", universe[i]->heat, xx);
2924 #endif
2925 	  EARTH->bf[locus * hc + i] += (xx - EARTH->bf[locus * hc + i])/ (EARTH->am[locus]);
2926 	  //EARTH->bf[locus * hc + i] += (xx - EARTH->bf[locus * hc + i]);
2927 	  //#ifdef THERMOCHECK
2928 	  //fprintf(universe[0]->options->mixfile,"%f\n", 	  EARTH->bf[locus * hc + i]);
2929 	  //#endif
2930 	}
2931     }
2932 #ifdef THERMOCHECK
2933   if(options->mixplot)
2934   fprintf(universe[0]->options->mixfile,"\n");
2935 #endif
2936 }
2937 
print_marginal_order(char * buf,long * bufsize,world_fmt * world)2938 void  print_marginal_order(char *buf, long *bufsize, world_fmt *world)
2939 {
2940   long i;
2941 
2942   for(i=0;i<world->options->heated_chains;i++)
2943     *bufsize += sprintf(buf+ *bufsize,"# --  %s = %f\n", "Thermodynamic temperature", world->options->heat[i]);
2944   *bufsize += sprintf(buf+ *bufsize,"# --  %s\n", "Marginal log(likelihood) [Thermodynamic integration]");
2945   *bufsize += sprintf(buf+ *bufsize,"# --  %s\n", "Marginal log(likelihood) [Harmonic mean]");
2946 }
2947 
2948 #if defined(MPI) && !defined(PARALIO) /* */
print_marginal_like(float * temp,long * z,world_fmt * world)2949 void      print_marginal_like(float *temp, long *z, world_fmt * world)
2950 {
2951   long locus = world->locus;
2952   long t;
2953   long hc = world->options->heated_chains;
2954   MYREAL lsum;
2955   MYREAL heat0, heat1;
2956 
2957   if(world->options->heating)
2958     {
2959       lsum = 0.;
2960       for(t=1; t < hc; t++)
2961 	{
2962 	  heat0 = 1./world->options->heat[t-1] ;
2963 	  heat1 = 1./world->options->heat[t];
2964 	  // this ignores adaptive heating for MPI!!!!
2965 	  temp[*z] = (float) world->bf[locus * hc + t-1];
2966 	  *z += 1;
2967 	  lsum += (heat0 - heat1) * ((world->bf[locus * hc + t-1] + world->bf[locus * hc + t]) * 0.5);
2968 	}
2969       temp[(*z)++] =  (float) world->bf[locus * hc + t-1];
2970       temp[(*z)++] =  (float) lsum;
2971     }
2972   temp[(*z)++] =  (float) world->hmscale[locus] - log(world->hm[locus]);
2973 }
2974 #else /*not MPI or MPI & PARALIO*/
print_marginal_like(char * temp,long * c,world_fmt * world)2975 void      print_marginal_like(char *temp, long *c, world_fmt * world)
2976 {
2977   long locus = world->locus;
2978   long t;
2979   long hc = world->options->heated_chains;
2980   MYREAL lsum;
2981   MYREAL heat0, heat1;
2982   if(world->options->heating)
2983     {
2984       lsum = 0.;
2985       for(t=1; t < hc; t++)
2986 	{
2987 	  if(world->options->adaptiveheat!=NOTADAPTIVE)
2988 	    {
2989 	      heat0 = world->options->averageheat[t-1] ;
2990 	      heat1 = world->options->averageheat[t];
2991 	    }
2992 	  else
2993 	    {
2994 	      heat0 = 1./ world->options->heat[t-1] ;
2995 	      heat1 = 1./ world->options->heat[t];
2996 	    }
2997 	  *c += sprintf(temp+ *c,"\t%f", world->bf[locus * hc + t-1]);
2998 	  lsum += (heat0 - heat1) * ((world->bf[locus * hc + t-1] + world->bf[locus * hc + t]) * 0.5);
2999 	}
3000       *c += sprintf(temp + *c,"\t%f", world->bf[locus * hc + t-1]);
3001       *c += sprintf(temp + *c,"\t%f", lsum);
3002     }
3003   *c += sprintf(temp + *c,"\t%f", world->hmscale[locus] - log(world->hm[locus]));
3004 }
3005 #endif /*not MPI*/
3006 
3007 
3008 #ifdef LONGSUM
3009 /// put timepoints into the total tree to allow for bottlenecks and growth
3010 long
find_chaincutoffs(MYREAL * treetime,world_fmt * world,long locus,long r)3011 find_chaincutoffs (MYREAL * treetime, world_fmt * world, long locus, long r)
3012 {
3013 
3014     long j;
3015     long T;
3016     MYREAL totaltime = 0.;
3017     long copies = 0;
3018     tarchive_fmt *atl;
3019     atl = world->atl[r][locus].tl;
3020     for (j = 0; j < world->atl[r][locus].T; j++)
3021     {
3022         T = atl[j].longsumlen - 1;
3023         copies += atl[j].copies;
3024         totaltime = atl[j].longsum[T].eventtime / 3.;
3025         treetime[0] += totaltime;
3026         treetime[1] += totaltime + totaltime;
3027         treetime[2] += totaltime + totaltime + totaltime;
3028     }
3029     return copies;
3030 }
3031 
3032 /// find the cutoffs for the locus, averaging over multiple replicates
3033 void
find_loci_cutoffs(MYREAL * treetime,world_fmt * world)3034 find_loci_cutoffs (MYREAL * treetime, world_fmt * world)
3035 {
3036     long r;
3037     long locus;
3038     long count = 0;
3039     long repstart = 0;
3040     long repstop = 1;
3041     memset (treetime, 0, sizeof (MYREAL) * 3);
3042     set_replicates (world, world->repkind, world->rep, &repstart, &repstop);
3043 
3044     for (locus = 0; locus < world->loci; locus++)
3045     {
3046         for (r = repstart; r < repstop; r++)
3047         {
3048             count += find_chaincutoffs (treetime, world, locus, r);
3049         }
3050     }
3051     treetime[0] /= (MYREAL) count;
3052     treetime[1] /= (MYREAL) count;
3053     treetime[2] /= (MYREAL) count;
3054 }
3055 
3056 
3057 /// find the cutoffs avaergin over all replicates
3058 void
find_replicate_cutoffs(MYREAL * treetime,world_fmt * world)3059 find_replicate_cutoffs (MYREAL * treetime, world_fmt * world)
3060 {
3061     long r;
3062     long count = 0;
3063     long repstart = 0;
3064     long repstop = 1;
3065     memset (treetime, 0, sizeof (MYREAL) * 3);
3066     set_replicates (world, world->repkind, world->rep, &repstart, &repstop);
3067 
3068     for (r = repstart; r < repstop; r++)
3069     {
3070         count += find_chaincutoffs (treetime, world, world->locus, r);
3071     }
3072     treetime[0] /= (MYREAL) count;
3073     treetime[1] /= (MYREAL) count;
3074     treetime[2] /= (MYREAL) count;
3075 }
3076 
3077 
3078 /// change the cutoff times
3079 void
change_longsum_times(world_fmt * world)3080 change_longsum_times (world_fmt * world)
3081 {
3082     long i;
3083     long numpop3 = 3 * world->numpop;
3084     long numpop6 = 2 * numpop3;
3085     MYREAL *treetime;
3086     treetime = (MYREAL *) mycalloc (3, sizeof (MYREAL));	// we have 3 classes
3087     if (world->locus < world->loci)
3088         find_replicate_cutoffs (treetime, world);
3089     else
3090         find_loci_cutoffs (treetime, world);
3091     for (i = numpop3; i < numpop6; i += 3)
3092     {
3093         world->flucrates[i] = treetime[0];
3094         world->flucrates[i + 1] = treetime[1];
3095         world->flucrates[i + 2] = treetime[2];
3096     }
3097     myfree(treetime);
3098 }
3099 
3100 #endif /*LONGSUM*/
3101 
3102 ///
3103 /// check_parmfile checks what is called and also whether we should honor the menu=YES in the file
3104 /// user uses another parmfile than the default when there is a parameter associated with the program call
3105 /// this function checks the first two line of the file to find out whether this is a parmfile or potentially
3106 /// a datafile/
3107 /// The addition of an option to override the menu command in the parmfile
3108 /// migrate-n -menu ==> use menu independent of the parmfile setting
3109 /// migrate-n -nomenu ==> does use menu even if parmfile says so
3110 /// migrate-n -version ==> prints the version and quits
3111 /// migrate-n -help   ==> prints a quick help of commandline options
check_parmfile(long argcount,char ** arguments,char * parmfilename)3112 boolean  check_parmfile(long argcount, char **arguments, char *parmfilename)
3113 {
3114   int argument=0;
3115   int len;
3116   boolean  usemenu = OVERRIDE_NO;
3117   if (argcount > 1)
3118     {
3119       argument = 1;
3120       while(argument < argcount)
3121 	{
3122 	  //fprintf(stderr,"<|%s|>\n",argv[argument]);
3123 	  //fflush(stderr);
3124 	  if(arguments[argument][0]!='-')
3125 	    {
3126 	      len = (long) strlen (arguments[argument]) + 1;
3127 	      sprintf (parmfilename, "%-.*s", len, arguments[argument]);
3128 	    }
3129 	  else
3130 	    {
3131 
3132 	      switch(arguments[argument][1])
3133 		{
3134 		case 'm':
3135 		  usemenu = OVERRIDE_MENU;
3136 		  break;
3137 		case 'n':
3138 		  usemenu = OVERRIDE_NOMENU;
3139 		  break;
3140 		case 'v':
3141 		  printf("Migrate-n version: %s, subversion: %s\n", MIGRATEVERSION, MIGRATESUBVERSION);
3142 		  printf("Operating System:  %s\n", MYSYSTEM );
3143 #ifdef MPI
3144 #ifdef OMPI_MAJOR_VERSION
3145 		  printf("OPENMPI version: %i.%i\n", OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION);
3146 #endif
3147 #endif
3148 		  exit(-1);
3149 		  break;
3150 		case 'h':
3151 		  printf("Migrate-n version: %s, subversion: %s\n", MIGRATEVERSION, MIGRATESUBVERSION);
3152 		  printf("(beerli@fsu.edu)\n\n");
3153 		  printf("commandline options:\n");
3154 		  printf("-version  # shows the current version and exits\n");
3155 		  printf("-help     # show this menu and exit\n");
3156 		  printf("\n");
3157 		  printf("-nomenu   # does not display menu, use this for batch jobs\n");
3158 		  printf("-menu     # forces the display of the menu\n");
3159 		  printf("\n");
3160 		  exit(-1);
3161 		  break;
3162 		}
3163 	    }
3164 	  argument++;
3165 	}
3166     }
3167   return usemenu;
3168 }
3169 
3170 /// set the menu overrider
set_usemenu(boolean usemenu,boolean fromparmfile)3171 boolean set_usemenu(boolean usemenu, boolean fromparmfile)
3172 {
3173   switch(usemenu)
3174     {
3175     case OVERRIDE_MENU:
3176       return TRUE;
3177       break;
3178     case OVERRIDE_NOMENU:
3179       return FALSE;
3180       break;
3181     default:
3182       break;
3183     }
3184   return fromparmfile;
3185 }
3186 
3187 /// checks the settings of the number of long an short chain for bayes options and resets useless settings
check_bayes_options(option_fmt * options)3188 void check_bayes_options(option_fmt *options)
3189 {
3190   if (options->bayes_infer)
3191     {
3192       if(options->schains>0)
3193 	{
3194 	  //		fprintf(stdout,"NOTICE: with Bayesian inference no short chains are allowed, setting them to ZERO\n");
3195 	  options->schains = 0;
3196 	}
3197       if(options->lchains>1 || options->lchains < 1)
3198 	{
3199 	  fprintf(stdout,"NOTICE: with Bayesian inference, most efficient runs are with 1 long chain,\nsetting the long chain to 1\n");
3200 	  options->lchains = 1;
3201 	};
3202     }
3203 }
3204 
reset_bayesmdimfile(world_fmt * world,option_fmt * options)3205 void reset_bayesmdimfile(world_fmt *world, option_fmt *options)
3206 {
3207   long locus=0;
3208   char ** files = NULL;
3209   long numfiles=0;
3210   if(world->options->has_bayesmdimfile)
3211   {
3212       charvec2d(&files,numcpu,STRSIZE);
3213       get_bayeshist (world, options); // this is transferring some material but
3214       // the parameter file is not filled in anymore [-> small data block]
3215       // save parameters in file and reload
3216       if(myID==MASTER)
3217       {
3218           // first file is alwasy present
3219           strcpy(files[numfiles++],options->bayesmdimfilename);
3220 #ifdef MPI
3221 #ifdef PARALIO
3222           fprintf(stdout,"%i> bayesmdimfile opening for analysis\n", myID);
3223           //this will need to open a whole array of files
3224           long numfiles=0;
3225           charvec2d(&files,numcpu,STRSIZE);
3226           long worker;
3227           char tmp[STRSIZE];
3228           MPI_Status status;
3229           strcpy(files[numfiles++],options->bayesmdimfilename);
3230           long numelem  = world->numpop2 + (world->options->gamma ? 1 : 0);
3231           long numelem2 = numelem * 2;
3232           MYREAL * temp = (MYREAL *) mycalloc(numelem2+2,sizeof(MYREAL));
3233           temp[0]=MIGMPI_PARALIO;
3234           for(worker=1;worker<numcpu;worker++)
3235           {
3236               MYMPISEND (temp, numelem2 + 2, mpisizeof, worker, worker, comm_world);
3237           }
3238           worker=1;
3239           while(worker<numcpu)
3240           {
3241               MYMPIRECV (tmp,STRSIZE, MPI_CHAR, MPI_ANY_SOURCE,
3242                          MPI_ANY_TAG, comm_world, &status);
3243               strcpy(files[numfiles++],tmp);
3244               worker++;
3245           }
3246           myfree(temp);
3247           //	  world->bayesmdimfile = NULL;//fopen(options->bayesmdimfilename,"r+");
3248 #endif /*paralio*/
3249 #endif /*mpi*/
3250       } /*for the master to get the filenames for bayesallfile */
3251 #ifdef ZNZ
3252 #ifdef PARALIO
3253       // closing the MPI worker's files and the master
3254       MPI_File_close(&world->mpi_bayesmdimfile);
3255 #else
3256       //closing the one and only file (single cpu mode or MPI master alone)
3257       if(myID==MASTER)
3258 	znzclose(world->bayesmdimfile);
3259 #endif /*paralio*/
3260       // open the bayesallfile assuming no paralio setting and using the zipped stuff ZNZ
3261       if(myID==MASTER)
3262 	world->bayesmdimfile = znzopen(options->bayesmdimfilename,"r", options->use_compressed);
3263 #else
3264       // not ZNZ
3265 #ifdef PARALIO
3266       // closing the MPI worker's files and the master
3267       MPI_File_close(&world->mpi_bayesmdimfile);
3268 #else
3269       //closing the one and only file (single cpu mode or MPI master alone)
3270       if(myID==MASTER)
3271 	fclose(world->bayesmdimfile);
3272 #endif /*paralio*/
3273       world->bayesmdimfile = NULL;
3274       //world->bayesmdimfile = fopen(options->bayesmdimfilename,"r");
3275       //if(world->bayesmdimfile==NULL)
3276       //  {
3277       //    perror("Failed to open file: ");
3278       //    printf("errno = %d.\n", errno);
3279       //    exit(1);
3280       //  }
3281 #endif /*znz*/
3282       if (myID==MASTER)
3283       {
3284 	read_bayes_fromfile(world->bayesmdimfile,world, options, files,numfiles);
3285           for(locus=0;locus<world->loci;locus++)
3286           {
3287               if(world->data->skiploci[locus])
3288                   continue;
3289               if(world->bayes->histogram[locus].results == NULL)
3290               {
3291                   world->data->skiploci[locus] = TRUE;
3292                   continue;
3293               }
3294               calc_hpd_credibility(world->bayes, locus, world->numpop2, world->numpop2 + world->bayes->mu);
3295           }
3296       }
3297   }
3298   else
3299     {
3300       // keep everything in RAM
3301       get_bayeshist (world, options);
3302     }
3303 }
3304