1 /*! \file main.c */
2 /*! \mainpage M I G R A T E
3 * \section intro Introduction
4 * Migrate is a program to estimate population-genetic parameters from genetic data such as
5 * electrophoretic marker data, microsatellite data, DNA or RNA sequence data, and single
6 * nucleotide polymorphisms. Migrate uses the concepts of Maximum likelihood and Bayesian
7 * inference to infer parameters based on coalescence theory. For more information related
8 * to the use of the program or the theory behind check out
9 * http://popgen.scs.fsu.edu/migrate.html
10 *
11 * \section install Installation
12 * to install the program simply do
13 * \subsection step1 configure
14 * \subsection step2 make
15 * \subsection step3 sudo make install
16 *
17 * \section maintainer Maintainer and Copyrights
18 * Peter Beerli,
19 * beerli@fsu.edu
20 *
21 * Copyright 1997-2002 Peter Beerli and Joseph Felsenstein, Seattle WA\n
22 * Copyright 2003-2013 Peter Beerli, Tallahassee FL\n
23 *
24 * Permission is hereby granted, free of charge, to any person obtaining a copy
25 * of this software and associated documentation files (the "Software"), to deal
26 * in the Software without restriction, including without limitation the rights
27 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
28 * of the Software, and to permit persons to whom the Software is furnished to do
29 * so, subject to the following conditions:
30 *
31 * The above copyright notice and this permission notice shall be included in all copies
32 * or substantial portions of the Software.
33 *
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
35 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
36 * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
37 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
38 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
39 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 *
41 */
42 /* $Id: main.c 2169 2013-08-24 19:02:04Z beerli $*/
43
44 #include "migration.h"
45 #include "sighandler.h"
46 #include "heating.h"
47 #include "histogram.h"
48 #include "world.h"
49 #include "data.h"
50 #include "options.h"
51 #include "mcmc.h"
52 #include "bayes.h"
53 #include "broyden.h"
54 #include "combroyden.h"
55 #include "menu.h"
56 #include "random.h"
57 #include "sequence.h"
58 #include "tree.h"
59 #include "tools.h"
60 #include "profile.h"
61 #include "aic.h"
62 #include "lrt.h"
63 #include "migrate_mpi.h"
64 #include "migevents.h"
65 #include "skyline.h"
66 #include "reporter.h"
67 #include "autotune.h"
68 #ifdef PRETTY
69 #include "pretty.h"
70 #endif /*PRETTY*/
71 #include <stdio.h>
72 #ifdef BEAGLE
73 #include "calculator.h"
74 #endif
75 #include "mutationmodel.h"
76 #ifdef UEP
77 #include "uep.h"
78 #endif
79 #ifdef DMALLOC_FUNC_CHECK
80 #include <dmalloc.h>
81 #endif
82
83 #ifdef GRANDCENTRAL
84 #include <dispatch/dispatch.h>
85 #endif
86
87 //use this if you have nice() and need to be nice with others
88 #ifdef HAVE_NICE
89 #include <unistd.h>
90 #endif
91
92 #ifdef PRETTY
93 #ifdef WIN32
94 #include "pretty-win32.h"
95 #endif
96 #endif
97
98
99 /* Definitions for MCMCMC -- heated chains
100 * definitions of the first four heated chains, they are ordered from cold to hot */
101 #define EARTH universe[0] /*!< cold chain has always a temperature=1 */
102 #define VENUS universe[1] /*!< second coldest chain */
103 #define MERKUR universe[2] /*!< thrid coldest chain */
104 #define SUN universe[3] /*!< fourth coldest chain */
105
106 /* GLOBAL VARIABLES +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
107 int myID; /*!< myID=0 for single-cpu program and master in MPI program, worker nodes myID > 0 */
108 int myRepID; /*!< myID=0 for single-cpu program and master in MPI program, worker nodes myID > 0 */
109 int color; /*!< either 1 or MPI_UNDEFINED, in anticipation of more complicated MPI schemes */
110 int numcpu; /*!< used if MPI is running otherwise == 1 */
111 int locidone; /*!< used in MPI workers */
112 long unique_id_global;
113 #ifdef MPI
114 #ifdef PARALIO
115 boolean my_file_open_error;
116 #endif
117 #ifdef USE_MYREAL_FLOAT
118 #ifdef WINDOWS
119 #define mpisizeof MPI_FLOAT ;
120 #else
121 const MPI_Datatype mpisizeof = MPI_FLOAT ;
122 #endif
123 #else
124 #ifdef WINDOWS
125 //#define mpisizeof MPI_DOUBLE ;
126 MPI_Datatype mpisizeof ;
127 #else
128 const MPI_Datatype mpisizeof = MPI_DOUBLE ;
129 #endif
130 #endif
131 #endif
132
133
134 #ifdef CAUTIOUS
135 boolean cautious; /*!< forces a check whether a file exists and will ask before overwriting */
136 #endif
137 #ifdef MEMDEBUG
138 #include <sys/time.h>
139 FILE *memfile;
140 struct timeval memt_start, memt_finish;
141 double memelapsed;
142 long totalsize;
143 #endif
144 #ifdef MPI
145 filedb_fmt filedb[30];
146 long filenum;
147
148 MPI_Comm comm_world; /*!< parallel environment that contains knowledge about all workers and master */
149 //MPI_Comm comm_workers; /*!< parallel environment that contains knowledge about all workers */
150 MPI_Group worker_group;
151 MPI_Group world_group;
152 #endif
153 #ifdef SLOWNET
154 int profiledone; /*!< used in MPI workers in profiles on slow networks */
155 #endif
156 #ifdef PTHREADS
157 tpool_t heating_pool; /*!< when compiled with PTHREADS then holds all threads */
158 #else
159 #ifndef tpool_t
160 #define tpool_t char
161 #endif
162 tpool_t heating_pool;
163 #endif
164
165 // random generator related global variables
166 long *seed; /*!< contains the seed of the random number */
167 long *newseed; /*!< contains the new random seed */
168 char *generator; /*!< string that shows what random number generator is used */
169
170 // for bayesian output counter, initialized in bayes_init()
171 long *mdimfilecount;
172
173 //#ifdef PRETTY
174 // for pretty printing
175 pdf_doc doc;
176 pdf_page page;
177 pdf_contents canvas;
178 int page_counter;
179 char pdf_pagetitle[LINESIZE+1];
180 char pdf_time[LINESIZE+1];
181 float page_height;
182 float left_margin;
183 float page_width;
184 //#endif
185
186
187 int
188 setup_locus (long locus, world_fmt * world, option_fmt * options,
189 data_fmt * data);
190
191 void
192 condense_time (world_fmt * world, long *step, long *j,
193 MYREAL * accepted, long *G, long *steps, long oldsteps,
194 long rep);
195
196 MYREAL sumbezier(long intervals, MYREAL x0, MYREAL y0, MYREAL x1, MYREAL y1, MYREAL x2, MYREAL y2);
197 void calculate_BF(world_fmt **universe, option_fmt *options);
198
199 long set_repkind (option_fmt * options);
200
201 void
202 heating_init (world_fmt ** universe, int usize, data_fmt * data,
203 option_fmt * options);
204
205 void
206 heating_prepare (world_fmt ** universe, int usize,
207 option_fmt * options, data_fmt * data, long rep);
208
209 void heating_prepare2 (world_fmt ** universe, int usize);
210
211 long
212 replicate_number (option_fmt * options, long chain, char type,
213 long replicate);
214
215 void
216 combine_loci_standard (char type, option_fmt * options, world_fmt * world,
217 long *Gmax);
218 void
219 combine_loci_mpi (char type, option_fmt * options, world_fmt * world,
220 long *Gmax);
221
222 void set_penalizer (long chain, long chains, char type,
223 world_fmt ** universe);
224
225 void
226 run_sampler (option_fmt * options, data_fmt * data,
227 world_fmt ** universe, int usize, long *outfilepos, long *Gmax);
228
229 void run_replicate (long locus,
230 long replicate,
231 world_fmt ** universe,
232 option_fmt * options,
233 data_fmt * data,
234 tpool_t * localheating_pool,
235 int usize, long *treefilepos, long *Gmax);
236
237
238 void
239 run_locus (world_fmt ** universe, int usize, option_fmt * options,
240 data_fmt * data, tpool_t * localheating_pool, long maxreplicate,
241 long locus, long *treefilepos, long *Gmax);
242
243 void
244 run_loci (world_fmt ** universe, int usize, option_fmt * options,
245 data_fmt * data, tpool_t * localheating_pool, long maxreplicate,
246 long *treefilepos, long *Gmax);
247
248 void run_one_update (world_fmt * world);
249
250 void run_updates (world_fmt ** universe,
251 int usize, option_fmt * options,
252 tpool_t * localheating_pool, long inc, long increment,
253 long step, long steps);
254
255 void heated_swap (world_fmt ** universe, worldoption_fmt * options);
256
257 void
258 change_chaintype (long locus, char *type, world_fmt * world,
259 long *increment, long *oldsteps, long *chains,
260 option_fmt * options);
261
262 void
263 prepare_next_chain (world_fmt ** universe, worldoption_fmt * options,
264 char type, long chain, long *chains,
265 long *pluschain, long locus, long replicate);
266
267 void print_bayesfactor(FILE *file, world_fmt **universe, option_fmt * options);
268 #if defined(MPI) && !defined(PARALIO)
269 //void fix_bayesfactor(world_fmt *world, option_fmt * options);
270 void print_marginal_like(float *temp, long *z, world_fmt * world);
271 #else /*not MPI*/
272 void print_marginal_like(char *temp, long *c, world_fmt * world);
273 #endif
274
275 void print_burnin_stop(FILE *file, world_fmt **universe, option_fmt * options);
276 void
277 print_heating_progress (world_fmt ** universe,
278 worldoption_fmt * options, long stepinc);
279
280 long analyze_olddata (world_fmt * world, option_fmt * options, data_fmt * data, long *outfilepos);
281 boolean analyze_oldbayesdata(world_fmt *world, option_fmt *options, data_fmt *data, long *outfilepos);
282 void print_theta0(FILE *file, world_fmt *world, long maxreplicate);
283 void profile_tables (option_fmt * options, world_fmt * world, long *gmaxptr);
284
285 void finish_mac (option_fmt * options, data_fmt * data);
286
287 int
288 setup_locus (long locus, world_fmt * world, option_fmt * options,
289 data_fmt * data);
290
291 long set_repkind (option_fmt * options);
292
293 void heating_prepare2 (world_fmt ** universe, int usize);
294
295 long replicate_number (option_fmt * options, long chain, char type,
296 long replicate);
297
298 void print_heating_progress2 (FILE * file, worldoption_fmt * options,
299 world_fmt ** universe);
300
301
302 void get_bayeshist (world_fmt * world, option_fmt * options);
303 void get_treedata (world_fmt * world, option_fmt * options);
304 void get_mighistdata (world_fmt * world, option_fmt * options);
305
306 void change_longsum_times (world_fmt * world);
307
308 boolean check_parmfile(long argcount, char **arguments, char *parmfilename);
309 boolean set_usemenu(boolean usemenu, boolean fromparmfile);
310 void check_bayes_options(option_fmt *options);
311 void reset_bayesmdimfile(world_fmt *world, option_fmt *options);
312 long calculate_newpop_numpop(option_fmt *options, data_fmt *data);
313
314 extern void unset_penalizer_function (boolean inprofiles);
315
316 ///
317 /// specifies the classes of override the menu as a parameter to the program
318 enum override_enum
319 {
320 OVERRIDE_NO, OVERRIDE_MENU, OVERRIDE_NOMENU
321 };
322
323
324
325 ///
326 /// the program migrate calculates migration rates and population sizes
327 /// from genetic data, it allows for a wide variety of data types and many options
328 /// \callgraph
329 int
main(int argc,char ** argv)330 main (int argc, char **argv)
331 {
332 char type = 's';
333 int usize = 1;
334 int usemenu = OVERRIDE_NO;
335 long locus;
336 long Gmax = 0;
337 long outfilepos = 0;
338 data_fmt *data;
339 boolean restarted_bayes_bool=FALSE;
340 #ifdef INTEGRATEDLIKE
341 long maxreplicate;
342 #endif
343 option_fmt *options;
344 world_fmt **universe;
345 #ifdef MPI
346 int server;
347 #ifdef WINDOWS
348 mpisizeof = MPI_DOUBLE ;
349 #endif
350 #endif
351 #ifdef HAVE_NICE
352 nice (10); //nice value arbitrarily set to 10, 5 is still nice, 0 is standard
353 #endif
354 // puts(argv[0]);
355
356 //---------------------------------------------------------------------------------------------
357 // windows specific code for pretty printing
358 #ifdef PRETTY
359 #ifndef MPI
360 #ifdef WIN32
361 set_haru_handler();
362 #endif
363 #endif
364 #endif
365
366 //---------------------------------------------------------------------------------------------
367 // MPI initialisation
368 #ifdef MPI
369 // parallel version
370 filenum = 0; //setting up the filename database so that worker node can write back to master
371
372 MPI_Init (&argc, &argv);
373 comm_world = MPI_COMM_WORLD;
374 MPI_Comm_size (comm_world, &numcpu);
375 MPI_Comm_rank (comm_world, &myID);
376 MPI_Comm_group (comm_world, &world_group);
377 server = MASTER; //server ID
378 MPI_Group_excl (world_group, 1, &server, &worker_group);
379 locidone = 0;
380
381 #ifdef SLOWNET
382 // slow network parallel version -- this will turn into the standard version
383 profiledone = 0;
384 which_calc_like (SINGLELOCUS);
385 #endif
386
387 #else /*MPI*/
388 //scalar version of migrate
389 myID = MASTER;
390 numcpu = 1;
391 #endif /*MPI*/
392
393 // debug code for memory problems
394 #ifdef MEMDEBUG
395 totalsize = 0 ;
396 memfile = fopen("memoryfile","w+");
397 gettimeofday(&memt_start, NULL);
398 #endif
399
400 //---------------------------------------------------------------------------------------------
401 // initialization of main container and random number parts
402 seed = (long *) mymalloc (sizeof (long) * 3);
403 newseed = (long *) mymalloc (sizeof (long) * 3);
404 generator = (char *) mycalloc (1,sizeof(char) * 80);
405 universe = (world_fmt **) mycalloc (HEATED_CHAIN_NUM, sizeof (world_fmt *));
406
407 // set flag to decide whether we use caution when writing files
408 #ifdef CAUTIOUS
409 cautious = FALSE;
410 #endif
411 // try to catch and beautify some error messages
412 signalhandling (ON);
413 unique_id_global=0;
414 // create main data structures
415 create_data (&data);
416 create_world (&(EARTH), 1L);
417 create_options (&options);
418
419 // parmfile and menu
420 init_options (options);
421
422 //---------------------------------------------------------------------------------------------
423 // master initializations, this works for both parallel and single-cpu version
424 if (myID == MASTER)
425 {
426 usemenu = check_parmfile((long) argc,argv,options->parmfilename);
427 read_options_master (options);
428 options->menu = set_usemenu(usemenu, options->menu);
429 print_menu_title (stdout, options);
430 get_menu (options, EARTH, data);
431 check_bayes_options(options);
432 #ifdef MPI
433 // MYMPIBARRIER (comm_world);
434 broadcast_options_master (options, data);
435 #endif
436 }
437 else
438 {
439 #ifdef MPI
440 // MYMPIBARRIER (comm_world);
441 broadcast_options_worker (options);
442 #endif
443 options->menu = FALSE;
444 }
445
446 //---------------------------------------------------------------------------------------------
447 // data initialization and data reading phase
448 usize = (options->heating ? ( MAX (1, options->heated_chains)) : 1 );
449 universe = (world_fmt **) myrealloc (universe, usize * sizeof (world_fmt *));
450 // opens files on all nodes
451 #ifdef DEBUG
452 char nowstr[LINESIZE];
453 get_time (nowstr, "%c");
454 printf("%i> before opening files for master and 'pointers' for workers at %s\n",myID,nowstr);
455 #endif
456 init_files (EARTH, data, options);
457 #ifdef DEBUG
458 get_time (nowstr, "%c");
459 printf("%i> files are now open and ready at %s\n", myID, nowstr);
460 #endif
461
462 if (options->writelog && myID == MASTER)
463 {
464 print_menu_title (options->logfile, options);
465 }
466
467 EARTH->repkind = SINGLECHAIN;
468 //fprintf(stderr,"myID=%i myRepID=%i\n",myID, myRepID);
469 unset_penalizer_function (FALSE); // install penalizer for far jumps in MCMC
470
471 //---------------------------------------------------------------------------------------------
472 // sampling phase
473 if (!options->readsum) // all go here except when reading old runs
474 {
475 run_sampler (options, data, universe, usize, &outfilepos, &Gmax);
476 }
477 else
478 {
479 if(options->bayes_infer)
480 {
481 // reanalyze old or broken bayesrun [not ready yet]
482 restarted_bayes_bool = analyze_oldbayesdata(EARTH, options, data, &outfilepos);
483 }
484 else
485 {
486 //reanalyze sumfile
487 Gmax = analyze_olddata (EARTH, options, data, &outfilepos);
488 }
489 }
490 unset_penalizer_function (TRUE);
491
492 //---------------------------------------------------------------------------------------------
493 // combining phase for multiple loci and replicates
494 #ifdef MPI
495 //with MPI:the workers stay here much longer than the Master
496 combine_loci_mpi (type, options, EARTH, &Gmax);
497 #else
498 combine_loci_standard (type, options, EARTH, &Gmax);
499 #endif
500 // bayes inference
501 if (options->bayes_infer)
502 {
503 // if bayes intermediate data recording is ON then reset the file
504 // for reading for printing and combining, else use the material still in RAM
505 if(!restarted_bayes_bool)
506 reset_bayesmdimfile(EARTH, options);
507 }
508 else
509 {
510 // gather the treedata(1) into sumfile or from worker nodes
511 get_treedata (EARTH, options);
512 }
513
514 //---------------------------------------------------------------------------------------------
515 // printing main results
516 if (myID == MASTER)
517 {
518 if (options->bayes_infer)
519 {
520 // printe bayes inference main table
521 bayes_stat (EARTH);
522 }
523 else
524 {
525 // print ML table
526 print_list (universe, options, data);
527 print_alpha_curve (EARTH, EARTH->atl, &Gmax);
528 if(options->printcov)
529 print_cov(EARTH, EARTH->numpop, EARTH->loci, EARTH->cov);
530 if (EARTH->options->lratio->counter > 0)
531 print_lratio_test (EARTH, &Gmax);
532 if (options->aic)
533 akaike_information (EARTH, &Gmax);
534 }
535 fflush (EARTH->outfile);
536 //---------------------------------------------------------------------------------------------
537 // printing additional results
538 #ifdef MPI
539 get_mighistdata (EARTH, options);
540 #endif
541 if(options->skyline && options->bayes_infer)
542 {
543 print_expected_values(EARTH, options);
544 }
545 if(options->mighist || options->skyline)
546 {
547 print_event_values(EARTH);
548 #ifdef PRETTY
549 pdf_histogram_legend();
550 #endif
551 }
552 #ifdef UEP
553 // print UEP probabilities
554 if (options->uep)
555 analyze_uep (EARTH);
556 #endif
557 // profile tables unset_penalizer_function (TRUE); //now we calculate profile and shut down
558 // the penalizing of far jumps in the maximizer function
559 } // end of printing main tables and histograms
560
561 #ifdef SLOWNET
562 which_calc_like (PROFILE);
563 if (myID == MASTER)
564 {
565 // release the workers from the mpi_maximize_worker() function.
566 // the workers will stop working on locus-likelihoods and advance to profiles
567 mpi_send_stop (EARTH);
568 }
569 if (!options->bayes_infer)
570 {
571 profile_tables (options, EARTH, &Gmax);
572 }
573 #else /* SLOWNET*/
574 if (myID == MASTER)
575 {
576 #ifdef INTEGRATEDLIKE
577 if(options->integrated_like)
578 {
579 maxreplicate = (options->replicate
580 && options->replicatenum >
581 0) ? options->replicatenum : 1;
582 print_theta0(stdout, EARTH, maxreplicate);
583
584 if (EARTH->options->lratio->counter > 0)
585 print_lratio_test (EARTH, &Gmax);
586 if (options->aic)
587 akaike_information (EARTH, &Gmax);
588
589 profile_tables (options, EARTH, &Gmax);
590 }
591 #else
592 if (!options->bayes_infer)
593 {
594 profile_tables (options, EARTH, &Gmax);
595 }
596 #endif /*INTEGRATEDLIKE*/
597 }
598 #endif /* SLOWNET */
599
600 #ifdef MPI
601 if (myID == MASTER)
602 {
603 #endif
604 if(EARTH->options->bayes_infer)
605 {
606 // print marginal likelihoods
607 #ifdef MPI
608 // fix_bayesfactor(EARTH,options);
609 #endif
610 print_bayesfactor(EARTH->outfile, universe,options);
611 // printing of MCMC run characteristics
612 fprintf(EARTH->outfile,"MCMC run characteristics\n");
613 fprintf(EARTH->outfile,"========================\n\n");
614 bayes_print_accept(EARTH->outfile,EARTH);
615 #ifdef PRETTY
616 pdf_bayes_print_accept(EARTH);
617 #endif
618 fprintf(EARTH->outfile,"Autocorrelation and Effective sample size\n");
619 fprintf(EARTH->outfile,"-------------------------------------------------------------------\n\n");
620 print_bayes_ess(EARTH->outfile,EARTH,EARTH->numpop2 + EARTH->bayes->mu * EARTH->loci + 1,2,EARTH->auto_archive, EARTH->ess_archive);
621 #ifdef PRETTY
622 pdf_bayes_print_ess(EARTH);
623 #endif
624 }
625
626 if(strchr("aet",EARTH->options->burnin_autostop))
627 {
628 print_burnin_stop(EARTH->outfile, universe, options);
629 }
630
631 if(EARTH->options->progress)
632 {
633 if(EARTH->options->bayes_infer)
634 // if(EARTH->options->bayes_infer && options->datatype!='g')
635 {
636 fprintf(stdout,"\nMCMC run characteristics: Autocorrelation and Effective sample size\n");
637 fprintf(stdout,"-------------------------------------------------------------------\n\n");
638 print_bayes_ess(stdout,EARTH,EARTH->numpop2+ EARTH->bayes->mu * EARTH->loci + 1,2,
639 EARTH->auto_archive, EARTH->ess_archive);
640
641 print_bayesfactor(stdout, universe,options);
642
643 }
644 }
645 if(options->treeprint)
646 {
647 if(EARTH->options->treeinmemory)
648 {
649 for(locus=0;locus<EARTH->loci; locus++)
650 {
651 fprintf(EARTH->treefile,"%s", EARTH->treespace[locus]);
652 }
653 }
654 #ifdef NEXUSTREE
655 FPRINTF(EARTH->treefile,"\nend;\n");
656 #endif
657 }
658
659 // if adaptive heating print a table with the average temperatures
660 //
661 if(options->heating && options->adaptiveheat!=NOTADAPTIVE)
662 {
663 fprintf(EARTH->outfile,"\n\n\nAverage temperatures during the run using %s\n",(options->adaptiveheat!=NOTADAPTIVE)
664 ==STANDARD ? "standard adaptive heating scheme" : "bounded adaptive heating scheme" );
665 fprintf(EARTH->outfile,"===========================================================================\n\n");
666 fprintf(EARTH->outfile,"Chain Temperature\n");
667 // locus means indicator for chain
668 for(locus = 0 ; locus < options->heated_chains; locus++)
669 {
670 fprintf(EARTH->outfile,"%5li %10.5f\n",locus+1,universe[locus]->averageheat);
671 }
672 fprintf(EARTH->outfile,"Adaptive heating often fails, if the average temperatures are very close together\n");
673 fprintf(EARTH->outfile,"try to rerun using static heating! If you want to compare models using marginal\n");
674 fprintf(EARTH->outfile,"likelihoods then you MUST use static heating\n");
675 pdf_print_averageheat(universe,options);
676 }
677 //
678 // print warnings into the PDF and the outfile
679 //
680 if(EARTH->options->bayes_infer)
681 {
682 print_stored_warnings(EARTH);
683 pdf_print_stored_warnings(EARTH);
684 }
685 //
686 //
687 //
688 pdf_print_end_time(&page_height);
689 // write out to PDF file
690 pdf_write_file(options);
691 print_finish (EARTH, outfilepos);
692 // closing all files
693 #ifdef MAC
694 finish_mac (options, data);
695 #endif
696
697 exit_files (EARTH, data, options);
698 #ifdef MPI
699 # ifndef SLOWNET
700 mpi_send_stop (EARTH); //stop workers
701 // printf("%i> at barrier in slownet-main before finalize\n", myID);
702 # endif
703 }
704 MYMPIBARRIER (comm_world);
705 MPI_Finalize ();
706 #endif /*MPI*/
707 // myfree(seed);
708 //myfree(newseed);
709 //myfree(generator);
710 //free_universe(universe, usize, options);
711 //destroy_data(data);
712 //destroy_options(options);
713 // for debugging with MallocDebug on macosx
714 #ifdef MACMALLOCDEBUG
715 while(TRUE)
716 {
717 sleep(1);
718 } //end for debugging
719 #endif
720 return 0;
721 } /* main end */
722
723 ///
724 /// Calculates the maximum likelihood estimates from the trees that were gathered for each locus
725 /// \callgraph
726 void
combine_loci_standard(char type,option_fmt * options,world_fmt * world,long * Gmax)727 combine_loci_standard (char type, option_fmt * options, world_fmt * world,
728 long *Gmax)
729 {
730 long kind = MULTILOCUS;
731 if (options->readsum)
732 {
733 if (world->loci == 1)
734 kind = SINGLELOCUS;
735 }
736 #ifndef INTEGRATEDLIKE
737 if ((world->loci - world->skipped > 1) || (options->readsum))
738 #else
739 if ((world->loci - world->skipped > 1) || (options->readsum))
740 #endif
741 {
742 if (options->gamma)
743 world->param0[world->numpop2] = world->options->alphavalue;
744 world->repkind = set_repkind (options);
745 decide_plot (world->options, world->options->lchains,
746 world->options->lchains, 'l');
747 #ifdef LONGSUM
748 change_longsum_times (world); //multilocus
749 #endif /*LONGSUM*/
750 #ifdef INTEGRATEDLIKE
751 (void) estimateParameter (options->replicatenum, *Gmax, world, options,\
752 world->cov[world->loci], 0 /* chain */ ,\
753 type, kind, world->repkind);
754 #else
755 if(!options->bayes_infer)
756 {
757 (void) estimateParameter (options->replicatenum, *Gmax, world, options,
758 world->cov[world->loci], 0L /* chain */ ,
759 type, kind, world->repkind);
760 }
761 #endif
762 }
763 }
764
765 ///
766 /// Calculates the maximum likelihood estimates from the trees that were gathered for each locus
767 /// \callgraph
768 void
combine_loci_mpi(char type,option_fmt * options,world_fmt * world,long * Gmax)769 combine_loci_mpi (char type, option_fmt * options, world_fmt * world,
770 long *Gmax)
771 {
772 #ifdef MPI
773 long kind = MULTILOCUS;
774 if (options->readsum)
775 {
776 if (world->loci == 1)
777 kind = SINGLELOCUS;
778 }
779 if ((world->loci - world->skipped > 0) || (options->readsum))
780 {
781 if (options->gamma)
782 world->param0[world->numpop2] = world->options->alphavalue;
783 world->repkind = set_repkind (options);
784 if (myID == MASTER)
785 {
786 #ifdef INTEGRATEDLIKE
787 mpi_gmax_master (world, Gmax);
788 mpi_startparam_master (world);
789 decide_plot (world->options, world->options->lchains,
790 world->options->lchains, 'l');
791 #else
792 if(!options->bayes_infer)
793 {
794 mpi_gmax_master (world, Gmax);
795 mpi_startparam_master (world);
796 decide_plot (world->options, world->options->lchains,
797 world->options->lchains, 'l');
798 }
799 #endif
800 #ifdef LONGSUM
801 change_longsum_times (world); //multilocus
802 #endif /*LONGSUM*/
803 // broadcast Gmax to all workers, the worker pendant is in mpi_gmax_worker
804 #ifndef INTEGRATEDLIKE
805 if (!options->bayes_infer)
806 {
807 MYMPIBCAST (Gmax, 1, MPI_LONG, MASTER, comm_world);
808 (void) estimateParameter (options->replicatenum, *Gmax, world,
809 options, world->cov[world->loci],
810 0 /* chain */ ,
811 type, kind, world->repkind);
812 }
813 #else
814 MYMPIBCAST (Gmax, 1, MPI_LONG, MASTER, comm_world);
815 (void) estimateParameter (options->replicatenum, *Gmax, world,
816 options, world->cov[world->loci],
817 0 /* chain */ ,
818 type, kind, world->repkind);
819 #endif
820 }
821 else
822 {
823 #ifndef INTEGRATEDLIKE
824 // printf("%i> before gmax worker\n", myID);
825 if(!options->bayes_infer)
826 {
827 mpi_gmax_worker (world);
828 mpi_startparam_worker (world);
829 }
830 // printf("%i> start worker postlike calculations\n", myID);
831 mpi_maximize_worker (world, options, MULTILOCUS, options->replicatenum); // receive broadcast for Gmax
832 #else
833 mpi_gmax_worker (world);
834 mpi_startparam_worker (world);
835 mpi_maximize_worker (world, options, MULTILOCUS, options->replicatenum); // receive broadcast for Gmax
836 #endif
837 }
838 }
839 // else
840 // {
841 // if (myID != MASTER)
842 // {
843 // maxreplicate = (options->replicate
844 // && options->replicatenum >
845 // 0) ? options->replicatenum : 1;
846 // fprintf(stdout,"about to pack result buffer and send too");
847 // mpi_results_worker(MIGMPI_SUMFILE, world, maxreplicate, pack_result_buffer);
848 // //mpi_maximize_worker(world, options->replicatenum);
849 // }
850 // }
851 #endif
852 }
853
854 ///
855 /// calculate the number populations to analyze and resize the newpop array if necessary
856 /// this function is essential only for cases where the user changes the population number
857 /// using the relabel option in the parmfile
858 /// in almost all cases options->newpops_numalloc and data->numpop are the same size.
859 /// Fix May 2015: relabel > numpop gets adjust to numpop and a warning is issued, because the
860 /// datafile and the parmfile are mismatched; relabel can be smaller than numpop
861 /// but not larger
calculate_newpop_numpop(option_fmt * options,data_fmt * data)862 long calculate_newpop_numpop(option_fmt *options, data_fmt *data)
863 {
864 long pop;
865 long i;
866 long newnumpop=0;
867 long numpop = data->numpop;
868 #ifdef WIN32
869 // (old?) windows compilers have difficult to assign memory based on variables
870 long temp[5000];//this is __way__ more than migrate ever will be able to handle
871 #else
872 long temp[data->numpop < options->newpops_numalloc ? options->newpops_numalloc : data->numpop];
873 #endif
874 //use order of populations {1,2,3,4,5,....} , do not start with zero!!
875 //reset to options->newpops={1,1,2,1,2,3,4,...}
876 if (options->newpops_numalloc != numpop)
877 {
878 // newpops_numalloc > numpop ===> warn and resize
879 // newpops_numalloc < numpop ===> resize and fill
880 if(options->newpops_numalloc > numpop)
881 {
882 warning("Population number in datafile is smaller than in the relabel option!");
883 warning("Relabel option will be adjust to number of populations in datafile");
884 options->newpops_numalloc = numpop;
885 }
886 options->newpops = (long *) myrealloc(options->newpops,sizeof(long)*numpop);
887 for(i=options->newpops_numalloc;i<numpop;i++)
888 {
889 options->newpops[i] = i+1;
890 }
891 options->newpops_numalloc = numpop;
892 }
893 memcpy(temp,options->newpops,sizeof(long)*numpop);
894 qsort((void *) temp, (size_t) numpop, sizeof(long), longcmp);
895 pop = temp[0];
896 newnumpop = 1;
897 for(i=1;i<numpop;i++)
898 {
899 if(temp[i] != pop)
900 {
901 newnumpop++;
902 pop = temp[i];
903 }
904 }
905 options->newpops_numpop = newnumpop;
906 return newnumpop;
907 }
908
909
910 ///
911 /// the slice sampling stick size is allocated here for the options, this is
912 /// done in three different places for: standard, genealogy, MPI
alloc_sticksize(option_fmt * options,data_fmt * data)913 void alloc_sticksize(option_fmt *options, data_fmt *data)
914 {
915 if( options->slice_sticksizes==NULL)
916 {
917 options->slice_sticksizes = (MYREAL *) mycalloc(data->numpop * data->numpop + 1, sizeof(MYREAL));
918 }
919 else
920 {
921 options->slice_sticksizes = (MYREAL *) myrealloc(options->slice_sticksizes, (data->numpop * data->numpop + 1)* sizeof(MYREAL));
922 }
923 }
924
set_skiploci(option_fmt * options,data_fmt * data,world_fmt ** universe)925 void set_skiploci(option_fmt *options, data_fmt *data, world_fmt **universe)
926 {
927 long locus;
928 long i;
929 for(locus=0;locus<data->loci;locus++)
930 {
931 for (i = 0; i < options->heated_chains; i++)
932 universe[i]->data->skiploci[locus] = data->skiploci[locus];
933 }
934 }
935
936 ///
937 /// runs the MCMC sampling
938 void
run_sampler(option_fmt * options,data_fmt * data,world_fmt ** universe,int usize,long * outfilepos,long * Gmax)939 run_sampler (option_fmt * options, data_fmt * data, world_fmt ** universe,
940 int usize, long *outfilepos, long *Gmax)
941 {
942 MYREAL var;
943 long i;
944 long maxreplicate;
945 long treefilepos;
946 #ifdef MPI
947 MPI_Request *irequests; // contains requests generated in MPI_Isend
948 MPI_Status *istatus; // conatins stata from MPI_Wait_any
949 long minnodes;
950 long *twolongs; // message sent to workers contains locus and replicate number
951
952 if (myID < data->loci + 1)
953 color = 1;
954 else
955 color = MPI_UNDEFINED;
956
957 twolongs = (long *) mycalloc (TWO, sizeof (long));
958 #endif
959 getseed(options);
960 #ifdef MPI
961 if (myID == MASTER)
962 {
963 #endif
964 #ifdef DEBUG
965 char nowstr[LINESIZE];
966 get_time (nowstr, "%c");
967 printf("\n\nMaster start reading data at %s\n",nowstr);
968 #endif
969 get_data (data->infile, data, options, EARTH);
970 #ifdef MPI
971 #ifdef DEBUG
972 get_time (nowstr, "%c");
973 printf("Master finished reading data at %s\n",nowstr);
974 #endif
975 if (numcpu == 1)
976 {
977 error
978 ("This program was compiled to use a parallel computer\n and you tried to run it on only a single node.\nThis will not work because it uses a \n\"single_master-many_worker\" architecture \nand needs at least TWO nodes\n");
979 }
980 broadcast_data_master (data, options);
981 } /*end of myID==MASTER loop for MPI*/
982 else
983 {
984 broadcast_data_worker (data, options);
985 }
986 #endif
987 set_plot (options);
988 EARTH->cold = TRUE; // this is the cold chain when we use heating, the hotter chains have FALSE
989 // sticksizes are filled in init_world but initialized here so make sure that
990 // the heated chains do not produce a memory leak by mutiply initializing the sticksizes
991 alloc_sticksize(options,data);
992 // filling of options->slice_sticksizes
993 // deferred after world->bayes initialization in init_world()
994 calculate_newpop_numpop(options,data);
995
996 init_world (EARTH, data, options);
997 // set_meanmu(EARTH,options);
998 if(EARTH->cold)
999 single_chain_var (EARTH, 0L, &var, NULL, NULL);
1000
1001 #ifdef NEXUSTREE
1002 if (EARTH->options->treeprint == LASTCHAIN)
1003 {
1004 FPRINTF(EARTH->treefile,"#NEXUS\n\nbegin trees;");
1005 }
1006 #endif
1007
1008 #ifdef PTHREADS
1009 tpool_init (&heating_pool, usize, usize, 0);
1010 #endif
1011
1012 if (options->heating)
1013 {
1014 //first in universe is the cold chain[EARTH]
1015 heating_init (universe, usize, data, options);
1016 }
1017 if(options->printfst || options->thetaguess==FST || options->migrguess==FST)
1018 {
1019 calc_simple_param (EARTH, data);
1020 }
1021
1022 /* report to screen */
1023 if (myID == MASTER)
1024 {
1025 print_menu_options (EARTH, options, data);
1026 if (options->progress)
1027 print_data_summary (stdout, EARTH, options, data);
1028 /* print to outfile */
1029 #ifdef PRETTY
1030 pdf_master_init(EARTH, options, data);
1031 #endif
1032 *outfilepos = print_title (EARTH, options);
1033 print_options (EARTH->outfile, EARTH, options, data);
1034 print_data_summary (EARTH->outfile, EARTH, options, data);
1035 print_data (EARTH, options, data);
1036 print_spectra (EARTH, options, data);
1037 if(options->murates_fromdata)
1038 {
1039 if (options->progress)
1040 print_mutationrate_weights(stdout, options->mu_rates, options->segregs,options->wattersons, data->loci);
1041 print_mutationrate_weights(EARTH->outfile, options->mu_rates, options->segregs,options->wattersons, data->loci);
1042 }
1043
1044 if (options->printfst)
1045 print_fst (EARTH, options, data, EARTH->fstparam);
1046
1047 set_skiploci(options, data, universe);
1048 if(options->bayes_infer && myID == MASTER)
1049 {
1050 if(options->has_bayesmdimfile)
1051 print_bayes_mdimfileheader(EARTH->bayesmdimfile,
1052 options->bayesmdiminterval, EARTH, data);
1053 }
1054 }
1055 if(!options->bayes_infer) // only needed for ML
1056 {
1057 if (options->lchains < 2 && options->replicatenum == 0)
1058 {
1059 options->replicate = 0;
1060 }
1061 }
1062 maxreplicate = universe[0]->maxreplicate;
1063 //(options->replicate
1064 // && options->replicatenum > 0) ? options->replicatenum : 1;
1065 #ifdef MPI
1066 minnodes = MAX (0, numcpu - data->loci - 1);
1067 irequests = (MPI_Request *) mycalloc (minnodes + 1, sizeof (MPI_Request));
1068 istatus = (MPI_Status *) mycalloc (minnodes + 1, sizeof (MPI_Status));
1069 if (myID == MASTER)
1070 {
1071 mpi_runloci_master (data->loci, EARTH->who, EARTH, options->readsum, options->menu);
1072 }
1073 else
1074 {
1075 mpi_runloci_worker (universe, usize, options, data,
1076 &heating_pool, maxreplicate, &treefilepos, Gmax);
1077 }
1078 myfree(istatus);
1079 myfree(irequests);
1080 #else /*MPI*/
1081 run_loci (universe, usize, options, data,
1082 &heating_pool, maxreplicate, &treefilepos, Gmax);
1083
1084 #endif /*MPI*/
1085
1086 #ifdef BEAGLE
1087 beagle_stop(universe, usize);
1088 #endif
1089
1090 #ifdef PTHREADS
1091 tpool_destroy (heating_pool, 1);
1092 #endif
1093 #ifdef MPI
1094
1095 if (myID != MASTER)
1096 {
1097 #endif
1098 if (options->heating)
1099 {
1100 for (i = 0; i < options->heated_chains; i++)
1101 {
1102 //free_tree(universe[i]->root, universe[i]);
1103 free_timevector (universe[i]->treetimes);
1104 }
1105 }
1106 else
1107 {
1108 //free_tree(EARTH->root, EARTH);
1109 free_timevector (EARTH->treetimes);
1110 }
1111 #ifdef MPI
1112 }
1113 myfree(twolongs);
1114 #endif
1115 }
1116
1117 /// generate samples of genealogies for all loci
1118 void
run_loci(world_fmt ** universe,int usize,option_fmt * options,data_fmt * data,tpool_t * localheating_pool,long maxreplicate,long * treefilepos,long * Gmax)1119 run_loci (world_fmt ** universe, int usize, option_fmt * options,
1120 data_fmt * data, tpool_t * localheating_pool, long maxreplicate,
1121 long *treefilepos, long *Gmax)
1122 {
1123 long locus;
1124 for (locus = 0; locus < data->loci; locus++)
1125 {
1126 if(!data->skiploci[locus])
1127 {
1128 run_locus (universe, usize, options, data,
1129 localheating_pool, maxreplicate, locus, treefilepos, Gmax);
1130 }
1131 free_datapart(data,options,locus);
1132 }
1133 }
1134
1135
1136 /// save all genealogy summaries
1137 void
get_treedata(world_fmt * world,option_fmt * options)1138 get_treedata (world_fmt * world, option_fmt * options)
1139 {
1140 #ifdef MPI
1141 long maxreplicate = world->maxreplicate;
1142 #endif
1143
1144 if (myID == MASTER)
1145 {
1146 if (options->writesum)
1147 {
1148 #ifdef MPI
1149 //get all sumfile data from the workers using unpack_sumfile_buffer()
1150 mpi_results_master (MIGMPI_SUMFILE, world, maxreplicate,
1151 unpack_sumfile_buffer);
1152 #endif
1153 write_savesum (world);
1154 }
1155 #ifdef SLOWNET
1156 else
1157 {
1158 // printf("%i> about to receive the sumfile data\n", myID);
1159 mpi_results_master (MIGMPI_SUMFILE, world, maxreplicate,
1160 unpack_sumfile_buffer);
1161 // printf("%i> all sumfile data received and unpacked\n", myID);
1162 }
1163 #endif
1164 }
1165 }
1166
1167 ///
1168 /// save all genealogy summaries
1169 void
get_bayeshist(world_fmt * world,option_fmt * options)1170 get_bayeshist (world_fmt * world, option_fmt * options)
1171 {
1172 #ifdef MPI
1173 //long i;
1174 long maxreplicate = world->maxreplicate;
1175 if (myID == MASTER)
1176 {
1177 //for(i=0;i<world->loci;i++)
1178 // printf("%i> before moving stuff %f %f\n",myID, world->hm[i],world->am[i]);
1179 mpi_results_master (MIGMPI_BAYESHIST, world, maxreplicate,
1180 unpack_bayes_buffer);
1181 //for(i=0;i<world->loci;i++)
1182 // printf("%i> after moving stuff %f %f\n",myID, world->hm[i],world->am[i]);
1183 }
1184 #endif
1185 }
1186
recalc_skyline_values(world_fmt * world,option_fmt * options,long maxreplicate)1187 void recalc_skyline_values(world_fmt *world, option_fmt * options, long maxreplicate)
1188 {
1189 long i, j, locus;
1190 mighistloci_fmt *aa;
1191 long * eventnum;
1192 const float invmax = (float) (1./maxreplicate);
1193 if(world->options->mighist && world->options->skyline)
1194 {
1195 for(locus=0; locus < world->loci; locus++)
1196 {
1197 aa = &world->mighistloci[locus];
1198 eventnum = world->mighistloci[locus].eventbinnum;
1199 for(j=0; j< world->numpop2; j++)
1200 {
1201 for (i = 0; i < eventnum[j]; i++)
1202 {
1203 aa->eventbins[j][i][0] *= invmax;
1204 aa->eventbins[j][i][1] *= invmax;
1205 aa->eventbins[j][i][2] *= invmax;
1206 aa->eventbins[j][i][3] *= invmax;
1207 aa->eventbins[j][i][4] *= invmax;
1208 aa->eventbins[j][i][5] *= invmax;
1209 }
1210 }
1211 }
1212 }
1213 }
1214
1215
1216 /// get migration event time data
1217 /// and skyline data when present
1218 void
get_mighistdata(world_fmt * world,option_fmt * options)1219 get_mighistdata (world_fmt * world, option_fmt * options)
1220 {
1221 #ifdef MPI
1222 long maxreplicate = world->maxreplicate;
1223 if (myID == MASTER)
1224 {
1225 if (options->mighist)
1226 {
1227 //get all mighist data from the workers using unpack_mighist_buffer()
1228 mpi_results_master (MIGMPI_MIGHIST, world, maxreplicate,
1229 unpack_mighist_buffer);
1230 if(options->skyline)
1231 {
1232 //get all mighist data from the workers using unpack_mighist_buffer()
1233 fprintf(stdout,"%i> before unpack skyline\n",myID);
1234 mpi_results_master (MIGMPI_SKYLINE, world, maxreplicate,
1235 unpack_skyline_buffer);
1236 }
1237 }
1238 if(/*world->options->treeprint ==BEST &&*/ world->options->treeinmemory==TRUE)
1239 {
1240 mpi_results_master (MIGMPI_TREESPACE, world, maxreplicate,
1241 unpack_treespace_buffer);
1242 }
1243 recalc_skyline_values(world, options, maxreplicate);
1244 }
1245 #endif
1246
1247 }
1248
1249 /// swap the tree pointer between chains with different temperatures
1250 void
heated_swap(world_fmt ** universe,worldoption_fmt * options)1251 heated_swap (world_fmt ** universe, worldoption_fmt * options)
1252 {
1253 long sm = 0, mv = 0, vw = 0;
1254 long r;
1255 //debug for heating traverse issues
1256 if(options->heatedswap_off || (options->heating_count++ % options->heating_interval) != 0)
1257 return;
1258 switch (r = RANDINT (0L, options->heated_chains - 2L))
1259 {
1260 case 2:
1261 sm = chance_swap_tree (SUN, MERKUR);
1262 MERKUR->swapped += sm;
1263 break;
1264 case 1:
1265 mv = chance_swap_tree (MERKUR, VENUS);
1266 VENUS->swapped += mv;
1267 break;
1268 case 0:
1269 vw = chance_swap_tree (VENUS, EARTH);
1270 EARTH->swapped += vw;
1271 break;
1272 default:
1273 universe[r]->swapped += chance_swap_tree (universe[r + 1], universe[r]);
1274 }
1275 }
1276
1277
1278 /// change the type of the chain form short to long and reset the treelist
1279 void
change_chaintype(long locus,char * type,world_fmt * world,long * increment,long * oldsteps,long * chains,option_fmt * options)1280 change_chaintype (long locus, char *type, world_fmt * world, long *increment,
1281 long *oldsteps, long *chains, option_fmt * options)
1282 {
1283 if (*type == 's')
1284 {
1285 *type = 'l';
1286 create_treetimelist (world, &(world->treetimes), locus);
1287 set_bounds (increment, oldsteps, chains, options, *type);
1288 world->increment = *increment;
1289 }
1290 }
1291
1292 /// prepare the next chain
1293 void
prepare_next_chain(world_fmt ** universe,worldoption_fmt * options,char type,long chain,long * chains,long * pluschain,long locus,long replicate)1294 prepare_next_chain (world_fmt ** universe, worldoption_fmt * options,
1295 char type, long chain, long *chains, long *pluschain,
1296 long locus, long replicate)
1297 {
1298 long i;
1299 EARTH->likelihood[0] = EARTH->likelihood[EARTH->G];
1300 if (options->heating)
1301 {
1302 for (i = 1; i < options->heated_chains; i++)
1303 universe[i]->likelihood[0] = universe[i]->likelihood[universe[i]->G];
1304 }
1305 EARTH->treetimes[0].copies = 0;
1306 if (type == 'l')
1307 {
1308 if (options->replicate && options->replicatenum > 0)
1309 EARTH->chainlikes[locus][replicate] = EARTH->param_like;
1310 else
1311 EARTH->chainlikes[locus][chain] = EARTH->param_like;
1312 }
1313 EARTH->start = FALSE;
1314 if (type == 'l')
1315 {
1316 if (chain < *chains + *pluschain)
1317 {
1318 if (((EARTH->param_like > options->lcepsilon)
1319 //|| (options->gelman && EARTH->convergence->gelmanmaxRall > GELMAN_MYSTIC_VALUE)
1320 )
1321 && !(options->replicate && options->replicatenum == 0))
1322 {
1323 (*chains)++;
1324 (*pluschain)--;
1325 }
1326 }
1327 }
1328 }
1329
1330 ///
1331 /// resetting of accept and accept_freq for heated chains
reset_heated_accept(world_fmt ** universe,long unum)1332 void reset_heated_accept(world_fmt **universe, long unum)
1333 {
1334 long i;
1335 for(i=0; i < unum; i++)
1336 {
1337 universe[i]->swapped = 0;
1338 universe[i]->accept = 0L;
1339 universe[i]->accept_freq = 0.;
1340 }
1341 }
1342
1343
1344 /// print progress about heated chains
1345 void
print_heating_progress(world_fmt ** universe,worldoption_fmt * options,long stepinc)1346 print_heating_progress (world_fmt ** universe, worldoption_fmt * options,
1347 long stepinc)
1348 {
1349 MYREAL fstepinc = (MYREAL) stepinc;
1350 if (options->heating)
1351 {
1352 #ifdef DEBUG_MPI
1353 printf("%i> stepinc=%li =========\n",myID, stepinc);
1354 #endif
1355 universe[0]->accept_freq /= fstepinc;
1356 universe[1]->accept_freq /= fstepinc;
1357 universe[2]->accept_freq /= fstepinc;
1358 universe[3]->accept_freq /= fstepinc;
1359 if (options->progress)
1360 print_heating_progress2 (stdout, options, universe);
1361 if (options->writelog)
1362 print_heating_progress2 (options->logfile, options, universe);
1363 }
1364 }
1365
1366 ///
1367 /// sub-function of print_heating, in MPI mode, the string printing will minimize data transfer.
1368 void
print_heating_progress2(FILE * file,worldoption_fmt * options,world_fmt ** universe)1369 print_heating_progress2 (FILE * file, worldoption_fmt * options,
1370 world_fmt ** universe)
1371 {
1372 char *plog;
1373 long plogsize = 0L;
1374 char nowstr[STRSIZE];
1375 char dots[STRSIZE];
1376 get_time (nowstr, "%H:%M:%S");
1377 plog = (char *) mycalloc(LONGLINESIZE,sizeof(char));
1378 #ifdef MPI
1379 plogsize = sprintf(plog, "[%3i] %s %li chains: ", myID, nowstr, options->heated_chains);
1380 #else
1381 plogsize = sprintf(plog, "%s %li chains: ", nowstr, options->heated_chains);
1382 #endif
1383 if (options->heated_chains > 4)
1384 strcpy(dots,",...");
1385 else
1386 dots[0]='\0';
1387 plogsize += sprintf (plog + plogsize, "Temp(%.4g,%.4g,%.4g,%.4g%s) ",universe[0]->averageheat,
1388 universe[1]->averageheat,universe[2]->averageheat,universe[3]->averageheat,dots);
1389 plogsize += sprintf (plog + plogsize, "Acc(%.2f,%.2f,%.2f,%.2f%s) ",universe[0]->accept_freq,
1390 universe[1]->accept_freq,universe[2]->accept_freq,universe[3]->accept_freq,dots);
1391 plogsize += sprintf (plog + plogsize, "Swap(%li,%li,%li,%li%s)\n",universe[0]->swapped,
1392 universe[1]->swapped,universe[2]->swapped,universe[3]->swapped,dots);
1393 FPRINTF(file,"%s",plog);
1394 myfree(plog);
1395 }
1396
1397
1398 /// analyze old sumfile data
1399 long
analyze_olddata(world_fmt * world,option_fmt * options,data_fmt * data,long * outfilepos)1400 analyze_olddata (world_fmt * world,
1401 option_fmt * options, data_fmt * data, long *outfilepos)
1402 {
1403 long locus;
1404 #ifdef MPI
1405 long listofthings[6];
1406 #endif
1407 long Gmax = 0;
1408 long pop;
1409 options->heating = FALSE;
1410 options->gelman = FALSE;
1411 if (myID == MASTER)
1412 {
1413 unset_penalizer_function (TRUE);
1414 Gmax = read_savesum (world, options, data);
1415
1416 data->popnames = (char **) mymalloc (sizeof (char *) * world->numpop);
1417 data->locusweight =
1418 (MYREAL *) mycalloc (1, sizeof (MYREAL) * (data->loci + 1));
1419 for (locus=0;locus < data->loci; locus++)
1420 {
1421 data->locusweight[locus]=1.0;
1422 }
1423 for (pop = 0; pop < world->numpop; pop++)
1424 {
1425 data->popnames[pop] =
1426 (char *) mycalloc (1, sizeof (char) * LINESIZE);
1427 MYSNPRINTF (data->popnames[pop], LINESIZE, "Pop_%li ", pop + 1);
1428 }
1429
1430 #ifdef MPI
1431 #ifdef WINDOWS
1432 MYMPIBCAST (data->geo, world->numpop2, MPI_DOUBLE, MASTER, comm_world );
1433 MYMPIBCAST (data->lgeo, world->numpop2, MPI_DOUBLE, MASTER, comm_world );
1434 #else
1435 MYMPIBCAST (data->geo, world->numpop2, mpisizeof, MASTER, comm_world );
1436 MYMPIBCAST (data->lgeo, world->numpop2, mpisizeof, MASTER, comm_world );
1437 #endif
1438 }
1439 else
1440 {
1441 MYMPIBCAST (listofthings, 6, MPI_LONG, MASTER, comm_world );
1442 // printf("listofthings received\n");
1443 // fflush(stdout);
1444 world->loci = listofthings[0];
1445 world->numpop = listofthings[1];
1446 world->numpop2 = listofthings[2];
1447 options->replicate = (boolean) listofthings[3];
1448 options->replicatenum = listofthings[4];
1449 Gmax = listofthings[5];
1450 data->numpop = world->numpop;
1451 options->newpops_numalloc = world->numpop;
1452 options->newpops = (long*) mycalloc(options->newpops_numalloc, sizeof(long));
1453 calculate_newpop_numpop(options,data);
1454 data->geo = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop2);
1455 data->lgeo = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop2);
1456 data->locusweight =
1457 (MYREAL *) mycalloc (1, sizeof (MYREAL) * (data->loci + 1));
1458 for (locus=0;locus < data->loci; locus++)
1459 {
1460 data->locusweight[locus]=1.0;
1461 }
1462 #ifdef WINDOWS
1463 MYMPIBCAST (data->geo, world->numpop2, MPI_DOUBLE, MASTER, comm_world);
1464 MYMPIBCAST (data->lgeo, world->numpop2, MPI_DOUBLE, MASTER, comm_world);
1465 #else
1466 MYMPIBCAST (data->geo, world->numpop2, mpisizeof, MASTER, comm_world);
1467 MYMPIBCAST (data->lgeo, world->numpop2, mpisizeof, MASTER, comm_world);
1468 #endif
1469 options->muloci = data->loci = world->loci;
1470 init_world (world, data, options);
1471 #endif
1472
1473 }
1474 #ifdef MPI
1475 #ifdef SLOWNET
1476 mpi_broadcast_results (world, world->loci,
1477 pack_sumfile_buffer, unpack_sumfile_buffer);
1478 #endif
1479 #endif
1480
1481 synchronize_param (world, options);
1482 if (options->plot)
1483 {
1484 options->plotnow = TRUE;
1485 world->options->plotnow = TRUE;
1486 }
1487 world->locus = 0;
1488 if (myID == MASTER)
1489 {
1490 *outfilepos = print_title (world, options);
1491 print_options (stdout, world, options, data);
1492 print_options (world->outfile, world, options, data);
1493 #ifdef PRETTY
1494 pdf_master_init(world, options, data);
1495 #endif
1496
1497 #ifdef MPI
1498 MYMPIBARRIER(comm_world);
1499 printf("%i> before mpi_runlocimaster\n",myID);fflush(stdout);
1500 mpi_runloci_master (world->loci, world->who, world, options->readsum, options->menu);
1501
1502 #else
1503
1504 for (locus = 0; locus < world->loci; locus++)
1505 {
1506 if (options->replicate && options->replicatenum > 0)
1507 {
1508 world->locus = locus;
1509 world->repkind = MULTIPLERUN;
1510 #ifdef LONGSUM
1511 change_longsum_times (world); //multi run replicates
1512 #endif /*LONGSUM*/
1513 if (!options->bayes_infer)
1514 {
1515 (void) estimateParameter (options->replicatenum, Gmax, world,
1516 options, world->cov[locus], 1L, 'l',
1517 SINGLELOCUS, world->repkind);
1518 }
1519 }
1520 }
1521 #endif
1522
1523 }
1524 #ifdef MPI
1525 else
1526 {
1527 MYMPIBARRIER(comm_world);
1528 printf("%i> before assignloci_worker\n",myID);fflush(stdout);
1529 assignloci_worker (world, options, &Gmax);
1530 // some workers might still wait in assignloci_worker(), will be stopped through the master
1531 }
1532 #endif
1533 return Gmax;
1534 } /*analyze_olddata()*/
1535
1536
analyze_oldbayesdata(world_fmt * world,option_fmt * options,data_fmt * data,long * outfilepos)1537 boolean analyze_oldbayesdata(world_fmt *world, option_fmt *options, data_fmt *data, long *outfilepos)
1538 {
1539 #ifndef MPI
1540 long pop;
1541 #endif
1542 long locus;
1543 char **files = NULL;
1544 long numfiles=1;
1545 charvec2d(&files,numfiles,LINESIZE);
1546 // insert code for number of files and allocation and naming
1547 strcpy(files[0],options->bayesmdimfilename);
1548 // read data from bayesfile
1549 world->cold=TRUE;
1550 // reads some minimal information form the header of the bayesallfile
1551 // this only works with files written with migrate 2.5+
1552 #ifdef MPI
1553 if(myID==MASTER)
1554 {
1555 warning("does not work correctly yet");
1556 read_from_bayesmdim_minimal_info(world->bayesmdimfile, world, options, data);
1557 read_geofile (data, options, world->numpop);
1558 alloc_sticksize(options,data);
1559 options->newpops_numalloc = world->numpop;
1560 options->newpops = (long*) mycalloc(options->newpops_numalloc, sizeof(long));
1561 calculate_newpop_numpop(options,data);
1562 data->locusweight =
1563 (MYREAL *) mycalloc (1, sizeof (MYREAL) * (data->loci + 1));
1564 for (locus=0;locus < data->loci; locus++)
1565 {
1566 data->locusweight[locus]=1.0;
1567 }
1568 /*options->newpops_numalloc = world->numpop;
1569 options->newpops = (long*) mycalloc(options->newpops_numalloc, sizeof(long));
1570 for(pop=0;pop<world->numpop;pop++)
1571 options->newpops[pop]=pop+1;*/
1572 init_world (world, data, options);
1573 read_bayes_fromfile(world->bayesmdimfile, world, options,files, numfiles);
1574 }
1575 #else
1576 read_from_bayesmdim_minimal_info(world->bayesmdimfile, world, options, data);
1577 read_geofile (data, options, world->numpop);
1578 alloc_sticksize(options,data);
1579 options->newpops_numalloc = world->numpop;
1580 options->newpops = (long*) mycalloc(options->newpops_numalloc, sizeof(long));
1581 for(pop=0;pop<world->numpop;pop++)
1582 options->newpops[pop]=pop+1;
1583 data->locusweight =
1584 (MYREAL *) mycalloc (1, sizeof (MYREAL) * (data->loci + 1));
1585 for (locus=0;locus < data->loci; locus++)
1586 {
1587 data->locusweight[locus]=1.0;
1588 }
1589 init_world (world, data, options);
1590 read_bayes_fromfile(world->bayesmdimfile, world, options, files, numfiles);
1591 #endif
1592 //set_meanmu(world,options);
1593
1594 #ifdef PRETTY
1595 pdf_master_init(world, options, data);
1596 #endif
1597 for(locus=0;locus<world->loci;locus++)
1598 {
1599 if(world->data->skiploci[locus])
1600 continue;
1601 if(world->bayes->histogram[locus].results == NULL)
1602 {
1603 world->data->skiploci[locus] = TRUE;
1604 continue;
1605 }
1606 calc_hpd_credibility(world->bayes, locus, world->numpop2, world->numpop2 + world->bayes->mu);
1607 }
1608 myfree(files);
1609 return TRUE;
1610 }
1611
1612 /// generate profile likelihood tables
1613 void
profile_tables(option_fmt * options,world_fmt * world,long * gmaxptr)1614 profile_tables (option_fmt * options, world_fmt * world, long *gmaxptr)
1615 {
1616 long len;
1617 #ifndef SLOWNET
1618 #ifdef PRETTY
1619 long location = 0;
1620 #endif
1621 long i;
1622 #endif
1623
1624 if (options->profile != _NONE)
1625 {
1626 // world->buffer =
1627 // (char *) myrealloc (world->buffer, allocbufsize * sizeof (char));
1628 if (options->printprofsummary)
1629 allocate_profile_percentiles (world);
1630 #ifdef HAVE_STRFTIME
1631 time (&world->starttime);
1632 #endif
1633 len = world->numpop2 + (world->options->gamma ? 1 : 0);
1634 #ifdef LONGSUM
1635 len += world->numpop * 3;
1636 #endif /*LONGSUM*/
1637 #ifdef SLOWNET
1638 if (!options->readsum)
1639 {
1640 mpi_broadcast_results (world, world->loci,
1641 pack_sumfile_buffer, unpack_sumfile_buffer);
1642 }
1643 mpi_broadcast_results (world,
1644 world->loci + ((world->loci == 1) ? 0 : 1),
1645 pack_result_buffer, unpack_result_buffer);
1646 //fprintf(stdout,"%i >>>>>>>>> wait for all at barrier in profiles_tables [main.c:1053]",myID);
1647 MYMPIBARRIER (comm_world);
1648 if (myID == MASTER)
1649 {
1650 print_profile_title (world);
1651 #ifdef PRETTY
1652 pdf_print_profile_title(world);
1653 #endif
1654 mpi_profiles_master (world, len, world->profilewho);
1655 }
1656 else
1657 mpi_profiles_worker (world, gmaxptr);
1658 #else /*SLOWNET -not*/
1659 if(myID==MASTER && (options->profile == TABLES || options->profile == ALL))
1660 {
1661 print_profile_title (world);
1662 #ifdef PRETTY
1663 pdf_print_profile_title(world);
1664 #endif
1665 }
1666 for (i = 0; i < len; i++)
1667 {
1668 boolean success = print_profile_likelihood_driver (i, world, gmaxptr);
1669 if(options->profile == TABLES || options->profile == ALL)
1670 {
1671 LARGEFPRINTF (world->outfile, world->allocbufsize+2, "%s\n\n", world->buffer);
1672 //ascii_print_profile_table(options->profilemethod, world);
1673 #ifdef PRETTY
1674 location = strlen(world->buffer);
1675 if(success)
1676 pdf_print_profile_table(55.0F, &page_height, options->profilemethod, world->buffer + location + 1, world);
1677 #endif
1678 }
1679 world->buffer[0] = '\0';
1680 }
1681 #endif /*SLOWNET end of not*/
1682 if (myID == MASTER && options->printprofsummary)
1683 {
1684 world->allocbufsize = print_profile_percentile (world);
1685 // print profile summary
1686 LARGEFPRINTF (world->outfile, world->allocbufsize, "%s\n\n", world->buffer);
1687 #ifdef PRETTY
1688 pdf_print_profile_percentile (world);
1689 #endif
1690 world->buffer[0] = '\0';
1691 destroy_profile_percentiles (world);
1692 }
1693 }
1694 //myfree(world->buffer);
1695 }
1696
1697 /// finish up mac files \todo remove this function
1698 void
finish_mac(option_fmt * options,data_fmt * data)1699 finish_mac (option_fmt * options, data_fmt * data)
1700 {
1701 #ifdef MAC
1702 #if 0
1703 fixmacfile (options->outfilename);
1704 if (options->plotmethod == PLOTALL)
1705 fixmacfile (options->mathfilename);
1706 if (options->treeprint)
1707 fixmacfile (options->treefilename);
1708 if (options->parmfile)
1709 fixmacfile ("parmfile");
1710 if (data->sumfile)
1711 fixmacfile (options->sumfilename);
1712 #endif
1713 #endif
1714 }
1715
1716 /// \brief setup a locus
1717 ///
1718 /// Set up a the structures for a locus. Run-Parameters are copied into the
1719 /// major structure WORLD from options and data. a first genealogy is build
1720 /// adjusted to the starting parameters (changing times of nodes) and a first
1721 /// conditional likelihood is calculated for all nodes, also the tree-parallele
1722 /// time structure is generated.
1723 /// Several run controllers are checked: replicate, datatype
1724 /// \retvalue 0 { failed to setup structure for locus because there is no data }
1725 /// \retvalue 1 { succeeded to setup locus-structure }
1726 int
setup_locus(long locus,world_fmt * world,option_fmt * options,data_fmt * data)1727 setup_locus (long locus, world_fmt * world, option_fmt * options,
1728 data_fmt * data)
1729 {
1730 world->locus = locus;
1731 set_param (world, data, options, locus);
1732 if (world->options->progress)
1733 print_menu_locus (stdout, world, locus);
1734 if (world->options->writelog)
1735 print_menu_locus (world->options->logfile, world, locus);
1736 world->start = TRUE;
1737 buildtree (world, world, options, data, locus);
1738 #ifdef BEAGLE
1739 finish_mutationmodel(world, data, options);
1740 init_beagle(world,locus);
1741 set_beagle_instances(world,TRUE); // sets two instances for beagle
1742 // set_beagle_instances(world,FALSE); // so that wee can easily MCMC
1743 fill_beagle_instances(world);
1744 #endif
1745 if (data->skiploci[locus]) /* skip loci with no tips */
1746 return 0;
1747
1748 create_treetimelist (world, &world->treetimes, locus);
1749 fix_times (world, options);
1750 #ifdef DEBUG
1751 // printf(" fixed times \n");
1752 #endif
1753 first_smooth (world, locus);
1754 world->likelihood[0] = treelikelihood (world);
1755 world->allikemax = world->likelihood[0]; //-MYREAL_MAX; /* for best tree option */
1756 world->treelen = 0.0;
1757 calc_treelength (world->root->next->back, &world->treelen);
1758
1759 #ifdef UEP
1760 if (options->uep)
1761 {
1762 world->treelen = 0.0;
1763 if (options->ueprate > 0.0)
1764 calc_treelength (world->root->next->back, &world->treelen);
1765 //world->ueplikelihood = ueplikelihood(world);
1766 //world->likelihood[0] += world->ueplikelihood;
1767 }
1768 #endif
1769
1770 world->has_proposal_details=FALSE;
1771 return 1;
1772 }
1773
1774 /// condense a single genealogy into a sufficient statistic for the maximization phase
1775 void
condense_time(world_fmt * world,long * step,long * j,MYREAL * accepted,long * G,long * steps,long oldsteps,long rep)1776 condense_time (world_fmt * world, long *step, long *j, MYREAL * accepted,
1777 long *G, long *steps, long oldsteps, long rep)
1778 {
1779 static long n = 0;
1780 #ifdef INTEGRATEDLIKE
1781 long i;
1782 long z;
1783 long nn = world->numpop2 + world->bayes->mu * world->loci;
1784 MYREAL x;
1785 MYREAL delta;
1786 #endif
1787 world->rep = rep;
1788 if(world->options->bayes_infer)
1789 {
1790 if (world->in_last_chain)
1791 {
1792 if (*step == 0)
1793 return;
1794
1795 // syncs treeupdate and paramupdate
1796 if(world->bayesaccept == -1)
1797 {
1798 world->bayes->oldval = probg_treetimes(world);
1799 }
1800 bayes_save (world, *step * world->options->lincr);
1801 store_events(world, world->treetimes, world->numpop, rep);
1802 *accepted += *j;
1803 #ifdef INTEGRATEDLIKE
1804 n += 1;
1805 // fprintf(stdout,"\naverage MP-values: %li ", n);
1806 for(i=0; i < nn; i++)
1807 {
1808 if(world->bayes->map[i][1] == INVALID)
1809 continue;
1810 else
1811 {
1812 z = world->bayes->map[i][1];
1813 }
1814 x = world->param0[z];
1815 delta = x - world->atl[rep][world->locus].param0[z];
1816 world->atl[rep][world->locus].param0[z] += delta/n;
1817 // fprintf(stdout,"%f ",world->atl[rep][world->locus].param0[z]);
1818 }
1819 // fprintf(stdout,"\n");
1820 #else
1821 return;
1822 #endif
1823 }
1824 else
1825 {
1826 warning("Bayesian analysis: do not run multiple long chains, run 1 long chain! [nothing will be wrong but it is a waste of time]");
1827 return;
1828 }
1829 }
1830 if (*step == 0)
1831 {
1832 copy_time (world, world->treetimes, FIRSTSTEP, 0L, world->numpop, rep);
1833 *accepted += *j;
1834 //for INTEGRATEDLIKE
1835 n = 0;
1836 }
1837 else
1838 {
1839 copy_time (world, world->treetimes, *G, *G + (long) (*j > 0),
1840 world->numpop, rep);
1841 if (*step < *steps)
1842 {
1843 //handled in advance_world * G += (long) (*j > 0);
1844 *accepted += *j;
1845 }
1846 }
1847 if (*step >= oldsteps - 1 && world->options->movingsteps)
1848 {
1849 if (((MYREAL) (*G + 1)) < world->options->acceptfreq * oldsteps)
1850 {
1851 (*steps)++;
1852 }
1853 }
1854 }
1855
print_theta0(FILE * file,world_fmt * world,long maxreplicate)1856 void print_theta0(FILE *file, world_fmt *world, long maxreplicate)
1857 {
1858 long i;
1859 long z;
1860 long locus;
1861 long rep;
1862 long nn = world->numpop2;
1863 fprintf(file,"Locus Replicate Parameters\n");
1864 fprintf(file,"-----------------------------------------------------------\n");
1865 for(locus=0; locus < world->loci; locus++)
1866 {
1867 for(rep = 0; rep < maxreplicate; rep++)
1868 {
1869 fprintf(file,"%5li %5li ",locus, rep+1);
1870 for(i=0; i < nn; i++)
1871 {
1872 if(world->bayes->map[i][1] == INVALID)
1873 continue;
1874 else
1875 {
1876 z = world->bayes->map[i][1];
1877 }
1878 fprintf(file,"%f ", world->atl[rep][world->locus].param0[z]);
1879 }
1880 fprintf(file,"\n");
1881 }
1882 }
1883 fprintf(file,"\n\n");
1884 }
1885
1886 /// set type of replication scheme
1887 long
set_repkind(option_fmt * options)1888 set_repkind (option_fmt * options)
1889 {
1890 if (options->replicate)
1891 {
1892 if (options->replicatenum == 0)
1893 return MULTIPLECHAIN;
1894 else
1895 return MULTIPLERUN;
1896 }
1897 return SINGLECHAIN;
1898 }
1899
1900 /// intialize heating scheme
1901 void
heating_init(world_fmt ** universe,int usize,data_fmt * data,option_fmt * options)1902 heating_init (world_fmt ** universe, int usize, data_fmt * data,
1903 option_fmt * options)
1904 {
1905 long chain;
1906 char tmp[20];
1907 for (chain = 1; chain < usize; ++chain)
1908 {
1909 MYSNPRINTF (tmp, 20L, "%10.5f", options->heat[chain]);
1910 create_world (&universe[chain], data->loci);
1911 universe[chain]->cold = FALSE;
1912 init_world (universe[chain], data, options);
1913 universe[chain]->heatid = chain;
1914 }
1915 }
1916
1917 /// prepare for heating scheme: Step I
1918 void
heating_prepare(world_fmt ** universe,int usize,option_fmt * options,data_fmt * data,long rep)1919 heating_prepare (world_fmt ** universe, int usize,
1920 option_fmt * options, data_fmt * data, long rep)
1921 {
1922 long chain;
1923 EARTH->heat = options->heat[0];
1924 for (chain = 1; chain < usize; ++chain)
1925 {
1926 buildtree (universe[0], universe[chain], options, data, EARTH->locus);//DEBUG
1927 klone (EARTH, universe[chain], options, data, options->heat[chain]);
1928 }
1929 }
1930
1931 /// prepare for heating scheme: Step I [this function failed in the parallel run, unused for now, replaced by above]
1932 void
heating_prepare_old(world_fmt ** universe,int usize,option_fmt * options,data_fmt * data,long rep)1933 heating_prepare_old (world_fmt ** universe, int usize,
1934 option_fmt * options, data_fmt * data, long rep)
1935 {
1936 long chain;
1937 EARTH->heat = options->heat[0];
1938 if (rep == 0)
1939 {
1940 for (chain = 1; chain < usize; ++chain)
1941 klone (EARTH, universe[chain], options, data, options->heat[chain]);
1942 }
1943 else
1944 {
1945 for (chain = 1; chain < usize; ++chain)
1946 klone_part (EARTH, universe[chain], options, data,
1947 options->heat[chain]);
1948 }
1949 }
1950
1951 /// prepare for heating scheme: Step II
1952 void
heating_prepare2(world_fmt ** universe,int usize)1953 heating_prepare2 (world_fmt ** universe, int usize)
1954 {
1955 long chain;
1956 universe[0]->averageheat = 1.0;
1957 for (chain = 1; chain < usize; ++chain)
1958 {
1959 universe[chain]->G = 0;
1960 universe[chain]->averageheat = universe[chain]->options->adaptiveheat!=NOTADAPTIVE ?
1961 0.0 : 1. / universe[chain]->heat;
1962 }
1963 for (chain = 1; chain < usize; ++chain)
1964 {
1965 clone_polish (EARTH, universe[chain]);
1966 }
1967 }
1968
1969 /// \brief generates replicate number
1970 ///
1971 /// generates replicate number
1972 /// \rtval 0 no replicate
1973 /// \rtval rep number of replicates
1974 long
replicate_number(option_fmt * options,long chain,char type,long replicate)1975 replicate_number (option_fmt * options, long chain, char type, long replicate)
1976 {
1977 long rep = 0;
1978 if (options->replicate && options->replicatenum > 0)
1979 rep = replicate;
1980 else
1981 {
1982 if (type == 'l' && options->replicate)
1983 rep = chain;
1984 else
1985 rep = 0;
1986 }
1987 return rep;
1988 }
1989
1990 /// generate genealogies for a single locus
1991 /// \callgraph
1992 void
run_locus(world_fmt ** universe,int usize,option_fmt * options,data_fmt * data,tpool_t * localheating_pool,long maxreplicate,long locus,long * treefilepos,long * Gmax)1993 run_locus (world_fmt ** universe, int usize, option_fmt * options,
1994 data_fmt * data, tpool_t * localheating_pool, long maxreplicate,
1995 long locus, long *treefilepos, long *Gmax)
1996 {
1997 long i;
1998 long convergence_len = universe[0]->numpop2 + 3 * universe[0]->numpop;
1999 long replicate;
2000 // DEBUG Hapmap stuff
2001 if(EARTH->data->skiploci[locus])
2002 return;
2003 if(options->bayes_infer)
2004 convergence_len = universe[0]->numpop2 + 1;
2005 memset(EARTH->convergence->chain_means,0,maxreplicate * convergence_len * sizeof(MYREAL));
2006 memset(EARTH->convergence->chain_s,0,maxreplicate * convergence_len * sizeof(MYREAL));
2007 memset(EARTH->convergence->gelmanmeanmaxR,0,maxreplicate * maxreplicate * sizeof(MYREAL));
2008 for (replicate = 0; replicate < maxreplicate; replicate++)
2009 {
2010 run_replicate (locus, replicate, universe, options, data,
2011 localheating_pool, usize, treefilepos, Gmax);
2012 if(EARTH->cold && EARTH->options->replicatenum > 0)
2013 {
2014 if(options->gelman)
2015 {
2016 chain_means(&EARTH->convergence->chain_means[replicate * convergence_len], EARTH);
2017 calc_chain_s(EARTH->convergence->chain_s, EARTH->convergence->chain_means, EARTH,
2018 replicate);
2019 if (replicate > 0)
2020 {
2021 convergence_check_bayes(EARTH, maxreplicate);
2022 convergence_progress(stdout, EARTH);
2023 }
2024 }
2025 }
2026 }
2027 #ifdef UEP
2028 if (options->uep)
2029 show_uep_store (EARTH);
2030 #endif
2031 if (options->replicate && options->replicatenum > 0)
2032 {
2033 EARTH->repkind = MULTIPLERUN;
2034 if (options->bayes_infer)
2035 {
2036 if(!options->has_bayesmdimfile)
2037 calculate_credibility_interval (EARTH, locus);
2038 // return;
2039 }
2040 #ifdef LONGSUM
2041 change_longsum_times (EARTH);
2042 #endif /*LONGSUM*/
2043 // over multiple replicates if present or single locus
2044 #ifdef INTEGRATEDLIKE
2045 (void) estimateParameter (options->replicatenum, *Gmax, EARTH,
2046 options, EARTH->cov[locus],
2047 options->lchains, /*type */ 'l',
2048 SINGLELOCUS, EARTH->repkind);
2049 #else
2050 if (!options->bayes_infer)
2051 {
2052 (void) estimateParameter (options->replicatenum, *Gmax, EARTH,
2053 options, EARTH->cov[locus],
2054 options->lchains, /*type */ 'l',
2055 SINGLELOCUS, EARTH->repkind);
2056 }
2057 #endif
2058 }
2059 else
2060 {
2061 if (options->bayes_infer)
2062 {
2063 if(!options->has_bayesmdimfile)
2064 {
2065 //adjust_bayes_bins(EARTH,locus);
2066 calculate_credibility_interval (EARTH, locus);
2067 }
2068 }
2069 }
2070 if (options->heating)
2071 {
2072 for (i = 0; i < options->heated_chains; i++)
2073 {
2074 free_tree (universe[i]->root, universe[i]);
2075 // free_timevector(universe[i]->treetimes);
2076 myfree(universe[i]->nodep);
2077 }
2078 }
2079 else
2080 {
2081 free_tree (EARTH->root, EARTH);
2082 myfree(EARTH->nodep);
2083 // free_timevector(EARTH->treetimes);
2084 }
2085 if(options->bayes_infer)
2086 bayes_reset (universe[0]);
2087 }
2088
2089 /// \brief updates the tree and records acceptance
2090 ///
2091 /// updates the tree and records acceptance
2092 /// when in bayesian mode change between changing the tree and the parameters
2093 void
run_one_update(world_fmt * world)2094 run_one_update (world_fmt * world)
2095 {
2096 long count = 0;
2097 long np = world->numpop2;
2098 //fprintf(stderr,"%i> locus=%li heat=%f\n",myID, world->locus, world->heat);
2099 //traverse_check(crawlback (world->root->next));
2100 if (world->options->bayes_infer)
2101 {
2102 if(world->bayes->mu)
2103 {
2104 np += 1;
2105 }
2106 world->bayesaccept = bayes_update (world); // trials is update for parameter update
2107 if (world->bayesaccept == -1) //either do tree updates or parameter updates
2108 {
2109 world->accept += (count = tree_update (world, world->G));
2110 world->bayes->accept[np] += count; //permanent recorder for the tree acceptance,
2111 // for some obscure reason the world->accept gets lost, most likely every cycle;
2112 world->bayes->trials[np] += 1; // counter for trees tried
2113 }
2114 world->param_like = world->bayes->oldval;
2115 }
2116 else
2117 {
2118 world->accept += tree_update (world, world->G);
2119 }
2120 }
2121
2122 /// \brief updates all trees, controls updates and heating
2123 ///
2124 /// controls updates and temperatures (threaded or unthreaded)
2125 /// updates the tree and/or parameters
2126 /// \callgraph
2127 void
run_updates(world_fmt ** universe,int usize,option_fmt * options,tpool_t * localheating_pool,long inc,long increment,long step,long steps)2128 run_updates (world_fmt ** universe,
2129 int usize,
2130 option_fmt * options,
2131 tpool_t * localheating_pool,
2132 long inc, long increment, long step, long steps)
2133 {
2134 #ifndef PTHREADS
2135 #ifndef GRANDCENTRAL
2136 long ii;
2137 #endif
2138 #endif
2139 if (options->heating)
2140 {
2141 #ifdef PTHREADS /*using threads and running on an SMP machine */
2142 fill_tpool (*localheating_pool, universe, usize);
2143 tpool_synchronize (*localheating_pool, usize);
2144 #else /*heating but not using threads */
2145 #ifdef GRANDCENTRAL
2146 // dispatch_apply(count, dispatch_get_global_queue(0, 0), ^(size_t i){
2147 // results[i] = do_work(data, i);
2148 // });
2149 //
2150 //
2151 // dispatch_queue_t queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
2152 dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
2153 dispatch_apply(options->heated_chains, queue,
2154 ^(unsigned long ii) {
2155 run_one_update (universe[ii]);
2156 }
2157 );
2158 #else
2159 for (ii = 0; ii < options->heated_chains; ii++)
2160 {
2161 run_one_update (universe[ii]);
2162 }
2163 #endif /*GRANDCENTRAL*/
2164 #endif /* end of not-using threads */
2165 if(options->bayes_infer && RANDUM() > options->updateratio)
2166 {
2167 #ifdef UEP
2168 if (options->uep && EARTH->in_last_chain)
2169 update_uepanc (EARTH);
2170 #endif
2171 return;
2172 }
2173 else
2174 {
2175 heated_swap (universe, EARTH->options);
2176 switch (options->adaptiveheat)
2177 {
2178 case STANDARD:
2179 adjust_temperatures (universe, options->heated_chains,
2180 inc /*rement */ + step * increment,
2181 steps * increment);
2182 break;
2183 case BOUNDED:
2184 adjust_temperatures_bounded (universe, options->heated_chains,
2185 inc /*rement */ + step * increment,
2186 steps * increment);
2187 break;
2188 case NOTADAPTIVE:
2189 default:
2190 break;
2191 }
2192 }
2193 }
2194 else
2195 { /* no heating */
2196 run_one_update (EARTH);
2197 }
2198 #ifdef UEP
2199 if (options->uep && EARTH->in_last_chain)
2200 update_uepanc (EARTH);
2201 #endif
2202 }
2203
2204 /// generates trees over the interval increments
2205 void
run_increments(world_fmt ** universe,long usize,option_fmt * options,tpool_t * localheating_pool,long increment,long step,long steps)2206 run_increments (world_fmt ** universe,
2207 long usize,
2208 option_fmt * options,
2209 tpool_t * localheating_pool,
2210 long increment, long step, long steps)
2211 {
2212 static long treefilepos; /* write position in the treefile */
2213 long i;
2214 for (i = 0; i < increment; i++)
2215 {
2216 EARTH->actualinc = i;
2217 run_updates (universe, usize, options,
2218 localheating_pool, i, increment, step, steps);
2219
2220 if (EARTH->likelihood[EARTH->G] > EARTH->maxdatallike)
2221 EARTH->maxdatallike = EARTH->likelihood[EARTH->G];
2222 }
2223 if (EARTH->options->treeprint != _NONE)
2224 print_tree (EARTH, EARTH->G, &treefilepos);
2225 }
2226
2227 /// \brief run a chain
2228 /// run a chain
2229 void
run_steps(world_fmt ** universe,long usize,option_fmt * options,tpool_t * localheating_pool,long increment,long steps,long rep)2230 run_steps (world_fmt ** universe,
2231 long usize,
2232 option_fmt * options,
2233 tpool_t * localheating_pool, long increment, long steps, long rep)
2234 {
2235 long step=0;
2236 long oldsteps = steps;
2237 long ii;
2238 MYREAL var=0.0;
2239 if(EARTH->cold && EARTH->options->bayes_infer)
2240 single_chain_var (NULL, step, &var, NULL, NULL);
2241 #ifdef MPI
2242 // does not work? check_memory_limit();
2243 #endif
2244 for (step = 0; step < steps; step++)// Cesky 2013
2245 {
2246 #ifdef MPI
2247 // does not work?? check_memory_limit();
2248 #endif
2249 EARTH->increment = increment;
2250 run_increments (universe, usize, options, localheating_pool,
2251 increment, step, steps);
2252 #ifdef UEP
2253 store_uep (EARTH);
2254 #endif
2255 // printf("step=%li inc=%li locus=%li\n",step,increment,EARTH->locus);
2256
2257 condense_time (EARTH, &step, &EARTH->accept,
2258 &EARTH->accept_freq, &EARTH->G, &steps, oldsteps, rep);
2259 if(EARTH->options->bayes_infer)
2260 {
2261 calculate_BF(universe,options);
2262 if(EARTH->cold)
2263 {
2264 single_chain_var (EARTH, step, &var, EARTH->autocorrelation, EARTH->effective_sample);
2265 }
2266 }
2267 if (step > 0)
2268 {
2269 advance_clone_like (EARTH, EARTH->accept, &EARTH->G);
2270 EARTH->accept = 0L;
2271 }
2272 // if we do heating
2273 if (EARTH->options->heating)
2274 {
2275 for (ii = 1; ii < EARTH->options->heated_chains; ++ii)
2276 {
2277 universe[ii]->accept_freq += universe[ii]->accept;
2278 advance_clone_like (universe[ii],
2279 universe[ii]->accept, &(universe[ii])->G);
2280 universe[ii]->accept = 0L;
2281 }
2282 }
2283 }
2284 }
2285
2286 /// run all chains for short and then for long
2287 void
run_chains(world_fmt ** universe,long usize,option_fmt * options,tpool_t * localheating_pool,long replicate,long chains,char type,long increment,long oldsteps,long * treefilepos,long * Gmax)2288 run_chains (world_fmt ** universe,
2289 long usize,
2290 option_fmt * options,
2291 tpool_t * localheating_pool,
2292 long replicate,
2293 long chains,
2294 char type,
2295 long increment, long oldsteps, long *treefilepos, long *Gmax)
2296 {
2297 long i;
2298 long steps;
2299 long chain;
2300 long rep;
2301 const long locus = EARTH->locus;
2302 long kind;
2303 long pluschain = 0;
2304 const MYREAL treeupdateratio = (options->bayes_infer ? options->updateratio : 1.0);
2305
2306 for (chain = 0;
2307 chain < chains || (type == 'l' && chain >= chains
2308 && EARTH->param_like > options->lcepsilon); chain++)
2309 {
2310 if (options->heating)
2311 MERKUR->swapped = VENUS->swapped = EARTH->swapped = 0;
2312 rep = replicate_number (options, chain, type, replicate);
2313 EARTH->rep = rep;
2314 precalc_world (EARTH);
2315 polish_world (EARTH);
2316 burnin_chain (EARTH);
2317 EARTH->G = 0;
2318 EARTH->accept_freq = 0.;
2319 EARTH->accept = 0;
2320 steps = oldsteps;
2321 EARTH->maxdatallike = EARTH->likelihood[0];
2322 if (options->heating)
2323 {
2324 heating_prepare2 (universe, usize);
2325
2326 for(i=1; i<EARTH->options->heated_chains; i++)
2327 {
2328 burnin_chain (universe[i]);
2329 }
2330 reset_heated_accept(universe,EARTH->options->heated_chains);
2331 }
2332 set_penalizer (chain, chains, type, universe);
2333 // run steps, run increments, run updaters
2334 run_steps (universe, usize, options, localheating_pool,
2335 increment, steps, rep);
2336
2337 decide_plot (EARTH->options, chain, chains, type);
2338 // prepare for parameter estimation, precompute and copy
2339 memcpy (EARTH->param00, EARTH->param0,
2340 sizeof (MYREAL) * EARTH->numpop2);
2341 #ifndef INTEGRATELIKE
2342 if(!options->bayes_infer)
2343 {
2344 memcpy (EARTH->atl[rep][locus].param0, EARTH->param00,
2345 sizeof (MYREAL) * EARTH->numpop2);
2346 log_param0 (EARTH->atl[rep][locus].param0,
2347 EARTH->atl[rep][locus].lparam0, EARTH->numpop2);
2348 copies2lcopies (&EARTH->atl[rep][locus]);
2349 }
2350 #else
2351 // memcpy (EARTH->atl[rep][locus].param0, EARTH->param00,
2352 // sizeof (MYREAL) * EARTH->numpop2);
2353 log_param0 (EARTH->atl[rep][locus].param0,
2354 EARTH->atl[rep][locus].lparam0, EARTH->numpop2);
2355 copies2lcopies (&EARTH->atl[rep][locus]);
2356 #endif
2357 // start parameter estimation for
2358 // single chain, single replicate
2359 EARTH->repkind = SINGLECHAIN;
2360 kind = SINGLELOCUS;
2361 *Gmax = (EARTH->G > *Gmax) ? EARTH->G + 1 : (*Gmax);
2362 #ifdef LONGSUM
2363 change_longsum_times (EARTH);
2364 #endif /*LONGSUM*/
2365 if (!options->bayes_infer)
2366 {
2367 (void) estimateParameter (rep, *Gmax, EARTH, options,
2368 EARTH->cov[locus], chain, type, kind,
2369 EARTH->repkind);
2370 }
2371 if (EARTH->options->heating)
2372 {
2373 print_heating_progress (universe, EARTH->options, (long) (increment * steps * treeupdateratio));
2374 reset_heated_accept(universe,EARTH->options->heated_chains);
2375 }
2376 else
2377 {
2378 print_progress (EARTH->options, EARTH, rep,
2379 (long) (increment * steps * treeupdateratio), (long) EARTH->accept_freq);
2380 EARTH->accept_freq = 0. ;
2381 EARTH->accept = 0L ;
2382 }
2383 // cleanup and prepare next
2384 prepare_next_chain (universe, EARTH->options, type, chain,
2385 &chains, &pluschain, locus, replicate);
2386 } //end chains
2387 }
2388
2389 /// set penalizing distance for jumps in the maximizer on or off
2390 void
set_penalizer(long chain,long chains,char type,world_fmt ** universe)2391 set_penalizer (long chain, long chains, char type, world_fmt ** universe)
2392 {
2393 if (type == 'l')
2394 {
2395 if (chain >= chains - 1)
2396 {
2397 EARTH->in_last_chain = TRUE;
2398 unset_penalizer_function (TRUE);
2399 }
2400 }
2401 else
2402 {
2403 EARTH->in_last_chain = FALSE;
2404 unset_penalizer_function (FALSE);
2405 }
2406 }
2407
2408
2409 /// \brief run a replicate
2410 ///
2411 /// run a replicate
2412 /// \callgraph
2413 void
run_replicate(long locus,long replicate,world_fmt ** universe,option_fmt * options,data_fmt * data,tpool_t * localheating_pool,int usize,long * treefilepos,long * Gmax)2414 run_replicate (long locus,
2415 long replicate,
2416 world_fmt ** universe,
2417 option_fmt * options,
2418 data_fmt * data,
2419 tpool_t * localheating_pool,
2420 int usize, long *treefilepos, long *Gmax)
2421 {
2422 long chains = 0;
2423 char type = 's';
2424 long runs;
2425 long increment = 1;
2426 long oldsteps = 0;
2427 /* loop over independent replicates----------------------- */
2428 /* but make sure that we get single chain estimates, too */
2429 EARTH->locus = locus;
2430 EARTH->repkind = SINGLECHAIN;
2431 EARTH->replicate = replicate;
2432 if (setup_locus (locus, EARTH, options, data) == 0)
2433 return;
2434 if (options->heating)
2435 heating_prepare (universe, usize, options, data, replicate);
2436 type = 's';
2437 runs = 1;
2438 /* short and long chains ----------------------------- */
2439 set_bounds (&increment, &oldsteps, &chains, options, type);
2440 EARTH->increment = increment;
2441 while (runs-- >= 0)
2442 {
2443 if (myID == MASTER && type == 's')
2444 {
2445 print_menu_chain (type, FIRSTCHAIN, oldsteps, EARTH,
2446 options, replicate);
2447 if (EARTH->options->treeprint == ALL)
2448 print_tree (EARTH, 0, treefilepos);
2449 }
2450 EARTH->chains = chains;
2451 /* loop over chains--------------------------- */
2452 run_chains (universe, usize, options, localheating_pool, replicate, chains, type, increment, oldsteps, treefilepos, Gmax); //PB 020203
2453 change_chaintype (locus, &type, EARTH, &increment, &oldsteps,
2454 &chains, options);
2455
2456 /* evaluate multiple long chain estimates */
2457 if (runs < 0 && options->replicate && options->replicatenum == 0)
2458 {
2459 EARTH->repkind = MULTIPLECHAIN;
2460 #ifdef LONGSUM
2461 change_longsum_times (EARTH);
2462 #endif /*LONGSUM*/
2463 if (!options->bayes_infer)
2464 {
2465 (void) estimateParameter (replicate, *Gmax, EARTH, options,
2466 EARTH->cov[locus], chains, type,
2467 SINGLELOCUS, EARTH->repkind);
2468 }
2469 }
2470 } //end runs
2471 //#ifndef MPI
2472 // collect correlation information of a locus and stick it into the archive,
2473 // the autocorrelation is averaged but the ess is summed up assuming that ll loci are independent
2474 if(EARTH->cold && EARTH->options->bayes_infer)
2475 {
2476 collect_ess_values(EARTH);
2477 collect_acceptance(EARTH);
2478 }
2479 //#endif
2480 }
2481
2482
print_burnin_stop(FILE * file,world_fmt ** universe,option_fmt * options)2483 void print_burnin_stop(FILE *file, world_fmt **universe, option_fmt * options)
2484 {
2485 world_fmt *world = universe[0];
2486 long z;
2487 long maxreplicate = (options->replicate
2488 && options->replicatenum >
2489 0) ? options->replicatenum : 1;
2490 fprintf(file,"\n\n\nStop of burn-in phase due to convergence\n");
2491 fprintf(file,"----------------------------------------\n");
2492 switch(options->burnin_autostop)
2493 {
2494 case 'a':
2495 fprintf(file,"[Stopping criteria was: Variance ratio between the last two groups of 1000 steps < %f]\n\n",world->varheat);
2496 break;
2497 case 't':
2498 fprintf(file,"[Stopping criteria was: reached prescribed acceptance ratio of %f]\n\n",world->options->autotune);
2499 break;
2500 case 'e':
2501 fprintf(file,"[Stopping criteria was: All effective MCMC sample sizes > %f]\n\n",world->essminimum);
2502 break;
2503 case ' ':
2504 fprintf(file,"\n\n");
2505 break;
2506 }
2507
2508 fprintf(file,"Locus Replicate Steps ESS* Accept* Variance ratio (new/old variance)\n");
2509 fprintf(file,"----- --------- ------ ------- ------- ---------------------------------\n");
2510 for(z=0; z < world->loci * maxreplicate; z++)
2511 {
2512 if(world->burnin_stops[z].oldvariance > 0.0)
2513 {
2514 fprintf(file,"%5li %5li %10li %6.1f %6.4f %10.4f (%f/%f)\n",1 + world->burnin_stops[z].locus,
2515 world->burnin_stops[z].replicate,
2516 world->burnin_stops[z].stopstep,
2517 world->burnin_stops[z].ess,
2518 world->burnin_stops[z].accept,
2519 world->burnin_stops[z].variance/world->burnin_stops[z].oldvariance,
2520 world->burnin_stops[z].variance,
2521 world->burnin_stops[z].oldvariance);
2522 }
2523 // world->burnin_stops[z].worker = myID;
2524 }
2525 fprintf(file,"(*=worst)\n\n");
2526 #ifdef PRETTY
2527 if(file==EARTH->outfile)
2528 pdf_burnin_stops(EARTH, maxreplicate);
2529 #endif
2530 }
2531
combine_scaling_factor(world_fmt * world)2532 MYREAL combine_scaling_factor(world_fmt *world)
2533 {
2534 const long np = world->numpop2;
2535 long pop;
2536 long i;
2537 MYREAL scaling_factor=0.0;
2538 bayes_fmt * bayes = world->bayes;
2539 boolean *visited;
2540 visited = (boolean *) mycalloc(np,sizeof(boolean));
2541 for(i=0;i<np;i++)
2542 {
2543 if(bayes->map[i][1] == INVALID)
2544 continue;
2545 else
2546 {
2547 pop = bayes->map[i][1];
2548 }
2549 if(visited[pop]==TRUE)
2550 continue;
2551 visited[pop] = TRUE;
2552 // scaling_factor += exp(bayes->scaling_factors[pop] - bayes->maxmaxvala);
2553 scaling_factor += bayes->scaling_factors[pop]; //PRODUCT_parameters(scalinfactorcalculation_see_bayes.c)
2554 #ifdef DEBUG
2555 printf("%i> scaling factor test: %li %li k=%f k_pop=%f %f\n",myID, i, pop, scaling_factor, bayes->scaling_factors[pop], bayes->maxmaxvala);
2556 #endif
2557 }
2558 // scaling_factor = log(scaling_factor) + bayes->maxmaxvala;
2559 if(world->options->has_bayesfile)
2560 {
2561 #ifdef DEBUG
2562 printf("# Scaling factor %20.20f\n",scaling_factor);
2563 #endif
2564 fprintf(world->bayesfile, "# Scaling factor %20.20f\n",scaling_factor);
2565 }
2566 myfree(visited);
2567 return scaling_factor;
2568 }
2569
print_bf_values(world_fmt * world)2570 void print_bf_values(world_fmt * world)
2571 {
2572 long locus;
2573 long t;
2574 const long hc = world->options->heated_chains;
2575 for(locus=0;locus<world->loci;locus++)
2576 {
2577 if(world->data->skiploci[locus])
2578 continue;
2579 printf("=====> locus=%li ",locus );
2580 for(t=0; t < hc; t++)
2581 {
2582 printf("%f ",world->bf[locus*hc+t]);
2583 }
2584 printf("\n");
2585 }
2586 }
2587
2588 #ifdef MPI_do_not_use
fix_bayesfactor(world_fmt * world,option_fmt * options)2589 void fix_bayesfactor(world_fmt *world, option_fmt * options)
2590 {
2591 return;
2592 long locus;
2593 long t;
2594 const long hc = world->options->heated_chains;
2595 const long maxreplicate = world->maxreplicate; //(options->replicate
2596 //&& options->replicatenum >
2597 // 0) ? options->replicatenum : 1;
2598 if(options->heating)//-----------------------heating
2599 {
2600 for(locus=0;locus<world->loci;locus++)
2601 {
2602 if(world->data->skiploci[locus])
2603 continue;
2604 // the thermodynamic integration is reporting maxreplicate times too high values
2605 // warning("this should fix the thermodynamic/mpi/replicate problem, but needs to be checked for multilocus data");
2606 printf("@@@@@@@ locus=%li %li (%f) ",locus,maxreplicate, world->bf[locus * hc + 0] );
2607 for(t=0; t < hc; t++)
2608 {
2609 world->bf[locus * hc + t] /= maxreplicate;
2610 printf("%f ",world->bf[locus*hc+t]);
2611 }
2612 printf("\n");
2613 }
2614 }
2615 }
2616 #endif
2617
2618
print_bayesfactor(FILE * file,world_fmt ** universe,option_fmt * options)2619 void print_bayesfactor(FILE *file, world_fmt **universe, option_fmt * options)
2620 {
2621 //#ifdef MPI
2622 //static boolean done=FALSE;
2623 //#endif
2624 long t;
2625 const long hc = EARTH->options->heated_chains;
2626 long locus;
2627 const long maxreplicate = universe[0]->maxreplicate; //(options->replicate
2628 //&& options->replicatenum >
2629 // 0) ? options->replicatenum : 1;
2630 // long lsteps = options->lsteps;
2631 // MYREAL sum = 0.;
2632 MYREAL heat0 = 1.;
2633 MYREAL heat1 = 1.;
2634 MYREAL heat2 = 1.0;
2635 MYREAL val0 = 0.;
2636 MYREAL val1 = 0.;
2637 MYREAL val2 = 0.;
2638 MYREAL bfsum = 0.;
2639 MYREAL bfsum2 = 0.;
2640 MYREAL approxlsum = 0.;
2641 MYREAL hsum = 0.;
2642 MYREAL asum = 0.;
2643 MYREAL lsum;
2644 MYREAL lsum0;
2645 MYREAL scaling_factor = 0.0;
2646 MYREAL *locusweight = EARTH->data->locusweight;//invariant loci treatment
2647 // calculate the harmonic mean score
2648 for(locus=0;locus < EARTH->loci; locus++)
2649 {
2650 if(EARTH->data->skiploci[locus])
2651 continue;
2652 hsum += locusweight[locus] * (EARTH->hmscale[locus] - log (EARTH->hm[locus]));//invariant loci treatment
2653 }
2654
2655 fprintf(file,"\n\n\nLog-Probability of the data given the model (marginal likelihood = log(P(D|thisModel))\n");
2656 fprintf(file,"--------------------------------------------------------------------\n[Use this value for Bayes factor calculations:\nBF = Exp[log(P(D|thisModel) - log(P(D|otherModel)]\nshows the support for thisModel]\n\n");
2657 #ifdef PRETTY
2658 if(file==EARTH->outfile)
2659 pdf_bayes_factor_header(EARTH,options);
2660 #endif
2661 if(EARTH->loci>1)
2662 {
2663 fprintf(file,"\n\nLocus Raw Thermodynamic score(1a) Bezier approximated score(1b) Harmonic mean(2)\n");
2664 fprintf(file,"------------------------------------------------------------------------------------------\n");
2665 if(file==EARTH->outfile)
2666 pdf_bayes_factor_rawscores_header(EARTH,options);
2667 }
2668 if(options->heating)//-----------------------heating
2669 {
2670 for(locus=0;locus<EARTH->loci;locus++)
2671 {
2672 if(EARTH->data->skiploci[locus])
2673 continue;
2674
2675 lsum = 0.;
2676 lsum0 = 0.;
2677 // from very hot to cold, the
2678 for(t=1; t < hc; t++)
2679 {
2680 heat2 = heat0;
2681 val2 = val0;
2682 #ifdef MPI
2683 heat0 = 1./options->heat[t-1] ;
2684 heat1 = 1./options->heat[t];
2685 #else
2686 if(options->datatype!='g')
2687 {
2688 if(options->adaptiveheat!=NOTADAPTIVE)
2689 {
2690 heat0 = 1./ universe[t-1]->averageheat ;
2691 heat1 = 1./ universe[t]->averageheat;
2692 }
2693 else
2694 {
2695 heat0 = universe[t-1]->heat ;
2696 heat1 = universe[t]->heat;
2697 }
2698 }
2699 else
2700 {
2701 heat0 = 1./options->heat[t-1];
2702 heat1 = 1./options->heat[t];
2703 }
2704 #endif
2705 //Simpson's rule and trapezoidal are the same when I do only have function values at a and b
2706 val0 = locusweight[locus] * EARTH->bf[locus * hc + t-1];
2707 val1 = locusweight[locus] * EARTH->bf[locus * hc + t];
2708 //we keep last element to adjust for Bezier approximation
2709 lsum0 = (heat0 - heat1) * (val0 + val1) * 0.5;
2710 lsum += lsum0;
2711 #ifdef DEBUG
2712 // printf("%i> sum[%li]=%f temp=(%f %f) values=(%f %f)\n",myID,t,lsum,heat0,heat1,EARTH->bf[locus * hc + t],EARTH->bf[locus * hc + t]);
2713 #endif
2714 }
2715 // (x2 y1 - x1 y2)/(x1 - x2)
2716 // this last addition to the lsum calculates the chunk between the last temperature and
2717 // the infinitely hot temperature as a linear approximation of the the second hottest temperature
2718 // this is certainly rough, but in simulations with 3 populations one can see that with large number
2719 // of temperatures this looks reasonable, and one can approximate the integral more accurately with
2720 // with only a few columns.
2721 // using Bezier to approximate nice curve between the last two points to mimick the curve that
2722 // can be found with 16 or 32 heated chains, handle points are calculated using adhoc decisions
2723 // (comparison with 32 heated chains) using 0.8 of the interval for handle_y1 and the intercept
2724 // between the first and the second last point to calculate the handle_y2 see sumbezier()
2725 // for implementation. Currently this is not tunable.
2726 // approxlsum = sumbezier(100, heat1, EARTH->bf[locus * hc + t-1],
2727 // heat0, EARTH->bf[locus * hc + t-2],
2728 // 1.0, EARTH->bf[locus * hc]);
2729 approxlsum = sumbezier(100L, heat1, val1,
2730 heat0, val0,
2731 heat2, val2);
2732 //#ifdef DEBUG
2733 // fprintf(stdout,"Thermo[%li]=(%f) - (%f) + (%f) = (%f)\n",locus, lsum, lsum0, approxlsum, approxlsum + lsum - lsum0);
2734 // #endif
2735 if(EARTH->loci>1)
2736 {
2737 fprintf(file," %5li%22.2f %22.2f %12.2f\n", locus + 1, lsum, lsum-lsum0+approxlsum, EARTH->hmscale[locus] - log (EARTH->hm[locus]));
2738 #ifdef PRETTY
2739 if(file==EARTH->outfile)
2740 pdf_bayes_factor_rawscores(locus, lsum, lsum-lsum0+approxlsum, EARTH->hmscale[locus] - log (EARTH->hm[locus]));
2741 #endif
2742 }
2743 bfsum2 += approxlsum + lsum - lsum0;
2744 bfsum += lsum; //+ EARTH->bfscale[locus];
2745 }
2746 if(EARTH->loci>1)
2747 {
2748 scaling_factor = combine_scaling_factor(EARTH);
2749 bfsum += scaling_factor;
2750 bfsum2 += scaling_factor;
2751 hsum += scaling_factor;
2752 fprintf(file,"---------------------------------------------------------------------------------------\n");
2753 fprintf(file," All %22.2f %22.2f %12.2f\n[Scaling factor = %f]\n", bfsum, bfsum2, hsum, scaling_factor);
2754 #ifdef PRETTY
2755 if(file==EARTH->outfile)
2756 pdf_bayes_factor_rawscores(-1L, bfsum, bfsum2, hsum);
2757 #endif
2758 }
2759 else
2760 {
2761 fprintf(file,"\n\n(1a) Thermodynamic integration: log(Prob(D|Model))=%f (%f with Bezier-approximation[1b]) %s\n", bfsum, bfsum2,
2762 options->adaptiveheat!=NOTADAPTIVE ? "(adaptive heating -> value may most likely be wrong)" : "");
2763 fprintf(file,"(2) Harmonic mean: log(Prob(D|Model))=%f\n",hsum);
2764 fprintf(file,"(1) and (2) should give a similar result, in principle.\nBut (2) is overestimating the likelihood, it and is presented for historical reasons and should not be used. But (1) needs heating with chains that span a temperature range of 1.0 to at least 100,000.\n\n");
2765 }
2766 }
2767 else // -----------------------------------------not heating
2768 {
2769 for(locus=0;locus<EARTH->loci;locus++)
2770 {
2771 if(EARTH->data->skiploci[locus])
2772 continue;
2773 if(EARTH->loci>1)
2774 {
2775 fprintf(file," %5li ------ ------ %12.2f\n", locus+1, EARTH->hmscale[locus] - log (EARTH->hm[locus]));
2776 #ifdef PRETTY
2777 if(file==EARTH->outfile)
2778 pdf_bayes_factor_rawscores_harmo(locus, EARTH->hmscale[locus] - log (EARTH->hm[locus]));
2779 #endif
2780 }
2781 }
2782 if(EARTH->loci>1)
2783 {
2784 scaling_factor = combine_scaling_factor(EARTH);
2785 hsum += scaling_factor;
2786 fprintf(file,"---------------------------------------------------------------------------------------\n");
2787 fprintf(file," All ------ ------ %12.2f\n", hsum);
2788 #ifdef PRETTY
2789 if(file==EARTH->outfile)
2790 pdf_bayes_factor_rawscores_harmo(-1L, hsum);
2791 #endif
2792 }
2793 else
2794 {
2795 fprintf(file,"\n\n(1) Thermodynamic integration: log(Prob(D|Model))= UNAVAILABLE because no heating was used\n");
2796 fprintf(file,"(2) Harmonic mean: log(Prob(D|Model))=%f\n",hsum);
2797 fprintf(file,"(1) and (2) should give a similar result, in principle.\nBut (2) is overestimating the likelihood, it \
2798 and is presented for historical reasons and should not be used. But (1) needs heating with chains that span a temperature rang\
2799 e of 1.0 to at least 100,000.\n\n");
2800 }
2801 }
2802 #ifdef PRETTY
2803 if(file==EARTH->outfile)
2804 {
2805 pdf_bayes_factor(EARTH, bfsum, bfsum2, hsum, asum, maxreplicate,scaling_factor);
2806 }
2807 #endif
2808 }
2809
2810 ///
2811 /// integrates over a Bezier curve between two points
2812 /// calculates two handle points that are set to adhoc values
2813 /// so that the x values of the handle are the the x value of the lowest point
2814 /// and the y values are set to about 80% of the min to max interval for the left point
2815 /// and a value that is the the y value from ax + b where a is calculated from a
2816 /// third point to the right and the second point, the third point is not used for the
2817 /// the Bezier curve otherwise
sumbezier(long intervals,MYREAL x0,MYREAL y0,MYREAL x1,MYREAL y1,MYREAL x2,MYREAL y2)2818 MYREAL sumbezier(long intervals, MYREAL x0, MYREAL y0, MYREAL x1, MYREAL y1, MYREAL x2, MYREAL y2)
2819 {
2820 const MYREAL inv_interval = 1./intervals;
2821 const MYREAL sx0 = x0;
2822 const MYREAL sx1 = x0;
2823 const MYREAL sy0 = 0.2 * y0 + 0.8 * y2;
2824 const MYREAL sy1 = (-x2 * y1 + x1 * y2)/(x1 - x2);
2825 MYREAL t = 0.0;
2826 MYREAL t2 = 0.0;
2827 MYREAL t3 = 0.0;
2828 MYREAL onet = 1.0;
2829 MYREAL onet2 = 1.0;
2830 MYREAL onet3 = 1.0;
2831 MYREAL newx;
2832 MYREAL newy;
2833 MYREAL oldx;
2834 MYREAL oldy;
2835 MYREAL sum = 0.0;
2836 // integrate over intervals between x0 and x1 and return sum
2837 // intialize with t=0.0
2838 oldx = x0;
2839 oldy = y0;
2840 //fprintf(stdout,"\n\n%f %f %f %f %f %f\n",x2,y2,sx0,sy0, x0,y0);
2841 for(t=inv_interval; t <= 1.0; t += inv_interval)
2842 {
2843 onet = 1.0 - t;
2844 onet2 = onet * onet;
2845 onet3 = onet2 * onet;
2846 onet2 *= 3.0 * t;
2847 t2 = t * t;
2848 t3 = t2 * t;
2849 t2 *= 3.0 * onet;
2850 // newx = 3sx0 (1-t)^2 t + 3 sx1 (1-t) t^2 + (1-t)^3 x0 + t^3 x1
2851 newx = sx0 * onet2 + sx1 * t2 + onet3 * x0 + t3 * x1;
2852 newy = sy0 * onet2 + sy1 * t2 + onet3 * y0 + t3 * y1;
2853 // fprintf(stdout,"%f %f\n",newx,newy);
2854 sum += (newx - oldx) * (newy + oldy)/2.;
2855 oldx = newx;
2856 oldy = newy;
2857 }
2858 // fprintf(stdout,"%f %f %f %f\n\n\n",x1,y1,sx1,sy1);
2859 return sum;
2860 }
2861
2862
2863 /// calculate values for the marginal likelihood using thermodynamic integration
2864 /// based on a method by Friel and Pettitt 2005
2865 /// (http://www.stats.gla.ac.uk/research/TechRep2005/05.10.pdf)
2866 /// this is the same method described in Lartillot and Phillippe 2006 Syst Bio
2867 /// integrate over all temperature using a simple trapezoidal rule
2868 /// prob(D|model) is only accurate with intervals for temperatures from 1/0 to 1/1.
2869 /// reports also the harmonic mean
calculate_BF(world_fmt ** universe,option_fmt * options)2870 void calculate_BF(world_fmt **universe, option_fmt *options)
2871 {
2872 long i;
2873 MYREAL xx, xx2;
2874 // MYREAL logprior;
2875 #ifdef THERMOCHECK
2876 static long counter=0;
2877 #endif
2878 long locus = universe[0]->locus;
2879 long hc = options->heated_chains;
2880 if(universe[0]->data->skiploci[locus])
2881 return;
2882 if(EARTH->likelihood[EARTH->G] <= -HUGE)
2883 return;
2884 EARTH->am[locus] += 1;
2885 locus = EARTH->locus;
2886 xx = EARTH->likelihood[EARTH->G];
2887
2888 #ifdef THERMOCHECK
2889 counter++;
2890 #endif
2891
2892 if(xx <= -HUGE)
2893 return;
2894
2895 if(xx > EARTH->hmscale[locus])
2896 {
2897 // EARTH->hm[locus] += EXP(EARTH->hmscale[locus] - xx);
2898 xx2 = EXP(EARTH->hmscale[locus] - xx);
2899 EARTH->hm[locus] += (xx2 - EARTH->hm[locus])/ (EARTH->am[locus]);
2900 }
2901 else
2902 {
2903 EARTH->hm[locus] *= EXP(xx - EARTH->hmscale[locus]);
2904 // EARTH->hm[locus] *= xx - EARTH->hmscale[locus];
2905 EARTH->hmscale[locus] = xx;
2906 // EARTH->hm[locus] += 1.;
2907 EARTH->hm[locus] += (1. - EARTH->hm[locus])/ (EARTH->am[locus]);
2908 }
2909
2910 #ifdef THERMOCHECK
2911 if(options->mixplot)
2912 fprintf(universe[0]->options->mixfile,"@BF %li %li %10.10f %10.10f",counter, locus, EARTH->heat, xx);
2913 //, EARTH->hmscale[locus]);
2914 #endif
2915
2916 if(options->heating)
2917 {
2918 for (i = 0; i < hc; i++)
2919 {
2920 xx = universe[i]->likelihood[universe[i]->G];
2921 #ifdef THERMOCHECK
2922 if(options->mixplot)
2923 fprintf(universe[0]->options->mixfile," @T %10.10f %10.10f", universe[i]->heat, xx);
2924 #endif
2925 EARTH->bf[locus * hc + i] += (xx - EARTH->bf[locus * hc + i])/ (EARTH->am[locus]);
2926 //EARTH->bf[locus * hc + i] += (xx - EARTH->bf[locus * hc + i]);
2927 //#ifdef THERMOCHECK
2928 //fprintf(universe[0]->options->mixfile,"%f\n", EARTH->bf[locus * hc + i]);
2929 //#endif
2930 }
2931 }
2932 #ifdef THERMOCHECK
2933 if(options->mixplot)
2934 fprintf(universe[0]->options->mixfile,"\n");
2935 #endif
2936 }
2937
print_marginal_order(char * buf,long * bufsize,world_fmt * world)2938 void print_marginal_order(char *buf, long *bufsize, world_fmt *world)
2939 {
2940 long i;
2941
2942 for(i=0;i<world->options->heated_chains;i++)
2943 *bufsize += sprintf(buf+ *bufsize,"# -- %s = %f\n", "Thermodynamic temperature", world->options->heat[i]);
2944 *bufsize += sprintf(buf+ *bufsize,"# -- %s\n", "Marginal log(likelihood) [Thermodynamic integration]");
2945 *bufsize += sprintf(buf+ *bufsize,"# -- %s\n", "Marginal log(likelihood) [Harmonic mean]");
2946 }
2947
2948 #if defined(MPI) && !defined(PARALIO) /* */
print_marginal_like(float * temp,long * z,world_fmt * world)2949 void print_marginal_like(float *temp, long *z, world_fmt * world)
2950 {
2951 long locus = world->locus;
2952 long t;
2953 long hc = world->options->heated_chains;
2954 MYREAL lsum;
2955 MYREAL heat0, heat1;
2956
2957 if(world->options->heating)
2958 {
2959 lsum = 0.;
2960 for(t=1; t < hc; t++)
2961 {
2962 heat0 = 1./world->options->heat[t-1] ;
2963 heat1 = 1./world->options->heat[t];
2964 // this ignores adaptive heating for MPI!!!!
2965 temp[*z] = (float) world->bf[locus * hc + t-1];
2966 *z += 1;
2967 lsum += (heat0 - heat1) * ((world->bf[locus * hc + t-1] + world->bf[locus * hc + t]) * 0.5);
2968 }
2969 temp[(*z)++] = (float) world->bf[locus * hc + t-1];
2970 temp[(*z)++] = (float) lsum;
2971 }
2972 temp[(*z)++] = (float) world->hmscale[locus] - log(world->hm[locus]);
2973 }
2974 #else /*not MPI or MPI & PARALIO*/
print_marginal_like(char * temp,long * c,world_fmt * world)2975 void print_marginal_like(char *temp, long *c, world_fmt * world)
2976 {
2977 long locus = world->locus;
2978 long t;
2979 long hc = world->options->heated_chains;
2980 MYREAL lsum;
2981 MYREAL heat0, heat1;
2982 if(world->options->heating)
2983 {
2984 lsum = 0.;
2985 for(t=1; t < hc; t++)
2986 {
2987 if(world->options->adaptiveheat!=NOTADAPTIVE)
2988 {
2989 heat0 = world->options->averageheat[t-1] ;
2990 heat1 = world->options->averageheat[t];
2991 }
2992 else
2993 {
2994 heat0 = 1./ world->options->heat[t-1] ;
2995 heat1 = 1./ world->options->heat[t];
2996 }
2997 *c += sprintf(temp+ *c,"\t%f", world->bf[locus * hc + t-1]);
2998 lsum += (heat0 - heat1) * ((world->bf[locus * hc + t-1] + world->bf[locus * hc + t]) * 0.5);
2999 }
3000 *c += sprintf(temp + *c,"\t%f", world->bf[locus * hc + t-1]);
3001 *c += sprintf(temp + *c,"\t%f", lsum);
3002 }
3003 *c += sprintf(temp + *c,"\t%f", world->hmscale[locus] - log(world->hm[locus]));
3004 }
3005 #endif /*not MPI*/
3006
3007
3008 #ifdef LONGSUM
3009 /// put timepoints into the total tree to allow for bottlenecks and growth
3010 long
find_chaincutoffs(MYREAL * treetime,world_fmt * world,long locus,long r)3011 find_chaincutoffs (MYREAL * treetime, world_fmt * world, long locus, long r)
3012 {
3013
3014 long j;
3015 long T;
3016 MYREAL totaltime = 0.;
3017 long copies = 0;
3018 tarchive_fmt *atl;
3019 atl = world->atl[r][locus].tl;
3020 for (j = 0; j < world->atl[r][locus].T; j++)
3021 {
3022 T = atl[j].longsumlen - 1;
3023 copies += atl[j].copies;
3024 totaltime = atl[j].longsum[T].eventtime / 3.;
3025 treetime[0] += totaltime;
3026 treetime[1] += totaltime + totaltime;
3027 treetime[2] += totaltime + totaltime + totaltime;
3028 }
3029 return copies;
3030 }
3031
3032 /// find the cutoffs for the locus, averaging over multiple replicates
3033 void
find_loci_cutoffs(MYREAL * treetime,world_fmt * world)3034 find_loci_cutoffs (MYREAL * treetime, world_fmt * world)
3035 {
3036 long r;
3037 long locus;
3038 long count = 0;
3039 long repstart = 0;
3040 long repstop = 1;
3041 memset (treetime, 0, sizeof (MYREAL) * 3);
3042 set_replicates (world, world->repkind, world->rep, &repstart, &repstop);
3043
3044 for (locus = 0; locus < world->loci; locus++)
3045 {
3046 for (r = repstart; r < repstop; r++)
3047 {
3048 count += find_chaincutoffs (treetime, world, locus, r);
3049 }
3050 }
3051 treetime[0] /= (MYREAL) count;
3052 treetime[1] /= (MYREAL) count;
3053 treetime[2] /= (MYREAL) count;
3054 }
3055
3056
3057 /// find the cutoffs avaergin over all replicates
3058 void
find_replicate_cutoffs(MYREAL * treetime,world_fmt * world)3059 find_replicate_cutoffs (MYREAL * treetime, world_fmt * world)
3060 {
3061 long r;
3062 long count = 0;
3063 long repstart = 0;
3064 long repstop = 1;
3065 memset (treetime, 0, sizeof (MYREAL) * 3);
3066 set_replicates (world, world->repkind, world->rep, &repstart, &repstop);
3067
3068 for (r = repstart; r < repstop; r++)
3069 {
3070 count += find_chaincutoffs (treetime, world, world->locus, r);
3071 }
3072 treetime[0] /= (MYREAL) count;
3073 treetime[1] /= (MYREAL) count;
3074 treetime[2] /= (MYREAL) count;
3075 }
3076
3077
3078 /// change the cutoff times
3079 void
change_longsum_times(world_fmt * world)3080 change_longsum_times (world_fmt * world)
3081 {
3082 long i;
3083 long numpop3 = 3 * world->numpop;
3084 long numpop6 = 2 * numpop3;
3085 MYREAL *treetime;
3086 treetime = (MYREAL *) mycalloc (3, sizeof (MYREAL)); // we have 3 classes
3087 if (world->locus < world->loci)
3088 find_replicate_cutoffs (treetime, world);
3089 else
3090 find_loci_cutoffs (treetime, world);
3091 for (i = numpop3; i < numpop6; i += 3)
3092 {
3093 world->flucrates[i] = treetime[0];
3094 world->flucrates[i + 1] = treetime[1];
3095 world->flucrates[i + 2] = treetime[2];
3096 }
3097 myfree(treetime);
3098 }
3099
3100 #endif /*LONGSUM*/
3101
3102 ///
3103 /// check_parmfile checks what is called and also whether we should honor the menu=YES in the file
3104 /// user uses another parmfile than the default when there is a parameter associated with the program call
3105 /// this function checks the first two line of the file to find out whether this is a parmfile or potentially
3106 /// a datafile/
3107 /// The addition of an option to override the menu command in the parmfile
3108 /// migrate-n -menu ==> use menu independent of the parmfile setting
3109 /// migrate-n -nomenu ==> does use menu even if parmfile says so
3110 /// migrate-n -version ==> prints the version and quits
3111 /// migrate-n -help ==> prints a quick help of commandline options
check_parmfile(long argcount,char ** arguments,char * parmfilename)3112 boolean check_parmfile(long argcount, char **arguments, char *parmfilename)
3113 {
3114 int argument=0;
3115 int len;
3116 boolean usemenu = OVERRIDE_NO;
3117 if (argcount > 1)
3118 {
3119 argument = 1;
3120 while(argument < argcount)
3121 {
3122 //fprintf(stderr,"<|%s|>\n",argv[argument]);
3123 //fflush(stderr);
3124 if(arguments[argument][0]!='-')
3125 {
3126 len = (long) strlen (arguments[argument]) + 1;
3127 sprintf (parmfilename, "%-.*s", len, arguments[argument]);
3128 }
3129 else
3130 {
3131
3132 switch(arguments[argument][1])
3133 {
3134 case 'm':
3135 usemenu = OVERRIDE_MENU;
3136 break;
3137 case 'n':
3138 usemenu = OVERRIDE_NOMENU;
3139 break;
3140 case 'v':
3141 printf("Migrate-n version: %s, subversion: %s\n", MIGRATEVERSION, MIGRATESUBVERSION);
3142 printf("Operating System: %s\n", MYSYSTEM );
3143 #ifdef MPI
3144 #ifdef OMPI_MAJOR_VERSION
3145 printf("OPENMPI version: %i.%i\n", OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION);
3146 #endif
3147 #endif
3148 exit(-1);
3149 break;
3150 case 'h':
3151 printf("Migrate-n version: %s, subversion: %s\n", MIGRATEVERSION, MIGRATESUBVERSION);
3152 printf("(beerli@fsu.edu)\n\n");
3153 printf("commandline options:\n");
3154 printf("-version # shows the current version and exits\n");
3155 printf("-help # show this menu and exit\n");
3156 printf("\n");
3157 printf("-nomenu # does not display menu, use this for batch jobs\n");
3158 printf("-menu # forces the display of the menu\n");
3159 printf("\n");
3160 exit(-1);
3161 break;
3162 }
3163 }
3164 argument++;
3165 }
3166 }
3167 return usemenu;
3168 }
3169
3170 /// set the menu overrider
set_usemenu(boolean usemenu,boolean fromparmfile)3171 boolean set_usemenu(boolean usemenu, boolean fromparmfile)
3172 {
3173 switch(usemenu)
3174 {
3175 case OVERRIDE_MENU:
3176 return TRUE;
3177 break;
3178 case OVERRIDE_NOMENU:
3179 return FALSE;
3180 break;
3181 default:
3182 break;
3183 }
3184 return fromparmfile;
3185 }
3186
3187 /// checks the settings of the number of long an short chain for bayes options and resets useless settings
check_bayes_options(option_fmt * options)3188 void check_bayes_options(option_fmt *options)
3189 {
3190 if (options->bayes_infer)
3191 {
3192 if(options->schains>0)
3193 {
3194 // fprintf(stdout,"NOTICE: with Bayesian inference no short chains are allowed, setting them to ZERO\n");
3195 options->schains = 0;
3196 }
3197 if(options->lchains>1 || options->lchains < 1)
3198 {
3199 fprintf(stdout,"NOTICE: with Bayesian inference, most efficient runs are with 1 long chain,\nsetting the long chain to 1\n");
3200 options->lchains = 1;
3201 };
3202 }
3203 }
3204
reset_bayesmdimfile(world_fmt * world,option_fmt * options)3205 void reset_bayesmdimfile(world_fmt *world, option_fmt *options)
3206 {
3207 long locus=0;
3208 char ** files = NULL;
3209 long numfiles=0;
3210 if(world->options->has_bayesmdimfile)
3211 {
3212 charvec2d(&files,numcpu,STRSIZE);
3213 get_bayeshist (world, options); // this is transferring some material but
3214 // the parameter file is not filled in anymore [-> small data block]
3215 // save parameters in file and reload
3216 if(myID==MASTER)
3217 {
3218 // first file is alwasy present
3219 strcpy(files[numfiles++],options->bayesmdimfilename);
3220 #ifdef MPI
3221 #ifdef PARALIO
3222 fprintf(stdout,"%i> bayesmdimfile opening for analysis\n", myID);
3223 //this will need to open a whole array of files
3224 long numfiles=0;
3225 charvec2d(&files,numcpu,STRSIZE);
3226 long worker;
3227 char tmp[STRSIZE];
3228 MPI_Status status;
3229 strcpy(files[numfiles++],options->bayesmdimfilename);
3230 long numelem = world->numpop2 + (world->options->gamma ? 1 : 0);
3231 long numelem2 = numelem * 2;
3232 MYREAL * temp = (MYREAL *) mycalloc(numelem2+2,sizeof(MYREAL));
3233 temp[0]=MIGMPI_PARALIO;
3234 for(worker=1;worker<numcpu;worker++)
3235 {
3236 MYMPISEND (temp, numelem2 + 2, mpisizeof, worker, worker, comm_world);
3237 }
3238 worker=1;
3239 while(worker<numcpu)
3240 {
3241 MYMPIRECV (tmp,STRSIZE, MPI_CHAR, MPI_ANY_SOURCE,
3242 MPI_ANY_TAG, comm_world, &status);
3243 strcpy(files[numfiles++],tmp);
3244 worker++;
3245 }
3246 myfree(temp);
3247 // world->bayesmdimfile = NULL;//fopen(options->bayesmdimfilename,"r+");
3248 #endif /*paralio*/
3249 #endif /*mpi*/
3250 } /*for the master to get the filenames for bayesallfile */
3251 #ifdef ZNZ
3252 #ifdef PARALIO
3253 // closing the MPI worker's files and the master
3254 MPI_File_close(&world->mpi_bayesmdimfile);
3255 #else
3256 //closing the one and only file (single cpu mode or MPI master alone)
3257 if(myID==MASTER)
3258 znzclose(world->bayesmdimfile);
3259 #endif /*paralio*/
3260 // open the bayesallfile assuming no paralio setting and using the zipped stuff ZNZ
3261 if(myID==MASTER)
3262 world->bayesmdimfile = znzopen(options->bayesmdimfilename,"r", options->use_compressed);
3263 #else
3264 // not ZNZ
3265 #ifdef PARALIO
3266 // closing the MPI worker's files and the master
3267 MPI_File_close(&world->mpi_bayesmdimfile);
3268 #else
3269 //closing the one and only file (single cpu mode or MPI master alone)
3270 if(myID==MASTER)
3271 fclose(world->bayesmdimfile);
3272 #endif /*paralio*/
3273 world->bayesmdimfile = NULL;
3274 //world->bayesmdimfile = fopen(options->bayesmdimfilename,"r");
3275 //if(world->bayesmdimfile==NULL)
3276 // {
3277 // perror("Failed to open file: ");
3278 // printf("errno = %d.\n", errno);
3279 // exit(1);
3280 // }
3281 #endif /*znz*/
3282 if (myID==MASTER)
3283 {
3284 read_bayes_fromfile(world->bayesmdimfile,world, options, files,numfiles);
3285 for(locus=0;locus<world->loci;locus++)
3286 {
3287 if(world->data->skiploci[locus])
3288 continue;
3289 if(world->bayes->histogram[locus].results == NULL)
3290 {
3291 world->data->skiploci[locus] = TRUE;
3292 continue;
3293 }
3294 calc_hpd_credibility(world->bayes, locus, world->numpop2, world->numpop2 + world->bayes->mu);
3295 }
3296 }
3297 }
3298 else
3299 {
3300 // keep everything in RAM
3301 get_bayeshist (world, options);
3302 }
3303 }
3304