1 /*------------------------------------------------------
2 Maximum likelihood estimation
3 of migration rate and effectice population size
4 using a Metropolis-Hastings Monte Carlo algorithm
5 -------------------------------------------------------
6 Bayesian R O U T I N E S
7
8 send questions concerning this software to:
9 Peter Beerli
10 beerli@fsu.edu
11
12 Copyright 1997-2002 Peter Beerli and Joseph Felsenstein
13 Copyright 2003-2008 Peter Beerli
14 Copyright 2009-2012 Peter Beerli and Michal Palczewski
15
16 Permission is hereby granted, free of charge, to any person obtaining a copy
17 of this software and associated documentation files (the "Software"), to deal
18 in the Software without restriction, including without limitation the rights
19 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
20 of the Software, and to permit persons to whom the Software is furnished to do
21 so, subject to the following conditions:
22
23 The above copyright notice and this permission notice shall be included in all copies
24 or substantial portions of the Software.
25
26 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
27 INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
28 PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
29 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
30 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
31 OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 $Id: bayes.c 2176 2013-12-16 15:45:24Z beerli $
34 */
35 /*! \file bayes.c
36
37 bayes.c contains functions to handle the Bayesian implementation of migrate
38 it is triggered through the options->bayes_infer=TRUE.
39
40 */
41 #include <stdlib.h>
42 #include "definitions.h"
43 #include "migration.h"
44 #include "bayes.h"
45 #include "random.h"
46 #include "tools.h"
47 #include "sighandler.h"
48 #include "world.h"
49 #include "tree.h"
50 #include "slice.h"
51 #include "reporter.h"
52 #include "correlation.h"
53 #include "autotune.h"
54 #include "priors.h"
55
56 #ifdef PRETTY
57 #include "pretty.h"
58 #endif
59 #ifdef MPI
60 #include "migrate_mpi.h"
61 #else
62 extern int myID;
63 #endif
64
65 // counter for bayesallfile on a per locus basis
66 //extern long * mdimfilecount;
67 extern float page_height;
68
69 extern void reprecalc_world(world_fmt *world, long that);
70 #if defined(MPI) && !defined(PARALIO)
71 extern void print_marginal_like(float *temp, long *z, world_fmt * world);
72 #else
73 extern void print_marginal_like(char *temp, long *z, world_fmt * world);
74 #endif
75 extern void print_marginal_order(char *buf, long *bufsize, world_fmt *world);
76
77 //boolean bayes_accept (MYREAL newval, MYREAL oldval, MYREAL heat, MYREAL hastingsratio);
78 void bayes_print_accept(FILE * file, world_fmt *world);
79 //long bayes_update (world_fmt * world);
80
81 // function pointers for the bayes machinery
82 MYREAL (*log_prior_ratiotheta) (MYREAL, MYREAL, bayes_fmt *, long);
83 MYREAL (*log_prior_ratiomig) (MYREAL, MYREAL, bayes_fmt *, long);
84 MYREAL (*log_prior_ratiorate) (MYREAL, MYREAL, bayes_fmt *, long);
85 MYREAL (*log_prior_ratioall) (MYREAL, MYREAL, bayes_fmt *, long);
86 MYREAL (*log_prior_theta) (world_fmt *, long);//for heating
87 MYREAL (*log_prior_mig) (world_fmt *, long);//for heating
88 MYREAL (*log_prior_rate) (world_fmt *, long);//for heating
89 // for scaling multiple loci correctly
90 //
91 MYREAL (*log_prior_theta1) (world_fmt *, long, MYREAL);
92 MYREAL (*log_prior_mig1) (world_fmt *, long, MYREAL);
93 MYREAL (*log_prior_rate1) (world_fmt *, long, MYREAL);
94 //
95 MYREAL (*propose_newtheta) (MYREAL, long, bayes_fmt *, MYREAL *);
96 MYREAL (*propose_newmig) (MYREAL, long, bayes_fmt *, MYREAL *);
97 MYREAL (*propose_newrate) (MYREAL, long, bayes_fmt *, MYREAL *);
98
99 MYREAL (*hastings_ratiotheta) (MYREAL , MYREAL, MYREAL, MYREAL, bayes_fmt *, long );
100 MYREAL (*hastings_ratiomig) (MYREAL , MYREAL, MYREAL, MYREAL, bayes_fmt *, long );
101 MYREAL (*hastings_ratiorate) (MYREAL , MYREAL, MYREAL, MYREAL, bayes_fmt *, long );
102
103 MYREAL propose_uni_newparam (MYREAL param, long which, bayes_fmt * bayes, MYREAL *r);
104 MYREAL propose_exp_newparam (MYREAL param, long which, bayes_fmt * bayes, MYREAL *r);
105 MYREAL propose_expb_newparam (MYREAL param,long which, bayes_fmt * bayes, MYREAL *r);
106 MYREAL propose_mult_newparam (MYREAL param,long which, bayes_fmt * bayes, MYREAL *r);
107
108
109 MYREAL log_prior_ratio_uni (MYREAL newparam, MYREAL oldparam, bayes_fmt *bayes, long which);
110 MYREAL log_prior_ratio_exp (MYREAL newparam, MYREAL oldparam, bayes_fmt *bayes, long which);
111 MYREAL log_prior_ratio_wexp (MYREAL newparam, MYREAL oldparam, bayes_fmt *bayes, long which);
112 MYREAL log_prior_ratio_mult (MYREAL newparam, MYREAL oldparam, bayes_fmt *bayes, long which);
113
114
115 MYREAL log_prior_uni (world_fmt * world, long numparam);//for heating
116 MYREAL log_prior_exp (world_fmt * world, long numparam);//for heating
117 MYREAL log_prior_wexp (world_fmt * world, long numparam);//for heating
118 MYREAL log_prior_mult (world_fmt * world, long numparam);//for heating
119
120
121 MYREAL log_prior_uni1 (world_fmt * world, long numparam, MYREAL);
122 MYREAL log_prior_exp1 (world_fmt * world, long numparam, MYREAL);
123 MYREAL log_prior_wexp1 (world_fmt * world, long numparam, MYREAL);
124 MYREAL log_prior_mult1 (world_fmt * world, long numparam, MYREAL);
125
126
127 MYREAL hastings_ratio_uni(MYREAL newparam, MYREAL oldparam, MYREAL delta, MYREAL r, bayes_fmt * bayes, long whichparam);
128 MYREAL hastings_ratio_exp(MYREAL newparam, MYREAL oldparam, MYREAL delta, MYREAL r, bayes_fmt * bayes, long whichparam);
129 MYREAL hastings_ratio_expb(MYREAL newparam, MYREAL oldparam, MYREAL delta, MYREAL r, bayes_fmt * bayes, long whichparam);
130 MYREAL hastings_ratio_mult(MYREAL newparam, MYREAL oldparam, MYREAL delta, MYREAL r, bayes_fmt * bayes, long whichparam);
131
132 MYINLINE MYREAL probWait(long *lines,MYREAL *locallparam, long numpop, long numpop2);
133
134 void calculate_credibility_interval(world_fmt * world, long locus);
135
136 void calc_hpd_credibility(bayes_fmt *bayes,long locus, long numpop2, long numparam);
137
138 void bayes_combine_loci(world_fmt * world);
139 void print_locus_histogram_header(FILE *bayesfile, MYREAL *deltas, char *custm2, long numpop, long numparam, boolean usem, boolean mu, world_fmt *world);
140 void print_locus_histogram(FILE *bayesfile, world_fmt * world, long locus, long numparam);
141 void print_loci_histogram(FILE *bayesfile, world_fmt * world, long locus, long numparam);
142
143 void bayes_set_param(MYREAL *param, MYREAL newparam, long which, char *custm2, long numpop);
144
145 void adjust_bayes_min_max(world_fmt* world, MYREAL **mini, MYREAL **maxi, MYREAL **adjmini, MYREAL **adjmaxi);
146 void bayes_progress(world_fmt *world, long ten);
147 #ifdef ZNZ
148 void print_bayes_tofile(znzFile mdimfile, MYREAL *params, bayes_fmt *bayes, world_fmt *world, long numpop2, long mdiminterval, long locus, long replicate, long step);
149 #else
150 void print_bayes_tofile(FILE *mdimfile, MYREAL *params, bayes_fmt *bayes, world_fmt *world, long numpop2, long mdiminterval, long locus, long replicate, long step);
151 #endif
152 #ifdef DEBUG
153 extern void print_bf_values(world_fmt * world);
154 #endif
155
156 void recalc_timelist (world_fmt * world, MYREAL new_ratio , MYREAL old_ratio);
157
158
159
160 /// \brief set param within min max
161 ///
check_min_max_param(MYREAL * param0,MYREAL minparam,MYREAL maxparam)162 void check_min_max_param(MYREAL *param0, MYREAL minparam, MYREAL maxparam)
163 {
164 if(*param0 > maxparam)
165 *param0 = maxparam;
166 else
167 {
168 if(*param0 < minparam)
169 *param0 = minparam;
170 }
171 }
172
173
174 /// \brief Decide which prior distribution for the THETA parameter to use
175 /// Decide which prior distribution to use for THETA: the functionpointers propose_newparam_x will hold
176 /// either the Exponential prior distribution or a Uniform prior distribution or .. other priors ..
177 /// each prior distribution has its own specific hastings ratio that will be calculated in the
178 /// function ptr hastings_ratio
179 void
which_theta_prior(int kind)180 which_theta_prior (int kind)
181 {
182 switch (kind)
183 {
184 case UNIFORMPRIOR:
185 log_prior_ratiotheta = (MYREAL (*) (MYREAL, MYREAL, bayes_fmt *, long)) log_prior_ratio_uni;
186 log_prior_theta = (MYREAL (*) (world_fmt *, long)) log_prior_uni;
187 log_prior_theta1 = (MYREAL (*) (world_fmt *, long, MYREAL)) log_prior_uni1;
188 propose_newtheta = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL *)) propose_uni_newparam;
189 hastings_ratiotheta = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long)) hastings_ratio_uni;
190 break;
191 case EXPPRIOR:
192 log_prior_ratiotheta = (MYREAL (*) (MYREAL, MYREAL, bayes_fmt *, long)) log_prior_ratio_exp;
193 log_prior_theta = (MYREAL (*) (world_fmt *, long)) log_prior_exp;
194 log_prior_theta1 = (MYREAL (*) (world_fmt *, long, MYREAL)) log_prior_exp1;
195 propose_newtheta = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_exp_newparam;
196 hastings_ratiotheta = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long )) hastings_ratio_exp;
197 break;
198 case WEXPPRIOR:
199 log_prior_ratiotheta = (MYREAL (*) (MYREAL, MYREAL, bayes_fmt *, long)) log_prior_ratio_wexp;
200 log_prior_theta = (MYREAL (*) (world_fmt *, long)) log_prior_wexp;
201 log_prior_theta1 = (MYREAL (*) (world_fmt *, long, MYREAL)) log_prior_wexp1;
202 propose_newtheta = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_expb_newparam;
203 hastings_ratiotheta = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long)) hastings_ratio_expb;
204 break;
205 case MULTPRIOR:
206 log_prior_ratiotheta = (MYREAL (*) (MYREAL, MYREAL,bayes_fmt *, long)) log_prior_ratio_mult;
207 log_prior_theta = (MYREAL (*) (world_fmt * , long)) log_prior_mult;
208 log_prior_theta1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_mult1;
209 propose_newtheta = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_mult_newparam;
210 hastings_ratiotheta = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long )) hastings_ratio_mult;
211 break;
212 case GAMMAPRIOR:
213 log_prior_ratiotheta = (MYREAL (*) (MYREAL, MYREAL, bayes_fmt *, long)) log_prior_ratio_gamma;
214 log_prior_theta = (MYREAL (*) (world_fmt *, long)) log_prior_gamma;
215 log_prior_theta1 = (MYREAL (*) (world_fmt *, long, MYREAL)) log_prior_gamma1;
216 propose_newtheta = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_gamma_newparam;
217 hastings_ratiotheta = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long)) hastings_ratio_gamma;
218 break;
219 default:
220 error("Prior distribution hookup for THETA failed");
221 break;
222 }
223 }
224 /// \brief Decide which prior distribution for the MIG parameter to use
225 /// Decide which prior distribution to use for MIG: the functionpointers propose_newparam_x will hold
226 /// either the Exponential prior distribution or a Uniform prior distribution or .. other priors ..
227 /// each prior distribution has its own specific hastings ratio that will be calculated in the
228 /// function ptr hastings_ratio
229 void
which_mig_prior(int kind)230 which_mig_prior (int kind)
231 {
232 switch (kind)
233 {
234 case UNIFORMPRIOR:
235 log_prior_ratiomig = (MYREAL (*) (MYREAL, MYREAL, bayes_fmt *, long)) log_prior_ratio_uni;
236 log_prior_mig = (MYREAL (*) (world_fmt * , long)) log_prior_uni;
237 log_prior_mig1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_uni1;
238 propose_newmig = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL *)) propose_uni_newparam;
239 hastings_ratiomig = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long)) hastings_ratio_uni;
240 break;
241 case EXPPRIOR:
242 log_prior_ratiomig = (MYREAL (*) (MYREAL, MYREAL, bayes_fmt *, long)) log_prior_ratio_exp;
243 log_prior_mig = (MYREAL (*) (world_fmt * , long)) log_prior_exp;
244 log_prior_mig1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_exp1;
245 propose_newmig = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_exp_newparam;
246 hastings_ratiomig = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long )) hastings_ratio_exp;
247 break;
248 case WEXPPRIOR:
249 log_prior_ratiomig = (MYREAL (*) (MYREAL, MYREAL, bayes_fmt *, long)) log_prior_ratio_wexp;
250 log_prior_mig = (MYREAL (*) (world_fmt * , long)) log_prior_wexp;
251 log_prior_mig1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_wexp1;
252 propose_newmig = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_expb_newparam;
253 hastings_ratiomig = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long)) hastings_ratio_expb;
254 break;
255 case MULTPRIOR:
256 log_prior_ratiomig = (MYREAL (*) (MYREAL, MYREAL,bayes_fmt *, long)) log_prior_ratio_mult;
257 log_prior_mig = (MYREAL (*) (world_fmt * , long)) log_prior_mult;
258 log_prior_mig1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_mult1;
259 propose_newmig = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_mult_newparam;
260 hastings_ratiomig = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long )) hastings_ratio_mult;
261 break;
262 case GAMMAPRIOR:
263 log_prior_ratiomig = (MYREAL (*) (MYREAL, MYREAL,bayes_fmt *, long)) log_prior_ratio_gamma;
264 log_prior_mig = (MYREAL (*) (world_fmt * , long)) log_prior_gamma;
265 log_prior_mig1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_gamma1;
266 propose_newmig = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_gamma_newparam;
267 hastings_ratiomig = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long )) hastings_ratio_gamma;
268 break;
269 default:
270 error("Prior distribution hookup for Migration parameters failed");
271 break;
272 }
273 }
274 /// \brief Decide which prior distribution for the mutation rate modifier parameter to use
275 /// Decide which prior distribution to use for mutation rate modifier:
276 /// the functionpointers propose_newparam_x will hold
277 /// either the Exponential prior distribution or a Uniform prior distribution or .. other priors ..
278 /// each prior distribution has its own specific hastings ratio that will be calculated in the
279 /// function ptr hastings_ratio
280 void
which_rate_prior(int kind)281 which_rate_prior (int kind)
282 {
283 switch (kind)
284 {
285 case UNIFORMPRIOR:
286 log_prior_ratiorate = (MYREAL (*) (MYREAL, MYREAL, bayes_fmt *, long)) log_prior_ratio_uni;
287 log_prior_rate = (MYREAL (*) (world_fmt * , long)) log_prior_uni;
288 log_prior_rate1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_uni1;
289 propose_newrate = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL *)) propose_uni_newparam;
290 hastings_ratiorate = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long)) hastings_ratio_uni;
291 break;
292 case EXPPRIOR:
293 log_prior_ratiorate = (MYREAL (*) (MYREAL, MYREAL, bayes_fmt *, long)) log_prior_ratio_exp;
294 log_prior_rate = (MYREAL (*) (world_fmt * , long)) log_prior_exp;
295 log_prior_rate1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_exp1;
296 propose_newrate = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_exp_newparam;
297 hastings_ratiorate = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long )) hastings_ratio_exp;
298 break;
299 case WEXPPRIOR:
300 log_prior_ratiorate = (MYREAL (*) (MYREAL, MYREAL, bayes_fmt *, long)) log_prior_ratio_wexp;
301 log_prior_rate = (MYREAL (*) (world_fmt * , long)) log_prior_wexp;
302 log_prior_rate1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_wexp1;
303 propose_newrate = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_expb_newparam;
304 hastings_ratiorate = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long)) hastings_ratio_expb;
305 break;
306 case MULTPRIOR:
307 log_prior_ratiorate = (MYREAL (*) (MYREAL, MYREAL,bayes_fmt *, long)) log_prior_ratio_mult;
308 log_prior_rate = (MYREAL (*) (world_fmt * , long)) log_prior_mult;
309 log_prior_rate1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_mult1;
310 propose_newrate = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_mult_newparam;
311 hastings_ratiorate = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long )) hastings_ratio_mult;
312 break;
313 case GAMMAPRIOR:
314 log_prior_ratiorate = (MYREAL (*) (MYREAL, MYREAL,bayes_fmt *, long)) log_prior_ratio_gamma;
315 log_prior_rate = (MYREAL (*) (world_fmt * , long)) log_prior_gamma;
316 log_prior_rate1 = (MYREAL (*) (world_fmt * , long, MYREAL)) log_prior_gamma1;
317 propose_newrate = (MYREAL (*) (MYREAL, long, bayes_fmt *, MYREAL * )) propose_gamma_newparam;
318 hastings_ratiorate = (MYREAL (*) (MYREAL, MYREAL, MYREAL, MYREAL, bayes_fmt *, long )) hastings_ratio_gamma;
319 break;
320 default:
321 error("Prior distribution hookup for mutation rate parameter failed");
322 break;
323 }
324 }
325
326
327
328 /// \brief fixes start parameter that are outside of the bayesian mini/maxi values
329 ///
330 /// Fixes parameter start values that are outside of the Bayesian minimum and maximum
331 /// values. Only called in a Bayesian analysis
bayes_check_and_fix_param(world_fmt * world,option_fmt * options)332 void bayes_check_and_fix_param(world_fmt *world, option_fmt *options)
333 {
334 long i;
335 // long frompop, topop;
336 //MYREAL theta;
337 MYREAL *maxparam = world->bayes->maxparam;
338 MYREAL *minparam = world->bayes->minparam;
339
340 for (i=0; i< world->numpop; i++)
341 {
342 if(world->param0[i] > maxparam[i])
343 world->param0[i] = maxparam[i];
344 else
345 if(world->param0[i] < minparam[i])
346 world->param0[i] = minparam[i];
347 }
348
349 for (i=world->numpop; i< world->numpop2; i++)
350 {
351 if(options->custm2[i]!='0')
352 {
353 //@@if(world->options->usem)
354 //@@ {
355 check_min_max_param(&world->param0[i],minparam[i],maxparam[i]);
356 //@@ }
357 //@@else
358 //@@ {
359 //@@ m2mm (i, world->numpop, &frompop, &topop);
360 //@@ theta = world->param0[topop];
361 //@@ if(world->param0[i]*theta > maxparam[i])
362 //@@ world->param0[i] = maxparam[i]/theta;
363 //@@ else
364 //@@ if(world->param0[i]*theta < minparam[i])
365 //@@ world->param0[i] = minparam[i]/theta;
366 //@@ }
367 }
368 }
369 }
370
371
372
373 /// \brief Calculate Prob(g|param)Prob(D|G) from world->treetimes.
374 ///
375 /// Calculate Prob(g|param)Prob(D|G) from world->treetimes
376 // uses as input vtlist *tl = world->treetimes->tl;
377 // uses as input world->treetimes->T
378 // to allow for custom p(g*|theta) calculation, but currently not needed
379 //MYREAL probg_treetimesX(world_fmt *world, vtlist *tl, long T);
probg_treetimes(world_fmt * world)380 MYREAL probg_treetimes(world_fmt *world)
381 //{
382 // return probg_treetimesX(world,world->treetimes->tl,world->treetimes->T);
383 //}
384 //MYREAL probg_treetimesX(world_fmt *world, vtlist *tl, long T)
385 {
386 const long numpop = world->numpop;
387 const long numpop2 = world->numpop2;
388 const long locus = world->locus;
389 const MYREAL like = world->likelihood[world->G];
390 // each locus is run with its native theta we do not need to adjust for inheritance
391 // before the combining over loci.
392 const MYREAL inheritance = 1.0;
393 const MYREAL linheritance = 0.0;
394 //
395 MYREAL pk;
396 boolean usem = world->options->usem;
397 //
398 long T = world->treetimes->T;
399 vtlist *tl = world->treetimes->tl; // comment this out to
400 // making probgtreetimes more general
401 // so that it can be used in the proposal structure in MCMC1.c
402 vtlist *tli = &(tl[0]);
403 vtlist *tli1;
404
405 long i;
406 long r, j;
407 long tlif = tli->from;
408 long tlit = tli->to;
409 int msta, msto;
410
411 MYREAL deltatime = tl[0].age;
412 MYREAL sumprob = 0.;
413 MYREAL eventprob=0.;
414 //xcode MYREAL k;
415
416 const MYREAL *geo = world->data->geo;
417 const MYREAL *lgeo = world->data->lgeo;
418 MYREAL *param0 = world->param0;
419 MYREAL *locallparam;
420 MYREAL *localparam;
421 MYREAL *sm;
422 const MYREAL mu_rate = world->options->mu_rates[locus];
423 const MYREAL lmu_rate = world->options->lmu_rates[locus];
424
425 #ifndef LONGSUM
426 MYREAL pw;
427 locallparam = (MYREAL *) mycalloc ((numpop2 + numpop + numpop), sizeof (MYREAL));
428 #else /*LONGSUM*/
429 MYREAL *rates;
430 MYREAL *lrates;
431 MYREAL *rtimes;
432 long partsize = numpop2 + world->bayes->mu + numpop * 3;
433 MYREAL
434 locallparam = (MYREAL *) mycalloc ((numpop2 + numpop + numpop + 9 * numpop), sizeof (MYREAL));
435 rates = locallparam + numpop2 + numpop + numpop;
436 lrates = rates + 3 * numpop;
437 rtimes = rates + 6 * numpop;
438 // rates and lrates=log(rates) hold the multipliers for Theta in the the timeslices
439 memcpy (rates, localparam+partsize - 3 * numpop, sizeof(MYREAL) * 3 * numpop);
440 memcpy (lrates, locallparam+partsize - 3 * numpop, sizeof(MYREAL) * 3 * numpop);
441 // rtimes hold the ages at the bottom of a rate timeslice
442 memcpy (rtimes, world->flucrates + 3 * numpop, sizeof(MYREAL) * 3 * numpop);
443 #endif /*LONGSUM*/
444
445 // pointers into the localparam vector
446 localparam = locallparam + numpop2;
447 sm = localparam + numpop;
448
449 // fill localparam vector with data
450 memcpy (localparam, param0, sizeof (MYREAL) * numpop);
451 for (r = 0; r < numpop; r++)
452 {
453 locallparam[r] = LOG2 - (log(param0[r]) + linheritance + lmu_rate);
454 localparam[r] = -1. / (localparam[r] * mu_rate * inheritance); // minus, so that we can loop over all in probG4
455 msta = world->mstart[r];
456 msto = world->mend[r];
457 for (j = msta; j < msto; j++)
458 {
459 if (param0[j] > 0.0)
460 {
461 //@@
462 pk = usem ? param0[j] : param0[j] / param0[r];
463 sm[r] -= geo[j] * pk / mu_rate; //minus, so that we can loop over all in probG4
464 locallparam[j] = LOG(pk) + lgeo[j] - lmu_rate;
465 }
466 }
467 }
468
469 if(tl[0].eventnode->type == 't')
470 {
471 eventprob = 0.0;
472 deltatime = 0.0; //the first tip might be in the past,
473 // this makes sure that if all sample have dual date > 0 that
474 // we do not calculate strange probabilities, this is a problem only for the
475 // first entry.
476 }
477 else
478 {
479 // k = tli->lineages[tlit];
480 eventprob = ((tlif==tlit) ? (locallparam[tlif]) : locallparam[mm2m(tlif,tlit, numpop)]);
481 }
482 sumprob = deltatime * probWait(tli->lineages, locallparam, numpop, numpop2) + eventprob;
483
484 for(i=1; i<T-1;i++)
485 {
486 tli = &tl[i];
487 tli1 = &tl[i-1];
488 tlif = tli->from;
489 tlit = tli->to;
490
491 if(tli->eventnode->type == 't')
492 {
493 eventprob = 0.0;
494 }
495 else
496 {
497 // k = tli->lineages[tlit];
498 eventprob = ((tlif==tlit) ? (locallparam[tlif]) : locallparam[mm2m(tlif,tlit, numpop)]);
499 }
500 deltatime = (tli->age - tli1->age);
501 //#ifndef LONGSUM
502 pw = deltatime;
503 pw *= probWait(tli->lineages, locallparam, numpop, numpop2);
504 pw += eventprob;
505 sumprob += pw;
506 //#else
507 //error("Bayesian method of multiple timeslices is not yet implemented");
508 //#endif
509 }
510 myfree(locallparam);
511 //printf("ln p(d|m)=%f+%f=%f\n",sumprob, like ,sumprob + like);
512 // each point probability adds a log(1/mu) to the sumprob
513 // assuming mu is constant this will be log(treetimes) + log(1/mu) equivalents
514 // assuming mu is 1 we simply divide the result by treetimes
515 // as a result the values will be still too high posterior estimates
516 // will be off by the same quantity log(mu)
517 return /*-LOG(world->treetimes->T-1) +*/ (sumprob + like);
518 }
519
520
521 ///
522 /// calculates the waiting time part for prob(g|param)
523 MYINLINE
probWait(long * lines,MYREAL * locallparam,long numpop,long numpop2)524 MYREAL probWait(long *lines, MYREAL *locallparam, long numpop, long numpop2)
525 {
526 MYREAL *invtheta = locallparam + numpop2;
527 MYREAL *msum = invtheta + numpop;
528 long j;
529 const long line0 = lines[0];
530 const long line01 = (line0 * line0) - line0;
531 long line1;
532 long line11;
533 long line2;
534 long line21;
535 long line3;
536 long line31;
537 MYREAL probm = 0.;
538 MYREAL probth = 0.;
539 switch(numpop)
540 {
541 case 1:
542 probth = line01 * invtheta[0];
543 //should be zero! probm = msum[0] * line0;
544 return probth; // + probm;
545 case 2:
546 line1 = lines[1];
547 line11 = (line1 * line1) - line1;
548 probm = msum[0] * line0 + msum[1] * line1;
549 probth = invtheta[0] * line01 + invtheta[1] * line11;
550 return probth + probm;
551 case 3:
552 line1 = lines[1];
553 line11 = (long) (line1 * (line1 - 1.0));
554 line2 = lines[2];
555 line21 = (long) (line2 * (line2 - 1.0));
556 probm = msum[0] * line0 + msum[1] * line1 + msum[2] * line2;
557 probth = invtheta[0] * line01 + invtheta[1] * line11 + invtheta[2] * line21;
558 return probth + probm;
559 case 4:
560 line1 = lines[1];
561 line11 = (line1 * line1) - line1;
562 line2 = lines[2];
563 line21 = (line2 * line2) - line2;
564 line3 = lines[3];
565 line31 = (line3 * line3) - line3;
566 probm = msum[0] * line0 + msum[1] * line1 + msum[2] * line2 + msum[3] * line3;
567 probth = invtheta[0] * line01 + invtheta[1] * line11 + invtheta[2] * line21 + invtheta[3] * line31;
568 return probth + probm;
569 default:
570 line1 = lines[1];
571 line11 = (line1 * line1) - line1;
572 line2 = lines[2];
573 line21 = (line2 * line2) - line2;
574 line3 = lines[3];
575 line31 = (line3 * line3) - line3;
576 probm = msum[0] * line0 + msum[1] * line1 + msum[2] * line2 + msum[3] * line3;
577 probth = invtheta[0] * line01 + invtheta[1] * line11 + invtheta[2] * line21 + invtheta[3] * line31;
578 for(j=4; j < numpop; j++)
579 {
580 const long linex = lines[j];
581 probth += ((linex * linex) - linex) * invtheta[j];
582 probm += linex * msum[j];
583 }
584 return probth + probm;
585 }
586 }
587 ///
588 /// do we accept parameter update
589 boolean
bayes_accept(MYREAL newval,MYREAL oldval,MYREAL heat,MYREAL hastingsratio)590 bayes_accept (MYREAL newval, MYREAL oldval, MYREAL heat, MYREAL hastingsratio)
591 {
592 MYREAL diff = newval - oldval + hastingsratio;
593 // for thermodynamic integration we should not heat this
594 // for the standard heating scheme we perhaps should but there is no visible improvement in the runs
595 // when we heat this portion for swapping, if we will find some differences I will need to add a flag
596 // that differentiates between thermodynamic integration and standard heating.
597 if (diff >= 0.0)
598 return TRUE;
599 if (LOG (RANDUM ()) < diff)
600 return TRUE;
601 else
602 return FALSE;
603 }
604
605 ///
606 /// log prior ratios for all parameters
log_prior_ratio_all(world_fmt * world,MYREAL * newvals)607 MYREAL log_prior_ratio_all(world_fmt *world, MYREAL *newvals)
608 {
609 long np = world->numpop;
610 long np2 = world->numpop2;
611 long npp = np2 + (world->bayes->mu * world->loci);
612 MYREAL logmax = -MYREAL_MAX;
613 long pop;
614 MYREAL ratio = 0;
615 MYREAL *vals;
616 vals = (MYREAL *) mycalloc(npp,sizeof(MYREAL));
617 for(pop = 0; pop < np; pop++)
618 {
619 vals[pop] = log_prior_ratiotheta(newvals[pop], -1., world->bayes,0L);
620 if(logmax < vals[pop])
621 logmax = vals[pop];
622 }
623 for(pop = np; pop < np2; pop++)
624 {
625 vals[pop] = log_prior_ratiomig(newvals[pop], -1., world->bayes,0L);
626 if(logmax < vals[pop])
627 logmax = vals[pop];
628 }
629 if(world->bayes->mu)
630 {
631 vals[pop] = log_prior_ratiorate(newvals[pop], -1., world->bayes,0L);
632 if(logmax < vals[pop])
633 logmax = vals[pop];
634 }
635 ratio = 0;
636 for(pop = 0; pop < npp; pop++)
637 ratio += EXP(vals[pop]-logmax);
638 myfree(vals);
639 return log(ratio)+logmax;
640 }
641
642 ///
643 /// Log Prior distribution ratios between old and new parameter:
644 /// UNIFORM
645 /// $$p[x] = \frac{1}{b-a} $$
646 /// in reality I use a window to propose new parameters
647 /// but the distribution should be still the same $p[x]$
648 /// the prior distribution for the new parameter and the old are the same
log_prior_ratio_uni(MYREAL newparam,MYREAL oldparam,bayes_fmt * bayes,long which)649 MYREAL log_prior_ratio_uni(MYREAL newparam,
650 MYREAL oldparam,
651 bayes_fmt * bayes,
652 long which)
653 {
654 if((newparam > bayes->maxparam[which]) || (newparam < bayes->minparam[which]))
655 return -HUGE;
656 else
657 return 0.0 ;
658 }
659
660 ///
661 /// Uniform prior distribution for theta or migration rate used in heating acceptance
log_prior_uni(world_fmt * world,long numparam)662 MYREAL log_prior_uni(world_fmt *world, long numparam)
663 {
664 long numpop = world->numpop;
665 long start = ((numparam <= numpop || numpop==1) ? 0 : numpop);
666 long stop = ((start == numpop) ? world->numpop2 : numpop);
667 long i;
668 // long frompop;
669 //long topop;
670 MYREAL * param0 = world->param0;
671 bayes_fmt * bayes = world->bayes;
672 MYREAL value=0.0;
673 MYREAL mu;
674 MYREAL p0;
675
676 if(numparam>stop) // rate change
677 {
678 mu = world->options->mu_rates[world->locus];
679 i = world->numpop2 + world->locus;
680 if((mu > bayes->maxparam[i]) || (mu < bayes->minparam[i]))
681 return -HUGE;
682 else
683 return -LOG(bayes->maxparam[i] - bayes->minparam[i]);
684 }
685 // migration or theta parameters
686 for(i = start; i < stop; i++)
687 {
688 if(!strchr("0c", world->options->custm2[i]))
689 {
690 //@@if(i>=numpop && !world->options->usem)
691 //@@{
692 //@@ m2mm(i,numpop,&frompop,&topop);
693 //@@ p0 = param0[i] * param0[topop];
694 //@@}
695 //@@else
696 //@@ {
697 p0 = param0[i];
698 //@@ }
699 if((p0 > bayes->maxparam[i]) || (p0 < bayes->minparam[i]))
700 return -HUGE;
701 else
702 value += -LOG(bayes->maxparam[i] - bayes->minparam[i]);
703 }
704 }
705 return value ;
706 }
707
708 ///
709 /// uniform prior distribution, returns log(1/(a-b)
log_prior_uni1(world_fmt * world,long numparam,MYREAL val)710 MYREAL log_prior_uni1(world_fmt *world, long numparam, MYREAL val)
711 {
712 bayes_fmt * bayes = world->bayes;
713 long i = numparam;
714 // long frompop;
715 //long topop;
716 MYREAL retval = -HUGE;
717 MYREAL p0;
718 //@@if(i>=world->numpop && !world->options->usem)
719 //@@ {
720 //@@ m2mm(i,world->numpop,&frompop,&topop);
721 //@@ p0 = val * world->param0[topop];
722 //@@ }
723 //@@else
724 //@@ {
725 p0 = val;
726 //@@ }
727
728 if((p0 <= bayes->maxparam[i]) && (p0 >= bayes->minparam[i]))
729 {
730 retval = bayes->maxparam[i] - bayes->minparam[i];
731 if (retval > 0.0)
732 return -LOG(retval);
733 }
734 return retval;
735 }
736
737 ///
738 /// Log Prior distribution ratios between old and new parameter:
739 /// EXPONENTIAL
740 /// $$p[x] = Integrate[Exp[-u/mean]/mean, {u, a, x}]/-Exp[-b/mean] + Exp[-a/mean] $$
741 /// the ratio between old and new parameter prior will be then
742 /// $$
743 ///\frac{e^{-\frac{a}{\text{mean}}}-e^{-\frac{x}{\text{mean}}
744 ///}}{e^{-\frac{a}{\text{mean}}}-e^{-\frac{\text{x0}}{\tex
745 /// t{mean}}}}
746 /// $$
747 ///
748 /// The log(hastingsratio) for this move is not zero but cancels with the log_prior_ratio_exp
749 /// so instead of calculating stuff that goes away we set in both places the value to zero
log_prior_ratio_exp(MYREAL newparam,MYREAL oldparam,bayes_fmt * bayes,long which)750 MYREAL log_prior_ratio_exp(MYREAL newparam, MYREAL oldparam, bayes_fmt * bayes, long which)
751 {
752 if((newparam > bayes->maxparam[which]) || (newparam < bayes->minparam[which]))
753 return -HUGE;
754 else
755 return 0.;
756 // MYREAL imean = 1./bayes->priormean[which];
757 //MYREAL precalc = exp(-bayes->minparam[which] *imean);
758 //MYREAL xo = EXP(-oldparam * imean);
759 //MYREAL xn = EXP(-newparam * imean);
760 //MYREAL idenom = 1./(precalc - xo );
761 //MYREAL nom = (precalc - xn );
762 //return log(nom * idenom);
763 }
764
765 ///
766 /// Exponential prior distribution for theta or migration rate used in heating acceptance
767 /// Out[12]//TextForm=
768 /// b/mean (a - c)/mean
769 /// E (-a + c + (-1 + E ) mean)
770 /// --------------------------------------------
771 /// a/mean b/mean
772 /// -E + E
773 /// a: lower bound
774 /// b: upper bound
775 /// c: parameter value
776 /// (Power(E,b/m)*(-1 + Power(E,(a - c)/m)))/(Power(E,a/m) - Power(E,b/m))
log_prior_exp(world_fmt * world,long numparam)777 MYREAL log_prior_exp(world_fmt *world, long numparam)
778 {
779 return log_prior_gamma(world,numparam);
780 /*long numpop = world->numpop;
781 long start = ((numparam <= numpop || numpop==1) ? 0 : numpop);
782 long stop = ((start == numpop) ? world->numpop2 : numpop);
783 MYREAL * param0 = world->param0;
784 MYREAL minp;
785 MYREAL maxp;
786 MYREAL mean;
787 MYREAL imean;
788 bayes_fmt *bayes = world->bayes;
789 MYREAL value = 0.0;
790 MYREAL mu;
791 long i;
792 MYREAL retval;
793 if(numparam>stop) // rate change
794 {
795 i = world->numpop2 + world->locus;
796 mu = world->options->mu_rates[world->locus];
797 if((mu > bayes->maxparam[i]) || (mu < bayes->minparam[i]))
798 return -HUGE;
799 else
800 {
801 maxp = bayes->maxparam[i];
802 minp = bayes->minparam[i];
803 mean = bayes->meanparam[i];
804 imean = 1.0 / mean;
805 //(Power(E,b/m)*(-1 + Power(E,(a - c)/m)))/(Power(E,a/m) - Power(E,b/m))
806 //return log((EXP(maxp*imean)*(-1.0 + EXP((minp - param0[i])*imean)))/(EXP(minp*imean) - EXP(maxp*imean)));
807 // (Power(E,a/m)*(1 - Power(E,(b - c)/m)))/(Power(E,a/m) - Power(E,b/m))
808 retval = 1.0-(EXP(minp*imean)*(1.0 - EXP((maxp - param0[i])*imean)))/(EXP(minp*imean) - EXP(maxp*imean));
809 if (retval>0.0)
810 return log(retval);
811 else
812 return -HUGE;
813 // return log(EXP(maxp * imean) * (-minp + param0[i] + EXP((minp-param0[i]) * imean))/
814 // (EXP(maxp * imean) - EXP(minp * imean)));
815 }
816 }
817 // migration and coalescence rate
818 for(i = start; i < stop; i++)
819 {
820 if(!strchr("0c", world->options->custm2[i]))
821 {
822 maxp = bayes->maxparam[i];
823 minp = bayes->minparam[i];
824 mean = bayes->meanparam[i];
825 imean = 1.0 / mean;
826 if((param0[i] > bayes->maxparam[i]) || (param0[i] < bayes->minparam[i]))
827 return -HUGE;
828 else
829 {
830 //(Power(E,b/m)*(-1 + Power(E,(a - c)/m)))/(Power(E,a/m) - Power(E,b/m))
831 //value += log((EXP(maxp*imean)*(-1.0 + EXP((minp - param0[i])*imean)))/(EXP(minp*imean) - EXP(maxp*imean)));
832 retval = 1.0-(EXP(minp*imean)*(1.0 - EXP((maxp - param0[i])*imean)))/(EXP(minp*imean) - EXP(maxp*imean));
833 if (retval>0.0)
834 value += log(retval);
835 else
836 value += -HUGE;
837 //log(EXP(maxp * imean) * (-minp + param0[i] + EXP((minp-param0[i]) * imean))/
838 // (EXP(maxp * imean) - EXP(minp * imean)));
839 }
840 }
841 }
842 return value;*/
843 }
844
845 ///
846 /// return probability of prior_exp with value
log_prior_exp1(world_fmt * world,long numparam,MYREAL value)847 MYREAL log_prior_exp1(world_fmt *world, long numparam, MYREAL value)
848 {
849 return log_prior_gamma1(world,numparam,value);
850 /* MYREAL val = value;
851 bayes_fmt * bayes = world->bayes;
852 long i = numparam;
853 MYREAL retval = -HUGE;
854 MYREAL minp;
855 MYREAL maxp;
856 MYREAL mean;
857 MYREAL imean;
858 if((val <= bayes->maxparam[i]) && (val >= bayes->minparam[i]))
859 {
860 maxp = bayes->maxparam[i];
861 minp = bayes->minparam[i];
862 mean = bayes->meanparam[i];
863 imean = 1.0 / mean;
864 // retval = 1.0-(EXP(minp*imean)*(1.0 - EXP((maxp - value)*imean)))/(EXP(minp*imean) - EXP(maxp*imean));
865 retval = (1.0-EXP(imean*(minp - value)))/(1.0 - EXP(imean*(-maxp+minp)));
866 if (retval >0.0)
867 retval = log(retval);
868 else
869 retval = -HUGE;
870 }
871 return retval;*/
872 }
873
874
875
876 ///
877 /// Log Prior distribution ratios between old and new parameter:
878 /// EXPONENTIAL
879 /// $$p[x] = Integrate[Exp[-u/mean]/mean, {u, a, x}]/-Exp[-b/mean] + Exp[-a/mean] $$
880 /// the ratio between old and new parameter prior will be then
881 /// $$
882 ///\frac{e^{-\frac{a}{\text{mean}}}-e^{-\frac{x}{\text{mean}}
883 ///}}{e^{-\frac{a}{\text{mean}}}-e^{-\frac{\text{x0}}{\tex
884 /// t{mean}}}}
885 /// $$
886 /// see under log_prior_ratio_exp(), this needs more careful checking as here we
887 /// depend on current theta and almost certainly the hastings ratio is different from the
888 /// one with the exp proposal.
log_prior_ratio_wexp(MYREAL newparam,MYREAL oldparam,bayes_fmt * bayes,long which)889 MYREAL log_prior_ratio_wexp(MYREAL newparam, MYREAL oldparam, bayes_fmt * bayes, long which)
890 {
891 if((newparam > bayes->maxparam[which]) || (newparam < bayes->minparam[which]))
892 return -HUGE;
893 else
894 return 0.;
895 }
896
log_prior_wexp(world_fmt * world,long numparam)897 MYREAL log_prior_wexp(world_fmt *world, long numparam)
898 {
899 return log_prior_exp(world, numparam);
900 }
901
log_prior_wexp1(world_fmt * world,long numparam,MYREAL value)902 MYREAL log_prior_wexp1(world_fmt *world, long numparam, MYREAL value)
903 {
904 return log_prior_exp1(world, numparam, value);
905 }
906 ///
907 /// Log Prior distribution ratios between old and new parameter:
908 /// MULTIPLIER
909 /// $$p[x] = 1/(lambda m)
910 /// the ratio between old and new parameter prior will be then
911 /// $$
912 ///
913 /// $$
log_prior_ratio_mult(MYREAL newparam,MYREAL oldparam,bayes_fmt * bayes,long which)914 MYREAL log_prior_ratio_mult(MYREAL newparam, MYREAL oldparam, bayes_fmt * bayes, long which)
915 {
916 if((newparam > bayes->maxparam[which]) || (newparam < bayes->minparam[which]))
917 return -HUGE;
918 else
919 {
920 if(newparam > 0.0)
921 return log(newparam/oldparam);
922 else
923 return -MYREAL_MAX;
924 }
925 }
926
927
928 // incorrect
log_prior_mult(world_fmt * world,long numparam)929 MYREAL log_prior_mult(world_fmt *world, long numparam)
930 {
931 long numpop = world->numpop;
932 long start = ((numparam <= numpop || numpop==1) ? 0 : numpop);
933 long stop = ((start == numpop) ? world->numpop2 : numpop);
934 MYREAL * param0 = world->param0;
935 MYREAL minp;
936 MYREAL maxp;
937 MYREAL mean;
938 MYREAL imean;
939 bayes_fmt *bayes = world->bayes;
940 MYREAL value = 0.0;
941 MYREAL mu;
942 long i;
943
944 if(numparam>stop) // rate change
945 {
946 i = world->numpop2 + world->locus;
947 mu = world->options->mu_rates[world->locus];
948 if((mu > bayes->maxparam[i]) || (mu < bayes->minparam[i]))
949 return -HUGE;
950 else
951 {
952 maxp = bayes->maxparam[i];
953 minp = bayes->minparam[i];
954 mean = bayes->meanparam[i];
955 imean = 1.0 / mean;
956 value += log(EXP(maxp * imean) * (-minp + param0[i] + EXP((minp-param0[i]) * imean))/
957 (EXP(maxp * imean) - EXP(minp * imean)));
958 }
959
960 }
961 for(i = start; i < stop; i++)
962 {
963 if(!strchr("0c", world->options->custm2[i]))
964 {
965 maxp = bayes->maxparam[i];
966 minp = bayes->minparam[i];
967 mean = bayes->meanparam[i];
968 imean = 1.0 / mean;
969 if((param0[i] > bayes->maxparam[i]) || (param0[i] < bayes->minparam[i]))
970 return -HUGE;
971 else
972 {
973 value += log(EXP(maxp * imean) * (-minp + param0[i] + EXP((minp-param0[i]) * imean))/
974 (EXP(maxp * imean) - EXP(minp * imean)));
975 }
976 }
977 }
978 return value;
979 }
980
981 ///
982 ///
log_prior_mult1(world_fmt * world,long numparam,MYREAL val)983 MYREAL log_prior_mult1(world_fmt *world, long numparam, MYREAL val)
984 {
985 bayes_fmt * bayes = world->bayes;
986 long i = numparam;
987 MYREAL retval = -HUGE;
988 MYREAL minp;
989 MYREAL maxp;
990 MYREAL mean;
991 MYREAL imean;
992 if((val <= bayes->maxparam[i]) && (val >= bayes->minparam[i]))
993 {
994 maxp = bayes->maxparam[i];
995 minp = bayes->minparam[i];
996 mean = bayes->meanparam[i];
997 imean = 1.0 / mean;
998 retval = log(EXP(maxp * imean) * (-minp + val + EXP((minp-val) * imean))/(EXP(maxp * imean) - EXP(minp * imean)));
999 }
1000 return retval;
1001 }
1002
1003
1004 ///
1005 /// scaling prior used for multiple loci posterior
scaling_prior(world_fmt * world,long numparam,MYREAL val)1006 MYREAL scaling_prior(world_fmt *world, long numparam, MYREAL val)
1007 {
1008 if(numparam < world->numpop2)
1009 {
1010 if(numparam<world->numpop)
1011 return (*log_prior_theta1)(world,numparam,val);
1012 else
1013 return (*log_prior_mig1)(world,numparam,val);
1014 }
1015 else
1016 {
1017 return (*log_prior_rate1)(world, numparam,val);
1018 }
1019 return -HUGE;
1020 }
1021
1022
calculate_prior(world_fmt * world)1023 MYREAL calculate_prior(world_fmt *world)
1024 {
1025 const long numpop = world->numpop;
1026 MYREAL nom1 = (*log_prior_theta)(world, world->numpop);
1027 MYREAL nom2 = (*log_prior_mig)(world, world->numpop2);
1028 MYREAL retval = nom1;
1029 //printf("%i> numpop=%li retval=%f ",myID, numpop, retval);
1030 if (numpop>1)
1031 {
1032 retval += nom2;
1033 }
1034 if(world->bayes->mu)
1035 {
1036 retval += (*log_prior_rate)(world, world->numpop2);
1037 }
1038 //printf("%f\n",retval);
1039 return retval;
1040 }
1041
calculate_plotter_priors(world_fmt * world)1042 void calculate_plotter_priors(world_fmt *world)
1043 {
1044 bayes_fmt *bayes = world->bayes;
1045 long np = world->numpop2 + world->bayes->mu;
1046 MYREAL *priors;
1047 MYREAL *maxpriors;
1048 MYREAL *totalspriors;
1049 boolean *visited;
1050 long *bins = world->bayes->histogram[0].bins;
1051 MYREAL allpriortotal;
1052 long targetsumbin;
1053 long pa0, pa, rpa, bin;
1054 MYREAL h,v,w;
1055 visited = (boolean *) mycalloc(np,sizeof(boolean));
1056 #ifdef DEBUG
1057 float * sums = (float *) mycalloc(np,sizeof(float));
1058 #endif
1059 priors = (MYREAL *) mycalloc(bayes->histogram[0].binsum + (world->loci*2)+1, sizeof(MYREAL));
1060 doublevec1d(&maxpriors, np);
1061 doublevec1d(&totalspriors, np);
1062 targetsumbin = 0;
1063 memset(visited,0,sizeof(boolean)*np);
1064 for(pa0 = 0; pa0 < np; pa0++)
1065 {
1066 if(world->bayes->map[pa0][1] == INVALID)
1067 continue;
1068 else
1069 pa = world->bayes->map[pa0][1];
1070
1071 if(pa0 >= world->numpop2)
1072 rpa=world->numpop2;
1073 else
1074 rpa = pa;
1075
1076 if(visited[rpa]==TRUE)
1077 continue;
1078 visited[rpa] = TRUE;
1079 w = world->bayes->deltahist[rpa];
1080 //logw = log(w);
1081 maxpriors[rpa] = -HUGE;
1082 for(bin=targetsumbin; bin < targetsumbin + bins[rpa]; bin++)
1083 {
1084 h = world->bayes->deltahist[rpa];
1085 v = world->bayes->histogram[0].minima[rpa] + (bin-targetsumbin) * h + h/2;
1086 if (v-h/2. == 0.0)
1087 priors[bin]= 0.0;
1088 else
1089 priors[bin] = scaling_prior(world,rpa,v);// + logw;
1090 if(priors[bin] > maxpriors[rpa])
1091 maxpriors[rpa] = (MYREAL) priors[bin];
1092 }
1093 totalspriors[rpa]=0.0;
1094 for(bin=targetsumbin; bin < targetsumbin + bins[rpa]; bin++)
1095 {
1096 totalspriors[rpa] += exp(priors[bin] - maxpriors[rpa]);
1097 }
1098 totalspriors[rpa] = log(totalspriors[rpa]);
1099 allpriortotal=0.0;
1100 for(bin=targetsumbin; bin < targetsumbin + bins[rpa]; bin++)
1101 {
1102 priors[bin] += -maxpriors[rpa] - totalspriors[rpa];
1103 priors[bin] = exp(priors[bin]);
1104 #ifdef DEBUG
1105 sums[rpa] += priors[bin];
1106 #endif
1107 }
1108 targetsumbin += bins[rpa];
1109 }
1110 #ifdef DEBUG
1111 for(pa0 = 0; pa0 < np; pa0++)
1112 printf("sums[%li]=%f ==?== 1.0??\n",pa0,sums[pa0]);
1113 myfree(sums);
1114 #endif
1115 world->bayes->priors = priors;
1116 myfree(maxpriors);
1117 myfree(totalspriors);
1118 myfree(visited);
1119 }
1120
1121
1122 ///
1123 /// Uniform flat prior, coding based on Rasmus Nielsen, veryfied with mathematica
1124 /// the correlation among values is dependent on delta
1125 MYREAL
propose_uni_newparam(MYREAL param,long which,bayes_fmt * bayes,MYREAL * r)1126 propose_uni_newparam (MYREAL param, long which, bayes_fmt * bayes, MYREAL *r)
1127 {
1128 MYREAL np;
1129 MYREAL rr = (*r) - 0.5;
1130 MYREAL delta = bayes->delta[which];
1131 MYREAL minparam = bayes->minparam[which];
1132 MYREAL maxparam = bayes->maxparam[which];
1133 MYREAL thisdelta = rr * delta;
1134 np = param + thisdelta;
1135 if (np > maxparam)
1136 {
1137 np = 2. * maxparam - np;
1138 }
1139
1140 else
1141 {
1142 if (np < minparam)
1143 {
1144 np = 2. * minparam - np;
1145 }
1146 }
1147 return np;
1148 }
1149
1150
1151
1152 ///
1153 /// Hastings ratio calculator for uniform distribution
hastings_ratio_uni(MYREAL newparam,MYREAL oldparam,MYREAL delta,MYREAL r,bayes_fmt * bayes,long whichparam)1154 MYREAL hastings_ratio_uni(MYREAL newparam, MYREAL oldparam, MYREAL delta, MYREAL r, bayes_fmt * bayes, long whichparam)
1155 {
1156 // return (newparam - oldparam)/bayes->priormean[whichparam];
1157 return 0.;
1158 }
1159
1160
1161
1162 /// \brief Exponential prior distribution
1163 /// exponential prior distribution using the gamma prior with alpha=1
1164 ///
1165 MYREAL
propose_exp_newparam(MYREAL param,long which,bayes_fmt * bayes,MYREAL * r)1166 propose_exp_newparam (MYREAL param, long which, bayes_fmt *bayes, MYREAL * r)
1167 {
1168 return propose_gamma_newparam (param, which, bayes, r);
1169 /* MYREAL np = 0.;
1170 MYREAL rr = *r;
1171 const MYREAL delta = bayes->delta[which];
1172 const MYREAL minparam = bayes->minparam[which];
1173 const MYREAL maxparam = bayes->maxparam[which];
1174 const MYREAL mean = bayes->meanparam[which];
1175 np = - mean * log(-(EXP(-minparam/mean)*(rr-1)) + (EXP(-maxparam/mean)*rr));
1176 return np;*/
1177 }
1178
1179 ///
1180 /// Hastings ratio calculator for exponential distribution
hastings_ratio_exp(MYREAL newparam,MYREAL oldparam,MYREAL delta,MYREAL r,bayes_fmt * bayes,long whichparam)1181 MYREAL hastings_ratio_exp(MYREAL newparam, MYREAL oldparam, MYREAL delta, MYREAL r, bayes_fmt * bayes, long whichparam)
1182 {
1183 return 0.;
1184 }
1185
1186
1187 ///
1188 /// uses an exponential prior with boundaries minparm and maxparm and uses also a window around the old parameter
1189 /// the window is of arbitrary size
1190 /// (see propose_exp_newparam() for details)
1191 MYREAL
propose_expb_newparam(MYREAL param,long which,bayes_fmt * bayes,MYREAL * r)1192 propose_expb_newparam (MYREAL param, long which, bayes_fmt *bayes, MYREAL *r)
1193 {
1194 return propose_gamma_newparam (param, which, bayes, r);
1195 /* const MYREAL delta = bayes->delta[which];
1196 const MYREAL minparam = bayes->minparam[which];
1197 const MYREAL maxparam = bayes->maxparam[which];
1198 const MYREAL mean = bayes->meanparam[which];
1199 MYREAL a = MAX(param - delta, 0.0);
1200 MYREAL b = param + delta;
1201 MYREAL rr = (*r);
1202 MYREAL np = - mean * log(-(EXP(-a/mean)*(rr-1)) + (EXP(-b/mean)* rr));
1203 while(np < minparam || np > maxparam)
1204 {
1205 if(np < minparam)
1206 {
1207 np = 2*minparam - np;
1208 }
1209 else
1210 {
1211 np = 2*maxparam - np;
1212 }
1213 }
1214 return np;*/
1215 }
1216
1217 ///
1218 /// Hastings ratio calculator for exponential distribution
hastings_ratio_expb(MYREAL newparam,MYREAL oldparam,MYREAL delta,MYREAL r,bayes_fmt * bayes,long whichparam)1219 MYREAL hastings_ratio_expb(MYREAL newparam, MYREAL oldparam, MYREAL delta, MYREAL r, bayes_fmt * bayes, long whichparam)
1220 {
1221 return 0. ;
1222 }
1223
1224 ///
1225 /// Multiplier move, the parameter is multiplied with the rate 1/(lambda*m)
1226 ///
1227 /// Requirements:
1228 /// a < m < b
1229 /// a = 1/b
1230 ///
1231 ///
1232 MYREAL
propose_mult_newparam(MYREAL param,long which,bayes_fmt * bayes,MYREAL * r)1233 propose_mult_newparam (MYREAL param, long which, bayes_fmt *bayes, MYREAL *r)
1234 {
1235 const MYREAL delta = bayes->delta[which];
1236 //const MYREAL minparam = bayes->minparam[which];
1237 const MYREAL maxparam = bayes->maxparam[which];
1238 MYREAL multiplier; // minmult < multiplier < maxmult
1239 MYREAL maxmult = delta;
1240 //MYREAL minmult = 1/maxmult;
1241 MYREAL lambda = 2. * log(maxmult); // tuning parameter \lambda = 2 ln(b)
1242 MYREAL np;
1243
1244 multiplier = EXP(lambda * ((*r) - 0.5));
1245
1246 np = multiplier * param;
1247 (*r) = multiplier;
1248
1249 while(np > maxparam)
1250 {
1251 np = maxparam * maxparam / np;
1252 }
1253 return np;
1254 }
1255
1256 ///
1257 /// Hastings ratio calculator for exponential distribution with multiplicator
1258 /// using the old parameter as mean value
1259 /// simply the jacobian from x->param, matrix if derivatives [should be seocnd right?]
1260 /// [check this with the green paper]
1261 /// first derivatives
hastings_ratio_mult(MYREAL newparam,MYREAL oldparam,MYREAL delta,MYREAL r,bayes_fmt * bayes,long whichparam)1262 MYREAL hastings_ratio_mult(MYREAL newparam, MYREAL oldparam, MYREAL delta, MYREAL r, bayes_fmt * bayes, long whichparam)
1263 {
1264 // q(x|x') 1/(lambda(1/m)) | dx'/dx dx'/du |
1265 // ------------ ----------------------------- | | =
1266 // q(x'|x) 1/(lambda m) | du'/dx du'/du |
1267 //
1268 // | dmx/dx dmx/dm |
1269 // = m^2 | | = m^2 |(m (-1/m^2) - x zero| = m^2 m/m^2 = m
1270 // | d(1/m)/dx d(1/m)/dm |
1271
1272 return log(r); // rate multiplier
1273 }
1274
1275
1276
1277 ///
1278 /// verbose log to stdout: report all changes [too much output, this will slow down run a lot]
print_bayes_verbose(long which,world_fmt * world,MYREAL newparam,boolean success)1279 void print_bayes_verbose(long which, world_fmt *world, MYREAL newparam, boolean success)
1280 {
1281 long i;
1282 // long frompop;
1283 //long topop;
1284 //MYREAL theta;
1285
1286 if(world->options->verbose)
1287 {
1288 fprintf(stdout,"%i> <%li> ", myID, which);
1289 for(i=0;i<world->numpop;i++)
1290 fprintf(stdout,"%f ", which==i ? newparam : world->param0[i]);
1291 //@@if(world->options->usem)
1292 //@@{
1293 for(i=world->numpop;i<world->numpop2;i++)
1294 fprintf(stdout,"%f ", which==i ? newparam : world->param0[i]);
1295 //@@}
1296 //@@else
1297 //@@ {
1298 //@@ for(i=world->numpop;i<world->numpop2;i++)
1299 //@@ {
1300 //@@m2mm(i,world->numpop,&frompop,&topop);
1301 //@@theta = world->param0[topop];
1302 //@@fprintf(stdout,"%f ", which==i ? newparam * theta :
1303 //@@ world->param0[i] * theta);
1304 //@@ }
1305 //@@}
1306 if(world->bayes->mu)
1307 fprintf(stdout,"%f ", which==world->numpop2 ? newparam : world->options->mu_rates[world->locus]);
1308 fprintf(stdout,"%f %c\n",world->param_like,success ? '*': ' ');
1309 }
1310 }
1311
1312 ///
1313 /// standard metropolis proposal
1314 MYREAL
uniform_proposal(long which,world_fmt * world,MYREAL * oldparam,boolean * success)1315 uniform_proposal(long which, world_fmt * world, MYREAL *oldparam, boolean *success)
1316 {
1317 #ifdef RANGEDEBUG
1318 int rangedebug=0;
1319 #endif
1320 //long topop;
1321 //long frompop;
1322
1323 const long numpop2 = world->numpop2;
1324 const MYREAL themin = world->bayes->minparam[which];
1325 const MYREAL themax = world->bayes->maxparam[which];
1326 // MYREAL mean;
1327 MYREAL delta;
1328 MYREAL newparam;
1329 MYREAL oldval;
1330 MYREAL r;
1331 MYREAL newval;
1332 MYREAL hastingsratio;
1333 //MYREAL theta;
1334 //MYREAL nm;
1335 const MYREAL murate = world->options->mu_rates[world->locus];
1336 MYREAL * param0 = world->param0;
1337 bayes_fmt * bayes = world->bayes;
1338 //MYREAL inheritance_scalar = world->options->inheritance_scalars[world->locus];
1339 const boolean verbose = (world->heat > (1.0 - SMALLEPSILON)) && myID==MASTER && world->options->verbose;
1340
1341 // calculate the probability for the old parameter set
1342 // this is done from scratch because the tree might have changed in the last step
1343 delta = bayes->delta[which];
1344 oldval = probg_treetimes(world);
1345 r = UNIF_RANDUM();
1346 // draw a new parameter from the prior distribution
1347 // for migration parameters we need to distinguish whether the
1348 // prior is in terms of M or xNm
1349 if(which < world->numpop)
1350 {
1351 newparam = -1000.0;
1352 //while(newparam < themin || newparam > themax)
1353 newparam = (*propose_newtheta) (param0[which],which, bayes, &r);
1354 check_min_max_param(&newparam,themin,themax);
1355 hastingsratio = (*hastings_ratiotheta)(newparam, oldparam[which], delta, r, bayes, which);
1356 // add the prior ratio to the log of the hastings ratios
1357 hastingsratio += (*log_prior_ratiotheta)(newparam,oldparam[which], bayes, which);
1358 bayes_set_param(world->param0,newparam,which,world->options->custm2, world->numpop);
1359 reprecalc_world(world, which);
1360 // calculate prob(G|params)prob(D|G)
1361 newval = probg_treetimes(world);
1362 }
1363 else
1364 {
1365 if(which < numpop2)
1366 {
1367 //@@if(world->options->usem)
1368 //@@ {
1369 newparam = -1000.0;
1370 // while(newparam < themin || newparam > themax)
1371 newparam = (*propose_newmig) (param0[which], which, bayes, &r);
1372 check_min_max_param(&newparam,themin,themax);
1373 hastingsratio = (*hastings_ratiomig)(newparam, oldparam[which], delta, r, bayes, which);
1374 // add the prior ratio to the log of the hastings ratios
1375 hastingsratio += (*log_prior_ratiomig)(newparam,oldparam[which], bayes, which);
1376 //@@ }
1377 //@@else
1378 //@@ {
1379 //@@ m2mm(which,world->numpop,&frompop,&topop);
1380 //@@ theta = param0[topop]*inheritance_scalar;
1381 //@@ delta = delta/theta; // delta is in terms of xNm! but needs to be in terms of M
1382 //@@ nm = param0[which] * theta;
1383 //@@ if(nm > themax)
1384 //@@ nm = themax;
1385 //@@ if(nm < themin)
1386 //@@ nm = themin;
1387 //@@ newparam = -1000.0;
1388 //@@ while(newparam < themin || newparam > themax)
1389 //@@ newparam = (*propose_newmig) (nm, which, bayes, &r);
1390 //@@ hastingsratio = (*hastings_ratiomig)(newparam, oldparam[which], delta, r, bayes, which);
1391 // add the prior ratio to the log of the hastings ratios
1392 //@@ hastingsratio += (*log_prior_ratiomig)(newparam, theta*oldparam[which], bayes, which);
1393 //@@ newparam /= theta;
1394 //@@ }
1395 bayes_set_param(world->param0,newparam,which,world->options->custm2, world->numpop);
1396 reprecalc_world(world, which);
1397 // calculate prob(G|params)
1398 newval = probg_treetimes(world);
1399 }
1400 else
1401 {
1402 newparam = -1000.0;
1403 //while(newparam < themin || newparam > themax)
1404 newparam = (*propose_newrate) (murate, which, bayes, &r);
1405 check_min_max_param(&newparam,themin,themax);
1406 hastingsratio = (*hastings_ratiorate)(newparam, murate, delta, r, bayes, which);
1407 // add the prior ratio to the log of the hastings ratios
1408 hastingsratio += (*log_prior_ratiorate)(newparam, murate, bayes, which);
1409 world->options->mu_rates[world->locus] = newparam;
1410 world->options->lmu_rates[world->locus] = log(newparam);
1411 // change the tree length because of the rate
1412 reprecalc_world(world, which);
1413 recalc_timelist(world, world->options->mu_rates[world->locus] , oldparam[which]);
1414 // calculate prob(G|params)
1415 newval = probg_treetimes(world);
1416 }
1417 }
1418 //Acceptance or rejection of the new value
1419 *success = bayes_accept(newval, oldval,world->heat, hastingsratio);
1420 if(*success)
1421 {
1422 if(verbose)
1423 print_bayes_verbose(which,world, newparam, *success);
1424 return newval;
1425 }
1426 else
1427 {
1428 // if(world->cold && which==numpop2)
1429 // printf("\n");
1430 if(world->options->prioralone)
1431 {
1432 *success = TRUE;
1433 if(verbose)
1434 print_bayes_verbose(which,world, newparam, *success);
1435 return newval;
1436 }
1437 if(verbose)
1438 print_bayes_verbose(which,world, newparam, *success);
1439 bayes_set_param(world->param0,oldparam[which],which,world->options->custm2, world->numpop);
1440 reprecalc_world(world, which);
1441 return oldval;
1442 }
1443 }
1444
traverse_adjust(node * theNode,MYREAL new_old_ratio)1445 void traverse_adjust(node *theNode, MYREAL new_old_ratio)
1446 {
1447 MYREAL age = 0.;
1448
1449 if(theNode == NULL)
1450 return;
1451
1452 if(theNode->type != 't')
1453 {
1454 // fprintf(stdout,"(%i%c-%f/%f)",myID,theNode->type,theNode->tyme,theNode->tyme*new_old_ratio);
1455 traverse_adjust(theNode->next->back, new_old_ratio);
1456 if(theNode->type != 'm')
1457 traverse_adjust(theNode->next->next->back, new_old_ratio);
1458 }
1459 else
1460 {
1461 //fprintf(stdout,"(%i%c-%f/%f)\n",myID,theNode->type,theNode->tyme,theNode->tyme*new_old_ratio);
1462 }
1463 age = theNode->tyme * new_old_ratio;
1464 adjust_time_all (theNode, age);
1465 }
1466
1467
1468 ///
1469 /// changes branchlength of all branches and the likelihood needs to be calculated completely
1470 void
recalc_timelist(world_fmt * world,MYREAL new_ratio,MYREAL old_ratio)1471 recalc_timelist (world_fmt * world, MYREAL new_ratio, MYREAL old_ratio)
1472 {
1473 MYREAL new_old_ratio = new_ratio / old_ratio;
1474 if (fabs(new_old_ratio - 1.0) < SMALLEPSILON)
1475 return;
1476 // printf("\n\n+++++++++++++\n%f\n++++++++++++++++++++++++++++++++++++++++++++++++\n",new_old_ratio);
1477 traverse_adjust(world->root, new_old_ratio);
1478 set_v (world->root->next->back);
1479 first_smooth(world,world->locus);
1480 construct_tymelist (world, &world->treetimes[0]);
1481 #ifdef RATE_DEBUG
1482 //if(world->cold)
1483 printf("%i> HEAT: %f, RATE: newrate=%f oldrate=%f oldlike=%f ",myID, world->heat, new_ratio, old_ratio,world->likelihood[world->G]);
1484 #endif
1485 world->likelihood[world->G] = treelikelihood(world);
1486 #ifdef RATE_DEBUG
1487 // if(world->cold)
1488 printf("newlike=%f\n",world->likelihood[world->G]);
1489 #endif
1490 }
1491
1492 void
recalc_timelist_1(world_fmt * world,MYREAL new_old_ratio)1493 recalc_timelist_1 (world_fmt * world, MYREAL new_old_ratio)
1494 {
1495 long k;
1496 MYREAL age;
1497
1498 node *theNode;
1499 if(new_old_ratio < SMALLEPSILON)
1500 {
1501 new_old_ratio = SMALLEPSILON;
1502 }
1503 printf("\n\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n");
1504 for (k = 0; k < world->treetimes[0].T; k++)
1505 {
1506 world->treetimes[0].tl[k].age *= new_old_ratio;
1507 age = world->treetimes[0].tl[k].age;
1508 theNode = world->treetimes[0].tl[k].eventnode;
1509 adjust_time_all (theNode, age);
1510 }
1511 for (k = 0; k < world->treetimes[0].T; k++)
1512 {
1513 theNode = world->treetimes[0].tl[k].eventnode;
1514 if (theNode->top == 0 )
1515 error("Node is not top\n");
1516 if(theNode->type=='i')
1517 printf("**p(%li):%f l(%li):%f r(%li):%f |||| %li %li\n",theNode->id, theNode->tyme, theNode->next->back->id, theNode->next->back->tyme, theNode->next->next->back->id, theNode->next->next->back->tyme, world->treetimes[0].tl[k].lineages[0],world->treetimes[0].tl[k].lineages[1]);
1518 if(theNode->type=='m')
1519 printf("**p(%li):%f l(%li):%f r: ---- |||| %li %li\n",theNode->id, theNode->tyme, theNode->next->back->id, theNode->next->back->tyme, world->treetimes[0].tl[k].lineages[0],world->treetimes[0].tl[k].lineages[1]);
1520 if(theNode->type=='t')
1521 printf("**p(%li):%f l:---- r: ---- |||| %li %li\n",theNode->id, theNode->tyme, world->treetimes[0].tl[k].lineages[0],world->treetimes[0].tl[k].lineages[1]);
1522 if(theNode->type !='t' && (theNode->next->back->tyme > theNode->tyme))
1523 {
1524 warning("***********************Time mess: above=%f this=%f\n,",theNode->next->back->tyme , theNode->tyme);
1525 }
1526 }
1527 treelikelihood(world);
1528 // first_smooth(world,world->locus);
1529 //construct_tymelist (world, &world->treetimes[0]);
1530 }
1531
1532
1533 ///
1534 /// Update for parameters using Bayesian inference
1535 long
bayes_update(world_fmt * world)1536 bayes_update (world_fmt * world)
1537 {
1538 //static unsigned long count=1;
1539 //long counttemp = world->options->lsteps * world->maxreplicate;
1540 const boolean mu = world->bayes->mu;
1541 boolean success=FALSE;
1542 long ba=0;
1543 long which;
1544 long w; // w is a function of bayes->map[which]
1545 const long numpop = world->numpop;
1546 const long numpop2 = world->numpop2;
1547 const long numpop2rate = world->numpop2 + (mu ? 1 : 0) ;
1548 // preparation for parameter selection using RANDINT(0..npx)
1549 const long npx = numpop2rate - 1;
1550 long type;
1551 // MYREAL dummy=0;//for expallslice
1552 MYREAL *oldparam=NULL;
1553 MYREAL newval = -HUGE;
1554 const MYREAL murate = world->options->mu_rates[world->locus];
1555 bayes_fmt *bayes = world->bayes;
1556 //MYREAL inheritance_scalar = world->options->inheritance_scalars[world->locus];
1557 // long frompop;
1558 //long topop;
1559 //MYREAL theta;
1560 //MYREAL nm;
1561 const boolean writelog = (myID==MASTER &&
1562 world->options->progress &&
1563 world->cold);
1564 const MYREAL updateratio = world->options->updateratio;
1565 const long tt = world->options->lsteps * world->increment;
1566 const long ten = ((tt > 100000) ? (tt/10) : (tt/3));
1567
1568
1569 // progress reporter nested here so that we do need to ask whether we run in Bayes mode or not
1570 if(!world->in_burnin && writelog)
1571 {
1572 world->treesdone += 1;
1573 if(writelog && (world->treesdone % ten == 0))
1574 {
1575 bayes_progress(world, ten);
1576 }
1577 }
1578 // choose genealogy or parameter update
1579 if(RANDUM() < updateratio)
1580 {
1581 return -1L; // update the tree
1582 }
1583
1584 // select parameter
1585 which /*= world->bayes->paramnum*/ = RANDINT(0L,npx);
1586 while(bayes->map[which][1] == INVALID)
1587 {
1588 which /*= world->bayes->paramnum*/ = RANDINT(0L,npx);
1589 }
1590 // savecopy the old parameters
1591 // we use a memcopy because the custom migration matrix can be filled with m and M and s and S
1592 // this will change multiple parameters at the same time
1593 doublevec1d(&oldparam,numpop2rate);
1594 memcpy(oldparam, world->param0,sizeof(MYREAL) * world->numpop2);
1595 if(mu)
1596 oldparam[numpop2] = murate;
1597 if(which < numpop)
1598 {
1599 w = world->bayes->map[which][1];
1600 type = THETAPRIOR;
1601 if(world->options->slice_sampling[THETAPRIOR])
1602 {
1603 newval = expslice(&world->param0[w], which, world, log_prior_ratiotheta);
1604 //expallslice(world->param0, &dummy, which, world);
1605 // printf("posterior=%f like=%f prior=%f\n",newval,world->likelihood[world->locus],newval - world->likelihood[world->locus]);
1606 //newval = world->param0[which];
1607 success = TRUE;
1608 }
1609 else
1610 {
1611 newval = uniform_proposal(w, world, oldparam, &success);
1612 }
1613 }
1614 else
1615 {
1616 if(which < numpop2)
1617 {
1618 w = world->bayes->map[which][1];
1619 type = MIGPRIOR;
1620 if(world->options->slice_sampling[MIGPRIOR])
1621 {
1622 //@@if (world->options->usem)
1623 //@@ {
1624 newval = expslice(&world->param0[w], which, world, log_prior_ratiomig);
1625 //@@ }
1626 //@@else
1627 //@@{
1628 //@@m2mm(w, numpop,&frompop,&topop);
1629 //@@theta = world->param0[topop] * inheritance_scalar;
1630 //@@nm=world->param0[w] * theta;
1631 //@@newval = expslice(&nm, which, world, log_prior_ratiomig);
1632 //@@world->param0[w] = nm / theta;
1633 //@@ }
1634 //expallslice(world->param0, &dummy, which, world);
1635 //newval = world->param0[which];
1636 success = TRUE;
1637 }
1638 else
1639 {
1640 newval = uniform_proposal(which, world, oldparam, &success);
1641 }
1642 }
1643 else
1644 {
1645 type = RATEPRIOR;
1646 if(world->options->slice_sampling[RATEPRIOR])
1647 {
1648 newval = slice(&world->options->mu_rates[world->locus], which, world, log_prior_ratiorate);
1649 //expallslice(world->param0, &world->options->mu_rates[world->locus], which, world);
1650 //newval = world->options->mu_rates[which];
1651 success = TRUE;
1652 }
1653 else
1654 {
1655 newval = uniform_proposal(which, world, oldparam, &success);
1656 }
1657 }
1658 }
1659 world->logprior = calculate_prior(world);
1660 if(success)
1661 {
1662 bayes->oldval = newval;
1663 world->param_like = newval;
1664 ba = 1;
1665 bayes->accept[which] += ba;
1666 autotune_proposal(world,which);
1667 }
1668 else
1669 {
1670 autotune_proposal(world,which);
1671 memcpy(world->param0, oldparam, sizeof(MYREAL) * world->numpop2);
1672 if(type==RATEPRIOR)
1673 {
1674 recalc_timelist(world, oldparam[which], world->options->mu_rates[world->locus]);
1675 }
1676 else
1677 {
1678 reprecalc_world(world, which);
1679 }
1680 if(mu)
1681 {
1682 recalc_timelist(world, oldparam[which], world->options->mu_rates[world->locus]);
1683 world->options->mu_rates[world->locus] = oldparam[numpop2];
1684 world->options->lmu_rates[world->locus] = log(oldparam[numpop2]);
1685 world->param0[which] = oldparam[which];
1686 reprecalc_world(world, which);
1687 }
1688 bayes->oldval = newval; // uniform_proposal delivers either old or new value see success
1689 ba = 0;
1690 }
1691 bayes->trials[which] += 1;
1692 #ifdef DEBUG
1693 //printf("%i> trial=%li of %li\n", myID, bayes->trials[which], world->options->lsteps);
1694 #endif
1695 // put in here the prior distribution recorder
1696 // record_prior(world->bayes->priorhist, world->locus, which,
1697 //
1698 myfree(oldparam);
1699 return ba;
1700 }
1701
1702
1703
1704 ///
1705 /// fill bayes record in array for histograms and bayesfile
1706 /// set parameter meaning according to option settings
bayes_save_parameter(world_fmt * world,long pnum,long step)1707 void bayes_save_parameter(world_fmt *world, long pnum, long step)
1708 {
1709 long i;//,i0;
1710 //long frompop = 0;
1711 //long topop = 0;
1712 long n = world->numpop2 + (world->bayes->mu) ;
1713 long nn = 2 + n;
1714 long numpop = world->numpop;
1715 long numpop2 = world->numpop2;
1716 long nnpnum = nn * pnum;
1717 boolean mu = world->bayes->mu;
1718 // MYREAL murate = world->options->meanmu[world->locus] * world->options->mu_rates[world->locus];
1719 MYREAL murate = world->options->mu_rates[world->locus];
1720 MYREAL inheritance_scalar = world->options->inheritance_scalars[world->locus];
1721 MYREAL * param0 = world->param0;
1722 worldoption_fmt *wopt = world->options;
1723 bayes_fmt * bayes = world->bayes;
1724 //MYREAL nm;
1725 (bayes->params+(nnpnum))[0] = bayes->oldval;
1726 (bayes->params+(nnpnum))[1] = world->likelihood[world->G];
1727
1728 //@@ if(wopt->usem)
1729 //@@ {
1730 memcpy(bayes->params+(nnpnum+2), param0,sizeof(MYREAL)* numpop2);
1731 if(inheritance_scalar != 1.0)
1732 {
1733 for(i = 0; i < numpop; i++)
1734 {
1735 (bayes->params+(nnpnum + 2))[i] = param0[i] * inheritance_scalar;
1736 }
1737 }
1738 //@@ }
1739 //@@ else
1740 //@@ {
1741 //@@ if(inheritance_scalar != 1.0)
1742 //@@ {
1743 //@@ for(i = 0; i < numpop; i++)
1744 //@@ {
1745 //@@ (bayes->params+(nnpnum + 2))[i] = param0[i] * inheritance_scalar;
1746 //@@ }
1747 //@@ for(i0 = numpop; i0 < numpop2; i0++)
1748 //@@ {
1749 //@@ if(!shortcut(i0,bayes, &i))
1750 //@@ {
1751 //@@ m2mm(i, numpop,&frompop,&topop);
1752 //@@ nm=param0[i] * param0[topop] * inheritance_scalar;
1753 //@@ if (nm > bayes->maxparam[i])
1754 //@@ nm = bayes->maxparam[i];
1755 //@@ if (nm < bayes->minparam[i])
1756 //@@ nm = bayes->minparam[i];
1757 //@@ (bayes->params+(nnpnum + 2))[i] = nm;
1758 //@@ }
1759 //@@ }
1760 //@@ }
1761 //@@ else
1762 //@@ {
1763 //@@ memcpy(bayes->params+(nnpnum+2), param0,sizeof(MYREAL)* numpop);
1764 //@@ for(i0 = numpop; i0 < numpop2; i0++)
1765 //@@ {
1766 //@@ if(!shortcut(i0,bayes, &i))
1767 //@@ {
1768 //@@ m2mm(i, numpop,&frompop,&topop);
1769 //@@ nm=param0[i] * param0[topop];
1770 //@@ if (nm > bayes->maxparam[i])
1771 //@@ nm = bayes->maxparam[i];
1772 //@@ if (nm < bayes->minparam[i])
1773 //@@ nm = bayes->minparam[i];
1774 //@@ (bayes->params+(nnpnum + 2))[i] = nm;
1775 //@@ }
1776 //@@ }
1777 //@@ }
1778 //@@ }
1779 if(mu)
1780 {
1781 // we only write one rate per record because the locus is known
1782 (bayes->params+(nnpnum+2))[numpop2] = murate;
1783 }
1784
1785 if(wopt->has_bayesmdimfile)
1786 {
1787 print_bayes_tofile(world->bayesmdimfile, bayes->params+(nn*pnum), bayes, world, numpop2, bayes->mdiminterval, world->locus, world->replicate, step);
1788 }
1789 }
1790
1791 ///
1792 /// print all parameters continously to a file. This may produce HUGE files
1793 /// and overrun your disk quota
1794 #ifdef ZNZ
print_bayes_tofile(znzFile mdimfile,MYREAL * params,bayes_fmt * bayes,world_fmt * world,long numpop2,long mdiminterval,long locus,long replicate,long step)1795 void print_bayes_tofile(znzFile mdimfile, MYREAL *params, bayes_fmt *bayes, world_fmt *world, long numpop2, long mdiminterval, long locus, long replicate, long step)
1796 #else
1797 void print_bayes_tofile(FILE *mdimfile, MYREAL *params, bayes_fmt *bayes, world_fmt *world, long numpop2, long mdiminterval, long locus, long replicate, long step)
1798 #endif
1799 {
1800 // long i;
1801 long j, j0;
1802 long interval = mdiminterval > 0 ? mdiminterval : 1;
1803 boolean *visited;
1804 long nn = world->numpop2 + (world->bayes->mu);
1805 //#ifdef MPI
1806 #if defined(MPI) && !defined(PARALIO)
1807 float *temp;
1808 long z=0;
1809 #else
1810 #ifdef PARALIO
1811 MPI_Status status;
1812 #endif
1813 int fmt = 5;
1814 long digits;
1815 long c=0;
1816 char *temp;
1817 #endif
1818 bayes->mdimfilecount[locus] += 1;
1819 if (bayes->mdimfilecount[locus] % interval == 0)
1820 {
1821 visited = (boolean*) mycalloc(nn+2,sizeof(boolean));
1822 //#ifdef MPI
1823 #if defined(MPI) && !defined(PARALIO)
1824 temp = (float*) mycalloc(nn+9+world->options->heated_chains+2, sizeof(float));
1825 temp[0] = (float) step;
1826 temp[1] = (float) locus;
1827 temp[2] = (float) replicate;
1828 temp[3] = (float) params[0];
1829 temp[4] = (float) params[1];
1830 temp[5] = (float) params[0] - params[1];
1831 temp[6] = (float) world->logprior;
1832 temp[7] = (float) world->treetimes->T-1;
1833 temp[8] = (float) world->treelen;
1834 z=9;
1835 #else
1836 temp = (char*) mycalloc(bayes->linesize, sizeof(char));
1837 c = sprintf(temp,"%li\t%li\t%li\t%f\t%f\t%f\t%f\t%li\t%f",step, locus+1, replicate+1, params[0], params[1],params[0] - params[1], world->logprior, world->treetimes->T-1, world->treelen);
1838 #endif
1839 for (j0=0; j0 < nn ; j0++)// the first value is the posterior Log(P(D|G)P(G|p)
1840 // second is log(P(D|G)
1841 {
1842 if(bayes->map[j0][1] == INVALID)
1843 continue;
1844 else
1845 {
1846 j = bayes->map[j0][1] + 2;
1847 if(visited[j]==TRUE)
1848 continue;
1849 else
1850 visited[j]=TRUE;
1851 }
1852 //#ifdef MPI
1853 #if defined(MPI) && !defined(PARALIO)
1854 temp[z] = params[j];
1855 z++;
1856 // fprintf (stdout, "%f (%li) ",temp[j+6],z);
1857 #else
1858 if(params[j] < SMALLEPSILON)
1859 {
1860 c += sprintf(temp+c,"\t0");
1861 }
1862 else
1863 {
1864 digits = (long) log10(params[j]);
1865 switch(digits)
1866 {
1867 case -8:
1868 case -6:
1869 case -5:
1870 fmt = 10;
1871 break;
1872 case -4:
1873 case -3:
1874 fmt = 8;
1875 break;
1876 case -2:
1877 case -1:
1878 fmt= 5;
1879 break;
1880 case 0:
1881 case 1:
1882 fmt = 4;
1883 break;
1884 case 2:
1885 fmt = 2;
1886 break;
1887 case 3:
1888 fmt = 1;
1889 break;
1890 case 4:
1891 case 5:
1892 case 6:
1893 case 7:
1894 case 8:
1895 fmt = 0;
1896 break;
1897 default:
1898 if(digits<-8)
1899 fmt=20;
1900 else
1901 fmt = 5;
1902 break;
1903 }
1904 c += sprintf(temp+c,"\t%.*f",fmt, params[j]);
1905 }
1906 #endif
1907 }
1908 #if defined(MPI) && !defined(PARALIO)
1909 //#ifdef MPI
1910 print_marginal_like(temp, &z, world);
1911 #else
1912 print_marginal_like(temp, &c, world);
1913 #endif
1914 #if defined(MPI) && !defined(PARALIO)
1915 //#ifdef MPI
1916 mpi_mdim_send(temp,z);
1917 #else
1918 c += sprintf(temp+c,"\n");
1919 if(!bayes->has_linesize)
1920 {
1921 bayes->linesize = TWO * c;
1922 bayes->has_linesize = TRUE;
1923 #ifdef ZNZ
1924 unsigned long bytes = bayes->linesize > ONEMEGABYTE ? bayes->linesize : ONEMEGABYTE;
1925 znzbuffer(mdimfile,bytes);
1926 #endif
1927 }
1928 #ifdef PARALIO
1929 //wrapper function,
1930 // fprintf(stdout,"%i>%li %li\n@@%s@@\n\n",myID,c, strlen(temp),temp);
1931 if(c!=strlen(temp))
1932 {
1933 printf("%i> %li %li\n%s\n",myID,c,strlen(temp),temp);
1934 error("failed in bayes:1959");
1935 }
1936 mpi_mdim_send(&world->mpi_bayesmdimfile,temp,c);
1937 #else
1938 #ifdef ZNZ
1939 //znzprintf(mdimfile,"%s", temp);
1940 znzwrite(temp, (size_t) c, (size_t) sizeof(char), mdimfile);
1941 #else
1942 /* this should do the trick, windows was printing an empty line*/
1943 FPRINTF(mdimfile,"%s", temp);
1944 #endif /*znz*/
1945 #endif /*paralio*/
1946 #endif /*defined(mpi) and !defined(paralio)*/
1947 myfree(temp);
1948 myfree(visited);
1949 }
1950 }
1951
1952
1953
1954 ///
1955 /// Save the Bayesian results for printout into bayesfile
bayes_save(world_fmt * world,long step)1956 void bayes_save(world_fmt *world, long step)
1957 {
1958 long np = 2 + world->numpop2 + (world->bayes->mu);
1959 long pnum = world->bayes->numparams;
1960 long allocparams = world->bayes->allocparams;
1961
1962 if(world->options->has_bayesmdimfile)
1963 {
1964 bayes_save_parameter(world, 0L, step);
1965 }
1966 else
1967 {
1968 bayes_save_parameter(world, pnum, step);
1969 pnum++;
1970 if(pnum >= allocparams)
1971 {
1972 allocparams += 1000;
1973 world->bayes->params = (MYREAL *) myrealloc(world->bayes->params,sizeof(MYREAL)*allocparams*np);
1974 }
1975 world->bayes->numparams = pnum;
1976 world->bayes->allocparams = allocparams;
1977 }
1978 }
1979
setup_bayes_map(longpair * map,char * custm2,long numpop,long numpop2,long size)1980 long setup_bayes_map(longpair *map, char *custm2, long numpop, long numpop2, long size)
1981 {
1982 long invalid_count = 0 ;
1983 long j1, j2;
1984 long n = strlen(custm2);
1985 long s = MIN(n,numpop2);
1986 long t = MIN(s,size);
1987 long i;
1988 long frompop;
1989 long topop;
1990 long first = 0;
1991
1992 for (i=0; i < t; i++)
1993 {
1994 map[i][0] = i;
1995 switch(custm2[i])
1996 {
1997 case '*':
1998 map[i][1] = i;
1999 break;
2000 case '0':
2001 case 'c':
2002 map[i][1] = INVALID;
2003 invalid_count++;
2004 break;
2005 case 's':
2006 case 'S':
2007 m2mm(i,numpop,&frompop,&topop);
2008 if(frompop != topop)
2009 {
2010 j1 = numpop + topop * (numpop-1) + ((frompop < topop) ? frompop : frompop-1);
2011 j2 = numpop + frompop * (numpop-1) + ((topop < frompop) ? topop : topop-1);
2012 //printf("j1=%li, j2=%li\n",j1,j2);
2013 map[i][1] = j1 < j2 ? j1 : j2;
2014 }
2015 else
2016 map[i][1] = -2; //solves miscoding of sizes with "symmetric" to "mean"
2017 break;
2018 case 'm':
2019 case 'M':
2020 map[i][1] = -2;
2021 break;
2022 default:
2023 error("bayes_map needs to know custom migration matrix setting");
2024 }
2025 }
2026 for (i=0; i < numpop; i++)
2027 {
2028 if(map[i][1] == -2)
2029 {
2030 first = map[i][0];
2031 break;
2032 }
2033 }
2034 for (i=0; i < numpop; i++)
2035 {
2036 if(map[i][1] == -2)
2037 {
2038 map[i][1] = first;
2039 }
2040 }
2041 for (i=numpop; i < t; i++)
2042 {
2043 if(map[i][1] == -2)
2044 {
2045 first = map[i][0];
2046 break;
2047 }
2048 }
2049 for (i=numpop; i < t; i++)
2050 {
2051 if(map[i][1] == -2)
2052 {
2053 map[i][1] = first;
2054 }
2055 }
2056
2057 for (i=t; i < size; i++)
2058 {
2059 map[i][0] = i;
2060 map[i][1] = i;
2061 }
2062 return invalid_count;
2063 }
2064
2065 ///
2066 /// Initialize the Bayesian framwork
bayes_init(bayes_fmt * bayes,world_fmt * world,option_fmt * options)2067 void bayes_init(bayes_fmt *bayes, world_fmt *world, option_fmt *options)
2068 {
2069 long size = world->numpop2;
2070 long invalids;
2071 bayes->numpop2 = size;
2072 bayes->mu = options->bayesmurates;
2073 bayes->linesize = MAXBUFSIZE;
2074 bayes->has_linesize=FALSE;
2075 bayes->progresslinesize = world->numpop2 * HUNDRED + 5 * HUNDRED;
2076 if(bayes->mu)
2077 {
2078 size += 1; // this is used with bayes->histograms[locus].params so we only need one rate and not loci# rates.
2079 }
2080 bayes->map = (longpair *) mycalloc(size, sizeof(longpair));
2081 invalids = setup_bayes_map(bayes->map, world->options->custm2, world->numpop, world->numpop2, size);
2082 if(invalids == size)
2083 {
2084 options->updateratio = 1.0;
2085 world->options->updateratio = 1.0;
2086 }
2087 bayes->oldval = -MYREAL_MAX;
2088 bayes->allocparams = 1;
2089 bayes->numparams = 0;
2090 bayes->paramnum = 0;
2091 // datastore for several variables
2092 bayes->datastore = (MYREAL *) mycalloc(8 * size + 2,sizeof(MYREAL));
2093 // pointers into datastore
2094 bayes->priormean = bayes->datastore;
2095 bayes->delta = bayes->priormean + size;
2096 bayes->minparam = bayes->delta + size;
2097 bayes->maxparam = bayes->minparam + size;
2098 bayes->meanparam = bayes->maxparam + size;
2099 bayes->deltahist = bayes->meanparam + size;
2100 bayes->alphaparam = (MYREAL *) mycalloc(size,sizeof(MYREAL));
2101 bayes->betaparam = (MYREAL *) mycalloc(size,sizeof(MYREAL));
2102 //old bayes->deltahist = (MYREAL *) mycalloc(npp, sizeof(MYREAL));
2103 bayes->datastore2 = (long *) mycalloc(2 * (size + 2),sizeof(long));
2104 bayes->accept = bayes->datastore2;
2105 bayes->trials = bayes->accept + size + 1;
2106
2107 // records for all bayes derived values
2108 bayes->params = (MYREAL *) mycalloc(bayes->allocparams * (size+2),sizeof(MYREAL));
2109 // bayes->params[0] = (MYREAL *) mycalloc(size+1,sizeof(MYREAL));
2110
2111 // set counter for mdim file (typically called bayesallfile)
2112 if(world->cold && options->has_bayesmdimfile)
2113 {
2114 bayes->mdimfilecount = (long *) mycalloc(world->loci,sizeof(long));
2115 }
2116 }
2117
2118
2119 /// initialize the Bayes histogram structure, adds an additional element for the
2120 /// summary over loci.
bayes_init_histogram(world_fmt * world,option_fmt * options)2121 void bayes_init_histogram(world_fmt * world, option_fmt * options)
2122 {
2123 long sumloc = world->loci > 1 ? 1 : 0;
2124 bayes_fmt *bayes = world->bayes;
2125 long loc;
2126 long np = world->numpop2;
2127 long npp = np + (bayes->mu); // the params are like...params...murate_locus
2128 long nppt = np + (bayes->mu);// * world->loci); // this includes all different locus-rates
2129 long pa;
2130
2131 bayeshistogram_fmt *hist;
2132 bayes->scaling_factors = (MYREAL *) mycalloc(npp,sizeof(MYREAL));
2133 bayes->histtotal = (MYREAL *) mycalloc(world->loci * nppt, sizeof(MYREAL));
2134 bayes->maxmaxvala = -HUGE;
2135 bayes->prettyhist = options->bayespretty;
2136 bayes->mdiminterval = options->bayesmdiminterval;
2137 bayes->histogram = (bayeshistogram_fmt *) mycalloc(world->loci + 1,sizeof(bayeshistogram_fmt));
2138
2139 for(loc=0; loc < world->loci + sumloc; loc++)
2140 {
2141 hist = &(bayes->histogram[loc]);
2142 hist->bins = (long *) mycalloc(npp, sizeof(long));
2143
2144 hist->results = NULL; //calloc(hist->binsum, sizeof(MYREAL)); // contains histogram, size is bins*numparam
2145 // on a per parameter basis
2146 // structure has a data storage vectors and the following are all pointers into it
2147 hist->numparam = npp; // number of parameters: thetas + migrates + murate
2148 hist->datastore = (MYREAL *) mycalloc(3 * npp + 8 * nppt, sizeof(MYREAL)); // data storage, size is numparam*11
2149 // pointers into data storage
2150 hist->minima = hist->datastore; // contains minimal values for each parameter
2151 hist->maxima = hist->datastore + npp; // contains maximal values for each parameter
2152 hist->adjmaxima = hist->datastore + 2*npp;// holds maxima values used in histogram [are smaller than maxima]
2153 hist->cred50l = hist->datastore + 3*npp; // holds 50%-credibility margins (<all lower values>,
2154 hist->cred50u = hist->datastore + 3*npp+nppt; //<all high values>)
2155 hist->cred95l = hist->datastore + 3*npp+2*nppt; // holds 95%-credibility margins (<all lower values>)
2156 hist->cred95u = hist->datastore + 3*npp+3*nppt; //<all high values>)
2157 hist->modes = hist->datastore + 3*npp+4*nppt; // holds 95%-credibility margins (<all lower values>, <all high values>)
2158 hist->medians = hist->datastore + 3*npp+5*nppt;
2159 hist->means = hist->datastore + 3*npp+6*nppt;
2160 hist->stds = hist->datastore + 3*npp+7*nppt;
2161
2162 for(pa=0; pa < world->numpop; pa++)
2163 {
2164 if(!strchr("c", world->options->custm2[pa]))
2165 {
2166 hist->bins[pa] = options->bayespriortheta->bins;
2167 hist->binsum += options->bayespriortheta->bins;
2168 hist->minima[pa] = HUGE;
2169 }
2170 }
2171 for(pa=world->numpop; pa < world->numpop2; pa++)
2172 {
2173 if(!strchr("0c", world->options->custm2[pa]))
2174 {
2175 hist->bins[pa] = options->bayespriorm->bins;
2176 hist->binsum += options->bayespriorm->bins;
2177 hist->minima[pa] = HUGE;
2178 }
2179 else
2180 {
2181 hist->bins[pa] = 0;
2182 hist->minima[pa] = HUGE;
2183 }
2184 }
2185 if(world->bayes->mu)
2186 {
2187 hist->bins[pa] = options->bayespriorrate->bins;
2188 hist->binsum += options->bayespriorrate->bins;
2189 hist->minima[pa] = HUGE;
2190 }
2191 }
2192 }
2193
2194 ///
2195 /// selects the specific set of prior parameters according to the prior options setting
2196 /// for each parameter with array_count i
select_prior_param(int selector,long i,bayes_fmt * bayes,const prior_fmt * prior)2197 MYINLINE void select_prior_param(int selector, long i, bayes_fmt *bayes, const prior_fmt *prior)
2198 {
2199 bayes->minparam[i] = prior->min;
2200 bayes->maxparam[i] = prior->max;
2201 bayes->meanparam[i] = prior->mean;
2202 bayes->deltahist[i] = (prior->max - prior->min)/ prior->bins;
2203 switch(selector)
2204 {
2205 case MULTPRIOR:
2206 case WEXPPRIOR:
2207 case EXPPRIOR:
2208 bayes->priormean[i] = prior->mean; // fill mean for the call to bayes_epxb_newparam
2209 bayes->delta[i] = prior->delta ; //(prior->min + prior->max)/(20.); // 1/10 of the max span
2210 bayes->alphaparam[i] = 1.0;
2211 bayes->betaparam[i] = find_beta_truncgamma(prior->mean, 1.0, bayes->minparam[i],bayes->maxparam[i]);
2212 break;
2213 case UNIFORMPRIOR:
2214 bayes->priormean[i] = prior->mean; // fill mean for the call to bayes_epxb_newparam
2215 bayes->delta[i] = prior->delta; //(prior->max - prior->min)/(10.); // 1/10 of the max span
2216 break;
2217 case GAMMAPRIOR:
2218 bayes->priormean[i] = prior->mean;
2219 bayes->alphaparam[i] = prior->alpha;
2220 bayes->delta[i] = prior->delta ; //(prior->min + prior->max)/(20.); // 1/10 of the max span
2221 bayes->betaparam[i] = find_beta_truncgamma(prior->mean, prior->alpha, bayes->minparam[i],bayes->maxparam[i]);
2222 break;
2223 default:
2224 error("Problems with the specification of the prior distribution");
2225 break;
2226 }
2227 }
2228
2229 /// fill the Bayesian framework with values
bayes_fill(world_fmt * world,option_fmt * options)2230 void bayes_fill(world_fmt *world, option_fmt *options)
2231 {
2232 long i;
2233 long locus;
2234
2235 bayes_fmt * bayes = world->bayes;
2236 which_theta_prior(options->bayesprior[THETAPRIOR]);
2237 which_mig_prior(options->bayesprior[MIGPRIOR]);
2238 which_rate_prior(options->bayesprior[RATEPRIOR]);
2239
2240 for(i=0; i< world->numpop;i++)
2241 {
2242 select_prior_param(options->bayesprior[THETAPRIOR], i, bayes, options->bayespriortheta);
2243 }
2244
2245 for(i=world->numpop; i< world->numpop2;i++)
2246 {
2247 select_prior_param(options->bayesprior[MIGPRIOR], i, bayes, options->bayespriorm);
2248 }
2249 // the rate parameter is set as an additional parameter as the numpop^2 element out of 0..numpop-1 for Theta
2250 // numpop .. numpop^2-1 for migration.
2251 if(bayes->mu)
2252 {
2253 // set start mu_rate that is compatible with min/max of prior
2254 select_prior_param(options->bayesprior[RATEPRIOR], world->numpop2, bayes, options->bayespriorrate);
2255 for(locus=0; locus < options->muloci;locus++)
2256 {
2257 world->options->mu_rates[locus] = options->bayespriorrate->mean;
2258 world->options->lmu_rates[locus] = log(options->bayespriorrate->mean);
2259 }
2260 for(locus=options->muloci;locus < world->loci; locus++)
2261 {
2262 world->options->mu_rates[locus] = world->options->mu_rates[options->muloci-1];
2263 world->options->lmu_rates[locus] = world->options->lmu_rates[options->muloci-1];
2264 }
2265
2266 }
2267 // attach to the custm migration matrix
2268 bayes->custm2 = world->options->custm2;
2269 }
2270
2271 /// resetting the bayes storage machinery
bayes_reset(world_fmt * world)2272 void bayes_reset(world_fmt * world)
2273 {
2274 bayes_fmt *bayes = world->bayes;
2275 //long size = world->numpop2 + 2 + (bayes->mu);
2276 if(world->options->bayes_infer)
2277 {
2278 //bayes->allocparams = 1;
2279 //bayes->params = (MYREAL *) myrealloc(bayes->params, bayes->allocparams * size * sizeof(MYREAL));
2280 bayes->numparams = 0; // each locus start a new set overwriting the old, allocparam is not reset
2281 }
2282 }
2283
2284 /// free the Bayesian framework
bayes_free(world_fmt * world)2285 void bayes_free(world_fmt *world)
2286 {
2287 long i;
2288 long sumloc = world->loci > 1 ? 1 : 0;
2289 if(world->bayes->histogram != NULL)
2290 {
2291 for(i=0;i < world->loci + sumloc; i++)
2292 {
2293 myfree(world->bayes->histogram[i].bins);
2294 myfree(world->bayes->histogram[i].datastore);
2295
2296 if(myID==MASTER && !world->data->skiploci[i])
2297 {
2298 myfree(world->bayes->histogram[i].results);
2299 // printf("bayes results freed for locus %li\n",i);
2300 myfree(world->bayes->histogram[i].set95);
2301 //printf("bayes set95 freed for locus %li\n",i);
2302
2303 }
2304 }
2305 myfree(world->bayes->histogram);
2306 }
2307 myfree(world->bayes->datastore);
2308 myfree(world->bayes->datastore2);
2309 myfree(world->bayes->alphaparam);
2310 myfree(world->bayes->betaparam);
2311 myfree(world->bayes->params);
2312 myfree(world->bayes->map);
2313 myfree(world->bayes);
2314 }
2315
2316
2317 /// calculate the Bayesian statistics and prints the statistics
bayes_stat(world_fmt * world)2318 void bayes_stat(world_fmt *world)
2319 {
2320 MYREAL threshold;
2321 MYREAL mutot=0;
2322 long n=0;
2323 double mu;
2324 long lmu;
2325 double meanmu;
2326 bayes_fmt * bayes = world->bayes;
2327 bayeshistogram_fmt *hist;
2328 long locus;
2329 long l;
2330 long frompop=0;
2331 long topop=0;
2332 long j=0, j0;
2333 long size;
2334 long numpop2 = world->numpop2;
2335 #ifdef DEBUG
2336 print_bf_values(world);
2337 #endif
2338 // for single locus data one is not calculating the overall values
2339 long lozi = world->loci > 1 ? world->loci : 0;
2340 #ifdef LONGSUM
2341 long addon = (world->options->fluctuate ? (world->numpop * 3) : 0);
2342 #else
2343 //xcode long addon = 0;
2344 #endif
2345 char st[7];
2346 char *stemp;
2347
2348 //MYREAL ***cov;
2349 long nsamp=0;
2350 long n1=0;
2351
2352 stemp = (char *) mycalloc(LINESIZE,sizeof(char));
2353
2354 //xcode addon += (bayes->mu * world->loci);
2355 //CHECK THIS! size = numpop2 + addon;
2356 size = numpop2 + bayes->mu;
2357 if(world->loci>1)
2358 {
2359 bayes_combine_loci(world);
2360 }
2361 else
2362 {
2363 calculate_plotter_priors(world);
2364 }
2365 // print raw histogram data into the bayesfile
2366 if(world->options->has_bayesfile)
2367 {
2368 print_locus_histogram_header(world->bayesfile, bayes->deltahist, bayes->custm2, world->numpop, size, world->options->usem, bayes->mu, world);
2369 for(locus=0; locus < world->loci; locus++)
2370 {
2371 if(world->data->skiploci[locus])
2372 continue;
2373
2374 print_locus_histogram(world->bayesfile, world, locus, size);
2375 }
2376 if(world->loci>1)
2377 {
2378 print_loci_histogram(world->bayesfile, world, world->loci, size);
2379 }
2380 }
2381 #ifdef PRETTY
2382 pdf_print_bayestable(world);
2383 page_height = pdf_loci_histogram(world);
2384 #endif
2385 FPRINTF(world->outfile,"\n\n\nBayesian estimates\n");
2386 FPRINTF(world->outfile,"==================\n\n");
2387 FPRINTF(world->outfile,"Locus Parameter 2.5%% 25.0%% mode 75.0%% 97.5%% median mean\n");
2388 FPRINTF(world->outfile,"-----------------------------------------------------------------------------------\n");
2389 for(locus=0; locus <= lozi; locus++)
2390 {
2391 if(locus<world->loci)
2392 {
2393 if(world->data->skiploci[locus])
2394 continue;
2395 }
2396 hist = &bayes->histogram[locus];
2397 if(locus == world->loci)
2398 strcpy(st," All ");
2399 else
2400 sprintf(st,"%5li ",locus + 1);
2401
2402 for(j0=0; j0< world->numpop2; j0++)
2403 {
2404
2405 // if(strchr("0c", world->options->custm2[j]))
2406 // continue;
2407 if(bayes->map[j0][1] == INVALID)
2408 continue;
2409 else
2410 j = bayes->map[j0][1];
2411
2412 if(j < world->numpop)
2413 {
2414 FPRINTF(world->outfile,"%5s ", st);
2415 FPRINTF(world->outfile,"Theta_%-3li ",j0+1);
2416 FPRINTF(world->outfile, "%8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f\n",
2417 hist->cred95l[j], hist->cred50l[j], hist->modes[j],
2418 hist->cred50u[j], hist->cred95u[j],hist->medians[j], hist->means[j]);
2419 }
2420 else
2421 {
2422 m2mm(j0,world->numpop,&frompop, &topop);
2423 if(world->options->usem)
2424 sprintf(stemp,"M_%li->%li", frompop+1, topop+1);
2425 else
2426 sprintf(stemp,"Theta_%li*M_%li->%li", topop+1, frompop+1, topop+1);
2427
2428 FPRINTF(world->outfile,"%5s ", st);
2429 FPRINTF(world->outfile, "%-15.15s",stemp);
2430 FPRINTF(world->outfile,"%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n",
2431 hist->cred95l[j], hist->cred50l[j], hist->modes[j],
2432 hist->cred50u[j], hist->cred95u[j],hist->medians[j], hist->means[j]);
2433 }
2434 // warning block: issuing a warning when the 75% and the95% percentiles are within 10% of the
2435 // upper prior boundary
2436 threshold = 0.9 * bayes->maxparam[j];
2437 if(hist->cred95u[j] > threshold && hist->cred50u[j] > threshold)
2438 {
2439 if(locus == lozi && locus>0)
2440 record_warnings(world,"Param %li (all loci): Upper prior boundary seems too low! ",j+1);
2441 else
2442 record_warnings(world,"Param %li (Locus %i): Upper prior boundary seems too low! ", j+1,locus+1);
2443 }
2444 }
2445 if(bayes->mu)
2446 {
2447 FPRINTF(world->outfile,"%5.5s ", st);
2448 if(locus==lozi && lozi>1)
2449 {
2450 meanmu = 0.;
2451 for(l=0;l<world->loci;l++)
2452 {
2453 meanmu += world->options->meanmu[l];
2454 }
2455 meanmu /= world->loci;
2456 lmu = (long) floor( log10(hist->modes[j0] * meanmu));
2457 mu = meanmu * pow(10. , -lmu);
2458 }
2459 else
2460 {
2461 lmu = (long) floor( log10(hist->modes[j0] * world->options->meanmu[locus]));
2462 mu = world->options->meanmu[locus] * pow(10. , -lmu);
2463 }
2464 FPRINTF(world->outfile," mu [10^%li] ",lmu);
2465 FPRINTF(world->outfile, "%8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f\n",
2466 mu*hist->cred95l[j0], mu*hist->cred50l[j0] , mu*hist->modes[j0],
2467 mu*hist->cred50u[j0], mu*hist->cred95u[j0] , mu*hist->medians[j0],
2468 mu*hist->means[j0]);
2469 }
2470 } // over all +1 loci
2471 FPRINTF(world->outfile,"-----------------------------------------------------------------------------------\n");
2472 if(bayes->mu)
2473 {
2474 mutot = 0;
2475 n = 1;
2476 for(locus=0; locus < world->loci; locus++)
2477 {
2478 if(world->data->skiploci[locus])
2479 continue; //invariantloci: not resolved yet
2480 mu = (double) world->options->meanmu[locus];
2481 mutot += (mu - mutot) / n;
2482 n++;
2483 FPRINTF(world->outfile,"(*) Mutation rate for locus %li was set to %8.5e", locus, mu);
2484 }
2485 if(world->loci>1)
2486 {
2487 FPRINTF(world->outfile,"(*) Average mutation rate for all loci is %8.5e", mutot);
2488 }
2489 }
2490 #if 0 //DEBUG cov table printing removed
2491 cov = (MYREAL ***) mycalloc(world->loci+1,sizeof(MYREAL **));
2492 for (locus=0;locus<world->loci;locus++)
2493 {
2494 cov[locus] = world->bayes->histogram[locus].covariance;
2495 n1 = world->bayes->histogram[locus].n;
2496 nsamp += n1/world->loci;
2497 //adjust_covariance(cov[locus],world->numpop2, n1);
2498 }
2499 if(world->loci>1)
2500 {
2501 cov[locus] = world->bayes->histogram[locus].covariance;
2502 //adjust_covariance(cov[locus],world->numpop2, nsamp);
2503 }
2504 print_cov2 (world, world->numpop, world->loci, cov);
2505 myfree(cov);
2506 #endif
2507 // print out the acceptance ratios for every parameter and the tree
2508 if(world->options->progress)
2509 {
2510 // final acceptance ratios for the Bayesian run
2511 bayes_print_accept(stdout,world);
2512 }
2513 myfree(stemp);
2514 }
2515
2516
2517 ///
2518 /// print out the acceptance ratios for all the different Bayesian updates
2519 void
bayes_print_accept(FILE * file,world_fmt * world)2520 bayes_print_accept(FILE * file, world_fmt *world)
2521 {
2522 long j; //used to loop over all parameters
2523 long topop =0; // indicator into the parameter vector, specifying originating population
2524 long frompop =0; // receiving population
2525 char *stemp; // string variable holding print-string
2526 long trials =0; //
2527 long tc = world->numpop2 + (world->bayes->mu * world->loci); //position of genealogy accept rates
2528 bayes_fmt *bayes = world->bayes;
2529 long estimated_trials=0;
2530 FPRINTF(file,"\n\n\nAcceptance ratios for all parameters and the genealogies\n");
2531 FPRINTF(file,"---------------------------------------------------------------------\n\n");
2532 // This needs more attention but will need more stuff to safe
2533 if(world->options->datatype == 'g')
2534 {
2535 FPRINTF(file,"\n\n\nnot available with datatype=Genealogy\n\n\n");
2536 return;
2537 }
2538
2539 FPRINTF(file,"Parameter Accepted changes Ratio\n");
2540 // population sizes
2541 for(j=0; j < world->numpop; j++)
2542 {
2543 if(!strchr("0c", bayes->custm2[j]))
2544 {
2545 trials=world->trials_archive[j];
2546 if(trials>0)
2547 {
2548 FPRINTF(file,"Theta_%-3li %8li/%-8li %8.5f\n", j+1, world->accept_archive[j],
2549 trials, (MYREAL) world->accept_archive[j]/trials);
2550 estimated_trials += trials;
2551 }
2552 }
2553 }
2554 // migration rates
2555 stemp = (char *) mycalloc(LINESIZE,sizeof(char));
2556 for(j=world->numpop; j < world->numpop2; j++)
2557 {
2558 if(!strchr("0c", bayes->custm2[j]))
2559 {
2560 trials=world->trials_archive[j];
2561 if(trials>0)
2562 {
2563 m2mm (j, world->numpop, &frompop, &topop);
2564 if(world->options->usem)
2565 {
2566 sprintf(stemp, "M_%li->%li", frompop+1, topop+1);
2567 }
2568 else
2569 {
2570 sprintf(stemp, "xN_%lim_%li->%li", topop+1, frompop+1, topop+1);
2571 }
2572 FPRINTF(file, "%-12.12s %8li/%-8li %8.5f\n", stemp, world->accept_archive[j],
2573 trials, (MYREAL) world->accept_archive[j]/trials);
2574 estimated_trials += trials;
2575 }
2576 }
2577 }
2578 // accepted rate of mutation rate changes for each locus mutation rate
2579 if(bayes->mu)
2580 {
2581 for(j=world->numpop2;j<world->numpop2+world->loci;j++)
2582 {
2583 FPRINTF(file, "Rate of mutation rate (%li) %8li/%-8li %8.5f\n", j+1,
2584 world->accept_archive[j],
2585 world->trials_archive[j], (MYREAL) world->accept_archive[j]/world->trials_archive[j]);
2586 estimated_trials += world->trials_archive[j];
2587 }
2588 }
2589 // accepted trees
2590 trials=world->trials_archive[tc];
2591 if(trials>0)
2592 {
2593 FPRINTF(file, "Genealogies %8li/%-8li %8.5f\n", world->accept_archive[tc], (long)
2594 trials, (MYREAL) world->accept_archive[tc]/trials);
2595 estimated_trials += trials;
2596 }
2597 // FPRINTF(file, "Sum of all acceptances %8li/%-8li\n", estimated_trials, world->options->lsteps*world->maxreplicate*world->loci*world->options->lincr);
2598
2599 myfree(stemp);
2600 }
2601
2602
2603 ///
2604 /// print out the acceptance ratios for all the different Bayesian updates
2605 void
bayes_progress(world_fmt * world,long ten)2606 bayes_progress(world_fmt *world, long ten)
2607 {
2608 const boolean writelog = world->options->writelog;
2609 const boolean progress = world->options->progress;
2610 char *buffer;
2611 long bufsize=0;
2612 char spacer[]="";
2613 //
2614 char nowstr[STRSIZE]; // will hold time of day
2615 // char temp[LINESIZE]; // for locus printing with rates
2616 long j0; // used to loop over all parameters
2617 long j;
2618 //long topop=0; // indicator into the parameter vector, specifying originating population
2619 //long frompop=0; // receiving population
2620 char *paramstr; // string variable holding print-string
2621 //long tc = world->numpop2; //this ignores alpha among multiple loci
2622 bayes_fmt *bayes = world->bayes;
2623 MYREAL * autocorr = world->autocorrelation;
2624 MYREAL * effsample = world->effective_sample;
2625 //boolean notusem = !world->options->usem;
2626 MYREAL param0;
2627 MYREAL accrat=0.0;
2628 buffer = (char *) mycalloc(bayes->progresslinesize,sizeof(char));
2629 paramstr = (char *) mycalloc(LINESIZE,sizeof(char));
2630
2631 get_time (nowstr, "%H:%M:%S");
2632 if(world->options->heating)
2633 {
2634 bufsize += sprintf(buffer+bufsize,
2635 "\n%s [NODE:%i, Locus: %li, Heating:ON (Swaps: %li)] ",
2636 nowstr,myID, world->locus + 1, world->swapped);
2637 }
2638 else
2639 {
2640 bufsize += sprintf(buffer+bufsize,"\n%s [NODE:%i, Locus: %li] ", nowstr,myID, world->locus + 1);
2641 }
2642 prognose_time (nowstr, world, 1, world->treesdone, spacer, TRUE);
2643 bufsize += sprintf(buffer+bufsize,"\n (prognosed end of run is %s [%f done])\n\n", nowstr,
2644 ((MYREAL) world->treesdone / (MYREAL) world->treestotal));
2645
2646 if(world->options->has_autotune)
2647 {
2648 bufsize += sprintf(buffer+bufsize," Parameter Acceptance Current PropWindow AutoCorr ESS\n");
2649 bufsize += sprintf(buffer+bufsize," ------------ ---------- ------------ ---------- -------- --------\n");
2650 // 123456789012 1234567890 1234567890 1234567890 12345678 12345678
2651 }
2652 else
2653 {
2654 bufsize += sprintf(buffer+bufsize," Parameter Acceptance Current AutoCorr ESS\n");
2655 bufsize += sprintf(buffer+bufsize," ------------ ---------- ------------ -------- --------\n");
2656 // 123456789012 1234567890 1234567890 12345678 12345678
2657 }
2658 for(j0=0; j0 < world->numpop2 + world->bayes->mu; j0++)
2659 {
2660 if(shortcut(j0,bayes,&j))
2661 {
2662 continue;
2663 }
2664 else
2665 {
2666 set_paramstr(paramstr, j,world->numpop,world->options->usem);
2667 //@@ if (j>=world->numpop && j< world->numpop2)
2668 //@@ {
2669 //@@ if (notusem)
2670 //@@ {
2671 //@@ m2mm(j,world->numpop,&frompop,&topop);
2672 //@@ param0 = world->param0[j] * world->param0[topop];
2673 //@@ }
2674 //@@ else
2675 //@@ param0 = world->param0[j];
2676 //@@ }
2677 //@@ else
2678 param0 = world->param0[j];
2679
2680 if(world->options->has_autotune)
2681 {
2682 if(bayes->trials[j]>0)
2683 accrat = (MYREAL) bayes->accept[j]/bayes->trials[j];
2684 else
2685 accrat = 0.0;
2686 bufsize += sprintf(buffer+bufsize, " %-12.12s % 5.3f % 10.5f % 10.5f % 8.3f % 8.2f\n",
2687 paramstr, accrat,param0,world->bayes->delta[j], autocorr[j],effsample[j]);
2688 }
2689 else
2690 {
2691 bufsize += sprintf(buffer+bufsize, " %-12.12s % 5.3f % 10.5f % 8.3f % 8.2f\n",
2692 paramstr, accrat,param0, autocorr[j],effsample[j]);
2693 }
2694 }
2695 }
2696 if(world->options->has_autotune)
2697 {
2698 sprintf(buffer+bufsize, " Genealogies % 5.3f % 13.5f --- % 8.3f % 8.2f\n",
2699 (MYREAL) bayes->accept[j]/bayes->trials[j],world->likelihood[world->numlike-1], autocorr[j0],effsample[j0]);
2700 }
2701 else
2702 {
2703 sprintf(buffer+bufsize, " Genealogies % 5.3f% 13.5f % 8.3f % 8.2f\n",
2704 (MYREAL) bayes->accept[j]/bayes->trials[j],world->likelihood[world->numlike-1], autocorr[j0],effsample[j0]);
2705 }
2706 if(progress)
2707 {
2708 FPRINTF(stdout,"%s",buffer);
2709 }
2710
2711 if(writelog)
2712 {
2713 FPRINTF(world->options->logfile,"%s",buffer);
2714 }
2715 myfree(buffer);
2716 myfree(paramstr);
2717 // print_bayes_ess(stdout, world, world->numpop2 + world->bayes->mu * world->loci + 1 , 11, world->autocorrelation, world->effective_sample);
2718 }
2719
2720
2721
2722
2723 /// adjusts the allocations for the histogram bins for the current locus
adjust_bayes_bins(world_fmt * world,long locus)2724 void adjust_bayes_bins(world_fmt * world, long locus)
2725 {
2726 //MYREAL dif = 0.;
2727 //long pa;
2728 //long np = world->numpop2 + (world->bayes->mu * world->loci);
2729 bayeshistogram_fmt *hist = &(world->bayes->histogram[locus]);
2730 #ifdef USE_ADJUST_BAYES_BIN
2731 hist->binsum = 0;
2732 for(pa=0; pa < np; pa++)
2733 {
2734 // if custom migration matrix is set to zero, zero the bins else set it
2735 if(pa < world->numpop2)
2736 {
2737 if(strchr("0c",world->options->custm2[pa]))
2738 {
2739 hist->bins[pa] = 0;
2740 }
2741 else
2742 {
2743 dif = (hist->maxima[pa] - hist->minima[pa]);
2744 if(dif <= 0)
2745 hist->bins[pa] = 0;
2746 else
2747 hist->bins[pa] = (long) (dif / world->bayes->deltahist[pa]) ;
2748 #ifdef DEBUG_MPI
2749 fprintf(stdout,"%i> LOCUS %li: bins[%li]=%li = (%f - %f) /%f \n",myID,locus, pa,hist->bins[pa],hist->maxima[pa], hist->minima[pa], world->bayes->deltahist[pa]);
2750 #endif
2751 hist->binsum += hist->bins[pa];
2752 }
2753 }
2754 else
2755 {
2756 dif = (hist->maxima[pa] - hist->minima[pa]);
2757 if(dif <= 0.0)
2758 hist->bins[pa] = 0;
2759 else
2760 hist->bins[pa] = (long) (dif / world->bayes->deltahist[pa]) ;
2761 hist->binsum += hist->bins[pa];
2762 }
2763 }
2764 // allocate the found number of bins for the histogram
2765 #endif
2766 // if(locus == world->loci && world->options->has_bayesmdimfile)
2767 // {
2768 if(world->bayes->histogram[locus].results == NULL)
2769 {
2770 world->bayes->histogram[locus].results = (float *) mycalloc(hist->binsum + world->loci * 2, sizeof(float));//tracing a valgrind issue dec 20 2007
2771 world->bayes->histogram[locus].set95 = (char *) mycalloc((hist->binsum+world->loci*2)* 2 + 2, sizeof(char));
2772 world->bayes->histogram[locus].set50 = world->bayes->histogram[locus].set95 + hist->binsum + (world->loci * 2) + 1;
2773 memset(hist->results, 0 , sizeof(float) * (hist->binsum));
2774 }
2775 // }
2776 }
2777
2778
2779 // assembles the histogram from the sampled parameter values in params
2780 // there is a similar function that reads from the bayesallfile mdimfile
2781 // synonym: construct_locus_histogram
construct_param_hist(world_fmt * world,long locus,long numpop2,long pa,long numbin,MYREAL * mini,MYREAL * maxi,float ** results,long * total,MYREAL * themean,MYREAL * thestd)2782 void construct_param_hist(world_fmt *world, long locus, long numpop2, long pa, long numbin, MYREAL *mini,
2783 MYREAL *maxi, float **results,
2784 long *total, MYREAL *themean, MYREAL *thestd)
2785 {
2786 bayes_fmt *bayes = world->bayes;
2787 long j, j0;
2788 long i;
2789 long np = numpop2 + (bayes->mu);
2790 long npx = np + 2;
2791 long bin;
2792 long nb;
2793
2794 MYREAL delta;
2795 MYREAL value;
2796 MYREAL *params = bayes->params;
2797 long floorindex;
2798 float *p = (float *) mycalloc(bayes->numparams,sizeof(float));
2799 float *q = (float *) mycalloc(bayes->numparams,sizeof(float));
2800 for(i=0;i < bayes->numparams; i++)
2801 {
2802 floorindex = npx * i + 2;
2803 delta = bayes->deltahist[pa];
2804 //value = (params+(npx*i))[pa+2]; // 2/11/13
2805 value = params[floorindex+pa];
2806 p[i] = (float) value;
2807 }
2808 qsort(p, bayes->numparams, sizeof(float), floatcmp);
2809 //memcpy(q,p,sizeof(float)*bayes->numparams);
2810 //bayes_smooth(p, bayes->numparams,bayes->histogram[0].bins[pa]/50,TRUE);
2811 for(i=0;i < bayes->numparams; i++)
2812 {
2813 floorindex = npx * i + 2;
2814 delta = bayes->deltahist[pa];
2815 value = p[i]; //params[floorindex+pa];
2816 //printf("@@@ %li %li %f %f\n",pa, i, value,q[i]);
2817 *themean += value;
2818 *thestd += value * value;
2819 if(value < mini[pa])
2820 {
2821 // warning("params is smaller than minimum in function construct_locus_histogram()\n");
2822 //MPIPROBLEM warning("%i> TOO SMALL value=%f, mini[%li]=%f\n",myID,value,pa,mini[pa]);
2823 }
2824 if(value > maxi[pa])
2825 {
2826 // warning("params is larger than maximum in function construct_locus_histogram()\n");
2827 // MPI PROBLEM warning("%i> TOO LARGE value=%f, maxi[%li]=%f\n",myID,value,pa,maxi[pa]);
2828 bin = bayes->histogram[locus].bins[pa] - 1;
2829 }
2830 else
2831 {
2832 bin = (long) ((value - mini[pa])/delta);
2833 if(bin<0)
2834 bin=0;
2835 }
2836 // printf("%i> bin=%li reconstituted=%f value=%f, mini=%f, maxi=%f, delta=%f, locus=%li\n",myID,bin, bin*delta + mini[pa], value,mini[pa],maxi[pa],delta,locus);
2837 if((bin) > bayes->histogram[locus].bins[pa])
2838 {
2839 warning("%i> value not counted for histogram: bin=%li > histbins=%li\n", myID,bin, bayes->histogram[locus].bins[pa]);
2840 *total +=1;
2841 return;
2842 }
2843 nb = 0;
2844 for(j0=0;j0<pa;j0++)
2845 {
2846 if(shortcut(j0,bayes,&j))
2847 {
2848 continue;
2849 }
2850 nb += bayes->histogram[locus].bins[j];
2851 }
2852 (*results)[nb + bin] += 1.;
2853 *total += 1;
2854 }
2855 //
2856 // adjusting the integral =1
2857 for(bin=0;bin < bayes->histogram[locus].bins[pa]; bin++)
2858 {
2859 nb=0;
2860 for(j0=0;j0<pa;j0++)
2861 {
2862 if(shortcut(j0,bayes,&j))
2863 {
2864 continue;
2865 }
2866 nb += bayes->histogram[locus].bins[j];
2867 }
2868 (*results)[nb + bin] /= (float)(*total * delta);//12-15-13 revision posterior printing
2869 }
2870 //for(j=0;j<maxi[pa]/delta;j++)
2871 // printf("%i> %f ",myID,(*results)[j]);
2872 //printf("\n");
2873 myfree(p);
2874 myfree(q);
2875 }
2876
2877 /// construct bayes histogram: make a at least BAYESNUMBIN slices through the min-max range for each locus
2878 /// while calculating the histogram calculate also the mean and standard deviation.
2879 /// This is done in the same loop, but is somehwat opaque. This is used from bayesallfile!
2880 /// Synonym: construct_param_hist() for ram based parameters
construct_locus_histogram(world_fmt * world,long locus,MYREAL * mini,MYREAL * maxi,float ** results)2881 void construct_locus_histogram(world_fmt *world, long locus, MYREAL *mini, MYREAL *maxi, float **results)
2882 {
2883 long pa;
2884 long pa0;
2885 long rpa;
2886 long total=0;
2887 long numbin=0 ;
2888 boolean *visited;
2889 long numpop2 = world->numpop2;
2890 long np = world->numpop2 + (world->bayes->mu);
2891 MYREAL themean=0.0;
2892 MYREAL thestd=0.0;
2893 visited = (boolean *) mycalloc(np, sizeof(boolean));
2894 for(pa0=0; pa0 < np ; pa0++)
2895 {
2896
2897 if(world->bayes->map[pa0][1] == INVALID)
2898 continue;
2899 else
2900 pa = world->bayes->map[pa0][1];
2901 if(pa>=numpop2)
2902 {
2903 pa = numpop2;
2904 rpa = numpop2;
2905 }
2906 else
2907 {
2908 rpa = pa;
2909 }
2910 // handles cases with symmetric migration rates and mean of migration rates
2911 // TODO needs revision for new scheme that allows more freedom in setting migration rate groups
2912 if(!visited[rpa])
2913 {
2914 themean = 0.0;
2915 thestd = 0.0;
2916 total = 0;
2917 construct_param_hist(world,locus,world->numpop2, rpa,numbin, mini, maxi, results, &total,&themean,&thestd);
2918 world->bayes->histogram[locus].means[pa] = themean/total;
2919 world->bayes->histogram[locus].stds[pa] = thestd / total;
2920 world->bayes->histtotal[locus*np+pa] = total;
2921 // printf("histtotal=%li locus=%li pa=%li\n", total, locus, pa);
2922 visited[rpa] = TRUE;
2923 }
2924 }
2925 covariance_bayes(world,locus);
2926 myfree(visited);
2927 }
2928
2929 ///
2930 /// find the largest interval for all loci looking at the stored minima and maxima of the histogram
2931 /// and reports these into adjmini and adjmaxi
2932 /// this allows combination of different histograms without storing all raw data
adjust_bayes_min_max(world_fmt * world,MYREAL ** mini,MYREAL ** maxi,MYREAL ** adjmini,MYREAL ** adjmaxi)2933 void adjust_bayes_min_max(world_fmt* world, MYREAL **mini, MYREAL **maxi, MYREAL **adjmini, MYREAL **adjmaxi)
2934 {
2935 //MYREAL delta;
2936 long pa0, pa;
2937 long np = world->numpop2 + (world->bayes->mu);
2938
2939 for(pa0=0; pa0 < np; pa0++)
2940 {
2941 // if custom migration matrix is set to zero
2942 // continue
2943 if(world->bayes->map[pa0][1] == INVALID)
2944 continue;
2945 else
2946 pa = world->bayes->map[pa0][1];
2947
2948 // if(pa < world->numpop2)
2949 //{
2950 //if(strchr("0c", world->options->custm2[pa]))
2951 // continue;
2952 //}
2953 //delta = world->bayes->deltahist[pa];
2954 if((*mini)[pa] < (*adjmini)[pa0])
2955 {
2956 (*adjmini)[pa0] = (*mini)[pa];
2957 }
2958 if((*maxi)[pa] > (*adjmaxi)[pa0])
2959 {
2960 (*adjmaxi)[pa0] = (*maxi)[pa];
2961 }
2962 }
2963 }
2964
2965
2966 /// find minimum and maximum values for each parameters
find_bayes_min_max(world_fmt * world,MYREAL ** mini,MYREAL ** maxi,MYREAL ** adjmaxi)2967 void find_bayes_min_max(world_fmt* world, MYREAL **mini, MYREAL **maxi, MYREAL **adjmaxi)
2968 {
2969 long pa0, pa;
2970 // long i;
2971 //long changed_min;
2972 //long changed_max;
2973 long np = world->numpop2 + (world->bayes->mu);
2974 //xcode long npx = np + 2;
2975 //xcode MYREAL value;
2976
2977 //xcode MYREAL delta;
2978 MYREAL minivalue;
2979 MYREAL maxivalue;
2980
2981 for(pa0=0; pa0 < np; pa0++)
2982 {
2983 // if custom migration matrix is set to zero
2984 // continue
2985 //if(pa < world->numpop2)
2986 //{
2987 // if(strchr("0c", world->bayes->custm2[pa]))
2988 // continue;
2989 //}
2990 if(world->bayes->map[pa0][1] == INVALID)
2991 continue;
2992 else
2993 pa = world->bayes->map[pa0][1];
2994
2995
2996 //xcode minivalue = HUGE;
2997 //xcode maxivalue = 0.;
2998 //xcode changed_min = 0;
2999 //xcode changed_max = 0;
3000 //#ifdef MPI
3001 // delta = world->bayes->deltahist[pa];
3002 //#endif
3003 // fprintf(stdout,"%i> npx i pa param value minivalue maxivalue\n",myID);
3004 // for(i=0; i < world->bayes->numparams; i++)
3005 // {
3006 /*------------------
3007 value = (world->bayes->params+(npx*i))[pa+2];
3008 if( minivalue > value)
3009 {
3010 minivalue = value;
3011 changed_min++;
3012 }
3013 if(maxivalue < value)
3014 {
3015 maxivalue = value;
3016 changed_max++;
3017 }
3018 fprintf(stdout,"%i> %li %li %li %f %f %f %f\n", myID, npx, i, pa, (world->bayes->params+(npx*i))[pa+1], value, minivalue, maxivalue);
3019 }
3020 if(maxivalue-minivalue<EPSILON)
3021 {
3022 #ifdef MPI
3023 maxivalue += BAYESNUMBIN * delta;
3024 minivalue = 0.0; //only good for parameter 0..inf
3025 #else
3026 maxivalue += BAYESNUMBIN * EPSILON;
3027 #endif
3028 #endif
3029 }
3030 --------------*/
3031 // if(changed_max==0)
3032 maxivalue = world->bayes->maxparam[pa];
3033
3034 // if(changed_min==0)
3035 minivalue = world->bayes->minparam[pa];
3036
3037
3038 // adjust the mini and maxi so that deltahist will fit in
3039 // (*mini)[pa0] = delta * (long) (minivalue / delta);
3040 //(*maxi)[pa0] = delta * (long)(1. + maxivalue / delta);
3041 (*mini)[pa0] = minivalue;
3042 (*maxi)[pa0] = maxivalue;
3043 // fprintf(stdout,"find_bayes_min_max() @@@@@ %f - %f @@@@@ [global: %f - %f]\n",(*mini)[pa],(*maxi)[pa],
3044 // world->bayes->histogram[world->loci].minima[pa],world->bayes->histogram[world->loci].maxima[pa]);
3045 }
3046 }
3047
3048 ///
3049 /// prints order of parameters for header files in bayesfile and bayesallfile
print_param_order(char ** buf,long * bufsize,long * allocbufsize,world_fmt * world,long numparam)3050 void print_param_order(char **buf, long *bufsize, long *allocbufsize, world_fmt *world, long numparam)
3051 {
3052 long pa;
3053 long pa0;
3054 long numpop = world->numpop;
3055 long numpop2 = world->numpop2;
3056 long frompop, topop;
3057 long frompop2, topop2;
3058 char *custm2 = world->options->custm2;
3059 boolean usem = world->options->usem;
3060 bayes_fmt *bayes = world->bayes;
3061 *bufsize += sprintf(*buf + *bufsize,"# Order of the parameters:\n");
3062 *bufsize += sprintf(*buf + *bufsize,"# Parameter-number Parameter\n");
3063 for(pa0=0;pa0<numparam;pa0++)
3064 {
3065 if(*bufsize > (*allocbufsize - 150))
3066 {
3067 *allocbufsize += LINESIZE;
3068 *buf = (char *) realloc(*buf, sizeof(char) * *allocbufsize);
3069 }
3070 if(bayes->map[pa0][1] != INVALID)
3071 {
3072 if(pa0 == bayes->map[pa0][1])
3073 pa = pa0;
3074 else
3075 pa = bayes->map[pa0][1];
3076
3077 if(pa0 < numpop)
3078 {
3079 if((pa0 == pa) && (custm2[pa0] == '*'))
3080 *bufsize += sprintf(*buf + *bufsize,"#@ %6li %s_%li\n", pa0+1, "Theta",pa0+1);
3081 else
3082 *bufsize += sprintf(*buf + *bufsize,"#@ %6li %s_%li = %s_%li [%c]\n",
3083 pa0+1, "Theta",pa0+1, "Theta",pa+1, custm2[pa0]);
3084 }
3085 else
3086 {
3087 // do we estimate mutation rate changes?
3088 if(pa0 == numpop2)
3089 {
3090 *bufsize += sprintf( *buf + *bufsize,"#@ %6li %s\n", pa0+1, "Rate");
3091 }
3092 else
3093 {
3094 m2mm(pa0,numpop,&frompop,&topop);
3095 if((pa0==pa) && (custm2[pa0]=='*'))
3096 {
3097 if(usem)
3098 *bufsize += sprintf( *buf + *bufsize,"#@ %6li %s_(%li,%li)\n", pa+1, "M", frompop+1, topop+1);
3099 else
3100 *bufsize += sprintf( *buf + *bufsize,"#@ %6li %s_(%li,%li) = %s_(%li,%li)*%s_%li\n", pa+1, "xNm", frompop+1, topop+1, "M", frompop+1, topop+1, "Theta", topop+1);
3101 }
3102 else
3103 {
3104 m2mm(pa,numpop,&frompop2,&topop2);
3105 if(usem)
3106 *bufsize += sprintf(*buf + *bufsize,"#@ %6li %s_(%li,%li) = %s_(%li,%li) [%c]\n", pa+1, "M", frompop+1, topop+1, "M", frompop2+1, topop2+1, custm2[pa0]);
3107 else
3108 *bufsize += sprintf(*buf + *bufsize,"#@ %6li %s_(%li,%li) = %s_(%li,%li)*%s_%li = %s_(%li,%li) = %s_(%li,%li)*%s_%li [%c]\n",
3109 pa+1, "xNm", frompop+1, topop+1, "M", frompop+1, topop+1, "Theta", topop+1,
3110 "xNm", frompop2+1, topop2+1, "M", frompop2+1, topop2+1, "Theta", topop2+1,
3111 custm2[pa0]);
3112 }
3113 }
3114 }
3115 }
3116 }
3117 }
3118
3119 ///
3120 /// prints a comment header, using shell script comments for the output of the raw histogram data for bayesdata
3121 ///
3122 /// # Raw data for the histogram of the posterior probabilities for all parameters and loci\n
3123 /// # produced by the program migrate-n VERSIONNUMBER (popgen.csit.fsu.edu/migrate.hml)\n
3124 /// # written by Peter Beerli 2004, Tallahassee, if you have problems email to beerli@fsu.edu\n
3125 /// #
3126 /// # The HPC values are indicators whether the parametervalue is in the highest-posterior credibility set,\n
3127 /// # a 0 means it is outside and a 1 means the value is inside the credibility set.\n
3128 /// #
3129 /// # --------------------------------------------------------------\n
3130 /// # Locus Parameter 50%HPC 95%HPC parameter-value count frequency\n
3131 /// # --------------------------------------------------------------\n
3132 /// #
print_locus_histogram_header(FILE * bayesfile,MYREAL * deltas,char * custm2,long numpop,long numparam,boolean usem,boolean mu,world_fmt * world)3133 void print_locus_histogram_header(FILE *bayesfile, MYREAL *deltas, char *custm2, long numpop, long numparam, boolean usem, boolean mu, world_fmt *world)
3134 {
3135 long pa;
3136 long bufsize = 0;
3137 long allocbufsize = LONGLINESIZE;
3138 char *buf = (char *) mycalloc(allocbufsize, sizeof(char));
3139 fprintf(bayesfile, "# Raw data for the histogram of the posterior probabilities for all parameters and loci\n");
3140 fprintf(bayesfile, "# produced by the program migrate-n %s (http://popgen.sc.fsu.edu/migrate.hml)\n",MIGRATEVERSION);
3141 fprintf(bayesfile, "# written by Peter Beerli 2004-2013, Tallahassee, if you have problems email to beerli@fsu.edu\n");
3142 fprintf(bayesfile, "#\n");
3143 fprintf(bayesfile, "# The HPC values are indicators whether the parametervalue is in the highest-posterior credibility set,\n");
3144 fprintf(bayesfile, "# a 0 means it is outside and a 1 means the value is inside the credibility set.\n");
3145 fprintf(bayesfile, "#\n");
3146 print_param_order(&buf, &bufsize,&allocbufsize, world,numparam);
3147 fprintf(bayesfile, "#%s\n",buf);
3148 fprintf(bayesfile, "# Delta for Theta and M ");
3149 for(pa=0;pa<numparam-1; pa++)
3150 {
3151 fprintf(bayesfile,"%f ", custm2[pa]!='0' ? deltas[pa] : -99);
3152 }
3153 if(!mu)
3154 fprintf(bayesfile,"%f ", custm2[pa]!='0' ? deltas[pa] : -99);
3155
3156 fprintf(bayesfile, "\n# -----------------------------------------------------------------------------------------\n");
3157 fprintf(bayesfile, "# Locus Parameter 50%%HPC 95%%HPC parameter-value count frequency cummulative_freq priorfreq\n");
3158 fprintf(bayesfile, "# -------------------------------------------------------------------------------------------\n");
3159 myfree(buf);
3160 }
3161
3162 ///
3163 /// print the locus data for a histogram to the file bayesfile the data has a header starting with # so that
3164 /// other programs easiliy can remove ot for processing. the function calculates the total of all bins and
3165 /// then is printing locusnumber parameternumber 95%HPC 50%HPC bincounts frequency for all bins and parameters
3166 /// the HPC columns are 0 and 1, where 0 means no in the credibiliity set.
3167 /// The header is printed in print_locus_histogram_header(bayesfile)
print_locus_histogram(FILE * bayesfile,world_fmt * world,long locus,long numparam)3168 void print_locus_histogram(FILE *bayesfile, world_fmt *world, long locus, long numparam)
3169 {
3170 // char indicator;
3171 bayes_fmt *bayes = world->bayes;
3172 long bin;
3173 long pa0, pa;
3174 long numbins = 0;
3175 long numbinsall = 0;
3176 long counterup;
3177 long counterlow=0;
3178 //long j;
3179 MYREAL delta;
3180 MYREAL value;
3181 MYREAL total=0. ;
3182 MYREAL freq=0.;
3183 MYREAL sumfreq=0.;
3184
3185 long *bins = bayes->histogram[locus].bins;
3186 float *results = bayes->histogram[locus].results;
3187 // MYREAL *modes = bayes->histogram[locus].modes;
3188 // MYREAL *medians = bayes->histogram[locus].medians;
3189 // MYREAL *means = bayes->histogram[locus].means;
3190 MYREAL *mini = bayes->histogram[locus].minima;
3191 MYREAL *maxi = bayes->histogram[locus].maxima;
3192 char *set50 = bayes->histogram[locus].set50;
3193 char *set95 = bayes->histogram[locus].set95;
3194 if(results == NULL)
3195 return;
3196 for(pa0=0; pa0 < numparam; pa0++)
3197 {
3198 if(bayes->map[pa0][1] == INVALID)
3199 continue;
3200 else
3201 pa = bayes->map[pa0][1];
3202 //fprintf(stdout,"########### (* locus=%li, parameter %li: *) bins=%li; min=%f; max=%f\n",locus+1, pa+1, bins[pa], mini[pa], maxi[pa]);
3203 delta = (maxi[pa] - mini[pa])/bins[pa];
3204 value = mini[pa] + delta/2.;
3205 total = 0. ;
3206 counterup=0;
3207 counterlow = 0;
3208 // numbins = 0;
3209 //for(j=0; j < pa; j++)
3210 // numbins += bins[j];
3211 if(pa<pa0)
3212 continue;
3213 numbinsall += bins[pa];
3214 numbins = numbinsall - bins[pa];
3215 for(bin=0;bin<bins[pa];bin++)
3216 {
3217 total += results[numbins + bin];
3218
3219 if(set95[numbins+bin]=='1')
3220 {
3221 counterlow++;
3222 break;
3223 }
3224 }
3225 for(bin=counterlow;bin<bins[pa];bin++)
3226 {
3227 total += results[numbins + bin];
3228 if(set95[numbins+bin]=='1')
3229 counterup++ ; //upper 97.5
3230 }
3231 sumfreq = 0.;
3232 for(bin=0;bin<bins[pa];bin++)
3233 {
3234 freq = results[numbins + bin] * delta;//12-15-13 revision posterior printing
3235 sumfreq += freq;
3236 fprintf(bayesfile,"%li %li %c %c %f %f %f %f %f\n", locus+1, pa+1, set50[numbins+bin], set95[numbins+bin], value,
3237 results[numbins + bin],freq,sumfreq, exp(scaling_prior(world,pa,value)));
3238 value += delta;
3239 }
3240 fprintf(bayesfile,"\n");
3241 }
3242 }
3243
3244
3245 ///
3246 /// print the loci data for a histogram to the file bayesfile the data has a header starting with # so that
3247 /// other programs easiliy can remove ot for processing. the function calculates the total of all bins and
3248 /// then is printing locusnumber parameternumber 95%HPC 50%HPC bincounts frequency for all bins and parameters
3249 /// the HPC columns are 0 and 1, where 0 means no in the credibiliity set.
3250 /// The header is printed in print_locus_histogram_header(bayesfile)
print_loci_histogram(FILE * bayesfile,world_fmt * world,long locus,long numparam)3251 void print_loci_histogram(FILE *bayesfile, world_fmt * world, long locus, long numparam)
3252 {
3253 bayes_fmt *bayes=world->bayes;
3254 long bin;
3255 //long j;
3256 long pa0, pa;
3257 long numbins = 0;
3258 long numbinsall = 0;
3259 MYREAL delta;
3260 MYREAL value;
3261 MYREAL total=0. ;
3262 MYREAL freq=0. ;
3263 MYREAL sumfreq=0. ;
3264 long *bins = bayes->histogram[locus].bins;
3265 float *results = bayes->histogram[locus].results;
3266 MYREAL *mini = bayes->histogram[locus].minima;
3267 MYREAL *maxi = bayes->histogram[locus].maxima;
3268 char *set50 = bayes->histogram[locus].set50;
3269 char *set95 = bayes->histogram[locus].set95;
3270
3271 for(pa0=0; pa0 < numparam; pa0++)
3272 {
3273 if(bayes->map[pa0][1] == INVALID)
3274 continue;
3275 else
3276 pa = bayes->map[pa0][1];
3277
3278
3279 delta = (maxi[pa] - mini[pa])/bins[pa];
3280 value = mini[pa] + delta/2.;
3281 sumfreq =0.;
3282
3283 //numbins = 0;
3284 //for(j=0; j < pa; j++)
3285 // numbins += bins[j];
3286 if(pa<pa0)
3287 continue;
3288 numbinsall += bins[pa];
3289 numbins = numbinsall - bins[pa];
3290
3291 for(bin=0;bin<bins[pa];bin++)
3292 {
3293 freq = (MYREAL) results[numbins + bin] * delta;
3294 sumfreq += freq;
3295 total = (MYREAL) bayes->trials[pa];
3296 fprintf(bayesfile,"%li %li %c %c %f %f %f %f %f\n", locus+1, pa+1, set50[numbins+bin], set95[numbins+bin], value,
3297 results[numbins + bin] , freq, sumfreq, bayes->priors[numbins+bin]);
3298 value += delta;
3299 }
3300 fprintf(bayesfile,"\n");
3301 }
3302 }
3303
3304
print_bayes_credibility(FILE * file,MYREAL * cred,float * results,long numpop)3305 void print_bayes_credibility(FILE *file, MYREAL *cred, float *results, long numpop)
3306 {
3307 long pa;
3308 long numpop2 = numpop * numpop;
3309 for(pa=0; pa < numpop; pa++)
3310 fprintf(file,"#Theta lower=%f upper=%f\n",cred[pa], cred[pa+ numpop2]);
3311 for(pa=numpop; pa < numpop2; pa++)
3312 fprintf(file,"#Migration lower=%f upper=%f\n",cred[pa], cred[pa+ numpop2]);
3313 }
3314
3315 ///
3316 /// print header for complete (raw) parameter outfile (options->bayesmdimfilename)
3317 /// Header uses shell script escapes
3318 /// # Migrate MIGRATEVERSION (Peter Beerli, (c) 2008)
3319 /// # Raw results from Bayesian inference: these values can be used to generate
3320 /// # joint posterior distribution of any parameter combination
3321 /// # and also used for TRACER results [splitting utility may be needed]
3322 #ifdef ZNZ
print_bayes_mdimfileheader(znzFile file,long interval,world_fmt * world,data_fmt * data)3323 void print_bayes_mdimfileheader(znzFile file, long interval, world_fmt* world, data_fmt *data)
3324 #else
3325 void print_bayes_mdimfileheader(FILE *file, long interval, world_fmt* world, data_fmt *data)
3326 #endif
3327 {
3328 #ifdef MPI
3329 #ifdef PARALIO
3330 MPI_Status status;
3331 #endif
3332 #endif
3333 long i;
3334 long pa;
3335 long pa0;
3336 long pop;
3337 long frompop;
3338 long topop;
3339 long repmax;
3340 boolean *visited;
3341 bayes_fmt *bayes = world->bayes;
3342 long bufsize = 0;
3343 long allocbufsize = LONGLINESIZE;
3344 char *buf = (char *) mycalloc(allocbufsize,sizeof(char)); //this should be plenty for the header (currently 10^4 characters)
3345 bufsize = sprintf(buf,"# Migrate %s (Peter Beerli, (c) 2010)\n", MIGRATEVERSION);
3346 bufsize += sprintf(buf+bufsize,"# Raw results from Bayesian inference: these values can be used to generate\n");
3347 bufsize += sprintf(buf+bufsize,"# joint posterior distribution of any parameter combination\n");
3348 bufsize += sprintf(buf+bufsize,"# Writing information on parameters (Thetas, M or xNm)\n");
3349 bufsize += sprintf(buf+bufsize,"# every %li parameter-steps\n", interval+1);
3350 bufsize += sprintf(buf+bufsize,"# \n");
3351 bufsize += sprintf(buf+bufsize,"# -- %s\n", "Steps");
3352 bufsize += sprintf(buf+bufsize,"# -- %s\n", "Locus");
3353 bufsize += sprintf(buf+bufsize,"# -- %s\n", "Replicates");
3354 bufsize += sprintf(buf+bufsize,"# -- %s\n", "log(Posterior)");
3355 bufsize += sprintf(buf+bufsize,"# -- %s\n", "log(prob(D|G))");
3356 bufsize += sprintf(buf+bufsize,"# -- %s\n", "log(prob(G|Model))");
3357 bufsize += sprintf(buf+bufsize,"# -- %s\n", "log(prob(Model))");
3358 bufsize += sprintf(buf+bufsize,"# -- %s\n", "Sum of time intervals on G");
3359 bufsize += sprintf(buf+bufsize,"# -- %s\n", "Total tree length of G");
3360 print_param_order(&buf, &bufsize, &allocbufsize, world, world->numpop2);
3361 if(world->bayes->mu)
3362 bufsize += sprintf(buf+bufsize,"# %li %s\n", world->numpop2+1, "Rate");
3363 bufsize += sprintf(buf+bufsize,"# \n");
3364 print_marginal_order(buf, &bufsize, world);
3365 if (world->options->replicate)
3366 {
3367 if (world->options->replicatenum == 0)
3368 repmax = world->options->lchains;
3369 else
3370 repmax = world->options->replicatenum;
3371 }
3372 else
3373 repmax = 1;
3374 bufsize += sprintf(buf+bufsize,"# \n");
3375 bufsize += sprintf(buf+bufsize,"#$ ------------------------------------------------------------------ \n");
3376 bufsize += sprintf(buf+bufsize,"#$ begin [do not change this content]\n");
3377 bufsize += sprintf(buf+bufsize,"#$ Model=%s\n",world->options->custm);
3378 bufsize += sprintf(buf+bufsize,"#$ Mode2=%s\n",world->options->custm2);
3379 bufsize += sprintf(buf+bufsize,"#$ %li %li %li %li %li %i\n",world->loci, world->numpop,
3380 world->numpop2, (long) world->options->replicate, repmax, (int) world->options->usem);
3381 for (pop = 0; pop < world->numpop; pop++)
3382 {
3383 bufsize += sprintf(buf+bufsize,"#$ %s\n",data->popnames[pop]);
3384 }
3385 bufsize += sprintf(buf+bufsize,"#$ end\n");
3386 bufsize += sprintf(buf+bufsize,"#$ ------------------------------------------------------------------ \n");
3387 bufsize += sprintf(buf+bufsize,"# \n");
3388 bufsize += sprintf(buf+bufsize,"# remove the lines above and including @@@@@, this allows to use\n");
3389 bufsize += sprintf(buf+bufsize,"# Tracer (http://tree.bio.ed.ac.uk/software/tracer/) to inspect\n");
3390 bufsize += sprintf(buf+bufsize,"# this file. But be aware that the current Tracer program (October 2006)\n");
3391 bufsize += sprintf(buf+bufsize,"# only works with single-locus, single-replicate files\n");
3392 bufsize += sprintf(buf+bufsize,"# The migrate contribution folder contains a command line utility written\n");
3393 bufsize += sprintf(buf+bufsize,"# in PERL to split the file for Tracer, it's name is mba\n");
3394 bufsize += sprintf(buf+bufsize,"# @@@@@@@@\n");
3395 bufsize += sprintf(buf+bufsize,"Steps\tLocus\tReplicate\tlnPost\tlnDataL\tlnPrbGParam\tlnPrior\ttreeintervals\ttreelength");
3396
3397 visited = (boolean*) mycalloc(world->numpop2,sizeof(boolean));
3398 for(pa0=0;pa0<world->numpop2;pa0++)
3399 {
3400 if(bayes->map[pa0][1] == INVALID)
3401 continue;
3402 else
3403 {
3404 pa = bayes->map[pa0][1];
3405 if(visited[pa]==TRUE)
3406 continue;
3407 else
3408 visited[pa]=TRUE;
3409 }
3410 if(pa < world->numpop)
3411 {
3412 bufsize += sprintf(buf+bufsize,"\t%s_%li", "Theta",pa+1);
3413 }
3414 else
3415 {
3416 m2mm(pa,world->numpop,&frompop,&topop);
3417 bufsize += sprintf(buf+bufsize,"\t%s_%li_%li",
3418 (world->options->usem ? "M" : "xNm"), frompop+1, topop+1);
3419 }
3420 }
3421 if(world->bayes->mu)
3422 {
3423 bufsize += sprintf(buf+bufsize,"\t%s", "murate");
3424 }
3425 for(i=0;i<world->options->heated_chains;i++)
3426 bufsize += sprintf(buf+bufsize,"\tmL_%f", world->options->heat[i]);
3427 if(world->options->heated_chains>1)
3428 bufsize += sprintf(buf+bufsize,"\tmL_thermo");
3429 bufsize += sprintf(buf+bufsize,"\tmL_harmonic");
3430 bufsize += sprintf(buf+bufsize,"\n");
3431
3432 #ifdef MPI
3433 #ifdef PARALIO
3434 //wrapper function [see migrate_mpi.c]
3435 //fprintf(stdout,"%i>%li %li\n@@%s@@\n\n",myID,bufsize, strlen(buf),buf);
3436 mpi_mdim_send(&world->mpi_bayesmdimfile,buf,bufsize);
3437 #else
3438 #ifdef ZNZ
3439 znzprintf(file,"%s",buf);
3440 #else
3441 fprintf(file,"%s",buf);
3442 #endif
3443 #endif
3444 #else
3445 #ifdef ZNZ
3446 znzprintf(file,"%s",buf);
3447 #else
3448 fprintf(file,"%s",buf);
3449 #endif
3450 #endif
3451
3452 myfree(visited);
3453 myfree(buf);
3454 }
3455
3456 ///
3457 /// finds the mode of the results histogram
3458 /// fills the bayes->histogram[locus].modes
find_bayes_mode(bayes_fmt * bayes,long locus,long numparam)3459 void find_bayes_mode(bayes_fmt *bayes, long locus, long numparam)
3460 {
3461
3462 long bin;
3463 long j;
3464 long numbin=0;
3465 long pa0, pa;
3466 long *bins = bayes->histogram[locus].bins;
3467
3468 MYREAL tmp;
3469 MYREAL *modes = bayes->histogram[locus].modes;
3470 MYREAL *mini = bayes->histogram[locus].minima;
3471 float *results = bayes->histogram[locus].results;
3472
3473 for(pa0=0; pa0 < numparam; pa0++)
3474 {
3475 // if custom migration matrix is set to zero
3476 // continue but ignore rate
3477 //if(pa < bayes->numpop2)
3478 // {
3479 // if(strchr("0c",bayes->custm2[pa]))
3480 // continue;
3481 // }
3482 if(bayes->map[pa0][1] == INVALID)
3483 continue;
3484 else
3485 pa = bayes->map[pa0][1];
3486
3487 for(j=0; j < pa; j++)
3488 numbin += bins[j];
3489
3490 tmp = 0. ;
3491 for(bin=0;bin<bins[pa]; bin++)
3492 {
3493 if (tmp < (double) results[numbin + bin])
3494 {
3495 tmp = (double) results[numbin+bin];
3496 modes[pa] = (MYREAL) bin * bayes->deltahist[pa] + mini[pa];
3497 }
3498 }
3499 // numbin += bins[pa];
3500 }
3501 }
3502
3503 ///
3504 /// kernel smoother uses params not histogram cells
3505 /// a window of size 2*el + 1 is averaged and replaces the central value
3506 /// the array with the values is prepended with el zeros and also appended with el zeros,
3507 /// and the output replaces the input array,
3508 /// this methods uses an Epanechnikov kernel [k(x) = 3/4 (1-x^2) for -1<=x<=1
kernel_smooth(float * x,long xelem,float * fx,long fxelem,long el,MYREAL mini,MYREAL maxi)3509 void kernel_smooth(float *x, long xelem, float *fx, long fxelem, long el, MYREAL mini, MYREAL maxi)
3510 {
3511 float d;
3512 float dd;
3513 float xsum;
3514 float *xx;
3515 float *weight;
3516 float total;
3517 float totalx=0.;
3518 float totalxsum = 0.;
3519 long i, j, jj, w;
3520 long el2 = 2 * el + 1;
3521 weight = (float *) mycalloc(el2, sizeof(float));
3522 if(xelem==0)
3523 return;
3524 xx = (float *) mycalloc(el2 + xelem,sizeof(float));
3525
3526 memcpy(xx+el, x, sizeof(float) * xelem);
3527 weight[el] = 0.75;
3528 total = 1.;
3529 d = (float) 2./(el2-1.);
3530 for(j = el+1; j < el2; j++)
3531 {
3532 dd = (-1. + d*j);
3533
3534 dd *= dd;
3535 weight[j] = (float) 0.75 * (1. - dd);
3536 total += 2 * weight[j];
3537 }
3538 // printf("weight=%f\n",total);
3539 weight[el] /= total;
3540
3541 for(j=el+1, jj = el-1; j < el2; j++, jj--)
3542 {
3543 weight[j] /= total;
3544 weight[jj] = weight[j];
3545 }
3546
3547 for(i=el; i < xelem + el; i++)
3548 {
3549 xsum = 0.;
3550 for(j=i - el, w=0; j < i + el + 1; j++, w++)
3551 xsum += (xx[i]-xx[j]) * weight[w];
3552 totalxsum += xsum;
3553 totalx += x[i-el];
3554 //fprintf(stdout,"%20.20f %20.20f\n",x[i-el], xsum);
3555 fx[i-el] = xsum;
3556 //x[i-el] = xsum/el2;
3557 }
3558 // fprintf(stdout,"%20.20f %20.20f\n",totalx, totalxsum);
3559 if(totalxsum>0.0)
3560 totalx /= totalxsum;
3561 else
3562 {
3563 myfree(xx);
3564 myfree(weight);
3565 return;
3566 }
3567 for(i=0; i < xelem; i++)
3568 {
3569 fx[i] *= totalx;
3570 //fprintf(stdout,"%20.20f\n",x[i]);
3571 }
3572 myfree(xx);
3573 myfree(weight);
3574 }
3575
3576
3577 ///
3578 /// moving average smoothing
3579 /// a window of size 2*el + 1 is averaged and replaces the central value
3580 /// the array with the values is prepended with el zeros and also appended with el zeros,
3581 /// and the output replaces the input array,
3582 /// this methods uses an Epanechnikov kernel [k(x) = 3/4 (1-x^2) for -1<=x<=1
bayes_smooth(float * x,long xelem,long el,boolean lastfirst)3583 void bayes_smooth(float *x, long xelem, long el, boolean lastfirst)
3584 {
3585 float d;
3586 float dd;
3587 float xsum;
3588 float *xx;
3589 float *weight;
3590 float total;
3591 float totalx=0.;
3592 float totalxsum = 0.;
3593 long i, j, jj, w;
3594 long el2 = 2 * el + 1;
3595 weight = (float *) mycalloc(el2, sizeof(float));
3596 if(xelem==0)
3597 return;
3598 xx = (float *) mycalloc(el2 + xelem,sizeof(float));
3599
3600 memcpy(xx+el, x, sizeof(float) * xelem);
3601 if(lastfirst)
3602 {
3603 for(i=0; i < el; i++)
3604 {
3605 xx[i] = xx[el];
3606 xx[i+xelem] = xx[xelem-1];
3607 }
3608 }
3609 weight[el] = 0.75;
3610 total = 1.;
3611 d = (float) 2./(el2-1.);
3612 for(j = el+1; j < el2; j++)
3613 {
3614 dd = (-1. + d*j);
3615 dd *= dd;
3616 weight[j] = (float) 0.75 * (1. - dd);
3617 total += 2 * weight[j];
3618 }
3619 // printf("weight=%f\n",total);
3620 weight[el] /= total;
3621
3622 for(j=el+1, jj = el-1; j < el2; j++, jj--)
3623 {
3624 weight[j] /= total;
3625 weight[jj] = weight[j];
3626 }
3627
3628 for(i=el; i < xelem + el; i++)
3629 {
3630 xsum = 0.;
3631 for(j=i - el, w=0; j < i + el + 1; j++, w++)
3632 xsum += xx[j] * weight[w];
3633 totalxsum += xsum;
3634 totalx += x[i-el];
3635 //fprintf(stdout,"%20.20f %20.20f\n",x[i-el], xsum);
3636 x[i-el] = xsum;
3637 //x[i-el] = xsum/el2;
3638 }
3639 // fprintf(stdout,"%20.20f %20.20f\n",totalx, totalxsum);
3640 if(totalxsum>0.0)
3641 totalx /= totalxsum;
3642 else
3643 {
3644 myfree(xx);
3645 myfree(weight);
3646 return;
3647 }
3648 for(i=0; i < xelem; i++)
3649 {
3650 x[i] *= totalx;
3651 //fprintf(stdout,"%20.20f\n",x[i]);
3652 }
3653 myfree(xx);
3654 myfree(weight);
3655 }
3656
3657 ///
3658 /// find the credibility set by using the Highest Posterior Probability Density(HPD) and the standard point
3659 /// descriptors. the statistics are filled into the statistics part of the bayes structure (datastore)
calc_hpd_credibility(bayes_fmt * bayes,long locus,long numpop2,long numparam)3660 void calc_hpd_credibility(bayes_fmt *bayes,long locus, long numpop2, long numparam)
3661 {
3662 const MYREAL alpha95 = 0.95;
3663 const MYREAL alpha50 = 0.50;
3664
3665 //long j;
3666 long li;
3667 long pa0, pa;
3668 long rpa; // the many locus-rates are using only a single rate prior and recording (per locus)
3669 // and we need to distinguish here
3670 long numbins=0;
3671 long numbinsall = 0;
3672 long locmedian;
3673
3674 pair *parts; // pairs of doubles
3675 long csum = 0;
3676 boolean *smoothy=NULL;
3677
3678 MYREAL biggestcolumn;
3679 MYREAL total;
3680 MYREAL total1;
3681 MYREAL cdf;
3682 MYREAL cutoff95;
3683 MYREAL cutoff50;
3684 MYREAL delta;
3685 MYREAL tmp;
3686
3687 MYREAL * mini;
3688 MYREAL * maxi;
3689 float *results;
3690 long *bins;
3691 MYREAL *modes;
3692 MYREAL *medians;
3693 MYREAL *cred50;
3694 MYREAL *cred95;
3695 char *set50;
3696 char *set95;
3697
3698 bins = bayes->histogram[locus].bins;
3699 modes = bayes->histogram[locus].modes;
3700 medians = bayes->histogram[locus].medians;
3701 mini = bayes->histogram[locus].minima;
3702 maxi = bayes->histogram[locus].maxima;
3703 cred50 = bayes->histogram[locus].cred50l;
3704 cred95 = bayes->histogram[locus].cred95l;
3705 set50 = bayes->histogram[locus].set50;
3706 set95 = bayes->histogram[locus].set95;
3707 results = bayes->histogram[locus].results;
3708
3709 smoothy = (boolean *) mycalloc(numparam, sizeof(boolean));
3710
3711 for(pa0=0; pa0 < numpop2 + ((numparam-numpop2)>=1); pa0++)
3712 {
3713 if(bayes->map[pa0][1] == INVALID)
3714 continue;
3715 else
3716 pa = bayes->map[pa0][1];
3717
3718 // so that we can check whether the bins got smoothed already or not
3719 smoothy[pa0] = FALSE;
3720
3721 if(pa >= numpop2)
3722 rpa = numpop2;
3723 else
3724 rpa = pa;
3725
3726 if(csum<bins[rpa])
3727 csum = bins[rpa];
3728 }
3729 parts = (pair *) mycalloc(csum, sizeof(pair));
3730
3731 for(pa0=0; pa0 < numpop2 + ((numparam-numpop2)>=1); pa0++)
3732 {
3733 // if custom migration matrix is set to zero
3734 // continue
3735 // if(pa < bayes->numpop2)
3736 // {
3737 // if(strchr("0c",bayes->custm2[pa]))
3738 // continue;
3739 // }
3740 if(bayes->map[pa0][1] == INVALID)
3741 continue;
3742 else
3743 pa = bayes->map[pa0][1];
3744
3745 // numbins=0;
3746 //for(j=0; j < pa; j++)
3747 // numbins += bins[j];
3748 if(pa<pa0)
3749 continue;
3750
3751
3752 if(pa >= numpop2)
3753 rpa = numpop2;
3754 else
3755 rpa = pa;
3756
3757
3758 numbinsall += bins[rpa];
3759 numbins = numbinsall - bins[rpa];
3760
3761 delta = (maxi[rpa] - mini[rpa]) / bins[rpa];
3762
3763 parts = (pair *) memset(parts,0,csum*sizeof(pair));
3764
3765 //
3766 // calculate the total and then the average
3767 total = 0.;
3768 biggestcolumn = 0.;
3769 locmedian = 0;
3770 //
3771 // smooth the results before we calculate means etc
3772 if(!smoothy[pa])
3773 {
3774 // bayes_smooth(results+numbins,bins[pa], bins[pa]/50);
3775 bayes_smooth(results+numbins,bins[rpa], MAX(3,bins[rpa]/50),TRUE);
3776 smoothy[pa] = TRUE;
3777 }
3778 total1 = 0.0;
3779 for(li=0;li<bins[rpa]; li++)
3780 {
3781 tmp = (double) results[numbins + li];
3782 total1 += tmp;
3783 }
3784 total=0.0;
3785 for(li=0;li<bins[rpa]; li++)
3786 {
3787 //12-15-13 revision posterior printing: results[numbins + li] /= (float) total1;
3788 tmp = (double) results[numbins + li];
3789 parts[li][1] = tmp;
3790 total += tmp;
3791 parts[li][0] = delta/2 + mini[rpa] + li * delta;
3792 if(tmp > biggestcolumn)
3793 {
3794 biggestcolumn = tmp;
3795 locmedian = li;
3796 }
3797 }
3798 //
3799 // mode is the value with largest column
3800 modes[pa] = parts[locmedian][0];
3801 //
3802 // median is at 0.5 of the cumulative probability distribution
3803 li = 0;
3804 tmp = 0;
3805 while(tmp < total/2 && li < bins[rpa]-1)
3806 {
3807 tmp += parts[li][1];
3808 li++;
3809 }
3810 medians[pa] = parts[li][0];
3811 //
3812 // sort parts using the histogram y-axis for easy finding of quantiles
3813 paired_qsort2(parts, bins[rpa]);
3814
3815 // find HPD crediblity intervals for 50% and 95% credibility intervals
3816 // for 50% intervals, starting from the highest value (mode) and moving down
3817 // until the cumulative sum / total is >=0.5
3818 cdf = 0.;
3819 li = bins[rpa];
3820 while(cdf < alpha50 && li>0)
3821 {
3822 li--;
3823 cdf += parts[li][1] /total;
3824 }
3825 cutoff50 = parts[li][1]; // anything lower than this will be in the 50% credibility set
3826 // or 95% interval
3827 while(cdf < alpha95 && li>0)
3828 {
3829 li--;
3830 cdf += parts[li][1] /total;
3831 }
3832 cutoff95 = parts[li][1]; // anything lower than this will be in the 95% credibility set
3833
3834 // fill the credibility sets (are printed into Bayesfile
3835 for(li=numbins;li<numbins + bins[rpa]; li++)
3836 {
3837 if(results[li] < cutoff95)
3838 {
3839 set50[li] = '0';
3840 set95[li] = '0';
3841 }
3842 else
3843 {
3844 set95[li] = '1';
3845 if(results[li] < cutoff50)
3846 set50[li] = '0';
3847 else
3848 set50[li] = '1';
3849 }
3850 }
3851 // fill the innermost 50% levels, smooth over adjacent bins
3852 //
3853 // start at the modus and go left
3854 li = numbins + locmedian;
3855 while (set50[li] == '1' && li > numbins)
3856 --li ;
3857 cred50[pa] = mini[rpa] + (li-numbins) * delta;// + delta/2.;
3858 while (set95[li] == '1' && li > numbins)
3859 --li ;
3860 cred95[pa] = mini[rpa] + (li-numbins) * delta;// + delta/2.;
3861
3862 // start at the modus and go right
3863 li = numbins + locmedian;
3864 while (set50[li] == '1' && li < numbins + bins[rpa])
3865 ++li ;
3866 cred50[pa + numparam] = maxi[rpa] - (bins[rpa] - li + numbins) * delta;// - delta/2.;
3867 while (set95[li] == '1' && li < numbins + bins[rpa])
3868 ++li ;
3869 cred95[pa + numparam] = maxi[rpa] - (bins[rpa] - li + numbins) * delta;// - delta/2.;
3870
3871 //numbins += bins[rpa];
3872 }
3873 myfree(parts);
3874 myfree(smoothy);
3875 }
3876
3877 ///
3878 /// combines over loci
bayes_combine_loci(world_fmt * world)3879 void bayes_combine_loci(world_fmt * world)
3880 {
3881 long targetbin;
3882 long locus;
3883 long loci = world->loci;
3884 long pa0;
3885 long pa=0;
3886 long rpa;
3887 long sourcebin;
3888 long sourcesumbin;
3889 long targetsumbin;
3890 long np = world->numpop2 + world->bayes->mu;// *world->loci
3891 long bin;
3892 long *bins;
3893
3894 MYREAL count;
3895 MYREAL sumprob;
3896 //MYREAL maxval;
3897 MYREAL *maxvala;
3898 MYREAL *maxpriors;
3899 MYREAL total;
3900 // MYREAL value;
3901 MYREAL *totals;
3902 MYREAL *totalspriors;
3903 bayes_fmt * bayes = world->bayes;
3904 //
3905 // records whether a cell in the all-loci histogram was filled or not
3906 char *touched;
3907 MYREAL *priors;
3908 //MYREAL *s_priors_minval;
3909 //MYREAL *elements;
3910 //
3911 // source is the locus histogram, already filled in by the locus workers or then by the locus loop
3912 bayeshistogram_fmt * source;
3913 //
3914 //MYREAL s_prior;
3915 float inverse_loci_minus_1 = 1;
3916 long real_loci = 0;//tlh 2/10/13 world->loci;
3917 MYREAL halfdelta;
3918 // target is the summary over all loci
3919 bayeshistogram_fmt *target = &bayes->histogram[world->loci];
3920 float *results;
3921 MYREAL *minima;
3922 MYREAL *maxima;
3923 boolean *visited;
3924 MYREAL *mini;
3925 MYREAL *maxi;
3926 MYREAL *adjmaxi;
3927 MYREAL *sumlocus;
3928 MYREAL sum =0.0;
3929 MYREAL w;// width of bin
3930 MYREAL logw; //log width of bins
3931 long tts;
3932 MYREAL countsum;
3933 long *counts;
3934 MYREAL *locusweight = world->data->locusweight;
3935 MYREAL allpriortotal;
3936 // FILE *targetfile = fopen("targettemp","w"); // DEBUG 2/11/13
3937 //
3938 // allocation of space for maximal values for parameters and set to some large negative number (logs!)
3939 doublevec1d(&maxvala, np);
3940 doublevec1d(&maxpriors, np);
3941 doublevec1d(&totalspriors, np);
3942 doublevec1d(&totals, np);
3943 doublevec1d(&sumlocus, world->loci);
3944 visited = (boolean *) mycalloc(np,sizeof(boolean));
3945
3946
3947 // scaling factor counter used to average for bins that are missing
3948 // if not all elements are filled for every locus than the scaling factor
3949 // for that element should be zero
3950 counts = (long *) mycalloc(bayes->histogram[world->loci].binsum,sizeof(long));
3951
3952 for(pa0=0;pa0<np; pa0++)
3953 {
3954 //maxvala[pa0] = -MYREAL_MAX;
3955 if(bayes->map[pa0][1] == INVALID)
3956 continue;
3957 else
3958 {
3959 pa = bayes->map[pa0][1];
3960 if(pa0 >= world->numpop2)
3961 rpa=world->numpop2;
3962 else
3963 rpa = pa;
3964 bayes->histogram[loci].minima[rpa] = MYREAL_MAX;
3965 bayes->histogram[loci].maxima[rpa] = -MYREAL_MAX;
3966 }
3967 }
3968 //
3969 // set the all-loci minima and maxima
3970 for(locus=0;locus<world->loci; locus++)
3971 {
3972 if(world->data->skiploci[locus])
3973 {
3974 // real_loci -= 1;
3975 continue;
3976 }
3977 real_loci += (long) locusweight[locus];
3978 #ifdef DEBUG
3979 //invariant loci treatment
3980 printf("%i> locus=%li, realoci=%li, weight=%f\n",myID,locus,real_loci,locusweight[locus]);
3981 #endif
3982 mini = world->bayes->histogram[locus].minima;
3983 maxi = world->bayes->histogram[locus].maxima;
3984 adjmaxi = world->bayes->histogram[locus].adjmaxima;
3985
3986 find_bayes_min_max(world, &mini, &maxi, &adjmaxi);
3987 adjust_bayes_min_max(world,&mini,
3988 &maxi,
3989 &bayes->histogram[world->loci].minima,
3990 &bayes->histogram[world->loci].maxima);
3991 //
3992 // printf("%i> BAYES A LOCI: locus=%li, binsum=%li\n",myID, locus, bayes->histogram[locus].binsum);
3993 }
3994 // allocation of all-loci-histogram table into results, and for the 50% and 95% credibility sets
3995 adjust_bayes_bins(world, world->loci);
3996 printf("%i> BAYES OVER LOCI: locus=%li, binsum=%li\n",myID, world->loci, bayes->histogram[world->loci].binsum);
3997 bins = target->bins;
3998 results = target->results;
3999 minima = target->minima;
4000 maxima = target->maxima;
4001 //
4002 // records whether a cell in the all-loci histogram was filled or not
4003 touched = (char *) mycalloc(bayes->histogram[loci].binsum + (world->loci*2)+1, sizeof(char));
4004 printf("len(touched)=%li\n",bayes->histogram[loci].binsum + (world->loci*2)+1);
4005 // record prior to correct for: P(theta)^(1/n-1)
4006 priors = (MYREAL *) mycalloc(bayes->histogram[loci].binsum + (world->loci*2)+1, sizeof(MYREAL));
4007 //s_priors_minval = (MYREAL *) mycalloc(bayes->histogram[loci].binsum + (world->loci*2)+1, sizeof(MYREAL));
4008 //elements = (MYREAL *) mycalloc(bayes->histogram[loci].binsum + (world->loci*2)+1, sizeof(MYREAL));
4009 //
4010 // this will remove the excess priors
4011 inverse_loci_minus_1 = 1.0-real_loci;
4012 // calculate the locus weights
4013 for(locus=0;locus<world->loci; locus++)
4014 {
4015 if(world->data->skiploci[locus])
4016 continue;
4017 for(pa0=0; pa0 < np; pa0++)
4018 {
4019 if(bayes->map[pa0][1] == INVALID)
4020 continue;
4021 else
4022 pa = bayes->map[pa0][1];
4023
4024 if(pa0 >= world->numpop2)
4025 pa=world->numpop2;
4026 if(visited[pa]==TRUE)
4027 continue;
4028 visited[pa] = TRUE;
4029 sumlocus[locus] += (MYREAL) world->bayes->histtotal[locus*np+pa];
4030 }
4031 sum += sumlocus[locus];
4032 // reset visited
4033 memset(visited,0,sizeof(boolean)*np);
4034 }
4035 // set all bins in target->results to a low log(value)
4036 for(bin=0;bin<bayes->histogram[world->loci].binsum; bin++)
4037 {
4038 results[bin] = 0.;
4039 touched[bin] = ' ';
4040 //s_priors_minval[bin] = -HUGE;
4041 }
4042 //
4043 // combine previous results into target->results
4044 // over all loci
4045 // ln mL = ln K * Sum ln mL_i
4046 // ln K = Prod_param Sum_bins Exp[ln(dx) + ln p(param,bin)*(1-loci) + Sum_loci ln posterior(param,bin, locus)]
4047 // 1. 2. 3. 7. 4. 5. 6.
4048 for(locus=0;locus<world->loci; locus++)
4049 {
4050 if(world->data->skiploci[locus])
4051 continue;
4052 sourcesumbin = 0;
4053 targetsumbin = 0;
4054 source = &bayes->histogram[locus];
4055 memset(visited,0,sizeof(boolean)*np);
4056 // over all parameters
4057 for(pa0=0; pa0 < np; pa0++)
4058 {
4059 if(bayes->map[pa0][1] == INVALID)
4060 continue;
4061 else
4062 pa = bayes->map[pa0][1];
4063
4064 if(pa0 >= world->numpop2)
4065 rpa=world->numpop2;
4066 else
4067 rpa = pa;
4068
4069 w = world->bayes->deltahist[rpa];
4070 //logw = log(w); //7. ln(dx)
4071
4072 if(visited[rpa]==TRUE)
4073 continue;
4074 visited[rpa] = TRUE;
4075 halfdelta = world->bayes->deltahist[rpa]*0.5;
4076 targetbin = (long) ((source->minima[rpa] - minima[rpa]) / w);
4077 if(source->results != NULL)
4078 {
4079 countsum=0.0;
4080 for(sourcebin=0; sourcebin < source->bins[rpa]; sourcebin++)
4081 {
4082 tts = targetsumbin + targetbin + sourcebin;
4083 if(source->results[sourcesumbin + sourcebin] > 0.)
4084 {
4085 // frequencies are normalized per locus and need adjustment for all loci
4086 //invariant loci treatment
4087 count = (double) source->results[sourcesumbin + sourcebin];// * locusweight[locus] ;
4088 touched[tts] = '1';
4089 results[tts] += (float) log(count); //5.//invariant loci treatment
4090 counts[tts] += locusweight[locus];//1;//invariant loci treatment
4091 countsum += count;
4092 } /**if sourcebin is not zero**/
4093 }/**over all sourcebins**/
4094 sourcesumbin += source->bins[rpa];
4095 targetsumbin += target->bins[rpa];
4096 }/**source!=null**/
4097 }/**parameter**/
4098 }/**loci**/
4099 //
4100 // adjust the total and use the overflow safeguard
4101 targetsumbin = 0;
4102 memset(visited,0,sizeof(boolean)*np);
4103 for(pa0 = 0; pa0 < np; pa0++)
4104 {
4105 if(world->bayes->map[pa0][1] == INVALID)
4106 continue;
4107 else
4108 pa = world->bayes->map[pa0][1];
4109
4110 if(pa0 >= world->numpop2)
4111 rpa=world->numpop2;
4112 else
4113 rpa = pa;
4114
4115 w = world->bayes->deltahist[rpa];
4116 logw = log(w); //7. ln(dx)
4117
4118 if(visited[rpa]==TRUE)
4119 continue;
4120 visited[rpa] = TRUE;
4121
4122 maxvala[rpa] = -HUGE;
4123 maxpriors[rpa] = -HUGE;
4124 // 2/11/13 check by makeing sure results are scaled correctly
4125 for(bin=targetsumbin; bin < targetsumbin + bins[rpa]; bin++)
4126 {
4127 MYREAL h = world->bayes->deltahist[rpa];
4128 MYREAL v = source->minima[rpa] + (bin-targetsumbin) * h + h/2;
4129 if (v-h/2. == 0.0)
4130 priors[bin]= 0.0;
4131 else
4132 priors[bin] = scaling_prior(world,rpa,v) ;//+ logw;
4133 if(priors[bin] > maxpriors[rpa])
4134 maxpriors[rpa] = (MYREAL) priors[bin];
4135 if(touched[bin] == '1')
4136 {
4137 if(counts[bin]>0)
4138 {
4139 // results[bin] = (float) real_loci*results[bin]/counts[bin];
4140 }
4141 else
4142 {
4143 results[bin] = (float) -__FLT_MAX__;
4144 }
4145 if(results[bin] > maxvala[rpa])
4146 maxvala[rpa] = (MYREAL) results[bin];
4147 }
4148 }
4149 //now find total
4150 totals[rpa]=0.0;
4151 totalspriors[rpa]=0.0;
4152 for(bin=targetsumbin; bin < targetsumbin + bins[rpa]; bin++)
4153 {
4154 totalspriors[rpa] += exp(priors[bin] - maxpriors[rpa]);
4155 if(touched[bin] == '1')
4156 {
4157 totals[rpa] += exp(results[bin]-maxvala[rpa]);
4158 }
4159 }
4160 totals[rpa] = log(totals[rpa]) + maxvala[rpa];
4161 totalspriors[rpa] = log(totalspriors[rpa]) + maxpriors[rpa] + logw;
4162 allpriortotal=0.0;
4163 maxvala[rpa]= -HUGE;
4164 for(bin=targetsumbin; bin < targetsumbin + bins[rpa]; bin++)
4165 {
4166 if(touched[bin] == '1')
4167 {
4168 results[bin] += priors[bin] * (1.0-counts[bin]);
4169 if(results[bin] > maxvala[rpa])
4170 maxvala[rpa] = (MYREAL) results[bin];
4171 }
4172 priors[bin] = exp(priors[bin]);
4173 allpriortotal += priors[bin] * w;
4174 }
4175 target->means[rpa] = 0.;
4176 sumprob = 0.;
4177 for(bin=targetsumbin; bin < targetsumbin + bins[rpa]; bin++)
4178 {
4179 if(touched[bin]=='1')
4180 {
4181 double x = exp(results[bin] - maxvala[rpa]);
4182 results[bin] = (float) x;
4183 sumprob += x * w;
4184 // calculate the integral as a sum of exp(RESULTS) scaled my the largest value found
4185 }
4186 else
4187 {
4188 results[bin] = 0.;
4189 }
4190 }
4191 if(bayes->maxmaxvala < maxvala[rpa])
4192 bayes->maxmaxvala = maxvala[rpa];
4193 if(sumprob > 0.0)
4194 {
4195 //log the integral(SUMPROB) and adjust with the biggest value
4196 bayes->scaling_factors[rpa] = log(sumprob) + maxvala[rpa];
4197 #ifdef DEBUG
4198 printf("%i> scalingfactor[%li]=%f (sumprob=%f maxvala=%f)\n",myID, rpa,bayes->scaling_factors[rpa],sumprob,maxvala[rpa]);
4199 #endif
4200 }
4201 else
4202 bayes->scaling_factors[rpa] = -HUGE;
4203 total = 0.0;
4204 for(bin=targetsumbin; bin < targetsumbin + target->bins[rpa]; bin++)
4205 {
4206 if(touched[bin]=='1')
4207 {
4208 results[bin] /= (float) sumprob;//this takes care of the maxvala scaling
4209 total += results[bin];
4210 target->means[pa] += results[bin] * (minima[rpa] + world->bayes->deltahist[rpa]/2.
4211 + world->bayes->deltahist[rpa] * (bin-targetsumbin));
4212 }
4213 }
4214 target->means[pa] /= total;
4215 #ifdef DEBUG
4216 fprintf(stdout,"allpriortotal=%f total=%f sumprob=%f \n== end summary \n",allpriortotal,total, sumprob);
4217 #endif
4218 targetsumbin += bins[rpa];
4219 }
4220 covariance_summary(world);
4221 myfree(touched);
4222 bayes->priors = priors; // 2/10/13
4223 //myfree(s_priors_minval);
4224 //myfree(elements);
4225 myfree(maxvala);
4226 myfree(maxpriors);
4227 myfree(totals);
4228 myfree(totalspriors);
4229
4230 // calculate the credibility intervals, histograms, means etc
4231 calc_hpd_credibility(bayes, loci, world->numpop2, world->numpop2+(world->bayes->mu));
4232 myfree(visited);
4233 myfree(sumlocus);
4234 myfree(counts);
4235 }
4236
4237
4238 /// calculates the HPC credibility set and also calculates the posterior singlelocus-histogram
calculate_credibility_interval(world_fmt * world,long locus)4239 void calculate_credibility_interval(world_fmt * world, long locus)
4240 {
4241 MYREAL *mini;
4242 MYREAL *maxi;
4243 MYREAL *adjmaxi;
4244 // long i;
4245 long np = world->numpop2 + (world->bayes->mu * world->loci);
4246
4247 mini = world->bayes->histogram[locus].minima;
4248 maxi = world->bayes->histogram[locus].maxima;
4249 adjmaxi = world->bayes->histogram[locus].adjmaxima;
4250
4251 find_bayes_min_max(world, &mini, &maxi, &adjmaxi);
4252 adjust_bayes_bins(world, locus);
4253
4254 // construct histogram
4255 construct_locus_histogram(world, locus, mini, maxi, &world->bayes->histogram[locus].results);
4256
4257 // calc_credibility
4258 calc_hpd_credibility(world->bayes, locus, world->numpop2, np);
4259 }
4260
4261 ///
4262 /// adjust the parameters so that the new set is consistent with the custom migration matrix
4263 void
bayes_set_param(MYREAL * param,MYREAL newparam,long which,char * custm2,long numpop)4264 bayes_set_param (MYREAL *param, MYREAL newparam, long which, char *custm2, long numpop)
4265 {
4266 char migtype = custm2[which];
4267
4268 long frompop = 0;
4269 long topop = 0;
4270 long i;
4271
4272 MYREAL nmig;
4273
4274 // check custm matrix and then decide
4275 switch(migtype)
4276 {
4277 case 'C':
4278 case 'c':
4279 break;
4280 case 's':
4281 m2mm (which, numpop, &frompop, &topop);
4282 param[mm2m(frompop,topop,numpop)] = newparam; // makes the
4283 param[mm2m(topop,frompop,numpop)] = newparam; // two parameter equal
4284 break;
4285 case 'S':
4286 m2mm (which, numpop, &frompop, &topop);
4287 param[mm2m(frompop,topop,numpop)] = newparam; // makes the
4288 param[mm2m(topop,frompop,numpop)] = param[mm2m(frompop,topop,numpop)] * param[topop] / param[frompop]; // two parameter equal
4289 break;
4290 case 'm':
4291 if(which<numpop)
4292 {
4293 for(i=0;i<numpop; ++i)
4294 {
4295 if(custm2[i]=='m')
4296 param[i] = newparam;
4297 }
4298 }
4299 else
4300 {
4301 for(i=numpop;i<numpop*numpop; ++i)
4302 {
4303 if(custm2[i]=='m')
4304 param[i] = newparam;
4305 }
4306 }
4307 break;
4308 case 'M':
4309 if(which<numpop)
4310 {
4311 for(i=0;i<numpop; ++i)
4312 {
4313 if(strchr("Mm",custm2[i]))
4314 param[i] = newparam;
4315 }
4316 }
4317 else
4318 {
4319 m2mm (which, numpop, &frompop, &topop);
4320 nmig = param[topop] * newparam;
4321 for(i=numpop;i<numpop*numpop; ++i)
4322 {
4323 if(custm2[i]=='M')
4324 {
4325 m2mm (i, numpop, &frompop, &topop);
4326 param[i] = nmig / param[topop]; // makes the
4327 }
4328 }
4329 }
4330 break;
4331 case '*':
4332 default:
4333 param[which] = newparam;
4334 break;
4335 }
4336 }
4337