1 /*------------------------------------------------------
2 Maximum likelihood estimation
3 of migration rate and effectice population size
4 using a Metropolis-Hastings Monte Carlo algorithm
5 -------------------------------------------------------
6 P A R A M E T E R E S T I M A T I O N R O U T I N E S
7
8 estimates parameter for each locus
9 using a Broyden minimization
10
11
12 send questions concerning this software to:
13 Peter Beerli
14 beerli@fsu.edu
15
16 Copyright 1997-2002 Peter Beerli and Joseph Felsenstein
17 Copyright 2003-2008 Peter Beerli
18 Copyright 2009-2012 Peter Beerli and Michal Palczewski
19
20 Permission is hereby granted, free of charge, to any person obtaining a copy
21 of this software and associated documentation files (the "Software"), to deal
22 in the Software without restriction, including without limitation the rights
23 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
24 of the Software, and to permit persons to whom the Software is furnished to do
25 so, subject to the following conditions:
26
27 The above copyright notice and this permission notice shall be included in all copies
28 or substantial portions of the Software.
29
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
31 INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
32 PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
33 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
34 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
35 OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36
37 $Id: broyden.c 2169 2013-08-24 19:02:04Z beerli $
38 started 1996
39 -------------------------------------------------------*/
40 /*! \file broyden.c
41 Basic routines for maximum likelihood parameter estimation.
42
43 */
44
45 #include "migration.h"
46 #include "tools.h"
47 #include "world.h"
48 #include "random.h"
49 #include "combroyden.h"
50 #include "joint-chains.h"
51 #include "sighandler.h"
52 #include "migrate_mpi.h"
53
54 #ifdef DMALLOC_FUNC_CHECK
55 #include <dmalloc.h>
56 #endif
57
58 extern MYREAL norm (MYREAL *d, long size);
59
60
61 /* prototypes ----------------------------------------- */
62 MYREAL absmaxvec (MYREAL *v, long n);
63 void create_nr (nr_fmt * nr, world_fmt * world, long G, long profilenum,
64 long thislocus, long repkind, long rep);
65 void reset_hess (MYREAL **hess, long n);
66 void destroy_nr (nr_fmt * nr, world_fmt * world);
67 MYREAL calc_locus_like (nr_fmt * nr, MYREAL *param, MYREAL *lparam,
68 long locus);
69 void param_all_adjust (MYREAL *xv, nr_fmt *nr); //worldoption_fmt * wopt, long numpop);
70 #ifdef LONGSUM
71 void gradient_longsum (MYREAL *d, nr_fmt * nr, long locus);
72 #else /*LONGSUM*/
73 void gradient (MYREAL *d, nr_fmt * nr, long locus);
74 #endif /*LONGSUM*/
75 MYREAL probG (MYREAL *param, MYREAL *lparam, tarchive_fmt * tl, nr_fmt * nr,
76 long locus);
77 MYREAL probG2 (MYREAL *param, MYREAL *lparam, MYREAL *sm, MYREAL *kt,
78 MYREAL *km, MYREAL *p, MYREAL *mindex, int *msta, int *me,
79 long numpop);
80 void probG3 (MYREAL *apg, MYREAL *apg0r, timearchive_fmt * tyme, long numpop,
81 MYREAL *apgmax, MYREAL *param, MYREAL *lparam, MYREAL *sm);
82 MYINLINE MYREAL probG4 (MYREAL *fullparam, MYREAL *data, int stop);
83 MYREAL sum_mig (MYREAL *param, long msta, long msto);
84 void calc_cov (MYREAL **dd, MYREAL *d, MYREAL *param, long n);
85 boolean is_singular (MYREAL **dd, long n);
86 void print_contribution (nr_fmt * nr, timearchive_fmt ** atl, long G);
87
88 void calc_dv (MYREAL *dv, MYREAL **hess, MYREAL *gxv, long n);
89 MYREAL calc_line (helper_fmt * helper, MYREAL a, MYREAL b, MYREAL c,
90 MYREAL (*psi) (MYREAL lamda, helper_fmt * helper));
91 void calc_hessian (MYREAL **hess, long n, MYREAL *delta, MYREAL *gama);
92 MYREAL psi (MYREAL lamda, helper_fmt * helper, MYREAL *param, MYREAL *lparam);
93 void grad2loggrad (MYREAL *param, long *indeks, MYREAL *d, long nn,
94 long profilenum);
95 void log_param0 (MYREAL *param, MYREAL *lparam, long nn);
96 void copies2lcopies (timearchive_fmt * atl);
97 void create_apg0 (MYREAL *apg0, nr_fmt * nr, timearchive_fmt * tyme,
98 long locus);
99
100
101 void force_sametheta (MYREAL *param, worldoption_fmt * wopt, long numpop);
102 void force_samemigration (MYREAL *param, worldoption_fmt * wopt, long numpop);
103 void calc_same_d (MYREAL *grad, nr_fmt * nr, long nn, long start, long stop);
104 void calc_symmetric_d (MYREAL *grad, nr_fmt * nr, long nn, long start,
105 long stop);
106 void force_symmetric_d (MYREAL *gxv, long model, nr_fmt * nr, long nn);
107 void check_symmetric_d (MYREAL *gxv, nr_fmt * nr, long nn);
108 void check_matrix_arbitrary (MYREAL *param, worldoption_fmt * wopt,
109 long numpop, long which_profile);
110 void print_menu_contribution (FILE * file, long contribution[]);
111 void alloc_apg (MYREAL ****apg, long repstop, long loci, long G);
112
113
114 void quadratic_constants (MYREAL *xguess,
115 MYREAL low, MYREAL mid, MYREAL high,
116 MYREAL xlow, MYREAL xmid, MYREAL xhigh);
117 void symmetric_other (long i, long numpop, long *other, long *otherpop);
118 MYREAL ln_copies (long n);
119
120 MYREAL normal_func_ok(MYREAL *param, MYREAL *param0, long numpop2);
121 MYREAL normal_func_no(MYREAL *param, MYREAL *param0, long numpop2);
122 MYREAL normal_func_gradient_ok(MYREAL p1, MYREAL p0);
123 MYREAL normal_func_gradient_no(MYREAL p1, MYREAL p0);
124
125 void unset_penalizer_function(boolean inprofiles);
126
127 MYREAL (*normal_func)(MYREAL *, MYREAL *, long);
128 MYREAL (*normal_func_gradient)(MYREAL, MYREAL);
129
130 #ifdef LONGSUM
131 MYREAL probG_longsum (MYREAL *param, MYREAL *lparam, tarchive_fmt * tl, nr_fmt * nr, long locus);
132 MYREAL probG4_longsum (MYREAL *fullparam, longsum_fmt *data, long longsumlen, long numpop, long numpop2);
133 #endif /*LONGSUM*/
134
135 /* Functions ++++++++++++++++++++++++++++++++++++++++++++++++*/
136
137 ///
138 /// Calculates part of the BFSG maximizer routine.
139 /// Multiplies the approximate hessian with the first derivative vector
140 /// and returns the product
141 void
calc_dv(MYREAL * dv,MYREAL ** hess,MYREAL * gxv,long n)142 calc_dv (MYREAL *dv, MYREAL **hess, MYREAL *gxv, long n)
143 {
144 long i, j;
145 memset (dv, 0, sizeof (MYREAL) * n);
146 for (i = 0; i < n; i++)
147 {
148 for (j = 0; j < n; j++)
149 {
150 dv[i] += hess[i][j] * gxv[j];
151 }
152 }
153 }
154
155 // line searcher
156 // finds the maximum in a direction
157 // this should be replaced with something more efficient.
158 #define PP 0.61803399
159 #define QQ 0.38196601
160 #define MOVE3(a,b,c,d) (a)=(b);(b)=(c);(c)=(d)
161
162
163
164
165 MYREAL
calc_line_new(helper_fmt * helper,MYREAL a,MYREAL b,MYREAL c,MYREAL (* fpsi)(MYREAL lamda,helper_fmt * helper))166 calc_line_new (helper_fmt * helper, MYREAL a, MYREAL b, MYREAL c,
167 MYREAL (*fpsi) (MYREAL lamda, helper_fmt * helper))
168 {
169 MYREAL xhigh = c;
170 MYREAL xlow = a;
171 MYREAL low, high, mid, xmid;
172 MYREAL xguess=0.;
173 int panic = 0;
174 low = (*fpsi) (xlow, helper);
175 high = (*fpsi) (xhigh, helper);
176 xmid = (xlow + xhigh) * HALF;
177 while (panic++ < 10000 && fabs (xhigh - xlow) > EPSILON)
178 {
179 mid = (*fpsi) (xmid, helper);
180 quadratic_constants (&xguess, low, mid, high, xlow, xmid, xhigh);
181 if (MYISNAN (xguess))
182 return 1.;
183 if (xguess < xmid)
184 {
185 xhigh = xmid;
186 high = mid;
187 }
188 else
189 {
190 xlow = xmid;
191 low = mid;
192 }
193 xmid = xguess;
194 }
195 return xlow;
196 }
197
198
199 void
quadratic_constants(MYREAL * xguess,MYREAL low,MYREAL mid,MYREAL high,MYREAL xlow,MYREAL xmid,MYREAL xhigh)200 quadratic_constants (MYREAL *xguess,
201 MYREAL low, MYREAL mid, MYREAL high,
202 MYREAL xlow, MYREAL xmid, MYREAL xhigh)
203 {
204 MYREAL xhigh2 = xhigh * xhigh;
205 MYREAL xlow2 = xlow * xlow;
206 MYREAL xmid2 = xmid * xmid;
207 MYREAL divisor = ((xlow - xmid) * (xlow * xmid - xlow * xhigh - xmid * xhigh + xhigh2));
208 MYREAL a = -((-(xmid * low) + xhigh * low + xlow * mid - xhigh * mid -
209 xlow * high + xmid * high) /divisor);
210 MYREAL b =
211 -((xmid2 * low - xhigh2 * low - xlow2 * mid + xhigh2 * mid +
212 xlow2 * high - xmid2 * high) / divisor);
213 // MYREAL c = -((-(xmid2*xhigh*low) + xmid*xhigh2*low +
214 // xlow2*xhigh*mid - xlow*xhigh2*mid -
215 // xlow2*xmid*high + xlow*xmid2*high)/
216 // (xlow2*xmid - xlow*xmid2 - xlow2*xhigh +
217 // xmid2*xhigh + xlow*xhigh2 - xmid*xhigh2));
218 *xguess = -b / (2. * a);
219 // printf("quadr:{%f %f} {%f %f} {%f %f}\n", xlow, low, *xguess, a * (*xguess * *xguess) + b * *xguess + c, xhigh, high);
220 }
221
222
223
224 MYREAL
calc_line(helper_fmt * helper,MYREAL a,MYREAL b,MYREAL c,MYREAL (* fpsi)(MYREAL lamda,helper_fmt * helper))225 calc_line (helper_fmt * helper, MYREAL a, MYREAL b, MYREAL c,
226 MYREAL (*fpsi) (MYREAL lamda, helper_fmt * helper))
227 {
228 /* a < b < c AND psia > psib < psic */
229 MYREAL d, psib, psic;
230 d = c;
231 if ((fabs (c - b)) > (fabs (b - a)))
232 {
233 c = b + QQ * (c - b);
234 }
235 else
236 {
237 c = b;
238 b = b - QQ * (b - a);
239 }
240 psib = (*fpsi) (b, helper);
241 psic = (*fpsi) (c, helper);
242 while ((fabs (d - a)) > (EPSILON * (fabs (b) + fabs (c))))
243 {
244 if (psic < psib)
245 {
246 MOVE3 (a, b, c, PP * b + QQ * d);
247 psib = psic;
248 psic = (*fpsi) (c, helper);
249 }
250 else
251 {
252 MOVE3 (d, c, b, PP * c + QQ * a);
253 psic = psib;
254 psib = (*fpsi) (b, helper);
255 }
256 // printf("b=%f(%f
257 //), c=%f(%f) {%f, %f}\n",b,psib,c,psic, helper->nr->param[0],
258 // helper->nr->param[helper->nr->partsize-1]);
259 }
260 if (psib < psic)
261 {
262 return b;
263 }
264 else
265 {
266 return c;
267 }
268 }
269
270 MYREAL
psi(MYREAL lamda,helper_fmt * helper,MYREAL * param,MYREAL * lparam)271 psi (MYREAL lamda, helper_fmt * helper, MYREAL *param, MYREAL *lparam)
272 {
273 MYREAL like;
274 calc_loci_param (helper->nr, helper->nr->lparam, helper->xv,
275 helper->dv, lamda, helper->nr->partsize);
276 set_expparam (helper->nr->param, helper->nr->lparam, helper->nr->partsize);
277 fill_helper (helper, helper->nr->param, helper->nr->lparam,
278 helper->nr->world, helper->nr);
279 like = CALCLIKE (helper, helper->nr->param, helper->nr->lparam);
280 return -like;
281 }
282
283
284 void
calc_hessian(MYREAL ** hess,long n,MYREAL * delta,MYREAL * gama)285 calc_hessian (MYREAL **hess, long n, MYREAL *delta, MYREAL *gama)
286 {
287 MYREAL **dd, *temp, t;
288 long i, j, k;
289 MYREAL dtg;
290 temp = (MYREAL *) mycalloc (n, sizeof (MYREAL));
291 dd = (MYREAL **) mycalloc (n, sizeof (MYREAL *));
292 dd[0] = (MYREAL *) mycalloc (n * n, sizeof (MYREAL));
293 dtg = delta[0] * gama[0];
294 for (i = 1; i < n; i++)
295 {
296 dd[i] = dd[0] + n * i;
297 dtg += delta[i] * gama[i];
298 }
299 if (dtg > 0.0 || dtg < 0.0)
300 dtg = 1. / dtg;
301 else
302 {
303 reset_hess (hess, n);
304 myfree(temp);
305 myfree(dd[0]);
306 myfree(dd);
307 return;
308 }
309 for (i = 0; i < n; i++)
310 {
311 for (j = 0; j < n; j++)
312 {
313 temp[i] += gama[j] * hess[j][i];
314 }
315 }
316 t = 0.0;
317 for (i = 0; i < n; i++)
318 t += temp[i] * gama[i];
319 t = (1.0 + t * dtg) * dtg;
320 for (i = 0; i < n; i++)
321 {
322 for (j = 0; j < n; j++)
323 {
324 for (k = 0; k < n; k++)
325 {
326 dd[i][j] += delta[i] * gama[k] * hess[k][j];
327 }
328 }
329 }
330 for (i = 0; i < n; i++)
331 {
332 temp[i] = 0.0;
333 for (j = 0; j < n; j++)
334 {
335 temp[i] += hess[i][j] * gama[j];
336 }
337 }
338 for (i = 0; i < n; i++)
339 {
340 for (j = 0; j < n; j++)
341 {
342 dd[i][j] += temp[i] * delta[j];
343 dd[i][j] *= dtg;
344 hess[i][j] += delta[i] * delta[j] * t - dd[i][j];
345 }
346 }
347 myfree(temp);
348 myfree(dd[0]);
349 myfree(dd);
350 }
351
352 MYREAL
absmaxvec(MYREAL * v,long n)353 absmaxvec (MYREAL *v, long n)
354 {
355 long i;
356 MYREAL max = fabs (v[0]);
357 for (i = 1; i < n; i++)
358 {
359 if (max < fabs (v[i]))
360 max = fabs (v[i]);
361 }
362 return max;
363 }
364
365 ///
366 /// creates the scratchpad for the maximum likelihood calculation, used during the MLE calculations
367 /// and also used during the profiles, the structure nr holds many parameters for further use in other functions
368 ///
369 void
create_nr(nr_fmt * nr,world_fmt * world,long G,long profilenum,long thislocus,long repkind,long rep)370 create_nr (nr_fmt * nr, world_fmt * world, long G,
371 long profilenum, long thislocus, long repkind, long rep)
372 {
373 long i;
374 long partsize;
375 // setting up local variables
376 nr->numpop = world->numpop;
377 nr->numpop2 = world->numpop2;
378 nr->skiploci = world->data->skiploci;
379 nr->world = world;
380 nr->mstart = world->mstart;
381 nr->mend = world->mend;
382 nr->repkind = repkind;
383 nr->repstart = (repkind == SINGLECHAIN) ? rep : 0;
384 nr->repstop = (repkind == SINGLECHAIN) ? rep + 1 : world->repstop;
385 nr->profilenum = profilenum;
386 // number of trees stored
387 nr->apg_max = (MYREAL *) mycalloc (2 * (world->loci + 1), sizeof (MYREAL));
388 nr->PGC = nr->apg_max + world->loci + 1; //memory alocated in apg_max
389 if (world->options->gamma)
390 {
391 nr->categs = GAMMA_INTERVALS;
392 nr->rate = (MYREAL *) mycalloc (2 * nr->categs, sizeof (MYREAL));
393 nr->probcat = nr->rate + nr->categs; // memory allocated in nr->rate
394 nr->partsize = world->numpop2 + 1;
395 }
396 else
397 {
398 if (!world->options->gamma && world->locus >= world->loci)
399 {
400 nr->categs = 0;
401 nr->rate = NULL;
402 nr->partsize = world->numpop2;
403 }
404 else
405 {
406 nr->categs = 0;
407 nr->rate = NULL;
408 nr->partsize = world->numpop2;
409 }
410 }
411 #ifdef LONGSUM
412 nr->partsize += world->numpop * 3;
413 #endif
414 partsize = nr->partsize;
415
416 nr->atl = world->atl;
417
418 nr->datalike = (MYREAL *) mymalloc ((G+1) * sizeof (MYREAL));
419
420 // parts is a long vector containing all standard parts that have a fixed length
421 // this excludes the datalikes because those use G (the number of trees), they might get reallocated during the
422 // lifetime of nr and so have their own memory block.
423 nr->parts = (MYREAL *) mycalloc (9 * partsize + (world->loci + 1), sizeof (MYREAL));
424 nr->d = nr->parts + partsize; // memory allocated in nr->parts
425 nr->od = nr->d + partsize; // memory allocated in nr->parts
426 nr->dv = nr->od + partsize; // memory allocated in nr->parts
427 nr->delta = nr->dv + partsize; // memory allocated in nr->parts
428 nr->gdelta = nr->delta + partsize; // memory allocated in nr->parts
429 nr->param = nr->gdelta + partsize; // memory allocated in nr->parts
430 nr->lparam = nr->param + partsize ; // memory allocated in nr->parts
431 nr->values = nr->lparam + partsize; // memory allocated in nr->parts
432 nr->locilikes = nr->values + partsize; // memory allocated in nr->parts
433
434 nr->profiles = (long *) mycalloc (2 * partsize, sizeof (long));//holds vector for profiles and indeks
435 nr->indeks = nr->profiles + partsize; //memory allocated in nr->profiles
436
437 // assignment of parameters into scratch pad
438 memcpy (nr->param, world->param0, partsize * sizeof (MYREAL));
439 for (i = 0; i < nr->partsize; ++i)
440 {
441 if (nr->param[i] > 0.0)
442 nr->lparam[i] = LOG (nr->param[i]);
443 else
444 nr->lparam[i] = -MYREAL_MAX;
445 }
446 // allocate earlier in world: alloc_apg (&nr->apg0, world->repstop, world->loci, G);
447 // allicate earlier in world: alloc_apg (&nr->apg, world->repstop, world->loci, G);
448 nr->apg = world->apg;
449 nr->apg0 = world->apg0;
450 }
451
452 void
alloc_apg(MYREAL **** apg,long repstop,long loci,long G)453 alloc_apg (MYREAL ****apg, long repstop, long loci, long G)
454 {
455 long j, i;
456 (*apg) = (MYREAL ***) mycalloc (repstop, sizeof (MYREAL **));
457 for (j = 0; j < repstop; j++)
458 {
459 (*apg)[j] = (MYREAL **) mycalloc (loci + 1, sizeof (MYREAL *));
460 (*apg)[j][0] =
461 (MYREAL *) mycalloc ((1 + G) * (loci + 1), sizeof (MYREAL));
462 for (i = 1; i < loci + 1; i++)
463 {
464 (*apg)[j][i] = (*apg)[j][0] + i * (G + 1);
465 }
466 }
467 }
468
469 void
reuse_nr(nr_fmt * nr,world_fmt * world,long G,long profilenum,long thislocus,long repkind,long rep)470 reuse_nr (nr_fmt * nr, world_fmt * world, long G, long profilenum,
471 long thislocus, long repkind, long rep)
472 {
473 long i;
474 nr->repkind = repkind;
475 nr->repstart = (repkind == SINGLECHAIN) ? rep : 0;
476 nr->repstop = (repkind == SINGLECHAIN) ? rep + 1 : world->repstop;
477 nr->profilenum = profilenum;
478 if (world->options->gamma && world->locus >= world->loci)
479 {
480 nr->categs = GAMMA_INTERVALS;
481 nr->rate = (MYREAL *) myrealloc (nr->rate, 2 * nr->categs * sizeof (MYREAL));
482 nr->probcat = nr->rate + nr->categs;
483 nr->partsize = world->numpop2 + 1;
484 }
485 memset (nr->parts, 0, nr->partsize * sizeof (MYREAL));
486 memset (nr->d, 0, nr->partsize * sizeof (MYREAL));
487 memset (nr->od, 0, nr->partsize * sizeof (MYREAL));
488 memset (nr->dv, 0, nr->partsize * sizeof (MYREAL));
489 memset (nr->delta, 0, nr->partsize * sizeof (MYREAL));
490 memset (nr->gdelta, 0, nr->partsize * sizeof (MYREAL));
491 memcpy (nr->param, world->param0, nr->partsize * sizeof (MYREAL));
492 for (i = 0; i < nr->partsize; ++i)
493 {
494 if (nr->param[i] > 0.0)
495 nr->lparam[i] = LOG (nr->param[i]);
496 else
497 nr->lparam[i] = -MYREAL_MAX;
498 }
499 nr->datalike = (MYREAL *) myrealloc (nr->datalike, (G+1) * sizeof (MYREAL));
500 memset (nr->datalike, 0, G * sizeof (MYREAL));
501 memset (nr->locilikes, 0, (world->loci + 1) * sizeof (MYREAL));
502 /*
503 for (j = world->repstop - 1; j >= 0; j--)
504 {
505 myfree(nr->apg0[j][0]);
506 myfree(nr->apg[j][0]);
507 myfree(nr->apg0[j]);
508 myfree(nr->apg[j]);
509 }
510 myfree(nr->apg0);
511 myfree(nr->apg);
512 alloc_apg (&nr->apg0, world->repstop, world->loci, G);
513 alloc_apg (&nr->apg, world->repstop, world->loci, G);*/
514 }
515
516 void
reset_hess(MYREAL ** hess,long n)517 reset_hess (MYREAL **hess, long n)
518 {
519 long pop;
520 // printf("resetting hessian\n");
521 memset (hess[0], 0, sizeof (MYREAL) * n * n);
522 for (pop = 1; pop < n; pop++)
523 {
524 hess[pop] = hess[0] + pop * n;
525 hess[pop][pop] = 1.0;
526 }
527 hess[0][0] = 1.0;
528 }
529
530 void
destroy_nr(nr_fmt * nr,world_fmt * world)531 destroy_nr (nr_fmt * nr, world_fmt * world)
532 {
533 // the large holding vectors are freed and all related points will go away, too.
534 myfree(nr->parts);
535 myfree(nr->datalike);
536 myfree(nr->apg_max);
537 myfree(nr->profiles);
538
539 if (nr->categs != 0)
540 {
541 myfree(nr->rate); // probcat is also freed, was part of rate-array
542 }
543 myfree(nr);
544 }
545
546 ///
547 /// calculate the likelihood of the parameter estimates
548 MYREAL
calc_locus_like(nr_fmt * nr,MYREAL * param,MYREAL * lparam,long locus)549 calc_locus_like (nr_fmt * nr, MYREAL *param, MYREAL *lparam, long locus)
550 {
551 long g, r, j, copies;
552 const long numpop = nr->world->numpop;
553 const long numpop2 = nr->world->numpop2;
554 int msta, msto;
555 int stop = (int) (numpop2 + numpop + numpop);
556 MYREAL gsum = 0;
557 MYREAL ***apg0;
558 MYREAL apgmax = -MYREAL_MAX;
559 // MYREAL ***apgall = nr->apg;
560 MYREAL *apg;
561 MYREAL *apg0r;
562 timearchive_fmt tyme;
563 tarchive_fmt *tl;
564 MYREAL *geo = nr->world->data->geo;
565 MYREAL *lgeo = nr->world->data->lgeo;
566 MYREAL *locallparam;
567 MYREAL *localparam;
568 MYREAL *sm;
569 const MYREAL mu_rate = nr->world->options->mu_rates[locus];
570 const MYREAL inv_mu_rate = 1./ mu_rate;
571 const MYREAL lmu_rate = nr->world->options->lmu_rates[locus];
572 // we calculate the maximum likelihood estiamte of the parameters
573 // so that all loci have the same Theta*, therefor the inheritance scalar has
574 // to be taken into account,
575 // for nuc, mt, x set to scalars 1.0, 0.25, 0.75,
576 // we need to maximize 1Theta*, 4Theta*, 1.333Theta*
577 const MYREAL inv_inheritance = nr->world->options->inheritance_scalars[locus];
578 const MYREAL inheritance = 1./ inv_inheritance;
579 const MYREAL tlmu_rate = lmu_rate + log(inheritance);
580 const MYREAL tmu_rate = mu_rate * inheritance;
581 //const MYREAL tinv_mu_rate = inv_mu_rate * inheritance;
582 MYREAL normaldev;
583 #ifdef LONGSUM
584
585 MYREAL *rates;
586 MYREAL *lrates;
587 MYREAL *rtimes;
588 #endif /*LONGSUM*/
589 #ifndef LONGSUM
590 // experiment with unrolling the loop in probG4 by 3, the 3 added here guarantees
591 // that enough elements are present so that the loop can gracefully finish
592 #ifdef FAST_TRY
593 stop += 3 - (stop % 3);
594 #endif
595 locallparam = (MYREAL *) mycalloc (stop, sizeof (MYREAL));
596 #else /*LONGSUM*/
597
598 locallparam = (MYREAL *) mycalloc ((numpop2 + numpop + numpop + 9 * numpop),
599 sizeof (MYREAL));
600 rates = locallparam + numpop2 + numpop + numpop;
601 lrates = rates + 3 * numpop;
602 rtimes = rates + 6 * numpop;
603 memcpy (rates, param+nr->partsize-3*numpop,sizeof(MYREAL)*3*numpop);
604 memcpy (lrates, lparam+nr->partsize-3*numpop,sizeof(MYREAL)*3*numpop);
605 memcpy (rtimes, nr->world->flucrates+ 3 * numpop ,sizeof(MYREAL)*3*numpop);
606
607 #endif /*LONGSUM*/
608
609 localparam = locallparam + numpop2;
610
611 sm = localparam + numpop;
612 memcpy (localparam, param, sizeof (MYREAL) * numpop);
613 memcpy (locallparam, lparam, sizeof (MYREAL) * numpop2);
614
615 nr->PGC[locus] = 0.0;
616 apg0 = nr->apg0;
617
618 for (r = 0; r < nr->numpop; r++)
619 {
620 locallparam[r] = LOG2 - (locallparam[r] + tlmu_rate);
621 localparam[r] = -1. / (localparam[r] * tmu_rate); // minus, so that we can loop over all in probG4
622 msta = nr->mstart[r];
623 msto = nr->mend[r];
624 for (j = msta; j < msto; j++)
625 {
626 sm[r] -= geo[j] * param[j] * inv_mu_rate; //minus, so that we can loop over all in probG4
627 locallparam[j] += lgeo[j] - lmu_rate;
628 }
629 }
630 gsum = 0;
631 for (r = nr->repstart; r < nr->repstop; r++)
632 {
633 apg = nr->apg[r][locus];
634 apg0r = apg0[r][locus];
635 tyme = nr->atl[r][locus];
636 normaldev = (*normal_func)(param, tyme.param0, numpop2);
637 //first element
638 tl = &(tyme.tl[0]);
639 copies = tl->copies - 1;
640 gsum += copies;
641 #ifdef INTEGRATEDLIKE
642 tl->lcopies = ln_copies(copies);
643 #endif
644 if (copies > 0)
645 {
646 #ifndef LONGSUM
647 apg[0] = tl->lcopies + probG4 (locallparam, tl->data, stop) - apg0r[0] + normaldev;
648 #else /*LONGSUM*/
649
650 apg[0] = tl->lcopies + probG4_longsum(locallparam,tl->longsum,tl->longsumlen, numpop,numpop2)
651 -apg0r[0] + normaldev;
652 #endif /*LONGSUM*/
653
654 }
655 else
656 apg[0] = -MYREAL_MAX;
657 if (apgmax < apg[0])
658 apgmax = apg[0];
659 //other elements
660 for (g = 1; g < tyme.T; g++)
661 {
662 tl = &(tyme.tl[g]);
663 gsum += tl->copies;
664 #ifdef INTEGRATEDLIKE
665 tl->lcopies = ln_copies(tl->copies);
666 #endif
667
668 #ifndef LONGSUM
669
670 apg[g] = tl->lcopies + probG4 (locallparam, tl->data, stop) - apg0r[g]
671 +normaldev;
672 #else /*LONGSUM*/
673
674 apg[g] = tl->lcopies + probG4_longsum(locallparam,tl->longsum,tl->longsumlen, numpop,numpop2)
675 -apg0r[g] + normaldev;
676 #endif /*LONGSUM*/
677
678 if (apg[g] > apgmax)
679 apgmax = apg[g];
680 }
681 } // end replicates
682 for (r = nr->repstart; r < nr->repstop; r++)
683 {
684 apg = nr->apg[r][locus];
685 //apg0r = apg0[r][locus];
686 tyme = nr->atl[r][locus];
687 // first element
688 //xcode tl = &(tyme.tl[0]);
689 // first element
690 apg[0] -= apgmax;
691 nr->PGC[locus] += EXP (apg[0]);
692 // all other elements
693 for (g = 1; g < tyme.T; g++)
694 {
695 apg[g] -= apgmax;
696 if (apg[g] > -40.)
697 nr->PGC[locus] += EXP (apg[g]);
698 }
699 } // replicates
700 nr->apg_max[locus] = apgmax;
701 nr->llike = apgmax + LOG (nr->PGC[locus]) - LOG (gsum);
702 myfree(locallparam);
703 return nr->llike;
704 }
705
706
707 ///
708 /// Lookup table for the calculation of log(copy_number_of_trees)
709 /// Most often there are only a few copies of a specific tree.
710 MYREAL
ln_copies(long n)711 ln_copies (long n)
712 {
713 switch (n)
714 {
715 case 0:
716 return -MYREAL_MAX;
717 case 1:
718 return 0.;
719 case 2:
720 return 0.69314718055994530942;
721 case 3:
722 return 1.0986122886681096914;
723 case 4:
724 return 1.3862943611198906188;
725 case 5:
726 return 1.6094379124341003746;
727 case 6:
728 return 1.7917594692280550008;
729 case 7:
730 return 1.9459101490553133051;
731 case 8:
732 return 2.0794415416798359283;
733 case 9:
734 return 2.1972245773362193828;
735 case 10:
736 return 2.3025850929940456840;
737 case 11:
738 return 2.3978952727983705441;
739 case 12:
740 return 2.4849066497880003102;
741 case 13:
742 return 2.5649493574615367361;
743 case 14:
744 return 2.6390573296152586145;
745 case 15:
746 return 2.7080502011022100660;
747 case 16:
748 return 2.7725887222397812377;
749 case 17:
750 return 2.8332133440562160802;
751 case 18:
752 return 2.8903717578961646922;
753 case 19:
754 return 2.9444389791664404600;
755 case 20:
756 return 2.9957322735539909934;
757 default:
758 return LOG ((MYREAL) n);
759 }
760 }
761
762 /// make sure that all 'm' marked population sizes are all the same
763 void
force_sametheta(MYREAL * param,worldoption_fmt * wopt,long numpop)764 force_sametheta (MYREAL *param, worldoption_fmt * wopt, long numpop)
765 {
766 long i, n = 0;
767 MYREAL summ = 0, logsumm;
768 char *custm2 = wopt->custm2;
769 for (i = 0; i < numpop; i++)
770 {
771 if (custm2[i] == 'm')
772 {
773 summ += EXP (param[i]);
774 n++;
775 }
776 }
777 summ /= n;
778 logsumm = LOG (summ);
779 for (i = 0; i < numpop; i++)
780 {
781 if (custm2[i] == 'm')
782 param[i] = logsumm;
783 }
784 }
785
786 /// make sure that all 'm' marked migration rates are the same
787 void
force_samemigration(MYREAL * param,worldoption_fmt * wopt,long numpop)788 force_samemigration (MYREAL *param, worldoption_fmt * wopt, long numpop)
789 {
790 long i;
791 MYREAL summ = 0, logsumm;
792 long numpop2 = numpop * numpop;
793 long n = 0;
794 char *custm2 = wopt->custm2;
795
796 for (i = numpop; i < numpop2; i++)
797 {
798 if (custm2[i] == 'm')
799 {
800 summ += EXP (param[i]);
801 n++;
802 }
803 }
804 summ /= n;
805 logsumm = LOG (summ);
806 for (i = numpop; i < numpop2; i++)
807 {
808 if (custm2[i] == 'm')
809 param[i] = logsumm;
810 }
811 }
812
813 /// adjust all parameters according to their settings in custom migration matrix
814 void
param_all_adjust(MYREAL * xv,nr_fmt * nr)815 param_all_adjust (MYREAL *xv, nr_fmt *nr)
816 {
817 MYREAL *param = xv;
818 worldoption_fmt * wopt= nr->world->options;
819 long numpop = nr->world->numpop;
820 MYREAL which_profile= -1;
821 char *custm = NULL;
822 long zeron = 0;
823 long constn = 0;
824 long symn = 0;
825 long sym2n = 0;
826 long from, to;
827 twin_fmt *syms = NULL;
828 quad_fmt *sym2s = NULL;
829 long *zeros = NULL;
830 long *consts = NULL;
831 long z = 0, i;
832 char *p;
833 MYREAL mm, lmm;
834
835 custm = wopt->custm2;
836 zeron = wopt->zeron;
837 constn = wopt->constn;
838 symn = wopt->symn;
839 sym2n = wopt->sym2n;
840
841 if (symn > 0)
842 syms = wopt->symparam;
843 if (sym2n > 0)
844 sym2s = wopt->sym2param;
845 if (zeron > 0)
846 zeros = wopt->zeroparam;
847 if (constn > 0)
848 consts = wopt->constparam;
849
850 p = custm;
851 z = (long) strcspn(p, "m");
852 if (z < numpop)
853 force_sametheta (param, wopt, numpop);
854 p = custm + numpop;
855 z = (long) strcspn(p, "m");
856
857 if (z < numpop*(numpop-1))
858 force_samemigration (param, wopt, numpop);
859
860 for (i = 0; i < zeron; i++)
861 param[zeros[i]] = -30.; //-MYREAL_MAX;
862
863 for (i = 0; i < constn; i++)
864 {
865 if (consts[i] < numpop)
866 param[consts[i]] = LOG (wopt->thetag[consts[i]]);
867 if (consts[i] >= numpop)
868 {
869 if (consts[i] >= numpop * numpop)
870 param[consts[i]] = LOG (wopt->alphavalue);
871 else
872 {
873 m2mm (consts[i], numpop, &from, &to);
874 param[consts[i]] = LOG (wopt->mg[to*(numpop-1)+from]);
875 }
876 }
877 }
878
879 // this is so weird because of the profiled parameters which should not change of course
880 for (i = 0; i < symn; i++)
881 {
882 if(nr->profilenum>0)
883 {
884 for(z=0;z<nr->profilenum;z++)
885 {
886 which_profile = nr->profiles[z];
887 if(which_profile == (long) syms[i][0])
888 {
889 param[syms[i][1]] = param[syms[i][0]];
890 break;
891 }
892 else
893 {
894 if(which_profile == (long) syms[i][1])
895 {
896 param[syms[i][0]] = param[syms[i][1]];
897 break;
898 }
899 else
900 {
901 mm = (EXP (param[syms[i][0]]) + EXP (param[syms[i][1]])) * HALF;
902 param[syms[i][0]] = param[syms[i][1]] = LOG (mm);
903 break;
904 }
905 }
906 }
907 }
908 else
909 {
910 mm = (EXP (param[syms[i][0]]) + EXP (param[syms[i][1]])) * HALF;
911 param[syms[i][0]] = param[syms[i][1]] = LOG (mm);
912 }
913 }
914 for (i = 0; i < sym2n; i++)
915 {
916 if(nr->profilenum>0)
917 {
918 for(z=0;z<nr->profilenum;z++)
919 {
920 which_profile = nr->profiles[z];
921 if(which_profile == (long) sym2s[i][0])
922 {
923 //printf(".1.\n");
924 mm = param[sym2s[i][0]] + param[sym2s[i][2]];
925 param[sym2s[i][1]] = mm - param[sym2s[i][3]];
926
927 break;
928 }
929 else
930 {
931 if(which_profile == (long) sym2s[i][1])
932 {
933 //printf(".2.\n");
934 mm = param[sym2s[i][1]] + param[sym2s[i][3]];
935 param[sym2s[i][0]] = mm - param[sym2s[i][2]];
936 break;
937 }
938 else
939 {
940 mm = (EXP (param[sym2s[i][0]] + param[sym2s[i][2]]) +
941 EXP (param[sym2s[i][1]] + param[sym2s[i][3]])) * HALF;
942 param[sym2s[i][0]] = (lmm = LOG (mm)) - param[sym2s[i][2]];
943 param[sym2s[i][1]] = lmm - param[sym2s[i][3]];
944 break;
945 }
946 }
947 }
948 }
949 else
950 {
951 mm = (EXP (param[sym2s[i][0]] + param[sym2s[i][2]]) +
952 EXP (param[sym2s[i][1]] + param[sym2s[i][3]])) * HALF;
953 param[sym2s[i][0]] = (lmm = LOG (mm)) - param[sym2s[i][2]];
954 param[sym2s[i][1]] = lmm - param[sym2s[i][3]];
955 }
956 }
957 }
958
959
960
961 //======================
962 /* switch (wopt->migration_model)
963 {
964 case MATRIX:
965 break;
966 case MATRIX_ARBITRARY:
967 check_matrix_arbitrary (xv, wopt, numpop, which_profile);
968 break;
969 case MATRIX_SAMETHETA:
970 force_sametheta (xv, wopt, numpop);
971 break;
972 case ISLAND:
973 force_sametheta (xv, wopt, numpop);
974 force_samemigration (xv, wopt, numpop);
975 break;
976 case ISLAND_VARTHETA:
977 force_samemigration (xv, wopt, numpop);
978 break;
979 }
980 }
981 */
982
983 ///
984 /// checks and corrects the migration matrix according to custom migration matrix
985 /// which_profile is necessary to avoid accidental changes in the variables that need profiling
986 void
check_matrix_arbitrary(MYREAL * param,worldoption_fmt * wopt,long numpop,long which_profile)987 check_matrix_arbitrary (MYREAL *param, worldoption_fmt * wopt, long numpop, long which_profile)
988 {
989 char *custm = NULL;
990 long zeron = 0;
991 long constn = 0;
992 long symn = 0;
993 long sym2n = 0;
994 long from, to;
995 // boolean done = FALSE;
996 twin_fmt *syms = NULL;
997 quad_fmt *sym2s = NULL;
998 long *zeros = NULL;
999 long *consts = NULL;
1000 long z = 0, i;
1001 long count;
1002 char *p;
1003 MYREAL mm, lmm;
1004 // long locus = 0;
1005 custm = wopt->custm2;
1006 zeron = wopt->zeron;
1007 constn = wopt->constn;
1008 symn = wopt->symn;
1009 sym2n = wopt->sym2n;
1010 if (symn > 0)
1011 syms = wopt->symparam;
1012 if (sym2n > 0)
1013 sym2s = wopt->sym2param;
1014 if (zeron > 0)
1015 zeros = wopt->zeroparam;
1016 if (constn > 0)
1017 consts = wopt->constparam;
1018 p = custm;
1019 z = 0;
1020 for (count = 0; count < numpop; count++)
1021 {
1022 if (*p == 'm')
1023 z++;
1024 p++;
1025 }
1026 if (z > 0)
1027 force_sametheta (param, wopt, numpop);
1028 p = custm + numpop;
1029 z = 0;
1030 for (count = numpop; count < numpop * numpop; count++)
1031 {
1032 if (*p == 'm')
1033 z++;
1034 p++;
1035 }
1036 if (z > 0)
1037 force_samemigration (param, wopt, numpop);
1038 for (i = 0; i < zeron; i++)
1039 {
1040 param[zeros[i]] = -30.; //-MYREAL_MAX;
1041 }
1042
1043 for (i = 0; i < constn; i++)
1044 {
1045 if (consts[i] < numpop)
1046 param[consts[i]] = LOG (wopt->thetag[consts[i]]);
1047 if (consts[i] >= numpop)
1048 {
1049 if (consts[i] >= numpop * numpop)
1050 param[consts[i]] = LOG (wopt->alphavalue);
1051 else
1052 {
1053 m2mm (consts[i], numpop, &from, &to);
1054 param[consts[i]] = LOG (wopt->mg[to*(numpop-1)+from]);
1055 }
1056 }
1057 }
1058
1059 for (i = 0; i < symn; i++)
1060 {
1061 if(which_profile == syms[i][0])
1062 param[syms[i][1]] = param[syms[i][0]];
1063 else
1064 {
1065 if(which_profile == syms[i][1])
1066 param[syms[i][0]] = param[syms[i][1]];
1067 else
1068 {
1069 mm = (EXP (param[syms[i][0]]) + EXP (param[syms[i][1]])) * HALF;
1070 param[syms[i][0]] = param[syms[i][1]] = LOG (mm);
1071 }
1072 }
1073 }
1074 for (i = 0; i < sym2n; i++)
1075 {
1076 if(which_profile == sym2s[i][0])
1077 mm = EXP (param[sym2s[i][0]] + param[sym2s[i][2]]);
1078 else
1079 {
1080 if(which_profile == sym2s[i][1])
1081 mm = EXP (param[sym2s[i][0]] + param[sym2s[i][2]]);
1082 else
1083 {
1084 mm = (EXP (param[sym2s[i][0]] + param[sym2s[i][2]]) +
1085 EXP (param[sym2s[i][1]] + param[sym2s[i][3]])) * HALF;
1086 }
1087 }
1088 param[sym2s[i][0]] = (lmm = LOG (mm)) - param[sym2s[i][2]];
1089 param[sym2s[i][1]] = lmm - param[sym2s[i][3]];
1090 }
1091 }
1092
1093
1094 /// calculates the gradient of the likelihood function
1095 void
gradient(MYREAL * d,nr_fmt * nr,long locus)1096 gradient (MYREAL *d, nr_fmt * nr, long locus)
1097 {
1098 boolean found=FALSE;
1099 MYREAL tk1,m1,th1,l1;
1100 //MYREAL nm1,th2;
1101 //MYREAL nm2, l2, m2, tk2;
1102 long z, r;
1103 long other , otherpop;
1104 long g, i, ii, pop, msta, msto;
1105 long numpop = nr->numpop;
1106 long nn = nr->partsize - nr->profilenum;
1107 MYREAL expapg, *thetas;
1108 //MYREAL *m;
1109 MYREAL *geo = nr->world->data->geo;
1110 tarchive_fmt *tl;
1111 MYREAL tet;
1112 MYREAL rtet;
1113 MYREAL *normalgrad;
1114 MYREAL mu_rate = nr->world->options->mu_rates[locus];
1115 MYREAL inv_mu_rate = 1. / mu_rate;
1116 // takes care of the inheritance scalars for theta
1117 const MYREAL inheritance=nr->world->options->inheritance_scalars[locus];
1118
1119 normalgrad = (MYREAL *) mymalloc(nr->numpop2 * sizeof(MYREAL));
1120 memset (d, 0, sizeof (MYREAL) * nr->numpop2);
1121 thetas = nr->param;
1122 //xcode m = nr->param + numpop;
1123 for (r = nr->repstart; r < nr->repstop; r++)
1124 {
1125 for (pop = 0; pop < nr->numpop2; pop++)
1126 {
1127 normalgrad[pop] = (*normal_func_gradient)(thetas[pop], nr->world->atl[r][locus].param0[pop]);
1128 }
1129 for (g = 0; g < nr->world->atl[r][locus].T; g++)
1130 {
1131 if (nr->apg[r][locus][g] < -40.)
1132 continue;
1133 tl = nr->world->atl[r][locus].tl;
1134 expapg = EXP (nr->apg[r][locus][g]);
1135 for (pop = 0; pop < numpop; pop++)
1136 {
1137 tet = thetas[pop]*inheritance; //INHERIT
1138 // the following lines are doing this derivative, but have reduced the division operation (speed)
1139 // nr->parts[pop] = -tl[g].p[pop] / tet + tl[g].kt[pop] / (tet * tet * mu_rate) - normalgrad[pop];
1140 rtet = 1./(tet * tet * mu_rate);
1141 nr->parts[pop] = (- mu_rate * tet * (tl[g].p[pop] + normalgrad[pop] * tet) + tl[g].kt[pop]) * rtet ;
1142 msta = nr->mstart[pop];
1143 msto = nr->mend[pop];
1144 //xcode z = 0;
1145 //xcode found=FALSE;
1146 for (i = msta; i < msto; i++)
1147 {
1148 if (nr->param[i] > 0.)
1149 {
1150 if(nr->world->options->custm2[i]=='S')
1151 {
1152 tk1 = geo[i]* tl[g].km[pop] * inv_mu_rate;
1153 l1 = tl[g].mindex[i];
1154 m1 = nr->param[i];
1155 th1 = nr->param[pop];
1156 found =find (i, nr->profiles, nr->profilenum);
1157 if (!found)
1158 {
1159 nr->parts[i] = ((l1 / m1) - tk1) - normalgrad[i];
1160 }
1161 symmetric_other(i, numpop, &other, &otherpop);
1162 //xcode th2 = nr->param[otherpop];
1163 //xcode nm1 = m1 * th1;
1164 // speed opt: nr->parts[pop] += tk1 * m1/th1 - l1/th1;
1165 nr->parts[pop] += tk1 * (m1 - l1)/th1;
1166 }
1167 else
1168 nr->parts[i] = ((tl[g].mindex[i] / (nr->param[i]))
1169 - geo[i] * tl[g].km[pop] * inv_mu_rate) - normalgrad[i];
1170 }
1171 }
1172 }
1173 z = 0;
1174 for (i = 0; i < nn; i++)
1175 {
1176 ii = (nr->profilenum > 0) ? nr->indeks[z++] : i;
1177 d[i] += expapg * nr->parts[ii];
1178 }
1179 }
1180 }
1181 myfree(normalgrad);
1182 }
1183
1184 #ifdef LONGSUM
1185
which_rate(MYREAL * rawtimes,long pop,MYREAL time_end)1186 long which_rate(MYREAL *rawtimes, long pop, MYREAL time_end)
1187 {
1188 long i;
1189 long end = pop * 3 + 3 -1;
1190 i = pop * 3;
1191 while(i<end && time_end > rawtimes[i+1])
1192 {
1193 i++;
1194 }
1195 return i;
1196 }
1197
1198 void
gradient_longsum(MYREAL * d,nr_fmt * nr,long locus)1199 gradient_longsum (MYREAL *d, nr_fmt * nr, long locus)
1200 {
1201 long z, r;
1202 long g, i, j, ii, pop;
1203 long numpop = nr->numpop;
1204 MYREAL expapg, *thetas, *m;
1205 MYREAL *geo = nr->world->data->geo;
1206 tarchive_fmt *tl;
1207 longsum_fmt *longsum;
1208 longsum_fmt *tint;
1209 MYREAL *treeparts;
1210 MYREAL tet;
1211 MYREAL time_end;
1212 MYREAL rawt;
1213 // long intervals[3];
1214 long numpop3 = nr->numpop * 3;
1215 long nn = nr->partsize - nr->profilenum - numpop3;
1216 MYREAL *rawtimes = nr->world->flucrates+numpop3;
1217 MYREAL *rawrates = nr->param+nn+nr->profilenum;
1218 // MYREAL *rawlrates = nr->lparam+nn;
1219 MYREAL mu_rate = nr->world->options->mu_rates[locus];
1220 // MYREAL * rates = mycalloc(numpop3 * 3,sizeof(MYREAL));
1221 // MYREAL *lrates = rates + numpop3;
1222 long ptime, ptimeold;
1223 MYREAL timeinterval;
1224 //interval[0]=intervals[1]=intervals[2]=0;
1225 FPRINTF(stdout,"CHECK FUNCTION LONGSUM: rawrates=%f %f %f @ %f %f %f \n",rawrates[0],rawrates[1],rawrates[2],rawrates[3],rawrates[4],rawrates[5]);
1226 treeparts = mycalloc(nr->numpop2+numpop3,sizeof(MYREAL));
1227 memset (d, 0, sizeof (MYREAL) * nr->partsize);
1228 thetas = nr->param;
1229 m = nr->param + numpop;
1230 for (r = nr->repstart; r < nr->repstop; r++)
1231 {
1232 memset(nr->parts,0,sizeof(MYREAL)*nr->partsize);
1233 for (g = 0; g < nr->world->atl[r][locus].T; g++)
1234 {
1235 if (nr->apg[r][locus][g] < -40.)
1236 continue;
1237 memset(treeparts,0,sizeof(MYREAL)*nr->partsize);
1238 tl = nr->world->atl[r][locus].tl;
1239 expapg = EXP (nr->apg[r][locus][g]);
1240 longsum = nr->world->atl[r][locus].tl[g].longsum;
1241 time_end=0.;
1242 // for(pop=0; pop < numpop; pop++)
1243 // rates[pop]=1.;
1244 for(j=0; j< nr->world->atl[r][locus].tl[g].longsumlen; j++)
1245 {
1246 tint = &longsum[j];
1247 time_end += tint->interval;
1248 switch(tint->eventtype)
1249 {
1250 case 'c':
1251 pop = tint->to;
1252 tet = thetas[pop];
1253 treeparts[pop] -= 1. / tet ;
1254
1255 ptime = which_rate(rawtimes,pop, time_end);
1256 treeparts[ptime+nn] -= 1./rawrates[ptime];
1257 break;
1258 case 'm':
1259 treeparts[tint->fromto] += 1./(nr->param[tint->fromto]);
1260 break;
1261 }
1262 for (pop = 0; pop < nr->numpop; pop++)
1263 {
1264 ptime = which_rate(rawtimes,pop, time_end);
1265 // intervals[ptime] += 1;
1266 timeinterval = tint->interval;
1267 tet = thetas[pop];
1268 rawt = rawtimes[ptime];
1269 if(((time_end - tint->interval) < rawt) && (time_end > rawt))
1270 {
1271 ptimeold = which_rate(rawtimes, pop, time_end - tint->interval);
1272 // intervals[ptimeold] += 1;
1273 if(ptime-ptimeold>1)
1274 {
1275 timeinterval =rawtimes[ptimeold+1]-(time_end - tint->interval);
1276 treeparts[ptimeold+nn] += timeinterval * tint->lineages2[pop] / (tet * mu_rate * rawrates[ptimeold] * rawrates[ptimeold]);
1277 treeparts[pop] += timeinterval * tint->lineages2[pop] / (tet * tet * mu_rate * rawrates[ptimeold]);
1278
1279 timeinterval = rawtimes[ptime] - rawtimes[ptimeold+1];
1280 treeparts[ptimeold+1+nn] += timeinterval * tint->lineages2[pop] / (tet * mu_rate * rawrates[ptimeold+1] * rawrates[ptimeold+1]);
1281 treeparts[pop] += timeinterval * tint->lineages2[pop] / (tet * tet * mu_rate * rawrates[ptimeold+1]);
1282
1283 timeinterval = time_end - rawtimes[ptime];
1284 treeparts[ptime+nn] += timeinterval * tint->lineages2[pop] / (tet * mu_rate * rawrates[ptime] * rawrates[ptime]);
1285 treeparts[pop] += timeinterval * tint->lineages2[pop] / (tet * tet * mu_rate * rawrates[ptime]);
1286 }
1287 else
1288 {
1289 timeinterval =rawtimes[ptime]-(time_end - tint->interval);
1290 treeparts[ptimeold+nn] += timeinterval * tint->lineages2[pop] / (tet * mu_rate * rawrates[ptimeold] * rawrates[ptimeold]);
1291 treeparts[pop] += timeinterval * tint->lineages2[pop] / (tet * tet * mu_rate * rawrates[ptimeold]);
1292 timeinterval = time_end - rawtimes[ptime];
1293 treeparts[ptime+nn] += timeinterval * tint->lineages2[pop] / (tet * mu_rate * rawrates[ptime] * rawrates[ptime]);
1294 treeparts[pop] += timeinterval * tint->lineages2[pop] / (tet * tet * mu_rate * rawrates[ptime]);
1295 }
1296 }
1297 else
1298 {
1299 treeparts[ptime+nn] += timeinterval * tint->lineages2[pop] / (tet * mu_rate * rawrates[ptime] * rawrates[ptime]);
1300 treeparts[pop] += timeinterval * tint->lineages2[pop] / (tet * tet * mu_rate * rawrates[ptime]);
1301 }
1302 }
1303 for (pop = nr->numpop; pop < nr->numpop2; pop++)
1304 {
1305 treeparts[pop] -= tint->interval * geo[pop] * tint->lineages[(long)((pop - numpop)/(numpop-1))] / mu_rate;
1306 }
1307 }
1308 for (pop = 0; pop < nr->numpop2; pop++)
1309 {
1310 nr->parts[pop] = treeparts[pop] - (*normal_func_gradient)(nr->param[pop],nr->world->atl[r][locus].param0[pop]);
1311 }
1312 for (pop = nr->numpop2; pop < nr->partsize; pop++)
1313 {
1314 nr->parts[pop] = treeparts[pop];
1315 }
1316 // for (pop = 0; pop < 3; pop++)
1317 // {
1318 // FPRINTF(stdout,"%li ", intervals[pop]);
1319 // }
1320 // FPRINTF(stdout,"\n");
1321
1322 z = 0;
1323 for (i = 0; i < nn; i++)
1324 {
1325 ii = (nr->profilenum > 0) ? nr->indeks[z++] : i;
1326 d[i] += expapg * nr->parts[ii];
1327 }
1328 for (i = nn; i < nr->partsize; i++)
1329 {
1330 if((i-nn) % 3 == 0) // each population has 3 time values and the first one is always 1
1331 d[i] = 0. ;
1332 else
1333 d[i] += expapg * nr->parts[i];//da rates
1334 }
1335 }
1336 }
1337 myfree(treeparts);
1338 }
1339 #endif /*LONGSUM*/
1340 /* calculates the derivatives for the different
1341 migration models
1342 */
1343 void
force_symmetric_d(MYREAL * gxv,long model,nr_fmt * nr,long nn)1344 force_symmetric_d (MYREAL *gxv, long model, nr_fmt * nr, long nn)
1345 {
1346 switch (model)
1347 {
1348 case MATRIX:
1349 break;
1350 case MATRIX_ARBITRARY:
1351 check_symmetric_d (gxv, nr, nn);
1352 break;
1353 case MATRIX_SAMETHETA:
1354 calc_symmetric_d (gxv, nr, nn, 0L, nr->numpop);
1355 break;
1356 case ISLAND:
1357 calc_symmetric_d (gxv, nr, nn, 0L, nr->numpop);
1358 calc_symmetric_d (gxv, nr, nn, nr->numpop, nr->numpop2);
1359 break;
1360 case ISLAND_VARTHETA:
1361 calc_symmetric_d (gxv, nr, nn, nr->numpop, nr->numpop2);
1362 break;
1363 }
1364 }
1365
1366 void
calc_same_d(MYREAL * grad,nr_fmt * nr,long nn,long start,long stop)1367 calc_same_d (MYREAL *grad, nr_fmt * nr, long nn, long start, long stop)
1368 {
1369 long i, ii;
1370 long z = 0;
1371 MYREAL dt = 0;
1372 long nsum = 0;
1373 char *custm = nr->world->options->custm2;
1374 if (nr->profilenum == 0)
1375 {
1376 for (i = start; i < stop; i++)
1377 {
1378 if (custm[i] == 'm')
1379 {
1380 dt += grad[i];
1381 nsum++;
1382 }
1383 }
1384 dt /= nsum;
1385 //xcode z = 0;
1386 for (i = start; i < stop; i++)
1387 {
1388 if (custm[i] == 'm')
1389 grad[i] = dt;
1390 }
1391 }
1392 else
1393 {
1394 for (i = 0; i < nn; i++)
1395 {
1396 ii = (nr->profilenum > 0) ? nr->indeks[z++] : i;
1397 if (ii >= start && ii < stop && custm[ii] == 'm')
1398 {
1399 dt += grad[i];
1400 nsum++;
1401 }
1402 }
1403 dt /= nsum;
1404 z = 0;
1405 for (i = 0; i < nn; i++)
1406 {
1407 ii = (nr->profilenum > 0) ? nr->indeks[z++] : i;
1408 if (ii >= start && ii < stop && custm[ii] == 'm')
1409 grad[i] = dt;
1410 }
1411 }
1412 }
1413
1414 void
calc_symmetric_d(MYREAL * grad,nr_fmt * nr,long nn,long start,long stop)1415 calc_symmetric_d (MYREAL *grad, nr_fmt * nr, long nn, long start, long stop)
1416 {
1417 long i, ii;
1418 long z = 0;
1419 MYREAL dt = 0;
1420 if (nr->profilenum == 0)
1421 {
1422 for (i = start; i < stop; i++)
1423 {
1424 dt += grad[i];
1425 }
1426 dt /= (stop - start);
1427 //xcode z = 0;
1428 for (i = start; i < stop; i++)
1429 {
1430 grad[i] = dt;
1431 }
1432 }
1433 else
1434 {
1435 for (i = 0; i < nn; i++)
1436 {
1437 ii = (nr->profilenum > 0) ? nr->indeks[z++] : i;
1438 if (ii >= start && ii < stop)
1439 dt += grad[i];
1440 }
1441 dt /= (stop - start);
1442 z = 0;
1443 for (i = 0; i < nn; i++)
1444 {
1445 ii = (nr->profilenum > 0) ? nr->indeks[z++] : i;
1446 if (ii >= start && ii < stop)
1447 grad[i] = dt;
1448 }
1449 }
1450 }
1451
1452 void
check_symmetric_d(MYREAL * grad,nr_fmt * nr,long nn)1453 check_symmetric_d (MYREAL *grad, nr_fmt * nr, long nn)
1454 {
1455 char *custm = NULL;
1456 //long zeron = 0;
1457 //long constn = 0;
1458 long symn = 0;
1459 long sym2n = 0;
1460 //long mmn = 0;
1461 long numpop = 0;
1462 /*static boolean done = FALSE; */
1463 twin_fmt *syms = NULL;
1464 quad_fmt *sym2s = NULL;
1465 //xcode long *zeros = NULL;
1466 //xcode long *consts = NULL;
1467 //xcode long *mms = NULL;
1468 long count;
1469 long z = 0, zz = 0, i;
1470 char *p;
1471 MYREAL mm, sq1, sq2;
1472
1473
1474 //xcode custm = nr->world->options->custm2;
1475 //xcode zeron = nr->world->options->zeron;
1476 //xcode constn = nr->world->options->constn;
1477 symn = nr->world->options->symn;
1478 sym2n = nr->world->options->sym2n;
1479 //xcode mmn = nr->world->options->mmn;
1480 if (symn > 0)
1481 syms = nr->world->options->symparam;
1482 if (sym2n > 0)
1483 sym2s = nr->world->options->sym2param;
1484 //xcode if (zeron > 0)
1485 //xcode zeros = nr->world->options->zeroparam;
1486 //xcode if (constn > 0)
1487 //xcode consts = nr->world->options->constparam;
1488 //xcode if (mmn > 0)
1489 //xcode mms = nr->world->options->mmparam;
1490 //xcode p = custm;
1491 custm = nr->world->options->custm2;
1492 numpop = nr->numpop;
1493 p = custm;
1494 z = 0;
1495 for (count = 0; count < numpop; count++)
1496 {
1497 if (*p == 'm')
1498 z++;
1499 p++;
1500 }
1501 if (z > 0)
1502 calc_same_d (grad, nr, nn, 0L, nr->numpop);
1503 p = custm + nr->numpop;
1504 z = 0;
1505 for (count = numpop; count < numpop * numpop; count++)
1506 {
1507 if (*p == 'm')
1508 z++;
1509 p++;
1510 }
1511 if (z > 0)
1512 calc_same_d (grad, nr, nn, nr->numpop, nr->numpop2);
1513 /* if there are symmetric migration rates M go here*/
1514 for (i = 0; i < symn; i++)
1515 {
1516 if (nr->profilenum > 0)
1517 {
1518 z = 0;
1519 while (z < nr->partsize && nr->indeks[z] != syms[i][0])
1520 z++;
1521 zz = 0;
1522 while (zz < nr->partsize && nr->indeks[zz] != syms[i][1])
1523 zz++;
1524 if (z < nr->partsize)
1525 mm = grad[z];
1526 else
1527 mm = 0.0;
1528 if (zz < nr->partsize)
1529 mm += grad[zz];
1530 //else
1531 // mm += 0;
1532 //fprintf(stderr,"@@@@@@@@@@@@@@@ %li %li %f %li\n",z,zz,mm,nr->partsize);
1533 if(z<nr->partsize)
1534 grad[z] = mm;
1535 if(zz<nr->partsize)
1536 grad[zz] = mm;
1537 }
1538 else
1539 {
1540 mm = grad[syms[i][0]] + grad[syms[i][1]];
1541 grad[syms[i][0]] = grad[syms[i][1]] = mm;
1542 }
1543 }
1544 /* if there are symmetric migration rates 4Nm */
1545 if (nr->profilenum > 0)
1546 {
1547 for (i = 0; i < sym2n; i++)
1548 {
1549 sq1 = nr->param[sym2s[i][2]] ;// * nr->param[sym2s[i][2]]);
1550 sq2 = nr->param[sym2s[i][3]];// * nr->param[sym2s[i][3]]);
1551 z = 0;
1552 while (nr->indeks[z] != sym2s[i][0] && z++ < nr->partsize)
1553 ;
1554 zz = 0;
1555 while (nr->indeks[zz] != sym2s[i][1] && zz++ < nr->partsize)
1556 ;
1557 if (z < nr->partsize)
1558 mm = grad[z] / sq1;
1559 else
1560 mm = 0;
1561 if (zz < nr->partsize)
1562 mm += grad[zz] / sq2;
1563 //else
1564 // mm += 0;
1565 // mm /= 2.;
1566 grad[z] = mm * sq1;
1567 grad[zz] = mm * sq2;
1568 }
1569 }
1570 else
1571 {
1572 for (i = 0; i < sym2n; i++)
1573 {
1574 sq1 = nr->param[sym2s[i][2]];// * nr->param[sym2s[i][2]];
1575 sq2 = nr->param[sym2s[i][3]];// * nr->param[sym2s[i][3]];
1576 mm = (grad[sym2s[i][0]] / sq1 + grad[sym2s[i][1]] / sq2);
1577 grad[sym2s[i][0]] = mm * sq1;
1578 grad[sym2s[i][1]] = mm * sq2;
1579 }
1580 }
1581 }
1582
1583 void
symmetric_other(long i,long numpop,long * other,long * otherpop)1584 symmetric_other (long i, long numpop, long *other, long *otherpop)
1585 {
1586 long pop;
1587 m2mm (i, numpop, otherpop, &pop);
1588 *other = mm2m (pop, *otherpop, numpop);
1589 }
1590
1591
1592 /* derivatives to log derivatives */
1593 void
grad2loggrad(MYREAL * param,long * indeks,MYREAL * d,long nn,long profilenum)1594 grad2loggrad (MYREAL *param, long *indeks, MYREAL *d, long nn,
1595 long profilenum)
1596 {
1597 long i, ii, z = 0;
1598 for (i = 0; i < nn; i++)
1599 {
1600 ii = (profilenum > 0) ? indeks[z++] : i;
1601 d[i] = -param[ii] * d[i];
1602 /*the minus is for minimizing the function -L
1603 instead of maximizing L
1604 */
1605 }
1606 }
1607
1608 MYINLINE MYREAL
probG(MYREAL * param,MYREAL * lparam,tarchive_fmt * tl,nr_fmt * nr,long locus)1609 probG (MYREAL *param, MYREAL *lparam, tarchive_fmt * tl, nr_fmt * nr,
1610 long locus)
1611 {
1612 const int numpop = (int) nr->numpop;
1613 int i, j, msta, msto;
1614 MYREAL result = 0., sm;
1615 MYREAL *geo = nr->world->data->geo;
1616 MYREAL *lgeo = nr->world->data->lgeo;
1617 MYREAL mu_rate = nr->world->options->mu_rates[locus];
1618 MYREAL inv_mu_rate = 1./ mu_rate;
1619 MYREAL lmu_rate = nr->world->options->lmu_rates[locus];
1620 for (i = 0; i < numpop; i++)
1621 {
1622 if (lparam[i] <= -MYREAL_MAX)
1623 return MYREAL_MAX;
1624 result += tl->p[i] * (LOG2 - (lparam[i] + lmu_rate));
1625 msta = nr->mstart[i];
1626 msto = nr->mend[i];
1627 sm = 0.0;
1628 for (j = msta; j < msto; j++)
1629 {
1630 if (param[j] > 0.0)
1631 {
1632 result += tl->mindex[j] * (lgeo[j] + lparam[j] - lmu_rate);
1633 sm += geo[j] * param[j] * inv_mu_rate;
1634 }
1635 }
1636 result += -tl->km[i] * sm - tl->kt[i] * (1./param[i]) * inv_mu_rate;
1637 }
1638 return result;
1639 }
1640
1641 MYINLINE MYREAL
probG4(MYREAL * fullparam,MYREAL * data,int stop)1642 probG4 (MYREAL *fullparam, MYREAL *data, int stop)
1643 {
1644 int i;
1645 // int i1;
1646 // int i2;
1647 //int stop = (int) (numpop2 + numpop + numpop);
1648 //MYREAL r1;
1649 //MYREAL r2;
1650 //MYREAL r3;
1651 MYREAL result = 0.;
1652 // fullparam is a linearized
1653 // log(param) [size: numpop * numpop] [Log(2/theta), Log(M)] for example 3pop: 3 log(2/theta), 6 log(M)
1654 // param [thetas are 1/param] [size: numpop * numpop] 3
1655 // sum(migparam) [size: numpop] 3
1656 // total of 12
1657 // data is linearized
1658 // p [size: numpop] 3
1659 // mindex [sizes: numpop * (numpop -1)] 6
1660 // kt [size: numpop] 3
1661 // km [size: numpop] 3
1662 // total 12
1663 //
1664 // calculation: p * log(2/theta) + mindex * log(M)
1665 // - kt * 1/theta - km * sm
1666 for (i = 0; i < stop; i++)
1667 {
1668 result += fullparam[i] * data[i];
1669 }
1670 // stop is at least 3:
1671 // population in dataset stop
1672 // 1 3
1673 // 2 8
1674 // 3 15
1675 // 4 24
1676 //for (i = 0, i1= 1, i2 = 2; i2 < stop ; i++, i1++, i2++)
1677 //{
1678 // r1 = fullparam[i] * data[i];
1679 // r2 = fullparam[i1] * data[i1];
1680 // r3 = fullparam[i2] * data[i2];
1681 // result += r1 + r2 + r3;
1682 //}
1683
1684 return result;
1685 }
1686
1687 #ifdef LONGSUM
1688 MYREAL
probG_longsum(MYREAL * param,MYREAL * lparam,tarchive_fmt * tl,nr_fmt * nr,long locus)1689 probG_longsum (MYREAL *param, MYREAL *lparam, tarchive_fmt * tl, nr_fmt * nr,
1690 long locus)
1691 {
1692 const int numpop = (int) nr->numpop;
1693 long interval, i, pop, fromto;
1694 MYREAL result = 0.;
1695 MYREAL *geo = nr->world->data->geo;
1696 MYREAL *lgeo = nr->world->data->lgeo;
1697 MYREAL mu_rate = nr->world->options->mu_rates[locus];
1698 MYREAL lmu_rate = nr->world->options->lmu_rates[locus];
1699 longsum_fmt *tint;
1700 MYREAL *sm;
1701
1702 sm = (MYREAL *) mycalloc(nr->numpop, sizeof(MYREAL));
1703 for(pop=0;pop<nr->numpop; pop++)
1704 {
1705 for (i = nr->world->mstart[pop]; i < nr->world->mend[pop]; i++)
1706 sm[pop] += geo[i] * param[i] / mu_rate;
1707 }
1708
1709 for(interval=0; interval < tl->longsumlen; interval++)
1710 {
1711 tint = &tl->longsum[interval];
1712 switch(tint->eventtype)
1713 {
1714 case 'c':
1715 result += (LOG2 - (lparam[tint->to] + lmu_rate));
1716 break;
1717 case 'm':
1718 if (param[tint->fromto] > 0.0)
1719 {
1720 fromto = tint->fromto;
1721 result += (lgeo[fromto] + lparam[fromto] - lmu_rate);
1722 }
1723 break;
1724 }
1725 for (i = 0; i < numpop; i++)
1726 {
1727 pop = tint->to;
1728 result -= tint->interval*tint->lineages2[pop] / (param[pop] * mu_rate);
1729 result -= tint->interval * tint->lineages[tint->to] * sm[pop];
1730 }
1731 }
1732 myfree(sm);
1733 return result;
1734 }
1735
1736
1737 MYREAL
probG4_longsum(MYREAL * fullparam,longsum_fmt * data,long longsumlen,long numpop,long numpop2)1738 probG4_longsum (MYREAL *fullparam, longsum_fmt *data, long longsumlen, long numpop, long numpop2)
1739 {
1740 int interval, i;
1741 MYREAL result = 0.;
1742 MYREAL timeinterval;
1743 MYREAL rawt;
1744 long treelen=longsumlen;
1745 longsum_fmt *tint;
1746 MYREAL time_end;
1747 MYREAL *sm = fullparam + numpop2 + numpop;
1748 // we allow three rates per population [uhh too many parameters]
1749 MYREAL *rawrates = fullparam + numpop2 + numpop + numpop;
1750 MYREAL *rawlrates = rawrates + 3*numpop;
1751 MYREAL *rawtimes = rawrates + 6*numpop;
1752 //MYREAL *rates;
1753 //MYREAL *lrates;
1754 long ptime, ptimeold;
1755 // MYREAL *rtimes;
1756 // long numpop3 = 3 * numpop;
1757 //rates = mycalloc( numpop * 6, sizeof(MYREAL));
1758 // for(pop=0; pop < numpop3; pop++)
1759 // rates[pop] = 1.0;
1760 // lrates = rates + numpop3;
1761 // rtimes = mycalloc( numpop3, sizeof(MYREAL));
1762 // fullparam is a linearized
1763 // log(param) [size: numpop * numpop] [Log(2/theta), Log(M)]
1764 // param [thetas are 1/param] [size: numpop ]
1765 // sum(migparam) [size: numpop]
1766 // data is linearized
1767 // p [size: numpop]
1768 // mindex [sizes: numpop * (numpop -1)]
1769 // kt [size: numpop]
1770 // km [size: numpop]
1771 //
1772 // calculation: p * log(2/theta) + mindex * log(M)
1773 // for(pop=0; pop < numpop3; pop += 3)
1774 // rates[pop] = rawrates[pop];
1775 time_end = 0.;
1776 for(interval=0;interval<treelen;interval++)
1777 {
1778 tint = &data[interval];
1779 time_end += tint->interval;
1780 ptime = which_rate(rawtimes, tint->to, time_end);
1781 switch(tint->eventtype)
1782 {
1783 case 'c':
1784 result += fullparam[tint->to] - rawlrates[ptime] ;
1785 break;
1786 case 'm':
1787 if (fullparam[tint->fromto] > -MYREAL_MAX)
1788 result += fullparam[tint->fromto];
1789 break;
1790 }
1791 // - kt * 1/theta - km * sm
1792 for (i = 0; i < numpop; i++)
1793 {
1794 timeinterval = tint->interval;
1795 ptime = which_rate(rawtimes, i, time_end);
1796 rawt = rawtimes[ptime];
1797 if(((time_end - tint->interval) < rawt) && (time_end > rawt))
1798 {
1799 ptimeold = which_rate(rawtimes, i, time_end - tint->interval);
1800 if(ptime-ptimeold>1)
1801 {
1802 timeinterval = rawtimes[ptimeold+1] - (time_end - tint->interval);
1803 result += timeinterval * (tint->lineages2[i] / rawrates[ptimeold] * fullparam[i+numpop2] + tint->lineages[i] * sm[i]);
1804 timeinterval = rawtimes[ptime] - rawtimes[ptimeold+1];
1805 result += timeinterval * (tint->lineages2[i] / rawrates[ptimeold+1] * fullparam[i+numpop2] + tint->lineages[i] * sm[i]);
1806 timeinterval = time_end - rawtimes[ptime];
1807 result += timeinterval * (tint->lineages2[i] / rawrates[ptime] * fullparam[i+numpop2] + tint->lineages[i] * sm[i]);
1808 }
1809 else
1810 {
1811 timeinterval = rawtimes[ptime] - (time_end - tint->interval);
1812 result += timeinterval * (tint->lineages2[i] / rawrates[ptimeold] * fullparam[i+numpop2] + tint->lineages[i] * sm[i]);
1813 timeinterval = time_end - rawtimes[ptime];
1814 result += timeinterval * (tint->lineages2[i] / rawrates[ptime] * fullparam[i+numpop2] + tint->lineages[i] * sm[i]);
1815 // FPRINTF(stderr,"on border timeend %f, interval %f, rawrate %f\n",time_end,tint->interval, rawrates[ptime]);
1816 }
1817 }
1818 else
1819 {
1820 result += timeinterval * (tint->lineages2[i] / rawrates[ptime] * fullparam[i+numpop2] + tint->lineages[i] * sm[i]);
1821 }
1822 // FPRINTF(stdout,"p=%i t=%f, u=%f ptime=%li rt=%f res=%f\n", i, time_end, timeinterval, ptime, rawtimes[ptime], result);
1823 }
1824 }
1825 // FPRINTF(stdout,"\n");
1826 //myfree(rates);
1827 // myfree(rtimes);
1828 return result;
1829 }
1830 #endif /*LONGSUM*/
1831
1832 void
probG3(MYREAL * apg,MYREAL * apg0r,timearchive_fmt * tyme,long numpop,MYREAL * apgmax,MYREAL * param,MYREAL * lparam,MYREAL * sm)1833 probG3 (MYREAL *apg, MYREAL *apg0r, timearchive_fmt * tyme,
1834 long numpop, MYREAL *apgmax,
1835 MYREAL *param, MYREAL *lparam, MYREAL *sm)
1836 {
1837 long g, g1, g2, g3, i, j;
1838 long msta, me;
1839 MYREAL temp, temp1, temp2;
1840 MYREAL result1, result2, result3, result4;
1841 MYREAL tparam;
1842 tarchive_fmt *tl1;
1843 tarchive_fmt *tl2;
1844 tarchive_fmt *tl3;
1845 tarchive_fmt *tl4;
1846 long remaind = tyme->T % 4;
1847
1848 if (remaind > 0)
1849 {
1850 for (g = 0; g < remaind; g++)
1851 {
1852 tl1 = &(tyme->tl[g]);
1853 result1 = 0.;
1854 for (i = 0; i < numpop; i++)
1855 {
1856 msta = numpop + i * (numpop - 1);
1857 me = msta + numpop - 1;
1858 temp = (LOG2 - lparam[i]);
1859 result1 += tl1->p[i] * temp;
1860 for (j = msta; j < me; j++)
1861 {
1862 if (param[j] > 0.0)
1863 {
1864 result1 += tl1->mindex[j] * lparam[j];
1865 }
1866 }
1867 result1 += -tl1->km[i] * sm[i] - tl1->kt[i] / param[i];
1868 }
1869 apg[g] = result1 - apg0r[g];
1870 if (apg[g] > *apgmax)
1871 *apgmax = apg[g];
1872 }
1873 }
1874 for (g = remaind; g < tyme->T; g += 4)
1875 {
1876 g1 = g + 1;
1877 g2 = g1 + 1;
1878 g3 = g2 + 1;
1879 tl1 = &(tyme->tl[g]);
1880 tl2 = &(tyme->tl[g1]);
1881 tl3 = &(tyme->tl[g2]);
1882 tl4 = &(tyme->tl[g3]);
1883 result1 = 0.;
1884 result2 = 0.;
1885 result3 = 0.;
1886 result4 = 0.;
1887 for (i = 0; i < numpop; i++)
1888 {
1889 tparam = 1. / param[i];
1890 msta = numpop + i * (numpop - 1);
1891 me = msta + numpop - 1;
1892 temp = (LOG2 - lparam[i]);
1893 result1 += tl1->p[i] * temp;
1894 result2 += tl2->p[i] * temp;
1895 result3 += tl3->p[i] * temp;
1896 result4 += tl4->p[i] * temp;
1897 for (j = msta; j < me; j++)
1898 {
1899 if (param[j] > 0.0)
1900 {
1901 result1 += tl1->mindex[j] * lparam[j];
1902 result2 += tl2->mindex[j] * lparam[j];
1903 result3 += tl3->mindex[j] * lparam[j];
1904 result4 += tl4->mindex[j] * lparam[j];
1905 }
1906 }
1907 result1 += -tl1->km[i] * sm[i] - tl1->kt[i] * tparam;
1908 result2 += -tl2->km[i] * sm[i] - tl2->kt[i] * tparam;
1909 result3 += -tl3->km[i] * sm[i] - tl3->kt[i] * tparam;
1910 result4 += -tl4->km[i] * sm[i] - tl4->kt[i] * tparam;
1911 }
1912 apg[g] = result1 - apg0r[g];
1913 apg[g1] = result2 - apg0r[g1];
1914 apg[g2] = result3 - apg0r[g2];
1915 apg[g3] = result4 - apg0r[g3];
1916 temp1 = MAX (apg[g], apg[g1]);
1917 temp2 = MAX (apg[g2], apg[g3]);
1918 if ((temp = MAX (temp1, temp2)) > *apgmax)
1919 *apgmax = temp;
1920 }
1921 }
1922
1923 MYREAL
probG2(MYREAL * param,MYREAL * lparam,MYREAL * sm,MYREAL * kt,MYREAL * km,MYREAL * p,MYREAL * mindex,int * msta,int * me,long numpop)1924 probG2 (MYREAL *param, MYREAL *lparam, MYREAL *sm,
1925 MYREAL *kt, MYREAL *km, MYREAL *p, MYREAL *mindex,
1926 int *msta, int *me, long numpop)
1927 {
1928 int i, j;
1929 MYREAL result = 0.;
1930 for (i = 0; i < numpop; i++)
1931 {
1932 if (lparam[i] <= -MYREAL_MAX)
1933 return MYREAL_MAX;
1934 // result += p[i] * (LOG2 - lparam[i]);
1935 for (j = msta[i]; j < me[i]; j++)
1936 {
1937 if (param[j] > 0.0)
1938 result += mindex[j] * lparam[j];
1939 }
1940 result += p[i] * lparam[i] - km[i] * sm[i] - kt[i] * param[i];
1941 // result += -km[i] * sm[i] - kt[i] / param[i];
1942 }
1943 return result;
1944 }
1945
1946
1947 boolean
is_singular(MYREAL ** dd,long n)1948 is_singular (MYREAL **dd, long n)
1949 {
1950 long i, j;
1951 MYREAL temp;
1952 boolean singular = FALSE;
1953 for (i = 0; i < n; i++)
1954 {
1955 temp = 0.0;
1956 for (j = 0; j < n; j++)
1957 {
1958 temp += dd[i][j];
1959 }
1960 if (!(temp > 0.0 || temp < 0.0))
1961 {
1962 singular = TRUE;
1963 break;
1964 }
1965 }
1966 for (i = 0; i < n; i++)
1967 {
1968 temp = 0.0;
1969 for (j = 0; j < n; j++)
1970 {
1971 temp += dd[i][j];
1972 }
1973 if (!(temp < 0.0 || temp > 0.0))
1974 {
1975 singular = TRUE;
1976 break;
1977 }
1978 }
1979 return singular;
1980 }
1981
1982
1983 void
calc_cov(MYREAL ** dd,MYREAL * d,MYREAL * param,long n)1984 calc_cov (MYREAL **dd, MYREAL *d, MYREAL *param, long n)
1985 {
1986 long i, j;
1987 if (!is_singular (dd, n))
1988 invert_matrix (dd, n);
1989 else
1990 {
1991 reset_hess (dd, n);
1992 return;
1993 }
1994 for (i = 0; i < n; i++)
1995 {
1996 for (j = 0; j < i; j++)
1997 {
1998 dd[i][j] /= (param[i] * param[j]);
1999 dd[j][i] = dd[i][j];
2000 }
2001 dd[i][i] = (dd[i][i] - param[i] * d[i]) / (param[i] * param[i]);
2002 }
2003 if (!is_singular (dd, n))
2004 invert_matrix (dd, n);
2005 else
2006 reset_hess (dd, n);
2007 }
2008
2009
2010 void
print_contribution(nr_fmt * nr,timearchive_fmt ** atl,long G)2011 print_contribution (nr_fmt * nr, timearchive_fmt ** atl, long G)
2012 {
2013 long g, i, r;
2014 MYREAL temp = 0, temp2 = 0, maxtemp;
2015 FILE *mixfile = nr->world->options->mixfile;
2016 long events = 0;
2017 long contribution[11];
2018 long locus = nr->world->locus;
2019 tarchive_fmt *tyme;
2020 for (g = 0; g < 11; g++)
2021 contribution[g] = 0;
2022 if (nr->world->options->mixplot)
2023 FPRINTF (mixfile, "\n\n");
2024 maxtemp = -MYREAL_MAX;
2025 for (r = nr->repstart; r < nr->repstop; r++)
2026 {
2027 for (g = 0; g < G; g++)
2028 {
2029 tyme = atl[r][nr->world->locus].tl;
2030 temp = nr->apg[r][locus][g] + nr->apg0[r][locus][g];
2031 temp2 = temp + nr->world->likelihood[g];
2032 if (temp2 > maxtemp)
2033 maxtemp = temp2;
2034 if (nr->world->options->mixplot)
2035 {
2036 events = 0;
2037 for (i = nr->world->numpop; i < nr->world->numpop2; i++)
2038 events += (long) tyme[g].mindex[i];
2039 for (i = 0; i < tyme[g].copies; i++)
2040 FPRINTF (mixfile, "%li %f %f ", events, temp,
2041 nr->world->likelihood[g]);
2042 }
2043 temp2 -= maxtemp;
2044 if (temp2 > -20)
2045 {
2046 contribution[9 - (long) (fabs (temp2) / 2)] +=
2047 (g > 0) ? tyme[g].copies : tyme[g].copies - 1;
2048 }
2049 contribution[10] += (g > 0) ? tyme[g].copies : tyme[g].copies - 1;
2050 }
2051 }
2052 if (nr->world->options->progress)
2053 {
2054 print_menu_contribution (stdout, contribution);
2055 if (nr->world->options->writelog)
2056 print_menu_contribution (nr->world->options->logfile, contribution);
2057 }
2058 }
2059
2060 void
print_menu_contribution(FILE * file,long contribution[])2061 print_menu_contribution (FILE * file, long contribution[])
2062 {
2063 long g;
2064 FPRINTF (file, " log(P(g|Param) * P(data|g)\n");
2065 FPRINTF (file, " -20 to ");
2066 for (g = -18; g <= 0; g += 2)
2067 {
2068 FPRINTF (file, "%4li ", g);
2069 }
2070 FPRINTF (file, " All\n");
2071 FPRINTF (file, " Counts ");
2072 for (g = 0; g < 10; g++)
2073 {
2074 FPRINTF (file, "%4li ", contribution[g]);
2075 }
2076 FPRINTF (file, "%5li\n", contribution[10]);
2077 }
2078
2079 /* calculates log(parameters)*/
2080 void
log_param0(MYREAL * param,MYREAL * lparam,long nn)2081 log_param0 (MYREAL *param, MYREAL *lparam, long nn)
2082 {
2083 long i;
2084 for (i = 0; i < nn; i++)
2085 {
2086 if (param[i] > 0)
2087 lparam[i] = LOG (param[i]);
2088 else
2089 lparam[i] = -30; //-MYREAL_MAX;
2090 }
2091 }
2092
2093
2094 void
copies2lcopies(timearchive_fmt * atl)2095 copies2lcopies (timearchive_fmt * atl)
2096 {
2097 long g;
2098 if (atl->tl[0].copies > 1)
2099 atl->tl[0].lcopies = ln_copies (atl->tl[0].copies - 1);
2100 else
2101 atl->tl[0].lcopies = -MYREAL_MAX;
2102 for (g = 1; g < atl->T; g++)
2103 {
2104 atl->tl[g].lcopies = ln_copies (atl->tl[g].copies);
2105 }
2106 }
2107
2108 void
create_apg0(MYREAL * apg0,nr_fmt * nr,timearchive_fmt * tyme,long locus)2109 create_apg0 (MYREAL *apg0, nr_fmt * nr, timearchive_fmt * tyme, long locus)
2110 {
2111 long g;
2112 long copies;
2113 /* Prob(G|Param0) */
2114 // FPRINTF(stdout,"first 4 theta0: %f %f %f %f \n",tyme->param0[0], tyme->param0[1], tyme->param0[2], tyme->param0[3]);
2115 for (g = 0; g < tyme->T; g++)
2116 {
2117 if (g > 0)
2118 copies = tyme->tl[g].copies;
2119 else
2120 copies = tyme->tl[g].copies - 1;
2121 if (copies == 0)
2122 apg0[g] = -MYREAL_MAX;
2123 else
2124 {
2125 #ifdef LONGSUM
2126 apg0[g] =probG_longsum (tyme->param0, tyme->lparam0, &tyme->tl[g], nr, locus);
2127 #else /*LONGSUM*/
2128 //#ifdef INTEGRATEDLIKE
2129 //apg0[g] = 0;
2130 //#else
2131 apg0[g] = probG (tyme->param0, tyme->lparam0, &tyme->tl[g], nr, locus);
2132 //#endif /*INTEGRATEDLIKE*/
2133 #endif /*LONGSUM*/
2134
2135 }
2136 }
2137 // FPRINTF(stdout,"first 4 apg0: %f %f %f %f \n",apg0[0], apg0[1], apg0[2], apg0[3]);
2138
2139 }
2140
2141
2142
2143 MYREAL
sum_mig(MYREAL * param,long msta,long msto)2144 sum_mig (MYREAL *param, long msta, long msto)
2145 {
2146 long i;
2147 MYREAL result = 0.0;
2148 for (i = msta; i < msto; i++)
2149 {
2150 result += param[i];
2151 }
2152 return result;
2153 }
2154
2155 // penalizes if theta is too far from theta0
2156 // std22 = std*std*2
normal_func_ok(MYREAL * param,MYREAL * param0,long numpop2)2157 MYREAL normal_func_ok(MYREAL *param, MYREAL *param0, long numpop2)
2158 {
2159 long i;
2160 MYREAL p0;
2161 MYREAL result=0.0;
2162 MYREAL diff;
2163 for(i=0; i<numpop2;i++)
2164 {
2165 if(param0[i]>0.0)
2166 {
2167 p0=param0[i];
2168 diff = param[i]-p0;
2169 result += -(diff*diff/(2. * p0 *p0)) - 0.91893853320467274178 - log(p0);
2170 }
2171 }
2172 return result;
2173 }
2174
normal_func_no(MYREAL * param,MYREAL * param0,long numpop2)2175 MYREAL normal_func_no(MYREAL *param, MYREAL *param0, long numpop2)
2176 {
2177 return 0.0;
2178 }
2179
normal_func_gradient_ok(MYREAL p1,MYREAL p0)2180 MYREAL normal_func_gradient_ok(MYREAL p1, MYREAL p0)
2181 {
2182
2183 return (p1-p0)/(p0*p0);//STD2;
2184 }
2185
normal_func_gradient_no(MYREAL p1,MYREAL p0)2186 MYREAL normal_func_gradient_no(MYREAL p1, MYREAL p0)
2187 {
2188 return 0.0;
2189 }
2190
unset_penalizer_function(boolean inprofiles)2191 void unset_penalizer_function(boolean inprofiles)
2192 {
2193 if(inprofiles)
2194 {
2195 normal_func = (MYREAL (*)(MYREAL *, MYREAL *, long)) normal_func_no;
2196 normal_func_gradient = (MYREAL (*)(MYREAL , MYREAL)) normal_func_gradient_no;
2197 }
2198 else
2199 { ///debug was originaly OK and not NO
2200 normal_func = (MYREAL (*)(MYREAL *, MYREAL *, long)) normal_func_no;
2201 normal_func_gradient = (MYREAL (*)(MYREAL, MYREAL)) normal_func_gradient_no;
2202 }
2203 }
2204
2205 /*
2206 ALTIVEC garbage dump
2207
2208 MYREAL
2209 calc_locus_like (nr_fmt * nr, MYREAL *param, MYREAL *lparam, long locus)
2210 {
2211 long g, r, j, copies, r4;
2212 const long numpop = nr->world->numpop;
2213 const long numpop2 = nr->world->numpop2;
2214 int msta, msto;
2215 MYREAL gsum = 0;
2216 MYREAL ***apg0;
2217 MYREAL apgmax = -MYREAL_MAX;
2218 // MYREAL ***apgall = nr->apg;
2219 MYREAL *apg;
2220 MYREAL *apg0r;
2221 timearchive_fmt tyme;
2222 tarchive_fmt *tl;
2223 MYREAL *geo = nr->world->data->geo;
2224 MYREAL *lgeo = nr->world->data->lgeo;
2225 FloatVec *floatparam;
2226 FloatVec *floatlparam;
2227 FloatVec *floatgeo;
2228 FloatVec *floatlgeo;
2229 FloatVec *locallparam;
2230 FloatVec *localparam;
2231 FloatVec *sm;
2232 float mu_rate = (float) nr->world->options->mu_rates[locus];
2233 float invmu_rate = 1./mu_rate;
2234 float lmu_rate = (float) nr->world->options->lmu_rates[locus];
2235 MYREAL normaldev;
2236 long sizepops = numpop - (numpop % 4) + 4;
2237 long sizeall = numpop2 - (numpop2 % 4) + 4;
2238 float log2 = LOG2;
2239 vector float vlog2;
2240 vector float vlmu;
2241 vector float vmu;
2242 vector float vimu;
2243 vector float vtmp1, vtmp2, vtmp3;
2244 vector float minuszero = vec_zero();
2245 long size = sizeall + sizepops + sizepops;
2246 floatparam = (FloatVec *) mycalloc (size, sizeof (FloatVec));
2247 floatlparam = (FloatVec *) mycalloc (size, sizeof (FloatVec));
2248 load_double_floatvec(floatparam, param,numpop2);
2249 load_double_floatvec(floatlparam, lparam,numpop2);
2250 floatgeo = (FloatVec *) mycalloc (size, sizeof (FloatVec));
2251 floatlgeo = (FloatVec *) mycalloc (size, sizeof (FloatVec));
2252 load_double_floatvec(floatgeo, geo,numpop2);
2253 load_double_floatvec(floatlgeo, lgeo,numpop2);
2254 locallparam = (FloatVec *) mycalloc (size, sizeof (FloatVec));
2255 localparam = locallparam + sizeall;
2256 sm = localparam + sizepops;
2257 memcpy (localparam, floatparam, sizeof (FloatVec) * numpop);
2258 memcpy (locallparam, floatlparam, sizeof (FloatVec) * numpop2);
2259
2260 // generate vlog2={LOG2, LOG2, LOG2, LOG2}
2261 vlog2 = load_float_splat(&log2);
2262 // generate vlmu={lmu_rate, ....}
2263 vlmu = load_float_splat(&lmu_rate);
2264 // generate vmu={mu_rate, ....}
2265 vmu = load_float_splat(&mu_rate);
2266 // generate inverse of vmu vimu={1/mu_rate, ....}
2267 vimu = load_float_splat(&invmu_rate);
2268
2269 nr->PGC[locus] = 0.0;
2270 apg0 = nr->apg0;
2271
2272 for (r = 0; r < sizepops/4; r++)
2273 {
2274 // y = log2 - logparam + logmurate
2275 locallparam[r].v = vec_sub(vlog2, vec_add(locallparam[r].v,vlmu));
2276 // y = 1/(logparam * murate);
2277 vtmp1 = vec_madd(localparam[r].v,vmu, minuszero);
2278 vtmp2 = vec_re(vtmp1);
2279 // Newton-Raphson improvement
2280 vtmp3 = vec_madd(vtmp2, vec_nmsub(vtmp2, vtmp1, vec_float_one()),vtmp2);
2281 // changing sign for later streamlining
2282 // is there a better way to do that?
2283 localparam[r].v = vec_sub(minuszero,vtmp3);
2284 msta = nr->mstart[r];
2285 msto = nr->mend[r];
2286 zz = 0;
2287 for (j = msta; j < msto; j++)
2288 {
2289 sm[zz].f[ -= geo[j] * param[j] / mu_rate; //minus, so that we can loop over all in probG4
2290 locallparam[j] += lgeo[j] - lmu_rate;
2291 }
2292
2293 }
2294 // for(j=0; j< sizeall /4 ; j++)
2295 // {
2296 //sm[r] -= geo[j] * param[j] / mu_rate; //minus, so that we can loop over all in probG4
2297 //locallparam[j] += lgeo[j] - lmu_rate;
2298 // not correctly done yet
2299 // floatparam[j].v = vec_madd(floatgeo[j].v, vec_madd(floatparam[j].v,vimu,minuszero),minuszero);
2300 // floatlparam[j].v = vec_sub(floatlgeo[j].v, vlmu);
2301 //}
2302 //memcpy(locallparam+numpop, floatlparam+numpop,sizeof(float)*numpop*(numpop-1));
2303
2304 gsum = 0;
2305 for (r = nr->repstart; r < nr->repstop; r++)
2306 {
2307 apg = nr->apg[r][locus];
2308 apg0r = apg0[r][locus];
2309 tyme = nr->atl[r][locus];
2310 normaldev = (*normal_func)(param, tyme.param0, numpop2);
2311 //first element
2312 tl = &(tyme.tl[0]);
2313 copies = tl->copies - 1;
2314 gsum += copies;
2315
2316 if (copies > 0)
2317 {
2318 apg[0] = tl->lcopies + probG4 (locallparam, tl->data, numpop,
2319 numpop2) - apg0r[0]
2320 + normaldev;
2321 ;
2322 }
2323 else
2324 apg[0] = -MYREAL_MAX;
2325 if (apgmax < apg[0])
2326 apgmax = apg[0];
2327 //other elements
2328 for (g = 1; g < tyme.T - tyme.T % 4; g++)
2329 {
2330 // tl = &(tyme.tl[g]);
2331 gsum += tyme.tl[g]->copies+tyme.tl[g+1]->copies+tyme.tl[g+2]->copies+tyme.tl[g+3]->copies;
2332 // apg[g] = tl->lcopies + probG4 (locallparam, tl->data, numpop, numpop2) - apg0r[g]
2333 // +normaldev;
2334 //
2335 temp.v = fourdot_products(locallparam, tyme.tl[g]->data,
2336 tyme.tl[g+1]->data,,tyme.tl[g+2]->data,,tyme.tl[g+3]->data, minuszero);
2337 maxtemp.v = vec_max(temp.v, maxtemp.v);
2338 //
2339 //if (apg[g] > apgmax)
2340 // apgmax = apg[g];
2341 apg[g] = temp.f[0];
2342 apg[g+1] = temp.f[1];
2343 apg[g+2] = temp.f[2];
2344 apg[g+3] = temp.f[3];
2345 apgmax = maxtemp.f[3];
2346 }
2347 } // end replicates
2348 for (r = nr->repstart; r < nr->repstop; r++)
2349 {
2350 apg = nr->apg[r][locus];
2351 apg0r = apg0[r][locus];
2352 tyme = nr->atl[r][locus];
2353 // first element
2354 tl = &(tyme.tl[0]);
2355 // first element
2356 apg[0] -= apgmax;
2357 nr->PGC[locus] += EXP (apg[0]);
2358 // all other elements
2359 for (g = 1; g < tyme.T; g++)
2360 {
2361 apg[g] -= apgmax;
2362 if (apg[g] > -40.)
2363 nr->PGC[locus] += EXP (apg[g]);
2364 }
2365 } // replicates
2366 nr->apg_max[locus] = apgmax;
2367 nr->llike = apgmax + LOG (nr->PGC[locus]) - LOG (gsum);
2368 myfree(locallparam);
2369 return nr->llike;
2370 }*/
2371