1 /* \file tree.c */
2 /*------------------------------------------------------
3 Maximum likelihood estimation
4 of migration rate and effectice population size
5 using a Metropolis-Hastings Monte Carlo algorithm
6 -------------------------------------------------------
7 T R E E B U I L D I N G R O U T I N E S
8
9 Peter Beerli 1996, Seattle
10 beerli@fsu.edu
11
12 Copyright 1997-2002 Peter Beerli and Joseph Felsenstein, Seattle WA
13 Copyright 2003-2004 Peter Beerli, Tallahassee FL
14
15 some code in this file are successors of code in dnaml in the PHYLIP
16 package of Joseph Felsenstein. Several changes were made to the conditionl
17 likelihood methods to improve speed, but the original design idea is Joe's.
18
19 A new model that includes treatment of gaps in the alignment is in the works
20 and may superceede all other models because it seems that this model
21 is capable to handle all other sequence models. <THIS IS NOY WORKING YET>
22
23 Permission is hereby granted, free of charge, to any person obtaining
24 a copy of this software and associated documentation files (the
25 "Software"), to deal in the Software without restriction, including
26 without limitation the rights to use, copy, modify, merge, publish,
27 distribute, sublicense, and/or sell copies of the Software, and to
28 permit persons to whom the Software is furnished to do so, subject
29 to the following conditions:
30
31 The above copyright notice and this permission notice shall be
32 included in all copies or substantial portions of the Software.
33
34 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
37 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
38 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
39 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
40 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
41
42
43 $Id: tree.c 2171 2013-09-22 22:05:02Z beerli $
44
45 -------------------------------------------------------*/
46 #include <stdlib.h>
47 #include "migration.h"
48 #include "sighandler.h"
49 #include "random.h"
50 #include "options.h"
51 #include "data.h"
52 #include "sequence.h"
53 #include "world.h"
54 #include "tools.h"
55 #include "migrate_mpi.h"
56 #include "mcmc.h"
57 #ifdef UEP
58 #include "uep.h"
59 #endif
60 #include "tree.h"
61 #ifdef BEAGLE
62 #include "calculator.h"
63 #endif
64 #ifdef DMALLOC_FUNC_CHECK
65 #include <dmalloc.h>
66 #endif
67
68 #define NOTIPS 0
69 #define WITHTIPS 1
70
71 extern long unique_id_global;
72
73 //// prototypes -------------------------------------------
74 ////
75 ////
76 //// creating trees
77 //void buildtree (world_fmt * world, option_fmt * options, data_fmt * data, long locus);
78 //void create_treetimelist (world_fmt * world, timelist_fmt ** ltl, long locus);
79 //void fix_times (world_fmt * world, option_fmt * options);
80 //void first_smooth (world_fmt * world, long locus);
81 //void set_dirty (node * p);
82 //void construct_tymelist (world_fmt * world, timelist_fmt * timevector);
83 //void timeslices (timelist_fmt ** timevector);
84 //void add_partlineages (long numpop, timelist_fmt ** timevector);
85 //// likelihood calculation
86 //MYREAL treelikelihood (world_fmt * world);
87 //MYREAL pseudotreelikelihood (world_fmt * world, proposal_fmt * proposal);
88 MYREAL treelike_anc (world_fmt * world, long locus);
89 //void set_pop (node * theNode, long pop, long actualpop);
90 //void pseudonuview (proposal_fmt * proposal, xarray_fmt xx1, MYREAL *lx1,
91 // MYREAL v1, xarray_fmt xx2, MYREAL *lx2, MYREAL v2);
92 //void ltov (node * p);
93 //void treeout (FILE * treefile, node * joint, node * p, long s);
94 //void print_tree (world_fmt * world, long g, long *filepos);
95 //MYREAL find_tipdate(char * id, long pop, world_fmt *world);
96 //
97 /* private functions------------------------------------- */
98
99 void allocate_tree (world_fmt * world, option_fmt * options, data_fmt * data,
100 long locus);
101 /* allocations of nodes */
102 void allocatetips (world_fmt * world, option_fmt * options, data_fmt * data,
103 long locus);
104 void allocateinterior (world_fmt * world, data_fmt * data, long locus);
105 void allocatepoproot (world_fmt * world, data_fmt * data, long locus);
106 void allocate_tip (world_fmt * world, option_fmt * options, node ** p,
107 long pop, long locus, long a, long ind, char **tipnames);
108 void alloc_seqx (world_fmt * world, node * theNode);
109 void allocate_xseq(xarray_fmt *x, long sites, long categs);
110
111 /* first tree material (upgma, distance) */
112 void set_tree (world_fmt * world, option_fmt * options, data_fmt * data,
113 long locus);
114 // sets migration events into a tree read from the user (dna only)
115 void set_migrations (world_fmt * world, option_fmt * options, data_fmt * data,
116 long locus);
117 void distance_EP (char **data, long tips, MYREAL **m);
118 void distance_micro (char **data, long tips, MYREAL **m);
119 void distance_sequence (data_fmt * data, long locus, long tips, long sites,
120 long nmlength, MYREAL **m);
121 void distance_allele (world_fmt * world, option_fmt * options, long locus,
122 long tips, MYREAL **distm);
123 void randomize_distm(MYREAL **distm, data_fmt * data, long locus,
124 long tips, char *custm);
125 void constrain_distance_zeromig (MYREAL **m, option_fmt *options, data_fmt * data, long locus,
126 long tips, char *custm);
127
128 void makevalues (world_fmt * world, option_fmt * options, data_fmt * data,
129 long locus);
130 void make_alleles (world_fmt * world, option_fmt * options, data_fmt * data, long locus);
131 void make_microsatellites (world_fmt * world, option_fmt * options, data_fmt * data, long locus);
132 void make_microbrownian (world_fmt * world, option_fmt *options, data_fmt * data, long locus);
133 void upgma (world_fmt * world, MYREAL **x, long tips, node ** nodep);
134 void set_top (world_fmt * world, node * p, long locus);
135 void set_v (node * p);
136 void calc_sancost (MYREAL **cost, long numpop);
137
138
139 void free_treetimes (world_fmt * world, long size);
140 void traverseNodes (node * theNode, timelist_fmt ** timevector, long *slice, world_fmt *world, long *tips);
141 void increase_timelist (timelist_fmt ** timevector);
142 void allocate_lineages (timelist_fmt **timevector, const long offset, const long numpop);
143 void smooth (const node * root, node * p, world_fmt * world,
144 const long locus);
145 void which_nuview (char datatype, boolean fastlike, boolean use_gaps, int watkins);
146 void which_pseudonuview (char datatype, boolean fastlike, boolean use_gaps, int watkins);
147 void nuview_allele (node * mother, world_fmt * world, const long locus);
148 void nuview_micro (node * mother, world_fmt * world, const long locus);
149 void nuview_brownian (node * mother, world_fmt * world, const long locus);
150 void nuview_sequence (node * mother, world_fmt * world, const long locus);
151 void nuview_sequence_slow (node * mother, world_fmt * world,
152 const long locus);
153 void nuview_ancestral (node * mother, world_fmt * world, const long locus);
154 void adjustroot (node * r);
155 MYINLINE MYREAL pseudo_tl_seq (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
156 proposal_fmt * proposal, world_fmt * world);
157 MYINLINE MYREAL pseudo_tl_snp (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
158 proposal_fmt * proposal, world_fmt * world);
159 MYINLINE MYREAL pseudo_tl_snp_unlinked (phenotype xx1, phenotype xx2, MYREAL v1,
160 MYREAL v2, proposal_fmt * proposal,
161 world_fmt * world);
162 MYINLINE MYREAL pseudo_tl_anc (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
163 proposal_fmt * proposal, world_fmt * world);
164 MYINLINE void pseudonu_allele (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1,
165 MYREAL vv1, MYREAL *xx2, MYREAL lx2, MYREAL vv2);
166 MYINLINE void pseudonu_micro (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1,
167 MYREAL v1, MYREAL *xx2, MYREAL lx2, MYREAL v2);
168 MYINLINE void pseudonu_brownian (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1,
169 MYREAL v1, MYREAL *xx2, MYREAL lx2, MYREAL v2);
170 MYINLINE void pseudonu_seq (proposal_fmt * proposal, phenotype xxx1, MYREAL *sxx1, MYREAL v1,
171 phenotype xxx2, MYREAL *sxx2, MYREAL v2);
172 MYINLINE void pseudonu_seq_slow (proposal_fmt * proposal, phenotype xxx1, MYREAL *sxx1,
173 MYREAL v1, phenotype xxx2, MYREAL *sxx2, MYREAL v2);
174 MYINLINE void pseudonu_anc (proposal_fmt * proposal, phenotype xxx1, MYREAL v1,
175 phenotype xxx2, MYREAL v2);
176 void calculate_steps (world_fmt * world);
177 MYREAL logfac (long n);
178
179
180 boolean treereader (world_fmt * world, option_fmt *options, data_fmt * data);
181 void length_to_times (node * p);
182 boolean treeread (FILE * file, world_fmt * world, option_fmt *options, node ** pp, node * q);
183 char processlength (FILE * file, node ** p, option_fmt * options);
184 node *allocate_nodelet (world_fmt *world, long num, char type);
185 void find_tips (node * p, node ** nodelist, long *z);
186 node *add_migration (world_fmt *world, node * p, long from, long to, MYREAL utime);
187 node *create_interior_node (world_fmt * world, node ** q);
188 node *create_root_node (world_fmt *world, node ** q);
189 node *create_tip_node (FILE * file, world_fmt * world, option_fmt *options, node ** q, char *ch);
190 boolean processbracket (FILE * file, world_fmt *world, node ** p, char *ch);
191 void set_tree_pop (node * p, long *pop);
192 void allocate_x (node * p, world_fmt * world, char datatype,
193 boolean withtips);
194 long find_firstpop (node * p);
195
196 void sankoff (world_fmt * world);
197 MYREAL minimum (MYREAL *vec1, MYREAL *vec2, long n);
198 void santraverse (world_fmt *world, node * theNode, MYREAL **cost, long numpop);
199 long ranbest (MYREAL *array, long tie, MYREAL best, long n);
200 void jumble (long *s, long n);
201 void jumble_ownseed (long *s, long n);
202 long number_genomes (int datatype);
203
204 /* copy whole tree */
205 void copy_tree (world_fmt * original, world_fmt * kopie);
206 node *copy_node (world_fmt * original, node * o, world_fmt * kopie,
207 node * last);
208 void copy_node_content (world_fmt * original, world_fmt * kopie, node * o,
209 node * t);
210 void swap_tree (world_fmt * tthis, world_fmt * tthat);
211 void swap (void *a, void *b);
212
213 void free_tree (node * p, world_fmt * world);
214 void free_tipnodelet (node * p, world_fmt * world);
215 void free_mignodelet (node * p, world_fmt * world);
216 void free_nodelet (node * p, long num, world_fmt * world);
217 void free_nodedata (node * p, world_fmt * world);
218
219 MYREAL inverse_logprob_noevent (world_fmt * world, long interval);
220 MYREAL sum_migprob (world_fmt * world, long pop, long interval);
221
222 MYREAL prob_micro_watkins (MYREAL t, long diff, world_fmt * world, pair *helper);
223 MYREAL prob_micro_singlestep (MYREAL t, long diff, world_fmt * world, pair *helper);
224
225 void
226 debugtreeout (FILE * file, node * joint, node * p, long s);
227
228
229 /** global variable NUVIEW points to function nuview_datatype() */
230 static void (*nuview) (node *, world_fmt *, long);
231 static double (*prob_micro) (MYREAL , long , world_fmt *, pair *);
232 static void (*ppseudonuview)(proposal_fmt *, xarray_fmt , MYREAL *, MYREAL , xarray_fmt , MYREAL *, MYREAL);
233 /* ======================================================= */
234 /*
235 * Creates a start-genealogy using a coalescence approach
236 *
237 * - set NUVIEW according to datatype
238 * initializes tree structure - fills tree with data - set_tree():
239 * upgma-tree, adjust for times, sankoff() for migration events
240 */
241 /// \brief builds the starting tree
242 ///
243 /// Builds the starting tree.
244 /// \callgraph
245 void
buildtree(world_fmt * earth,world_fmt * world,option_fmt * options,data_fmt * data,long locus)246 buildtree (world_fmt *earth, world_fmt * world, option_fmt * options, data_fmt * data,
247 long locus)
248 {
249 long pop;
250 long genomes = number_genomes (options->datatype);
251 free_tree(world->root, world);
252 world->sumtips = 0;
253 world->migration_counts = 0;
254 for (pop = 0; pop < data->numpop; pop++)
255 {
256 if(options->randomsubset > 0 && options->randomsubset < data->numind[pop][locus])
257 world->sumtips += options->randomsubset * genomes;
258 else
259 world->sumtips += data->numalleles[pop][locus];
260 }
261 which_nuview (options->datatype, options->fastlike, FALSE, options->msat_option);
262 //future which_pseudonuview (options->datatype, options->fastlike, FALSE, options->msat_option);
263 switch (options->datatype)
264 {
265 case 's':
266 case 'n':
267 case 'h':
268 case 'u':
269 case 'f':
270 init_sequences (world, options, data, locus);
271 init_sequences_aliases (earth, world, options, data, locus);
272 break;
273 case 'b':
274 world->data->seq[0]->endsite = 1;
275 data->freq = -10000000000000.;
276 break;
277 case 'm':
278 world->data->seq[0]->endsite = 1;
279 /* world->data->freq = 1. / world->options->micro_stepnum; */
280 break;
281 case 'a':
282 world->data->seq[0]->endsite = 1;
283 world->data->freq = 1. / (data->maxalleles[locus]);
284 world->data->freqlast = 1. - world->data->freq;
285 }
286
287 if (options->usertree)
288 {
289 options->usertreewithmig = treereader (world, options, data);
290 makevalues (world, options, data, locus);//insert values into tips
291 }
292 else
293 {
294 allocate_tree (world, options, data, locus); //allocate nodep
295 makevalues (world, options, data, locus); //allocate/insert data into tips
296 allocateinterior(world,data,locus); //allocate nodep guts
297 allocatepoproot (world, data, locus); //allocate bottom parts
298 if (world->data->skiploci[locus])
299 return;
300 }
301 // reorder_populations(world, options, data);
302
303 if (strchr (SEQUENCETYPES, world->options->datatype))
304 {
305 init_tbl (world, locus);
306 if (world->cold && world->replicate == 0)
307 {
308 print_seqfreqs (world->outfile, world, options);
309 print_tbl (world->outfile, world, options, locus);
310 print_weights (world->outfile, world, options, locus);
311 }
312 if (world->options->progress)
313 {
314 if (world->cold && world->replicate == 0)
315 {
316 #ifndef MPI
317 print_seqfreqs (stdout, world, options);
318 print_tbl (stdout, world, options, locus);
319 print_weights (stdout, world, options, locus);
320 #endif
321 if (world->options->writelog)
322 {
323 print_seqfreqs (world->options->logfile, world, options);
324 print_tbl (world->options->logfile, world, options, locus);
325 print_weights (world->options->logfile, world, options,
326 locus);
327 }
328 }
329 }
330 }
331 if(!options->usertree)
332 set_tree (world, options, data, locus);
333 else
334 {
335 if(!options->usertreewithmig)
336 set_migrations (world, options, data, locus);
337 }
338 if (world->options->datatype == 'b')
339 world->data->maxalleles[locus] = XBROWN_SIZE;
340
341 // calculate the migration counts for the first tree
342 world->migration_counts = 0;
343 count_migrations (world->root->next->back, &world->migration_counts);
344 //traverse_check(crawlback (world->root->next));printf("@");
345
346 }
347
348 /*
349 * creates the timelist which represents all time intervals on a tree. The
350 * timelist is an array of pointers and not a linked list.
351 *
352 * - allocates memory using an arbitrary value this will be later adjusted if a
353 * longer list is needed - construct
354 */
355 void
create_treetimelist(world_fmt * world,timelist_fmt ** ltl,long locus)356 create_treetimelist (world_fmt * world, timelist_fmt ** ltl, long locus)
357 {
358 if ((*ltl)->tl == NULL)
359 {
360 (*ltl)->allocT = TIMELIST_GUESS;
361 (*ltl)->tl = (vtlist *) mycalloc ((*ltl)->allocT, sizeof (vtlist));
362 allocate_lineages (ltl, 0, world->numpop);
363 }
364 (*ltl)->copies = 0;
365 construct_tymelist (world, (*ltl));
366 }
367
368
369 void
allocate_lineages(timelist_fmt ** timevector,const long offset,const long numpop)370 allocate_lineages (timelist_fmt **timevector, const long offset, const long numpop)
371 {
372 long i;
373 const long allocT = (*timevector)->allocT;
374 vtlist * tl;
375 long *lineages;
376
377 // speedup and simplification for allocating and freeing
378 // accessing is still done the old way through tl[i].lineages
379 // but instead having memory scattered the allocation is one long string
380 // freeing time for lineages should be reduced this way [I hope]
381 if((*timevector)->lineages != NULL)
382 {
383 (*timevector)->lineages = (long *) myrealloc((*timevector)->lineages,sizeof(long)*numpop*allocT);
384 if(offset != allocT)
385 {
386 memset((*timevector)->lineages+offset,0,sizeof(long)*numpop*(allocT-offset));
387 }
388 }
389 else
390 (*timevector)->lineages = (long *) mycalloc(numpop*allocT,sizeof(long));
391
392 tl = (*timevector)->tl;
393 lineages = (*timevector)->lineages;
394
395 for (i = offset; i < allocT; i++)
396 {
397 tl[i].lineages = lineages + i * numpop;
398 }
399 }
400
401 /*
402 * start first pass trhough the tree to calculate the tree-likleihood
403 */
404 void
first_smooth(world_fmt * world,long locus)405 first_smooth (world_fmt * world, long locus)
406 {
407 smooth (world->root->next, crawlback (world->root->next), world, locus);
408 }
409
410 /*
411 * Marks a node, so that TREELIKELIHOOD() will recalulated values in node
412 */
413 void
set_dirty(node * p)414 set_dirty (node * p)
415 {
416 p->dirty = TRUE;
417 }
418 ///
419 /// inserts the from and to into the timelist from the tree
420 void
timeslices(timelist_fmt ** timevector)421 timeslices (timelist_fmt ** timevector)
422 {
423 long z;
424 vtlist * tl = (*timevector)->tl;
425 vtlist * tlz;
426 node * eventnode;
427 const long T = (*timevector)->T;
428
429 for (z = 0; z < T; z++)
430 {
431 tlz = &(tl[z]);
432 eventnode = tlz->eventnode;
433 tlz->from = eventnode->pop;
434 tlz->to = eventnode->actualpop;
435 tlz->slice = z;
436 }
437 }
438
439 void
add_partlineages(long numpop,timelist_fmt ** timevector)440 add_partlineages (long numpop, timelist_fmt ** timevector)
441 {
442 // this points to the MRCA
443 const long T = (*timevector)->T - 2;
444
445 long i, pop;
446 vtlist *tl = (*timevector)->tl;
447 vtlist *tli;
448 vtlist *tli1;
449 long from;
450 long to;
451 long *lineages;
452 char type;
453 // this should add a lineages for the MRCA
454 memset(tl[T+1].lineages, 0, numpop * sizeof(long));
455 tl[T+1].lineages[tl[T].eventnode->pop] = 1;
456 for (i = T; i >= 0; i--)
457 {
458 tli1 = &tl[i+1];
459 tli = &tl[i];
460 lineages = tli->lineages;
461 from = tli->from;
462 to = tli->to;
463 type = tli->eventnode->type;
464 memset(lineages, 0, numpop * sizeof(long));
465 // if the node is an internode (a coalescent node) then add an additional line
466 // if it is a tip reduce one
467 if(type == 't')
468 {
469 lineages[to] -= 1;
470 }
471 else
472 {
473 if(type != 'r')
474 {
475 if (from == to)
476 {
477 lineages[to] += 1;
478 }
479 else
480 {
481 lineages[to] += 1;
482 lineages[from] -= 1;
483 }
484 }
485 }
486 // this copies the content from the last timeinterval (tli1) to the next (tli)
487 // printf("%i> lineages %5li:", myID, i);
488 for (pop = 0; pop < numpop; pop++)
489 {
490 lineages[pop] += tli1->lineages[pop];
491 // printf(" %li",lineages[pop]);
492 }
493 // printf(" %f %c %li-->%li %s\n",tli->eventnode->tyme, tli->eventnode->type, tli->from, tli->to, tli->eventnode->nayme);
494 }
495 }
496
497 void
add_partlineages_313(long numpop,timelist_fmt ** timevector)498 add_partlineages_313 (long numpop, timelist_fmt ** timevector)
499 {
500 // this points to the MRCA
501 const long T = (*timevector)->T - 2;
502
503 long i, pop;
504 vtlist *tl = (*timevector)->tl;
505 vtlist *tli;
506 vtlist *tli1;
507 long from;
508 long to;
509 long *lineages;
510 char type;
511 // this should add a lineages for the MRCA
512 memset(tl[T+1].lineages, 0, numpop * sizeof(long));
513 tl[T+1].lineages[tl[T].eventnode->pop] = 1;
514 for (i = T; i >= 0; i--)
515 {
516 tli1 = &tl[i+1];
517 tli = &tl[i];
518 lineages = tli->lineages;
519 from = tli1->from;
520 to = tli1->to;
521 type = tli1->eventnode->type;
522 memset(lineages, 0, numpop * sizeof(long));
523 // if the node is an internode (a coalescent node) then add an additional line
524 // if it is a tip reduce one
525 if(type == 't')
526 {
527 lineages[to] -= 1;
528 }
529 else
530 {
531 if(type != 'r')
532 {
533 if (from == to)
534 {
535 lineages[to] += 1;
536 }
537 else
538 {
539 lineages[to] += 1;
540 lineages[from] -= 1;
541 }
542 }
543 }
544 // this copies the content from the last timeinterval (tli1) to the next (tli)
545 // printf("%i> lineages %5li:", myID, i);
546 for (pop = 0; pop < numpop; pop++)
547 {
548 lineages[pop] += tli1->lineages[pop];
549 // printf(" %li",lineages[pop]);
550 }
551 // printf(" %f %c %li-->%li %s\n",tli->eventnode->tyme, tli->eventnode->type, tli->from, tli->to, tli->eventnode->nayme);
552 }
553 }
554
555 void
add_partlineages_312(long numpop,timelist_fmt ** timevector)556 add_partlineages_312 (long numpop, timelist_fmt ** timevector)
557 {
558 // this points to the MRCA
559 const long T = (*timevector)->T - 2;
560
561 long i, pop;
562 vtlist *tl = (*timevector)->tl;
563 vtlist *tli;
564 vtlist *tli1;
565 long from;
566 long to;
567 long *lineages;
568 char type;
569 // this should add a lineages for the MRCA
570 memset(tl[T+1].lineages, 0, numpop * sizeof(long));
571 tl[T].lineages[tl[T].eventnode->pop] = 1;// the tMRCA has then 1 lineages looking to the root
572 tl[T+1].lineages[tl[T].eventnode->pop] = 0;// the tMRCA has then 1 lineages looking to the root
573 for (i = T; i > 0; i--)
574 {
575 tli1 = &tl[i-1];
576 tli = &tl[i];
577 lineages = tli1->lineages;
578 from = tli->from;
579 to = tli->to;
580 type = tli->eventnode->type;
581 memset(lineages, 0, numpop * sizeof(long));
582 // if the node is an internode (a coalescent node) then add an additional line
583 // if it is a tip reduce one
584 if(type == 't')
585 {
586 lineages[to] -= 1;
587 }
588 else
589 {
590 if(type != 'r')
591 {
592 if (from == to)
593 {
594 lineages[to] += 1;
595 }
596 else
597 {
598 lineages[to] += 1;
599 lineages[from] -= 1;
600 }
601 }
602 }
603 // this copies the content from the last timeinterval (tli1) to the next (tli)
604 //printf("%i> lineages %5li:", myID, i);
605 for (pop = 0; pop < numpop; pop++)
606 {
607 lineages[pop] += tli->lineages[pop];
608 //printf(" %li",lineages[pop]);
609 }
610 // printf(" %f %c %li-->%li %s lin=%li\n",tli->eventnode->tyme, tli->eventnode->type, tli->from, tli->to, tli->eventnode->nayme, lineages[0]);
611 }
612 }
613
614
615 /*
616 calculates the tree-likelihood according to datatype a, m, b, s,u
617 */
618 MYREAL
treelikelihood(world_fmt * world)619 treelikelihood (world_fmt * world)
620 {
621 long a;
622 MYREAL term = 0.0;
623 //MYREAL term1 = 0.0;
624 node *nn = crawlback (world->root->next);
625 //#ifdef BEAGLE
626 //reset_beagle(world->beagle);
627 //#endif
628 #ifdef BEAGLE
629 printf("TreeLike:");
630 term = calcLnL(world,FALSE);
631 // printf("Start LnL from Beagle: %f\n",term);
632 return term;
633 #endif
634 set_dirty (nn);
635 smooth (world->root->next, crawlback(world->root->next), world, world->locus);
636 adjustroot (world->root);
637 switch (world->options->datatype)
638 {
639 case 's':
640 term = treelike_seq (world, world->locus);
641 break;
642 case 'n':
643 case 'h':
644 term = treelike_snp (world, world->locus);
645 break;
646 case 'u':
647 term = treelike_snp_unlinked (world, world->locus);
648 break;
649 case 'a':
650 // printf("@@@ ");
651 for (a = 0; a < world->data->maxalleles[world->locus] - 1; a++)
652 {
653 term += (world->data->freq * nn->x.a[a]);
654 // printf("%f ",term);
655 }
656 term += (world->data->freqlast * nn->x.a[a]);
657 // printf("%f ",term);
658 term = (term != 0.0) ? (LOG (term) + nn->scale[0]) : -MYREAL_MAX;
659 // printf("\n@@treelike=%f scale=%f\n",term, nn->scale[0]);
660 break;
661 case 'm':
662 for (a = 0; a < world->data->maxalleles[world->locus]; a++)
663 term += nn->x.a[a];
664 term = (term != 0.0) ? (LOG (term) + nn->scale[0]) : -MYREAL_MAX;
665 break;
666 case 'b':
667 term = nn->x.a[2];
668 break;
669 case 'f':
670 term = treelike_anc (world, world->locus);
671 break;
672 }
673 #ifdef UEP
674 if (world->options->uep)
675 ueplikelihood (world);
676 #endif
677 #ifdef DEBUG
678 // printf("@L(D|G)=%f\n",term);
679 #endif
680 return term;
681 }
682
683 /*
684 * calculates tree-likelihood using only arrays DOES NOT CHANGE ARRAYS IN THE
685 * TREE
686 */
687 MYREAL
pseudotreelikelihood(world_fmt * world,proposal_fmt * proposal)688 pseudotreelikelihood (world_fmt * world, proposal_fmt * proposal)
689 {
690 long a, locus = world->locus;
691 /* freq is not different between pop */
692 MYREAL term = 0.0;
693 switch (world->options->datatype)
694 {
695 case 's':
696 #ifdef BEAGLE
697 printf("PseudoLike: ");
698 return calcLnL(world,world->beagle->instance);
699 #endif
700 term = pseudo_tl_seq (proposal->xf.s, proposal->xt.s, proposal->v,
701 proposal->vs, proposal, world);
702
703 break;
704 case 'n':
705 case 'h':
706 term = pseudo_tl_snp (proposal->xf.s, proposal->xt.s, proposal->v,
707 proposal->vs, proposal, world);
708 break;
709 case 'u':
710 term = pseudo_tl_snp_unlinked (proposal->xf.s, proposal->xt.s,
711 proposal->v, proposal->vs, proposal,
712 world);
713 break;
714 case 'a':
715 // printf("@@@");
716 for (a = 0; a < world->data->maxalleles[locus] - 1; a++)
717 {
718 term += (world->data->freq * proposal->xf.a[a]);
719 //printf("%f ",term);
720 }
721 term += (world->data->freqlast * proposal->xf.a[a]);
722 //printf("%f ",term);
723 if (term == 0.0)
724 term = -MYREAL_MAX;
725 else
726 term = (LOG (term) + proposal->mf[0]);
727 // printf("\n@@pseudolike=%f scale=%f\n",term, proposal->mf[0]);
728 break;
729 case 'b':
730 term = proposal->xf.a[2];
731 break;
732 case 'm':
733 for (a = 0; a < world->data->maxalleles[locus]; a++)
734 {
735 term += proposal->xf.a[a];
736 }
737 if (term == 0.0)
738 term = -MYREAL_MAX;
739 else
740 term = (LOG (term) + proposal->mf[0]);
741 break;
742 case 'f':
743 term = pseudo_tl_anc (proposal->xf.s, proposal->xt.s, proposal->v,
744 proposal->vs, proposal, world);
745 break;
746 default:
747 warning("no datatype found for treelikelihood() calculation!");
748 term = -MYREAL_MAX;
749 break;
750 }
751 #ifdef UEP
752 if (proposal->world->options->uep)
753 term += pseudo_tl_uep (&(proposal->uf), &proposal->ut, proposal->v,
754 proposal->vs, proposal, world);
755 #endif
756 if(MYFINITE(((double) term)) == 0)
757 term = -MYREAL_MAX;
758 #ifdef DEBUG
759 // printf(" proposed L(D|G)=%f\n",term);
760 #endif
761 return term;
762 }
763
764
765 /*
766 * Calculates the sub-likelihoods but does not change the arrays in the tree,
767 * it uses the passed arrays and overwrites the xx1 array DOES NOT CHANGE THE
768 * TREE
769 */
770 void
pseudonuview(proposal_fmt * proposal,xarray_fmt xx1,MYREAL * lx1,MYREAL v1,xarray_fmt xx2,MYREAL * lx2,MYREAL v2)771 pseudonuview (proposal_fmt * proposal, xarray_fmt xx1, MYREAL *lx1, MYREAL v1,
772 xarray_fmt xx2, MYREAL *lx2, MYREAL v2)
773 {
774 /* (*ppseudonuview)(proposal, xx1, lx1, v1, xx2, lx2, v2);
775 return;
776 }
777 */
778 switch (proposal->datatype)
779 {
780 case 'a':
781 pseudonu_allele (proposal, &xx1.a, &(lx1[0]), v1, xx2.a, lx2[0], v2);
782 break;
783 case 'b':
784 pseudonu_brownian (proposal, &xx1.a, &(lx1[0]), v1, xx2.a, lx2[0], v2);
785 break;
786 case 'm':
787 pseudonu_micro (proposal, &xx1.a, &(lx1[0]), v1, xx2.a, lx2[0], v2);
788 break;
789 case 'u':
790 case 'n':
791 case 'h':
792 // pseudonu_seq (proposal, xx1.s, v1, xx2.s, v2);
793 // break;
794 case 's':
795 //#ifdef BEAGLE
796 //prepare_beagle_instances_proposal(proposal,xx1.s, v1,xx2.s, v2, world->beagle);
797 //#else
798 if (proposal->world->options->fastlike)
799 pseudonu_seq (proposal, xx1.s, NULL, v1, xx2.s, NULL, v2);
800 else
801 pseudonu_seq_slow (proposal, xx1.s, lx1, v1, xx2.s, lx2, v2);
802 //#endif
803 break;
804 case 'f':
805 pseudonu_anc (proposal, xx1.s, v1, xx2.s, v2);
806 break;
807 }
808 #ifdef UEP
809 if (proposal->world->options->uep)
810 {
811 pseudonu_twostate (proposal, &proposal->uf, proposal->umf, v1,
812 &proposal->ut, proposal->umt, v2);
813 }
814 #endif
815 }
816
817
818 MYINLINE void
pseudonu_allele(proposal_fmt * proposal,MYREAL ** xx1,MYREAL * lx1,MYREAL vv1,MYREAL * xx2,MYREAL lx2,MYREAL vv2)819 pseudonu_allele (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1,
820 MYREAL vv1, MYREAL *xx2, MYREAL lx2, MYREAL vv2)
821 {
822
823 long a;
824 long aa;
825 long locus = proposal->world->locus; /* allele counters */
826 long mal = proposal->world->data->maxalleles[locus]; /* maxalleles */
827 MYREAL freq = proposal->world->data->freq;
828 //MYREAL freqlast = proposal->world->data->freqlast; // same as 1 - k/(k+1) = 1/(k+1) = freq
829 MYREAL w1 = 0.0; /* time variables */
830 MYREAL w2 = 0.0;
831 MYREAL v1;
832 MYREAL v2;
833 MYREAL v1freq;
834 MYREAL v2freq;
835 MYREAL pija1;/* summary of probabilities */
836 MYREAL pija2;
837 MYREAL x3m = -MYREAL_MAX;
838 MYREAL inv_x3m;
839 MYREAL *xx3;
840 xx3 = (MYREAL *) mymalloc(sizeof(MYREAL) * mal);
841 v1 = 1.0 - EXP (-vv1);
842 v2 = 1.0 - EXP (-vv2);
843 if (v1 >= 1.)
844 {
845 w1 = 0.0;
846 v1 = 1.0;
847 }
848 else
849 {
850 w1 = 1.0 - v1;
851 }
852 if (v2 >= 1.)
853 {
854 w2 = 0.0;
855 v2 = 1.0;
856 }
857 else
858 {
859 w2 = 1.0 - v2;
860 }
861 // printf("@@pseudonu 1:{%f,%f,%f,%f} 2:(%f,%f,%f,%f}\n",(*xx1)[0],(*xx1)[1],*lx1,v1,
862 // xx2[0], xx2[1],lx2,v2);
863
864 v1freq = v1 * freq;
865 v2freq = v2 * freq;
866
867 for (aa = 0; aa < mal; aa++)
868 {
869 pija1 = pija2 = 0.0;
870 for (a = 0; a < mal; a++)
871 {
872 if(aa==a)
873 {
874 pija1 += (w1 + v1freq) * (*xx1)[a];
875 pija2 += (w2 + v2freq) * xx2[a];
876 }
877 else
878 {
879 pija1 += v1freq * (*xx1)[a];
880 pija2 += v2freq * xx2[a];
881 }
882 }
883 xx3[aa] = pija1 * pija2;
884 if (xx3[aa] > x3m)
885 x3m = xx3[aa];
886 }
887 inv_x3m = 1./ x3m;
888 for (aa = 0; aa < mal; aa++)
889 {
890 (*xx1)[aa] = xx3[aa] * inv_x3m;
891 }
892 // printf("@@finish: %f %f\n",(*xx1)[0],(*xx1)[1]);
893 *lx1 = LOG (x3m) + lx2 + *lx1;
894 myfree(xx3);
895 }
896
897 MYINLINE
898 void
pseudonu_micro(proposal_fmt * proposal,MYREAL ** xx1,MYREAL * lx1,MYREAL v1,MYREAL * xx2,MYREAL lx2,MYREAL v2)899 pseudonu_micro (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1, MYREAL v1,
900 MYREAL *xx2, MYREAL lx2, MYREAL v2)
901 {
902 long a, s, diff, locus = proposal->world->locus; /* allele counters */
903 long aa1, aa2;
904 long smax = proposal->world->data->maxalleles[locus];
905 long margin = proposal->world->options->micro_threshold[locus];
906 MYREAL pija1s, pija2s, vv1, vv2;
907 MYREAL x3m = -MYREAL_MAX;
908 world_fmt *world = proposal->world;
909 MYREAL inv_x3m;
910 MYREAL *xx3;
911 MYREAL *pm1;
912 MYREAL *pm2;
913 pair *helper = &world->options->msat_tuning;
914 xx3 = (MYREAL *) mymalloc(sizeof(MYREAL) * smax);
915 vv1 = v1;
916 vv2 = v2;
917
918 pm1 = (MYREAL *) mymalloc(sizeof(MYREAL) * 2 * margin);
919 pm2 = pm1 + margin;
920
921 for (diff = 0; diff < margin; diff++)
922 {
923 pm1[diff] = prob_micro (vv1, diff, world, helper);
924 pm2[diff] = prob_micro (vv2, diff, world, helper);
925 }
926
927 for (s = 0; s < smax; s++)
928 {
929 pija1s = pija2s = 0.0;
930 aa1 = MAX (0, s - margin);
931 aa2 = MIN(s + margin,smax);
932 for (a = aa1; a < aa2; a++)
933 //for (a = 0; a < smax; a++)
934 {
935 diff = labs (s - a);
936 if(diff >= margin)
937 continue;
938
939 if ((*xx1)[a] > 0)
940 {
941 pija1s += pm1[diff] * (*xx1)[a];
942 }
943 if (xx2[a] > 0)
944 {
945 pija2s += pm2[diff] * xx2[a];
946 }
947 }
948 xx3[s] = pija1s * pija2s;
949 if (xx3[s] > x3m)
950 x3m = xx3[s];
951 }
952 inv_x3m = 1./ x3m;
953 for (s = 0; s < smax; s++)
954 {
955 (*xx1)[s]= xx3[s] * inv_x3m;
956 }
957 *lx1 += LOG (x3m) + lx2;
958 myfree(xx3);
959 myfree(pm1);
960 }
961
962 //================================================
963 // brownian motion calculation for data likelihood: ghost-version
964 // changed divisions to multiply inverse
965 // simplified check and include of boundary for vtot
966 MYINLINE
967 void
pseudonu_brownian(proposal_fmt * proposal,MYREAL ** xx1,MYREAL * lx1,MYREAL v1,MYREAL * xx2,MYREAL lx2,MYREAL v2)968 pseudonu_brownian (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1,
969 MYREAL v1, MYREAL *xx2, MYREAL lx2, MYREAL v2)
970 {
971 MYREAL vtot, rvtot, c12;
972 MYREAL mean1, mean2, mean, vv1, vv2, f1, f2, diff;
973 mean1 = (*xx1)[0];
974 mean2 = xx2[0];
975
976 vv1 = v1 + (*xx1)[1];
977 vv2 = v2 + xx2[1];
978 vtot = vv1 + vv2;
979 if (vtot > 0.0)
980 {
981 rvtot = 1./vtot;
982 f1 = vv2 * rvtot;
983 f2 = 1.0 - f1;
984 mean = f1 * mean1 + f2 * mean2;
985 diff = mean1 - mean2;
986 c12 = diff * diff * rvtot;
987 (*xx1)[2] = (*xx1)[2] + xx2[2] + MIN (0, -0.5 * (LOG (vtot) + c12) + LOG2PIHALF);
988 (*xx1)[1] = vv1 * f1;
989 (*xx1)[0] = mean;
990 }
991 else
992 {
993 //xcode rvtot = HUGE;
994 //xcode vtot = SMALL_VALUE;
995 f1 = 0.5;
996 (*xx1)[2] = HUGE;
997 (*xx1)[1] = vv1 * f1;
998 (*xx1)[0] = f1 * (mean1 + mean2);
999 }
1000 }
1001
1002 /*
1003 adjust the variables POP and ACTUALPOP in interior nodes
1004 */
1005 void
set_pop(node * theNode,long pop,long actualpop)1006 set_pop (node * theNode, long pop, long actualpop)
1007 {
1008 switch (theNode->type)
1009 {
1010 case 'm':
1011 theNode->pop = theNode->next->pop = pop;
1012 theNode->actualpop = theNode->next->actualpop = actualpop;
1013 break;
1014 case 'i':
1015 case 'r':
1016 theNode->pop = theNode->next->pop = theNode->next->next->pop = pop;
1017 theNode->actualpop = theNode->next->actualpop = actualpop;
1018 theNode->next->next->actualpop = actualpop;
1019 break;
1020 case 't':
1021 if (theNode->pop != pop)
1022 error ("Population designation scrambled");
1023 break;
1024 default:
1025 error ("Undefined node type?!");
1026 break;
1027 }
1028 }
1029
1030
1031
1032 /*
1033 * ======================================================= local functions
1034 */
1035
1036 void
allocate_tree(world_fmt * world,option_fmt * options,data_fmt * data,long locus)1037 allocate_tree(world_fmt * world, option_fmt * options, data_fmt * data,
1038 long locus)
1039 {
1040 long nodenum = 0, pop, numpop = data->numpop;
1041 for (pop = 0; pop < numpop; pop++)
1042 {
1043 nodenum += data->numalleles[pop][locus] * 2;
1044 }
1045 world->nodep = (node **) mycalloc (nodenum+1, sizeof (node *));
1046 }
1047
1048 void
allocatetips(world_fmt * world,option_fmt * options,data_fmt * data,long locus)1049 allocatetips (world_fmt * world, option_fmt * options,
1050 data_fmt * data, long locus)
1051 {
1052 long a, pop;
1053 long zz = 0;
1054 for (pop=0; pop < data->numpop; pop++)
1055 {
1056 for(a=0; a < data->numalleles[pop][locus]; a++)
1057 {
1058 allocate_tip (world, options, &world->nodep[zz], pop, locus, zz, a,
1059 data->indnames[pop][a]);
1060 zz++;
1061 }
1062 }
1063 }
1064
1065 void
allocateinterior(world_fmt * world,data_fmt * data,long locus)1066 allocateinterior (world_fmt * world, data_fmt * data, long locus)
1067 {
1068 long sites = world->data->seq[0]->endsite + world->data->seq[0]->addon;
1069 node *p;
1070 long i;
1071 long temp=0;
1072 long mini = 0, maxi = 0;
1073 long numpop = data->numpop;
1074 for (i = 0; i < numpop; i++)
1075 {
1076 temp += data->numalleles[i][locus];
1077 }
1078 mini = temp;
1079 maxi = temp * 2 - 1;
1080 for (i = mini; i < maxi; i++)
1081 {
1082 p = allocate_nodelet (world, 3, 'i');
1083 p->top = TRUE;
1084 p->scale =
1085 (MYREAL *) mycalloc (sites, sizeof (MYREAL));
1086 p->s = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop);
1087 p->id = i;
1088 world->nodep[i] = p;
1089 }
1090 }
1091
1092 node *
allocate_nodelet(world_fmt * world,long num,char type)1093 allocate_nodelet (world_fmt *world, long num, char type)
1094 {
1095 boolean isfirst = TRUE;
1096 long j;
1097 node *p, *q = NULL, *pfirst = NULL;
1098 for (j = 0; j < num; j++)
1099 {
1100 p = dispense_nodelet(world);
1101 if(p==NULL)
1102 {
1103 warning( "%i> heat=%f Nodelet dispenser failed to supply nodelet\n",myID,1./world->heat);
1104 p = (node *) mymalloc (sizeof (node));
1105 }
1106 p->tip = FALSE;
1107 p->visited = FALSE;
1108 p->number = unique_id_global;
1109 p->pop = -1;
1110 p->actualpop = -1;
1111 p->type = type;
1112 p->id = -1;
1113 p->top = FALSE;
1114 p->dirty = TRUE;
1115 p->next = q;
1116 p->scale = NULL;
1117 p->s = NULL;
1118 p->x.s = NULL;
1119 p->x.a = NULL;
1120 #ifdef UEP
1121
1122 p->uep = NULL;
1123 p->ux.s = NULL;
1124 p->ux.a = NULL;
1125 #endif
1126 p->back = NULL;
1127 p->nayme = NULL;
1128 p->v = 0.0;
1129 p->tyme = 0.0;
1130 p->length = 0.0;
1131 if (isfirst)
1132 {
1133 isfirst = FALSE;
1134 pfirst = p;
1135 }
1136 q = p;
1137 }
1138 if(pfirst !=NULL)
1139 pfirst->next = q;
1140 return q;
1141 }
1142
1143 void
allocatepoproot(world_fmt * world,data_fmt * data,long locus)1144 allocatepoproot (world_fmt * world, data_fmt * data, long locus)
1145 {
1146 long sites = world->data->seq[0]->endsite + world->data->seq[0]->addon;
1147 //long i;
1148 node *p, *q;//, *qq;
1149 //long nodenum = 0; /* arbitrarily to the first */
1150 q = NULL;
1151 //p = allocate_nodelet (world, 3, 'i');
1152 //p->top = TRUE;
1153 //for (i = 0; i < world->numpop; i++)
1154 // nodenum += data->numalleles[i][locus];
1155 //p->x.a = (MYREAL *) mycalloc (nodenum + 1, sizeof (MYREAL));
1156 //p->top = TRUE;
1157 //qq = p;
1158 //q = NULL;
1159 p = allocate_nodelet (world, 3, 'r');
1160 p->top = TRUE;
1161 p->scale = (MYREAL *) mycalloc (sites, sizeof (MYREAL));
1162 //p->next->back = qq;
1163 //qq->back = p->next;
1164 world->root = p;
1165 }
1166
1167
1168 void
allocate_tip(world_fmt * world,option_fmt * options,node ** p,long pop,long locus,long a,long ind,char ** tipnames)1169 allocate_tip (world_fmt * world, option_fmt * options, node ** p, long pop,
1170 long locus, long a, long ind, char **tipnames)
1171 {
1172 const long sites = world->data->seq[0]->endsite + world->data->seq[0]->addon;
1173 long i;
1174 char *tipname;
1175 if(options->usertree)
1176 return; //do nothing because the tip is alread allocated
1177 if(tipnames[locus][0]!='\0')
1178 tipname = tipnames[locus];
1179 else
1180 tipname = tipnames[0];
1181 (*p) = allocate_nodelet (world, 1, 't');
1182 (*p)->tip = TRUE;
1183 (*p)->top = TRUE;
1184 (*p)->id = a;
1185 if(options->has_datefile)
1186 {
1187 (*p)->tyme = find_tipdate(tipname, pop, world);
1188 }
1189 else
1190 (*p)->tyme = 0.0;
1191 (*p)->scale = (MYREAL *) mycalloc (sites, sizeof (MYREAL));
1192 (*p)->pop = (*p)->actualpop = options->newpops[pop]-1;
1193 (*p)->s = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop);
1194 for (i = 0; i < world->numpop; i++)
1195 (*p)->s[i] = MYREAL_MAX;
1196 (*p)->s[options->newpops[pop]-1] = 0;
1197 if (strchr (SEQUENCETYPES, world->options->datatype))
1198 {
1199 (*p)->nayme = (char * ) strdup(tipname);
1200 unpad((*p)->nayme," ");
1201 translate((*p)->nayme,' ', '_');
1202 alloc_seqx (world, (*p));
1203 }
1204 else
1205 {
1206 (*p)->x.a =
1207 (MYREAL *) mycalloc (1,
1208 world->data->maxalleles[locus] * sizeof (MYREAL));
1209 (*p)->nayme = (char *) strdup(tipname);
1210 unpad((*p)->nayme," ");
1211 translate((*p)->nayme,' ', '_');
1212 }
1213 #ifdef UEP
1214 if (world->options->uep)
1215 {
1216 (*p)->uep = (int *) mycalloc (world->data->uepsites, sizeof (int));
1217 (*p)->ux.s = (pair *) mycalloc (world->data->uepsites, sizeof (pair));
1218 }
1219 #endif
1220 }
1221
1222
1223 void
makevalues(world_fmt * world,option_fmt * options,data_fmt * data,long locus)1224 makevalues (world_fmt * world, option_fmt * options, data_fmt * data,
1225 long locus)
1226 {
1227 long invi;
1228 long invsites;
1229 seqmodel_fmt * seq = world->data->seq[0];
1230 // switching accorind to option and not world->option
1231 // this would allow to run snips with known number of invariant sites
1232 //
1233 world->options->datatype=options->datatype;
1234 switch (world->options->datatype)
1235 {
1236 case 'a':
1237 make_alleles (world, options, data, locus);
1238 break;
1239 case 'b':
1240 make_microbrownian (world, options, data, locus);
1241 break;
1242 case 'm':
1243 make_microsatellites (world, options, data, locus);
1244 break;
1245 case 's':
1246 case 'f':
1247 // allocatetips (world, options, data, locus);
1248 make_sequences (world, options, data, locus);
1249 seq->oldsite = seq->endsite;
1250 break;
1251 case 'n':
1252 case 'h':
1253 //allocatetips (world, options, data, locus);
1254 make_sequences (world, options, data, locus);
1255 make_invarsites (world, data, locus);
1256 seq->oldsite = seq->endsite;
1257 invi = seq->endsite;
1258 seq->endsite += 4;
1259 if (options->totalsites>0)
1260 {
1261 invsites = options->totalsites - seq->oldsite;
1262 seq->aliasweight[invi++] = (long) invsites*seq->basefrequencies[NUC_A];
1263 seq->aliasweight[invi++] = (long) invsites*seq->basefrequencies[NUC_C];
1264 seq->aliasweight[invi++] = (long) invsites*seq->basefrequencies[NUC_G];
1265 seq->aliasweight[invi] = (long) invsites*seq->basefrequencies[NUC_T];
1266 world->options->datatype = 's';
1267 }
1268 break;
1269 case 'u':
1270 allocatetips (world, options, data, locus);
1271 make_snp (world, options, data, locus);
1272 make_invarsites_unlinked (world, data, locus);
1273 seq->oldsite = seq->endsite;
1274 seq->endsite *= (seq->addon + 1);
1275 break;
1276 default:
1277 usererror ("Oh yes, it would be nice if there were more\n \
1278 possible datatypes than just an\n \
1279 allele model, microsatellite model,\n sequence model, or \
1280 single nucleotide polymorphism model.\n \
1281 But there are currently no others, so the programs stops\n\n");
1282 break;
1283 }
1284 #ifdef UEP
1285 if (world->options->uep)
1286 {
1287 make_uep_values (world, data, locus);
1288 }
1289 #endif
1290 }
1291
1292 ///
1293 ///
1294 /// creates the branchlength and adds migration nodes to the
1295 /// start tree
1296 /// - creates rough genetic distance for upgma
1297 /// - upgma
1298 /// - find branches where we need to insert migrations
1299 /// - insert migrations
1300 /// - adjust time of all nodes using the coalescent with migration
1301 void
set_tree(world_fmt * world,option_fmt * options,data_fmt * data,long locus)1302 set_tree (world_fmt * world, option_fmt * options, data_fmt * data,
1303 long locus)
1304 {
1305 long tips = world->sumtips;
1306 MYREAL **distm;
1307 node **topnodes;
1308
1309 topnodes = (node **) mycalloc (1, sizeof (node *) * tips);
1310 doublevec2d(&distm,tips, tips);
1311
1312 if (!options->randomtree)
1313 {
1314 /* create a crude distance matrix according to the datatype */
1315 switch (world->options->datatype)
1316 {
1317 case 'a':
1318 case 'b':
1319 case 'm':
1320 distance_allele (world, options, locus, tips, distm);
1321 break;
1322 case 's':
1323 case 'n':
1324 case 'h':
1325 case 'u':
1326 case 'f':
1327 distance_sequence (data, locus, tips,
1328 world->data->seq[0]->sites[locus],
1329 options->nmlength, distm);
1330 break;
1331 }
1332 }
1333 else
1334 {
1335 randomize_distm (distm, data, locus, tips,
1336 world->options->custm);
1337 //printf("\n\nRANDOM TREE START\n\n");
1338 }
1339 constrain_distance_zeromig (distm, options, data, locus, tips,
1340 world->options->custm);
1341 // printf("distm[0][1]=%f distm[1][2]=%f\n",distm[0][1],distm[1][2]);
1342 //fflush(stdout);
1343
1344 #ifdef UEP
1345 if (options->uep)
1346 constrain_distance_uep (data->uep, world->data->uepsites, distm,
1347 tips);
1348 #endif
1349 if(!options->usertree)
1350 upgma (world, distm, tips, world->nodep);
1351 myfree(distm[0]);
1352 myfree(distm);
1353 //}
1354 world->root->tyme = world->root->next->tyme =
1355 world->root->next->next->tyme = world->root->next->back->tyme + 10000.;
1356 /* orient the tree up-down, set the length and v */
1357 set_top (world, world->root->next->back, locus);
1358 set_v (world->root->next->back);
1359 #ifdef BEAGLE
1360 long bid=0;
1361 bid = set_branch_index (world->root->next->back, &bid);
1362 #endif
1363 /*
1364 * insert migration nodes into the tree using the Slatkin and
1365 * Maddison approach (Fitch parsimony)
1366 */
1367 memcpy (topnodes, world->nodep, sizeof (node *) * tips);
1368 //zzz = 0;
1369 if(!options->usertree) //TRIAL
1370 allocate_x (world->root, world, world->options->datatype, NOTIPS);
1371 #ifdef UEP
1372
1373 if (world->options->uep)
1374 {
1375 // allocate_uep (world->root, world, world->options->datatype, NOTIPS);
1376 update_uep (world->root->next->back, world);
1377 check_uep_root (world->root->next->back, world);
1378 }
1379 #endif
1380 // debugtreeout (stdout, crawlback (world->root->next), crawlback (world->root->next),0);
1381 sankoff (world);
1382 // debugtreeout (stdout, crawlback (world->root->next), crawlback (world->root->next),0);
1383 myfree(topnodes);
1384 } /* set_tree */
1385
1386 /*
1387 creates the branchlength and adds migration nodes to the
1388 start tree
1389 - creates rough genetic distance for upgma
1390 - upgma
1391 - find branches where we need to insert migrations
1392 - insert migrations
1393 - adjust time of all nodes using the coalescent with migration
1394 */
1395 void
set_migrations(world_fmt * world,option_fmt * options,data_fmt * data,long locus)1396 set_migrations (world_fmt * world, option_fmt * options, data_fmt * data,
1397 long locus)
1398 {
1399 long tips = world->sumtips;
1400
1401 node **topnodes;
1402 topnodes = (node **) mycalloc (1, sizeof (node *) * tips);
1403 // can we do this here ???#ifdef UEP
1404 // if (options->uep)
1405 // constrain_distance_uep (data->uep, world->data->uepsites, distm, tips);
1406 //#endif
1407 world->root->tyme = world->root->next->tyme =
1408 world->root->next->next->tyme = world->root->next->back->tyme + 10000.;
1409 /* orient the tree up-down, set the length and v */
1410 set_top (world, world->root->next->back, locus);
1411 set_v (world->root->next->back);
1412 /*
1413 * insert migration nodes into the tree using the Slatkin and
1414 * Maddison approach (Fitch parsimony)
1415 */
1416 memcpy (topnodes, world->nodep, sizeof (node *) * tips);
1417 #ifdef UEP
1418 if (world->options->uep)
1419 {
1420 // allocate_uep (world->root, world, world->options->datatype, NOTIPS);
1421 update_uep (world->root->next->back, world);
1422 check_uep_root (world->root->next->back, world);
1423 }
1424 #endif
1425 sankoff (world);
1426 myfree(topnodes);
1427 } /* set_migrations */
1428
1429
randomize_distm(MYREAL ** distm,data_fmt * data,long locus,long tips,char * custm)1430 void randomize_distm(MYREAL **distm, data_fmt * data, long locus,
1431 long tips, char *custm)
1432 {
1433 long i;
1434 long j;
1435 for(i=0;i<tips; i++)
1436 {
1437 distm[i][i] = 0.0;
1438 for(j=0;j<i;j++)
1439 {
1440 distm[i][j] = distm[j][i] = RANDUM();
1441 //printf("%f ",distm[i][j]);
1442 }
1443 //printf("\n");
1444 }
1445 }
1446
1447
1448 void
constrain_distance_zeromig(MYREAL ** m,option_fmt * options,data_fmt * data,long locus,long tips,char * custm)1449 constrain_distance_zeromig (MYREAL **m, option_fmt *options, data_fmt * data, long locus,
1450 long tips, char *custm)
1451 {
1452 long pop, ind, j, i = 0;
1453 long *pops;
1454 pops = (long *) mycalloc (tips, sizeof (long));
1455 for (pop = 0; pop < data->numpop; pop++)
1456 {
1457 for (ind = 0; ind < data->numalleles[pop][locus]; ind++)
1458 {
1459 pops[i++] = options->newpops[pop];
1460 //OLD pops[i++] = pop;
1461 }
1462 }
1463 for (i = 0; i < tips; i++)
1464 {
1465 for (j = 0; j < i; j++)
1466 {
1467 if (custm[pops[i] + pops[i] * pops[j]] == '0')
1468 m[i][j] = m[j][i] = 1000;
1469 //printf("%f ",m[i][j]);
1470 }
1471 //printf("\n");
1472 }
1473 myfree(pops);
1474 }
1475
1476 void
distance_EP(char ** data,long tips,MYREAL ** m)1477 distance_EP (char **data, long tips, MYREAL **m)
1478 {
1479 long i, j;
1480 for (i = 0; i < tips; i++)
1481 {
1482 for (j = 0; j < i; j++)
1483 {
1484 if (!strcmp (data[i], data[j]))
1485 m[i][j] = m[j][i] = fabs (rannor (1., 0.1));
1486 else
1487 m[i][j] = m[j][i] = fabs (rannor (0., 0.1));
1488 }
1489 }
1490 }
1491
1492 void
distance_micro(char ** data,long tips,MYREAL ** m)1493 distance_micro (char **data, long tips, MYREAL **m)
1494 {
1495 long i, j;
1496 for (i = 0; i < tips; i++)
1497 {
1498 for (j = 0; j < i; j++)
1499 {
1500 m[i][j] = m[j][i] = pow (atof (data[i]) - atof (data[j]), 2.);
1501 m[i][j] = m[j][i] = fabs (rannor (m[i][j], 0.1));
1502 }
1503 }
1504 }
1505
1506 /* calculate pairwise distances using allele disimilarity */
1507 void
distance_allele(world_fmt * world,option_fmt * options,long locus,long tips,MYREAL ** distm)1508 distance_allele (world_fmt * world, option_fmt * options, long locus,
1509 long tips, MYREAL **distm)
1510 {
1511 char **mdata;
1512 long pop;
1513
1514 mdata = (char **) mycalloc (1, sizeof (char *) * (tips + 1));
1515 for (pop = 0; pop < tips; pop++)
1516 {
1517 mdata[pop] =
1518 (char *) mycalloc (1, sizeof (char) * options->allelenmlength);
1519 strcpy (mdata[pop], strrchr(world->nodep[pop]->nayme,'!')+1);
1520 }
1521 if (world->options->datatype == 'a')
1522 distance_EP (mdata, tips, distm);
1523 else
1524 distance_micro (mdata, tips, distm);
1525 for (pop = 0; pop < tips; pop++)
1526 {
1527 myfree(mdata[pop]);
1528 }
1529 myfree(mdata);
1530 }
1531
1532 /* calculate pairwise distances using sequence similarity */
1533 void
distance_sequence(data_fmt * data,long locus,long tips,long sites,long nmlength,MYREAL ** m)1534 distance_sequence (data_fmt * data, long locus, long tips, long sites,
1535 long nmlength, MYREAL **m)
1536 {
1537 long i = 0, j, z, pop, ind;
1538 char **dat;
1539 if (data->distfile != NULL)
1540 {
1541 read_distance_fromfile (data->distfile, tips, nmlength, m);
1542 }
1543 else
1544 {
1545 dat = (char **) mymalloc (sizeof (char *) * tips);
1546 for (pop = 0; pop < data->numpop; pop++)
1547 {
1548 for (ind = 0; ind < data->numalleles[pop][locus]; ind++)
1549 {
1550 dat[i++] = data->yy[pop][ind][locus][0];
1551 }
1552 }
1553 if (i != tips)
1554 {
1555 error ("Mistake in distance_sequence() tips is not equal sum(i)\n");
1556 }
1557 for (i = 0; i < tips; i++)
1558 {
1559
1560 for (j = i + 1; j < tips; j++)
1561 {
1562 //to come m[i][j] = m[j][i] = make_ml_distance(dat[i], dat[j], i, j);
1563
1564 for (z = 0; z < sites; z++)
1565 {
1566
1567 if (dat[i][z] != dat[j][z])
1568 {
1569 //m[i][j] = m[j][i] += fabs(rannor(1.0, 0.1));
1570 m[i][j] = m[j][i] += 1.0;
1571 }
1572 }
1573 }
1574 }
1575 myfree(dat);
1576 }
1577 }
1578
1579
1580
1581 void
fix_times(world_fmt * world,option_fmt * options)1582 fix_times (world_fmt * world, option_fmt * options)
1583 {
1584 long k;
1585 MYREAL tipdate=0.0;
1586 node *theNode;
1587 MYREAL age = world->data->maxsampledate
1588 * world->options->meanmu[world->locus]
1589 * world->options->generation_year * world->options->mu_rates[world->locus];
1590 if (!options->usertree)
1591 {
1592 #ifdef TREEDEBUG
1593 printf("%i> %s\nage= %f\nmaxsamp=%f\nmut=%f\ngen=%f\nmu_rate=%g\n-----------------------------\n",myID, "start timelist ---------------------", age,world->data->maxsampledate,world->options->meanmu[world->locus],1./world->options->generation_year,world->options->mu_rates[world->locus]);
1594 #endif
1595 for (k = 0; k < world->treetimes[0].T - 1; k++)
1596 {
1597 theNode = world->treetimes[0].tl[k].eventnode;
1598 if(theNode->type == 't')
1599 {
1600 //tipdate = find_tipdate(theNode->nayme, theNode->pop, world);
1601 tipdate = theNode->tyme;
1602 #ifdef TREEDEBUG
1603 printf("DEBUG TIPDATE %i> %s %g\n",myID, theNode->nayme, tipdate);
1604 #endif
1605 //theNode->tyme = tipdate;
1606 world->treetimes[0].tl[k].age = tipdate;
1607 }
1608 else
1609 {
1610 //k here is never 0, so this k-1 seems correct
1611 age += inverse_logprob_noevent (world, k-1);
1612 world->treetimes[0].tl[k].age = age;
1613 adjust_time (theNode, age);
1614 #ifdef TREEDEBUG
1615 printf("%i> %10.10s %g\n",myID, " ", theNode->tyme);
1616 #endif
1617 }
1618 }
1619 world->treetimes[0].tl[k].age = age + 10000.; /* this is the root */
1620 adjust_time (world->treetimes[0].tl[k].eventnode, age+10000);
1621 #ifdef TREEDEBUG
1622 printf("%i> %s %g\n",myID, "end timelist ---------------------", age+10000 );
1623 #endif
1624 }
1625 set_v (world->root->next->back);
1626 }
1627
1628 /*
1629 * creates a UPGMA tree: x = distance matrix which will be destroyed
1630 * through the process, tips = # of sequences/alleles, nodep = treenodes
1631 * have to be allocated for ALL nodes
1632 *
1633 * This code is stripped neighbor-joining code out of phylip v3.6. Only the
1634 * upgma option is present.
1635 */
1636 void
upgma(world_fmt * world,MYREAL ** x,long tips,node ** nodep)1637 upgma (world_fmt * world, MYREAL **x, long tips, node ** nodep)
1638 {
1639 long nc, nextnode, mini = -900, minj = -900, i, j, jj, ia, iaa, ja, jaa;
1640 MYREAL zz = (world->data->maxsampledate
1641 * world->options->meanmu[world->locus]
1642 * world->options->generation_year * world->options->mu_rates[world->locus]+ 1);
1643 MYREAL total, tmin, bi, bj, /* ti, tj, */ da;
1644 MYREAL *av;
1645 long *oc;
1646 node **cluster;
1647 long *enterorder;
1648 MYREAL denom=1.;
1649 /* First initialization */
1650 enterorder = (long *) mycalloc (tips, sizeof (long));
1651 for (ia = 0; ia < tips; ia++)
1652 enterorder[ia] = ia;
1653 jumble (enterorder, tips);
1654 nextnode = tips;
1655 av = (MYREAL *) mycalloc (tips, sizeof (MYREAL));
1656 oc = (long *) mymalloc (tips * sizeof (long));
1657 cluster = (node **) mycalloc (tips, sizeof (node *));
1658 for (i = 0; i < tips; i++)
1659 oc[i] = 1;
1660 for (i = 0; i < tips; i++)
1661 cluster[i] = nodep[i];
1662 /* Enter the main cycle */
1663 for (nc = 0; nc < tips - 1; nc++)
1664 {
1665 tmin = 99999.0;
1666 /* Compute sij and minimize */
1667 for (jaa = 1; jaa < tips; jaa++)
1668 {
1669 ja = enterorder[jaa];
1670 if (cluster[ja] != NULL)
1671 {
1672 for (iaa = 0; iaa < jaa; iaa++)
1673 {
1674 ia = enterorder[iaa];
1675 if (cluster[ia] != NULL)
1676 {
1677 total = x[ia][ja];
1678 //printf("ia=%li ja=%li x[ia][ja]=%f\n",ia,ja,x[ia][ja]);
1679 if (total < tmin)
1680 {
1681 tmin = total;
1682 mini = ia;
1683 minj = ja;
1684 }
1685 }
1686 }
1687 }
1688 } /* compute lengths and print */
1689 bi = x[mini][minj] / 2.0 - av[mini];
1690 bj = x[mini][minj] / 2.0 - av[minj];
1691 av[mini] += bi;
1692 nodep[nextnode]->next->back = cluster[mini];
1693 //xcode
1694 if(cluster[mini]!=NULL)
1695 {
1696 if(nodep[nextnode]->next!=NULL)
1697 {
1698 cluster[mini]->back = nodep[nextnode]->next;
1699 }
1700 nodep[nextnode]->next->next->back = cluster[minj];
1701 cluster[minj]->back = nodep[nextnode]->next->next;
1702 cluster[mini]->back->v = cluster[mini]->v = bi;
1703 cluster[minj]->back->v = cluster[minj]->v = bj;
1704 cluster[mini] = nodep[nextnode];
1705 }
1706 adjust_time (nodep[nextnode], (MYREAL) zz++);
1707 cluster[minj] = NULL;
1708 nextnode++;
1709 /* re-initialization */
1710 denom = (oc[mini] + oc[minj]);
1711 for (jj = 0; jj < tips; jj++)
1712 {
1713 if (cluster[jj] != NULL)
1714 {
1715 da = (x[mini][jj] * oc[mini] + x[minj][jj] * oc[minj])/denom ;
1716 x[mini][jj] = da;
1717 x[jj][mini] = da;
1718 }
1719 }
1720 for (j = 0; j < tips; j++)
1721 {
1722 x[minj][j] = x[j][minj] = 0.0;
1723 }
1724 oc[mini] += oc[minj];
1725 }
1726 /* the last cycle */
1727 for (i = 0; i < tips; i++)
1728 {
1729 if (cluster[i] != NULL)
1730 break;
1731 }
1732 world->root->next->back = cluster[i];
1733 cluster[i]->back = world->root->next;
1734 myfree(av);
1735 myfree(oc);
1736 myfree(cluster);
1737 myfree(enterorder);
1738 }
1739
1740 void
set_top(world_fmt * world,node * p,long locus)1741 set_top (world_fmt * world, node * p, long locus)
1742 {
1743
1744
1745 if (p->type == 't')
1746 {
1747 p->top = TRUE;
1748 //p->tyme = 0.0;
1749 return;
1750 }
1751 p->top = TRUE;
1752 p->next->top = FALSE;
1753 if (p->type != 'm')
1754 {
1755 p->next->next->top = FALSE;
1756 }
1757 set_top (world, p->next->back, locus);
1758 if (p->type != 'm')
1759 {
1760 set_top (world, p->next->next->back, locus);
1761 }
1762 if (p == crawlback (world->root->next))
1763 {
1764 p->back->top = FALSE;
1765 p->back->tyme = ROOTLENGTH;
1766 }
1767 } /* set_top */
1768
1769
1770 void
set_v(node * p)1771 set_v (node * p)
1772 {
1773 if (p->type == 't')
1774 {
1775 p->v = p->length = lengthof (p);
1776 return;
1777 }
1778 ltov (p);
1779 set_v (crawlback (p->next));
1780 set_v (crawlback (p->next->next));
1781 } /* set_v */
1782
1783 void
ltov(node * p)1784 ltov (node * p)
1785 {
1786 p->v = lengthof (p);
1787 } /* ltov */
1788
1789 /*
1790 * cost matrix COST is for the ISLAND and MATRIX model all 1 with a 0
1791 * diagonal, this will be changable through options, but perhaps this is not
1792 * so important
1793 */
1794 void
calc_sancost(MYREAL ** cost,long numpop)1795 calc_sancost (MYREAL **cost, long numpop)
1796 {
1797 long i, j;
1798 for (i = 0; i < numpop; i++)
1799 {
1800 for (j = 0; j < numpop; j++)
1801 {
1802 if (i != j)
1803 cost[i][j] = 1.0;
1804 else
1805 cost[i][i] = 0.0;
1806 }
1807 }
1808 }
1809
1810 void
sankoff(world_fmt * world)1811 sankoff (world_fmt * world)
1812 {
1813 MYREAL **cost;
1814 long i;
1815 cost = (MYREAL **) mymalloc (sizeof (MYREAL *) * world->numpop);
1816 cost[0] =
1817 (MYREAL *) mymalloc (sizeof (MYREAL) * world->numpop * world->numpop);
1818 for (i = 1; i < world->numpop; i++)
1819 {
1820 cost[i] = cost[0] + world->numpop * i;
1821 }
1822 calc_sancost (cost, world->numpop);
1823 santraverse (world, crawlback (world->root->next), cost, world->numpop);
1824 myfree(cost[0]);
1825 myfree(cost);
1826 }
1827
1828 void
jumble(long * s,long n)1829 jumble (long *s, long n)
1830 {
1831 long *temp, i, rr, tn = n;
1832
1833 temp = (long *) mycalloc (1, sizeof (long) * n);
1834 memcpy (temp, s, sizeof (long) * n);
1835 for (i = 0; i < n && tn > 0; i++)
1836 {
1837 s[i] = temp[rr = RANDINT (0, tn - 1)];
1838 temp[rr] = temp[tn - 1];
1839 tn--;
1840 }
1841 myfree(temp);
1842 }
1843
1844
1845 #ifdef WIN32
1846 #define SUBSETRANDOM (((double) rand())/((double) RAND_MAX))
1847 #else
1848 #define SUBSETRANDOM (drand48())
1849 #endif
1850
1851 // this uses a different stream than the usual random number
1852 // and is used to guarantee that the selection of individual subsets can be
1853 // maintained between different runs
1854 void
jumble_ownseed(long * s,long n)1855 jumble_ownseed (long *s, long n)
1856 {
1857 long *temp, i, rr, tn = n;
1858
1859 temp = (long *) mycalloc (1, sizeof (long) * n);
1860 memcpy (temp, s, sizeof (long) * n);
1861 for (i = 0; i < n && tn > 0; i++)
1862 {
1863 s[i] = temp[rr = (long) ( SUBSETRANDOM * (tn - 1))];
1864 temp[rr] = temp[tn - 1];
1865 tn--;
1866 }
1867 myfree(temp);
1868 }
1869
1870 void
santraverse(world_fmt * world,node * theNode,MYREAL ** cost,long numpop)1871 santraverse (world_fmt *world, node * theNode, MYREAL **cost, long numpop)
1872 {
1873 long i, ii, tie, which = 0;
1874 node *p = NULL, *q = NULL, *tmp = NULL, *left = NULL, *right = NULL;
1875 MYREAL best;
1876 long *poplist;
1877 poplist = (long *) mycalloc (1, sizeof (long) * numpop);
1878 if (theNode->type != 't')
1879 {
1880 if (RANDUM () > 0.5)
1881 {
1882 left = theNode->next;
1883 right = theNode->next->next;
1884 }
1885 else
1886 {
1887 left = theNode->next->next;
1888 right = theNode->next;
1889 }
1890 if (left->back != NULL)
1891 {
1892 p = crawlback (left);
1893 santraverse (world, p, cost, numpop);
1894 }
1895 if (right->back != NULL)
1896 {
1897 q = crawlback (right);
1898 santraverse (world, q, cost, numpop);
1899 }
1900 best = MYREAL_MAX;
1901 tie = 0;
1902 for (i = 0; i < numpop; i++)
1903 poplist[i] = i;
1904 jumble (poplist, numpop);
1905 for (ii = 0; ii < numpop; ii++)
1906 {
1907 i = poplist[ii];
1908 if(p!=NULL && q!=NULL)
1909 {
1910 theNode->s[i] =
1911 minimum (cost[i], p->s, numpop) + minimum (cost[i], q->s, numpop);
1912 }
1913 if (theNode->s[i] < best)
1914 {
1915 best = theNode->s[i];
1916 which = i;
1917 tie = 0;
1918 }
1919 else
1920 {
1921 if (theNode->s[i] == best)
1922 {
1923 tie++;
1924 which = i;
1925 }
1926 }
1927 }
1928 if (tie != 0)
1929 {
1930 theNode->pop = theNode->actualpop = which =
1931 ranbest (theNode->s, tie, best, numpop);
1932 theNode->s[which] -= SANKOFF_DELTA;
1933 }
1934 else
1935 {
1936 theNode->pop = theNode->actualpop = which;
1937 }
1938 if (p!=NULL && p->actualpop != which)
1939 {
1940 // printf("%i> p: theNode->tyme=%f, p->tyme=%f mignode=%f\n",myID, theNode->tyme,p->tyme,p->tyme + (theNode->tyme - p->tyme) / 2.);
1941 tmp = add_migration (world, p, theNode->actualpop, p->actualpop,
1942 (MYREAL) RANDDOUBLE(0.0, theNode->tyme - p->tyme));
1943 left->back = tmp;
1944 tmp->back = left;
1945 //debug FPRINTF(startfile, "%li %li\n", theNode->actualpop, p->actualpop);
1946 }
1947 if (q!=NULL && q->actualpop != which)
1948 {
1949 tmp = add_migration (world, q, theNode->actualpop, q->actualpop,
1950 (MYREAL) RANDDOUBLE(0.0, theNode->tyme - q->tyme));
1951
1952 //printf("%i> q: theNode->tyme=%f, q->tyme=%f mignode=%f\n",myID, theNode->tyme,q->tyme,q->tyme + (theNode->tyme - q->tyme) / 2.);
1953 right->back = tmp;
1954 tmp->back = right;
1955 //debug FPRINTF(startfile, "%li %li\n", theNode->actualpop, q->actualpop);
1956 }
1957 }
1958 myfree(poplist);
1959 }
1960
1961 /* returns minimum for sankoff routine */
1962 MYREAL
minimum(MYREAL * vec1,MYREAL * vec2,long n)1963 minimum (MYREAL *vec1, MYREAL *vec2, long n)
1964 {
1965 long j;
1966 MYREAL summ, min = MYREAL_MAX;
1967 for (j = 0; j < n; j++)
1968 {
1969 if (vec2[j] < MYREAL_MAX)
1970 {
1971 if ((summ = vec1[j] + vec2[j]) < min)
1972 min = summ;
1973 }
1974 }
1975 return min;
1976 }
1977
1978 long
ranbest(MYREAL * array,long tie,MYREAL best,long n)1979 ranbest (MYREAL *array, long tie, MYREAL best, long n)
1980 {
1981 long i;
1982 long which = RANDINT (0, tie);
1983 for (i = 0; i < n; i++)
1984 {
1985 if (fabs (array[i] - best) < EPSILON)
1986 {
1987 if (which == 0)
1988 return i;
1989 else
1990 which--;
1991 }
1992 }
1993 return -2;
1994 }
1995
1996 ///
1997 /// allocate memory for sequence data
1998 void
alloc_seqx(world_fmt * world,node * theNode)1999 alloc_seqx (world_fmt * world, node * theNode)
2000 {
2001 // long j,z;
2002 long endsite = world->data->seq[0]->endsite;
2003 if (strchr ("u", world->options->datatype))
2004 {
2005 endsite = endsite * (world->data->seq[0]->addon + 1) + world->data->seq[0]->addon;
2006 }
2007 if (strchr ("nh", world->options->datatype))
2008 {
2009 endsite += world->data->seq[0]->addon;
2010 }
2011 allocate_xseq(&theNode->x, endsite, world->options->rcategs);
2012 }
2013
allocate_xseq(xarray_fmt * x,long sites,long categs)2014 void allocate_xseq(xarray_fmt *x, long sites, long categs)
2015 {
2016 long j;
2017 (*x).s = (phenotype) mycalloc (sites, sizeof (ratelike *));
2018 #ifdef VARMUT
2019 (*x).s[0] = (ratelike) mycalloc (linkedloci * categs, sizeof (MYREAL) * sites[datamodeltype] * sitelikesize[datamodeltype]);
2020 #else
2021 (*x).s[0] = (ratelike) mycalloc (categs * sites, sizeof (sitelike));
2022 #endif /*VARMUT*/
2023 for (j = 1; j < sites; j++)
2024 (*x).s[j] = (*x).s[0] + categs * j;
2025
2026 }
2027
2028 ///
2029 /// zeroes sequence data vector
zero_xseq(xarray_fmt * x,long sites,long categs)2030 void zero_xseq(xarray_fmt *x, long sites, long categs)
2031 {
2032 long j;
2033 // (*x).s = (phenotype) mycalloc (sites, sizeof (ratelike *));
2034 memset((*x).s[0], 0, categs * sites * sizeof (sitelike));
2035 for (j = 1; j < sites; j++)
2036 (*x).s[j] = (*x).s[0] + categs * j;
2037
2038 }
2039
2040 void
make_alleles(world_fmt * world,option_fmt * options,data_fmt * data,long locus)2041 make_alleles (world_fmt * world, option_fmt * options, data_fmt * data, long locus)
2042 {
2043 long ii,top;
2044 long pop, ind;
2045 long zz=0;
2046 long zpop;
2047 long iu;
2048 long len;
2049 char a1[DEFAULT_ALLELENMLENGTH];
2050 char a2[DEFAULT_ALLELENMLENGTH];
2051 node **nodelist = world->nodep;
2052 for (pop = 0; pop < data->numpop; pop++)
2053 {
2054 // preparation of combining populations
2055 // if(world->custm[m2mm(pop,world->numpop)])
2056 //
2057 zpop=0;
2058 top = max_shuffled_individuals(options, data, pop, locus);
2059 for (ii = 0; ii < top; ii++)
2060 {
2061 ind = data->shuffled[pop][locus][ii];
2062 strcpy (a1, data->yy[pop][ind][locus][0]);
2063 strcpy (a2, data->yy[pop][ind][locus][1]);
2064 if (strcmp (a1, "?"))
2065 {
2066 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2067 len = strlen(nodelist[zz]->nayme);
2068 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2069 strcat(nodelist[zz]->nayme,"A!");
2070 strcat(nodelist[zz]->nayme,a1);
2071 nodelist[zz++]->x.a[findAllele (data, a1, locus)] = 1.0;
2072 zpop++;
2073 }
2074 else
2075 {
2076 if(options->include_unknown)
2077 {
2078 fprintf(stdout,"? found\n");
2079 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2080 for(iu=0;iu<data->maxalleles[locus]; iu++)
2081 nodelist[zz]->x.a[iu] = 1.0;
2082 len = strlen(nodelist[zz]->nayme);
2083 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2084 strcat(nodelist[zz]->nayme,"A!");
2085 strcat(nodelist[zz]->nayme,a1);
2086 zz++;
2087 zpop++;
2088 }
2089 }
2090 if (strcmp (a2, "?"))
2091 {
2092 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2093 len = strlen(nodelist[zz]->nayme);
2094 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2095 strcat(nodelist[zz]->nayme,"B!");
2096 strcat(nodelist[zz]->nayme,a2);
2097 nodelist[zz++]->x.a[findAllele (data, a2, locus)] = 1.0;
2098 zpop++;
2099 }
2100 else
2101 {
2102 if(options->include_unknown)
2103 {
2104 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2105 // strcpy (nodelist[zz]->nayme, a1);
2106 for(iu=0;iu<data->maxalleles[locus]; iu++)
2107 nodelist[zz]->x.a[iu] = 1.0;
2108 len = strlen(nodelist[zz]->nayme);
2109 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2110 strcat(nodelist[zz]->nayme,"B!");
2111 strcat(nodelist[zz]->nayme,a2);
2112 zz++;
2113 zpop++;
2114 }
2115 }
2116 }
2117 data->numalleles[pop][locus] = zpop;
2118 }
2119 world->sumtips = zz;
2120 if (world->sumtips==0)
2121 {
2122 data->skiploci[locus] = TRUE;
2123 world->data->skiploci[locus] = TRUE;
2124 world->skipped += 1;
2125 }
2126 }
2127
2128 void
find_minmax_msat_allele(world_fmt * world,data_fmt * data,long locus,long * smallest,long * biggest)2129 find_minmax_msat_allele (world_fmt * world, data_fmt * data, long locus,
2130 long *smallest, long *biggest)
2131 {
2132 long pop, ind, tmp;
2133 *biggest = 0;
2134 *smallest = LONG_MAX;
2135 for (pop = 0; pop < data->numpop; pop++)
2136 {
2137 for (ind = 0; ind < data->numind[pop][locus]; ind++)
2138 {
2139 if (data->yy[pop][ind][locus][0][0] != '?')
2140 {
2141 if ((tmp = atoi (data->yy[pop][ind][locus][0])) > *biggest)
2142 *biggest = tmp;
2143 if (tmp < *smallest)
2144 *smallest = tmp;
2145 }
2146 if (data->yy[pop][ind][locus][1][0] != '?')
2147 {
2148 if ((tmp = atoi (data->yy[pop][ind][locus][1])) > *biggest)
2149 *biggest = tmp;
2150 if (tmp < *smallest)
2151 *smallest = tmp;
2152 }
2153 }
2154 }
2155 }
2156
2157 void
make_microsatellites(world_fmt * world,option_fmt * options,data_fmt * data,long locus)2158 make_microsatellites (world_fmt * world, option_fmt *options, data_fmt * data, long locus)
2159 {
2160 long top;
2161 long pop, ind, ii;//, tmp = 0;//, tips = 0, unknownsum = 0;
2162 long zz=0;
2163 long zpop;
2164 long iu;
2165 long len;
2166 long smallest = 0;
2167 long biggest = 0;
2168 long smax;
2169 node **nodelist = world->nodep;
2170 char a1[DEFAULT_ALLELENMLENGTH];
2171 char a2[DEFAULT_ALLELENMLENGTH];
2172 calculate_steps (world);
2173 find_minmax_msat_allele (world, data, locus, &smallest, &biggest);
2174 smax = biggest - smallest;
2175 if(smax == 0)
2176 smax = 1;
2177 if(world->options->micro_threshold[locus] > smax)
2178 world->options->micro_threshold[locus] = smax;
2179 smax += 2 * world->options->micro_threshold[locus];
2180 world->data->maxalleles[locus] = data->maxalleles[locus] = smax;
2181 //we start with the smallest - margin and go up to biggest + margin
2182 smallest = smallest - 1 - world->options->micro_threshold[locus];
2183 if (smallest < 0)
2184 smallest = 0;
2185 for (pop = 0; pop < data->numpop; pop++)
2186 {
2187 zpop=0;
2188 top = max_shuffled_individuals(options, data, pop, locus);
2189 for (ii = 0; ii < top; ii++)
2190 {
2191 ind = data->shuffled[pop][locus][ii];
2192 strcpy (a1, data->yy[pop][ind][locus][0]);
2193 strcpy (a2, data->yy[pop][ind][locus][1]);
2194 if (strcmp (a1, "?"))
2195 {
2196 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz, ind,
2197 data->indnames[pop][ind]);
2198 nodelist[zz]->x.a =
2199 (MYREAL *) myrealloc (nodelist[zz]->x.a, sizeof (MYREAL) * smax);
2200 len = strlen(nodelist[zz]->nayme);
2201 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2202 strcat(nodelist[zz]->nayme,"A!");
2203 strcat(nodelist[zz]->nayme,a1);
2204 nodelist[zz++]->x.a[atoi (a1) - smallest] = 1.0;
2205 zpop++;
2206 }
2207 else
2208 {
2209 if(options->include_unknown)
2210 {
2211 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz, ind,data->indnames[pop][ind]);
2212 // strcpy (nodelist[zz]->nayme, a1);
2213 for(iu=0;iu<data->maxalleles[locus]; iu++)
2214 nodelist[zz]->x.a[iu] = 1.0;
2215 len = strlen(nodelist[zz]->nayme);
2216 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2217 strcat(nodelist[zz]->nayme,"A!");
2218 strcat(nodelist[zz]->nayme,a1);
2219 zz++;
2220 zpop++;
2221 }
2222 }
2223 if (strcmp (a2, "?"))
2224 {
2225 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz, ind,
2226 data->indnames[pop][ind]);
2227 nodelist[zz]->x.a =
2228 (MYREAL *) myrealloc (nodelist[zz]->x.a, sizeof (MYREAL) * smax);
2229 len = strlen(nodelist[zz]->nayme);
2230 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2231 strcat(nodelist[zz]->nayme,"B!");
2232 strcat(nodelist[zz]->nayme,a2);
2233 nodelist[zz++]->x.a[atoi (a2) - smallest] = 1.0;
2234 zpop++;
2235 }
2236 else
2237 {
2238 if(options->include_unknown)
2239 {
2240 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz, ind,data->indnames[pop][ind]);
2241 // strcpy (nodelist[zz]->nayme, a1);
2242 for(iu=0;iu<data->maxalleles[locus]; iu++)
2243 nodelist[zz]->x.a[iu] = 1.0;
2244 len = strlen(nodelist[zz]->nayme);
2245 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2246 strcat(nodelist[zz]->nayme,"B!");
2247 strcat(nodelist[zz]->nayme,a2);
2248 zz++;
2249 zpop++;
2250 }
2251 }
2252 }
2253 data->numalleles[pop][locus] = zpop;
2254 }
2255 world->sumtips = zz;
2256 if (world->sumtips==0)
2257 {
2258 data->skiploci[locus] = TRUE;
2259 world->data->skiploci[locus] = TRUE;
2260 world->skipped += 1;
2261 }
2262 }
2263
2264 void
make_microbrownian(world_fmt * world,option_fmt * options,data_fmt * data,long locus)2265 make_microbrownian (world_fmt * world, option_fmt * options, data_fmt * data, long locus)
2266 {
2267 long top;
2268 long pop, ind, ii;
2269 long len;
2270 char a1[DEFAULT_ALLELENMLENGTH];
2271 char a2[DEFAULT_ALLELENMLENGTH];
2272 node **nodelist = world->nodep;
2273 long zz=0;
2274 long zpop;
2275 // long iu;
2276 long btotal;
2277 MYREAL bsum;
2278 for (pop = 0; pop < data->numpop; pop++)
2279 {
2280 btotal=0;
2281 bsum=0.;
2282 top = max_shuffled_individuals(options, data, pop, locus);
2283 for (ii = 0; ii < top; ii++)
2284 {
2285 ind = data->shuffled[pop][locus][ii];
2286 strcpy (a1, data->yy[pop][ind][locus][0]);
2287 strcpy (a2, data->yy[pop][ind][locus][1]);
2288 if (strcmp (a1, "?"))
2289 {
2290 btotal++;
2291 bsum += atof (a1);
2292 }
2293 if (strcmp (a2, "?"))
2294 {
2295 btotal++;
2296 bsum += atof (a2);
2297 }
2298 }
2299 bsum /= btotal;
2300
2301 zpop = 0;
2302 for (ii = 0; ii < top; ii++)
2303 {
2304 ind = data->shuffled[pop][locus][ii];
2305 strcpy (a1, data->yy[pop][ind][locus][0]);
2306 strcpy (a2, data->yy[pop][ind][locus][1]);
2307 if (strcmp (a1, "?"))
2308 {
2309 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2310 len = strlen(nodelist[zz]->nayme);
2311 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2312 strcat(nodelist[zz]->nayme,"A!");
2313 strcat(nodelist[zz]->nayme,a1);
2314 nodelist[zz++]->x.a[0] = atof (a1);
2315 zpop++;
2316 }
2317 else
2318 {
2319 if(options->include_unknown)
2320 {
2321 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2322 len = strlen(nodelist[zz]->nayme);
2323 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2324 strcat(nodelist[zz]->nayme,"A!");
2325 strcat(nodelist[zz]->nayme,a1);
2326 nodelist[zz++]->x.a[0] = bsum; //average over other values
2327 // this might not be very sensible at all.
2328 zpop++;
2329 }
2330 }
2331 if (strcmp (a2, "?"))
2332 {
2333 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2334 len = strlen(nodelist[zz]->nayme);
2335 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2336 strcat(nodelist[zz]->nayme,"B!");
2337 strcat(nodelist[zz]->nayme,a2);
2338 nodelist[zz++]->x.a[0] = atof (a2);
2339 zpop++;
2340 }
2341 else
2342 {
2343 if(options->include_unknown)
2344 {
2345 allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2346 len = strlen(nodelist[zz]->nayme);
2347 nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2348 strcat(nodelist[zz]->nayme,"B!");
2349 strcat(nodelist[zz]->nayme,a2);
2350 nodelist[zz++]->x.a[0] = bsum;
2351 zpop++;
2352 }
2353 }
2354 }
2355 data->numalleles[pop][locus] = zpop;
2356 }
2357 world->sumtips = zz;
2358 if (world->sumtips==0)
2359 {
2360 data->skiploci[locus] = TRUE;
2361 world->data->skiploci[locus] = TRUE;
2362 world->skipped += 1;
2363 }
2364 }
2365
2366
2367 /*---------------------------------------------
2368 free_treetimes frees the ptr_array of timeslist
2369 */
2370 void
free_treetimes(world_fmt * world,long size)2371 free_treetimes (world_fmt * world, long size)
2372 {
2373 // long i;
2374 while (size >= 0)
2375 {
2376 // for(i=world->treetimes[size].allocT-1; i>= 0 ; i--)
2377 // myfree(world->treetimes[size].tl[i].lineages);
2378 myfree(world->treetimes[size].lineages);
2379 myfree(world->treetimes[size].tl);
2380 size--;
2381 }
2382 }
2383
2384 ///
2385 /// construct timelist
2386 void
construct_tymelist(world_fmt * world,timelist_fmt * timevector)2387 construct_tymelist (world_fmt * world, timelist_fmt * timevector)
2388 {
2389 long z = 0;
2390 long tips = 0;
2391 //long ii;
2392 //long T;
2393 // long pop;
2394 MYREAL tmp;
2395 timevector->numpop = world->numpop;
2396 traverseNodes (world->root, &timevector, &z, world, &tips);
2397 timevector->T = z;
2398 #ifdef TREEDEBUG
2399 printf("timevector->T=%li tips=%li\n",timevector->T,tips);
2400 #endif
2401 qsort ((void *) timevector->tl, timevector->T, sizeof (vtlist), agecmp);
2402 if ((*timevector).tl[(*timevector).T - 1].eventnode->type != 'r')
2403 {
2404 z = 0;
2405 while ((*timevector).tl[z].eventnode->type != 'r')
2406 z++;
2407
2408 tmp = (*timevector).tl[(*timevector).T - 1].eventnode->tyme + 10000.;
2409 (*timevector).tl[z].eventnode->tyme = tmp;
2410 (*timevector).tl[z].eventnode->next->tyme = tmp;
2411 (*timevector).tl[z].eventnode->next->next->tyme = tmp;
2412
2413 (*timevector).tl[z].age = (*timevector).tl[z].eventnode->tyme;
2414 qsort ((void *) timevector->tl, timevector->T, sizeof (vtlist), agecmp);
2415 warning("construct_tymelist root moved: new time = %f\n",(*timevector).tl[z].eventnode->tyme);
2416 }
2417 timeslices (&timevector);
2418 add_partlineages (world->numpop, &timevector);
2419 #ifdef TREEDEBUG
2420 fprintf(stderr,"///////////////////\n");
2421 long T = timevector->T-2;
2422 long ii, pop;
2423 for (ii = T; ii >= 0; ii--)
2424 {
2425 for (pop = 0; pop < world->numpop; pop++)
2426 fprintf(stderr,"%li ",timevector->tl[ii].lineages[pop]);
2427 fprintf(stderr,"%20.10f %20.10f | %f %c %li -> %li (%li)(%li)\n",timevector->tl[ii].age,timevector->tl[ii].eventnode->tyme, timevector->tl[ii].eventnode->tyme, timevector->tl[ii].eventnode->type, timevector->tl[ii].from, timevector->tl[ii].to, timevector->tl[ii].eventnode->id, showtop(timevector->tl[ii].eventnode->back)->id);
2428 }
2429 fprintf(stderr,"/////////////////////////////////////////////\n");
2430 #endif
2431 }
2432
2433
2434
2435 /// find a tipdate
2436 MYREAL
find_tipdate(char * id,long pop,world_fmt * world)2437 find_tipdate(char * id, long pop, world_fmt *world)
2438 {
2439 MYREAL date;
2440 long ind;
2441 long slen;
2442 long locus = world->locus;
2443 tipdate_fmt *sampledates = world->data->sampledates[pop][locus];
2444
2445 for(ind=0;ind < world->data->numind[pop][locus]; ind++)
2446 {
2447 if(sampledates[ind].name != NULL)
2448 {
2449 slen = strlen(sampledates[ind].name);
2450 if(!strncmp(sampledates[ind].name,id, slen))
2451 {
2452 date = (sampledates[ind].date
2453 * world->options->generation_year
2454 * world->options->meanmu[locus]) * world->options->mu_rates[locus];
2455 printf("%i> id='%s' name='%s' realdate=%f date=%f locus=%li pop=%li\n",myID, id,sampledates[ind].name, sampledates[ind].date, date, locus, pop);
2456 fflush(stdout);
2457 return date;
2458 }
2459 }
2460 }
2461 return 0.0;
2462 }
2463
2464 ///
2465 /// traverse the tree and writes node-information into the real timelist also
2466 /// takes care that the size of timelist is increased accordingly
2467 void
traverseNodes(node * theNode,timelist_fmt ** timevector,long * slice,world_fmt * world,long * tips)2468 traverseNodes (node * theNode, timelist_fmt ** timevector, long *slice, world_fmt *world, long *tips)
2469 {
2470 //#ifdef DEBUG
2471 // MYREAL tipdate = 0.0;
2472 //#endif
2473 if (theNode != NULL)
2474 {
2475 if (theNode->type != 't')
2476 {
2477 if (theNode->next->back != NULL)
2478 {
2479 traverseNodes (theNode->next->back, timevector, slice, world, tips);
2480 }
2481 if (theNode->type != 'm' && theNode->next->next->back != NULL)
2482 {
2483 traverseNodes (theNode->next->next->back, timevector, slice, world, tips);
2484 }
2485 if (theNode->top)
2486 {
2487 /*
2488 * Here we are on the save side if we increase the
2489 * timelist so never a fence-write can happen
2490 */
2491 if (*slice >= (*timevector)->allocT-1)
2492 {
2493 increase_timelist (timevector);
2494 }
2495 /*
2496 * (*timevector)->tl[*slice].pop =
2497 * theNode->actualpop;
2498 */
2499 (*timevector)->tl[*slice].age = theNode->tyme;
2500 //mark the visited node this will not work with recycled node pointers
2501 //does it work at all?
2502 //if(theNode != (*timevector)->tl[*slice].eventnode)
2503 // (*timevector)->tl[*slice].visited = TRUE;
2504 //else
2505 // (*timevector)->tl[*slice].visited = FALSE;
2506 //
2507 (*timevector)->tl[*slice].eventnode = theNode;
2508 (*timevector)->tl[*slice].slice = *slice;
2509 (*timevector)->tl[*slice].from = theNode->pop;
2510 (*timevector)->tl[*slice].to = theNode->actualpop;
2511 (*slice) += 1;
2512 }
2513 else
2514 {
2515 error("traverseNodes expects to look only at TOP nodes, but received another one and died\n");
2516 }
2517 }
2518 else
2519 {
2520 //tipdate = find_tipdate(theNode->nayme, theNode->pop, world);
2521 //theNode->tyme = tipdate;
2522 if (*slice >= (*timevector)->allocT-1)
2523 {
2524 increase_timelist (timevector);
2525 }
2526 (*timevector)->tl[*slice].age = theNode->tyme;
2527 (*timevector)->tl[*slice].eventnode = theNode;
2528 (*timevector)->tl[*slice].slice = *slice;
2529 (*timevector)->tl[*slice].from = theNode->pop;
2530 (*timevector)->tl[*slice].to = theNode->actualpop;
2531 (*tips) += 1;
2532 (*slice) += 1;
2533 }
2534 }
2535 else
2536 error("no node?????????");
2537 }
2538
2539 void
increase_timelist(timelist_fmt ** timevector)2540 increase_timelist (timelist_fmt ** timevector)
2541 {
2542 (*timevector)->oldT = (*timevector)->allocT;
2543 (*timevector)->allocT += (*timevector)->allocT / 4; /* increase timelist by
2544 * 25% */
2545 (*timevector)->tl = (vtlist *) myrealloc ((*timevector)->tl,
2546 sizeof (vtlist) * ((*timevector)->allocT + 1));
2547 memset ((*timevector)->tl + (*timevector)->oldT, 0,
2548 ((*timevector)->allocT - (*timevector)->oldT) * sizeof (vtlist));
2549 allocate_lineages (timevector, 0, (*timevector)->numpop);
2550 }
2551
2552
set_all_dirty(const node * root,node * p,world_fmt * world,const long locus)2553 void set_all_dirty (const node * root, node * p, world_fmt * world, const long locus)
2554 {
2555 // node *left;
2556 // node *right;
2557 if (p->type == 'm')
2558 error ("MIGRATION NODE IN SMOOTH FOUND, PLEASE REPORT!!!!\n");
2559 if (p->type == 'i')
2560 {
2561 set_all_dirty (root, /*right=*/crawlback (p->next), world, locus);
2562 set_all_dirty (root, /*left=*/crawlback (p->next->next), world, locus);
2563 p->dirty = TRUE;
2564 printf("(INT %li, %li, %f)-->%li\n",p->id, p->bid, p->v, showtop(crawlback(p))->id);
2565 }
2566 else
2567 {
2568 printf("(TIP %li, %li, %f)-->%li\n",p->id, p->bid, p->v, showtop(crawlback(p))->id);
2569 }
2570 } /* set_all_dirty */
2571
2572 void
smooth(const node * root,node * p,world_fmt * world,const long locus)2573 smooth (const node * root, node * p, world_fmt * world, const long locus)
2574 {
2575 // node *left;
2576 // node *right;
2577 // /* static */ long panic;
2578 /* only changed lineages are considered */
2579 if (!p->dirty)
2580 return;
2581
2582 //xcode if (p == (crawlback (root)))
2583 //xcode panic = 0;
2584 if (p->type == 'm')
2585 error ("MIGRATION NODE IN SMOOTH FOUND, PLEASE REPORT!!!!\n");
2586 if (p->type == 'i')
2587 {
2588 smooth (root, /*right=*/crawlback (p->next), world, locus);
2589 smooth (root, /*left=*/crawlback (p->next->next), world, locus);
2590 #ifdef BEAGLE
2591 prepare_beagle_instances(p,left, right, world->beagle);
2592 // printf("(PINT %li, %li, %f)-->%li\n",p->id, p->bid, p->v, showtop(crawlback(p))->id);
2593 #else
2594 (*nuview) (p, world, locus);
2595 #endif
2596 #ifdef UEP
2597 if(world->options->uep)
2598 twostate_nuview (p, world, locus);
2599 #endif
2600 p->dirty = FALSE;
2601 }
2602 #ifdef BEAGLE
2603 //else
2604 // {
2605 // printf("(PTIP: %li, %li, %f)-->%li\n",p->id, p->bid, p->v, showtop(crawlback(p))->id);
2606 // }
2607 #endif
2608 } /* smooth */
2609
2610
2611 /// sets the nuview machinery so that depending on datatype the correct
2612 /// conditional likelihood calculator is used
2613 void
which_pseudonuview(char datatype,boolean fastlike,boolean use_gaps,int watkins)2614 which_pseudonuview (char datatype, boolean fastlike, boolean use_gaps, int watkins)
2615 {
2616 switch (datatype)
2617 {
2618 case 'a':
2619 ppseudonuview = (void (*)(proposal_fmt *, xarray_fmt , MYREAL *, MYREAL, xarray_fmt , MYREAL *, MYREAL)) pseudonu_allele;
2620 break;
2621 case 'b':
2622 ppseudonuview = (void (*)(proposal_fmt *, xarray_fmt , MYREAL *, MYREAL, xarray_fmt, MYREAL *, MYREAL)) pseudonu_brownian;
2623 break;
2624 case 'm':
2625 ppseudonuview = (void (*)(proposal_fmt *, xarray_fmt, MYREAL *, MYREAL, xarray_fmt, MYREAL *, MYREAL)) pseudonu_micro;
2626 break;
2627 case 's':
2628 case 'n':
2629 case 'h':
2630 case 'u':
2631 if (fastlike && !use_gaps)
2632 ppseudonuview = (void (*)(proposal_fmt *, xarray_fmt, MYREAL *, MYREAL, xarray_fmt , MYREAL *, MYREAL)) pseudonu_seq_slow;
2633 else
2634 {
2635 if(fastlike)
2636 ppseudonuview = (void (*)(proposal_fmt *, xarray_fmt, MYREAL *, MYREAL, xarray_fmt, MYREAL *, MYREAL)) pseudonu_seq;
2637 else /*use_gaps*/
2638 ppseudonuview = (void (*)(proposal_fmt *, xarray_fmt, MYREAL *, MYREAL, xarray_fmt, MYREAL *, MYREAL)) pseudonu_seq_slow; //nuview_gaps;
2639 }
2640 break;
2641 case 'f': /* fitch, reconstruction of ancestral state
2642 * method */
2643 ppseudonuview = (void (*)(proposal_fmt *, xarray_fmt, MYREAL *, MYREAL, xarray_fmt, MYREAL * , MYREAL)) pseudonu_anc;
2644 }
2645 }
2646
2647 /// sets the nuview machinery so that depending on datatype the correct
2648 /// conditional likelihood calculator is used
2649 void
which_nuview(char datatype,boolean fastlike,boolean use_gaps,int watkins)2650 which_nuview (char datatype, boolean fastlike, boolean use_gaps, int watkins)
2651 {
2652 switch (datatype)
2653 {
2654 case 'a':
2655 nuview = (void (*)(node *, world_fmt *, long)) nuview_allele;
2656 break;
2657 case 'b':
2658 nuview = (void (*)(node *, world_fmt *, long)) nuview_brownian;
2659 break;
2660 case 'm':
2661 switch(watkins)
2662 {
2663 case MULTISTEP:
2664 prob_micro = (double (*)(MYREAL, long, world_fmt *, pair *)) prob_micro_watkins;
2665 break;
2666 case SINGLESTEP:
2667 default:
2668 prob_micro = (double (*)(MYREAL, long, world_fmt *, pair *)) prob_micro_singlestep;
2669 }
2670 nuview = (void (*)(node *, world_fmt *, long)) nuview_micro;
2671 break;
2672 case 's':
2673 case 'n':
2674 case 'h':
2675 case 'u':
2676 if (fastlike && !use_gaps)
2677 nuview = (void (*)(node *, world_fmt *, long)) nuview_sequence;
2678 else
2679 {
2680 if(fastlike)
2681 nuview = (void (*)(node *, world_fmt *, long)) nuview_sequence_slow;
2682 else /*use_gaps*/
2683 nuview = (void (*)(node *, world_fmt *, long)) nuview_sequence_slow; //nuview_gaps;
2684 }
2685 break;
2686 case 'f': /* fitch, reconstruction of ancestral state
2687 * method */
2688 nuview = (void (*)(node *, world_fmt *, long)) nuview_ancestral;
2689 }
2690 }
2691
2692 void
nuview_allele(node * mother,world_fmt * world,const long locus)2693 nuview_allele (node * mother, world_fmt * world, const long locus)
2694 {
2695 long a;
2696 long aa;
2697 long mal = world->data->maxalleles[locus];
2698
2699 MYREAL freq = world->data->freq;
2700 // MYREAL freqlast = world->data->freqlast; see pseudonuview
2701 MYREAL w1;
2702 MYREAL w2;
2703 MYREAL v1;
2704 MYREAL v2;
2705 MYREAL v1freq;
2706 MYREAL v2freq;
2707 MYREAL pija1;
2708 MYREAL pija2;
2709 MYREAL lx1;
2710 MYREAL lx2;
2711
2712 MYREAL x3m = -MYREAL_MAX;
2713
2714 MYREAL *xx1, *xx2;
2715 MYREAL *xx3;
2716
2717 node *d1 = NULL;
2718 node *d2 = NULL;
2719
2720 // MYREAL test = 0.0;
2721
2722 children (mother, &d1, &d2);
2723 xx1 = d1->x.a;
2724 xx2 = d2->x.a;
2725 xx3 = mother->x.a;
2726 lx1 = d1->scale[0];
2727 lx2 = d2->scale[0];
2728 v1 = 1 - EXP (-d1->v);
2729 v2 = 1 - EXP (-d2->v);
2730 if (v1 >= 1.)
2731 {
2732 w1 = 0.0;
2733 v1 = 1.0;
2734 }
2735 else
2736 {
2737 w1 = 1.0 - v1;
2738 }
2739 if (v2 >= 1.)
2740 {
2741 w2 = 0.0;
2742 v2 = 1.0;
2743 }
2744 else
2745 {
2746 w2 = 1.0 - v2;
2747 }
2748 // printf("@@nuview 1:{%f,%f, %f,%f} 2:(%f,%f, %f,%f}\n",xx1[0],xx1[1],lx1,v1,xx2[0],xx2[1],lx2,v2);
2749
2750 v1freq = v1 * freq;
2751 v2freq = v2 * freq;
2752
2753 for (aa = 0; aa < mal; aa++)
2754 {
2755 pija1 = pija2 = 0.0;
2756 for (a = 0; a < mal; a++)
2757 {
2758 if(aa==a)
2759 {
2760 pija1 += (w1 + v1freq) * xx1[a];
2761 pija2 += (w2 + v2freq) * xx2[a];
2762 }
2763 else
2764 {
2765 pija1 += v1 * freq * xx1[a];
2766 pija2 += v2 * freq * xx2[a];
2767 }
2768 }
2769 xx3[aa] = pija1 * pija2;
2770 //test += xx3[aa];
2771 if (xx3[aa] > x3m)
2772 x3m = xx3[aa];
2773
2774 }
2775 //if (test <= 0.0)
2776 // error ("xx3 is 0 or garbage!");
2777 for (aa = 0; aa < mal; aa++)
2778 {
2779 xx3[aa] /= x3m;
2780 }
2781 // printf("@@finish: (%f=%f) (%f=%f)\n",xx3[0],mother->x.a[0],xx3[1],mother->x.a[1]);
2782 mother->scale[0] = LOG (x3m) + lx2 + lx1;
2783 }
2784
2785 void
nuview_brownian(node * mother,world_fmt * world,const long locus)2786 nuview_brownian (node * mother, world_fmt * world, const long locus)
2787 {
2788
2789 node *d1 = NULL, *d2 = NULL;
2790 MYREAL rvtot = 0.;
2791 MYREAL xx1, xx2, c12, diff;
2792 MYREAL mean1, mean2, mean, v1, v2, vtot, f1, f2;
2793 children (mother, &d1, &d2);
2794 mean1 = d1->x.a[0];
2795 mean2 = d2->x.a[0];
2796 xx1 = d1->x.a[2];
2797 xx2 = d2->x.a[2];
2798
2799 v1 = d1->v + d1->x.a[1]; /* di->v == length of branch time(n1) -
2800 * time(n2) */
2801 v2 = d2->v + d2->x.a[1]; /* x.a[1] contains the deltav */
2802 vtot = v1 + v2;
2803 diff = mean1 - mean2;
2804 if (vtot > 0.0)
2805 {
2806 rvtot = 1./ vtot;
2807 f1 = v2 * rvtot;
2808 c12 = diff * diff * rvtot;
2809 }
2810 else
2811 {
2812 f1 = 0.5;
2813 c12 = HUGE;
2814 vtot = SMALL_VALUE;
2815 }
2816 f2 = 1.0 - f1;
2817 mean = f1 * mean1 + f2 * mean2;
2818
2819
2820 mother->x.a[2] =
2821 xx1 + xx2 + MIN (0.0, -0.5 * (LOG (vtot) + c12) + LOG2PIHALF);
2822 /*
2823 * printf("L=%f , L1=%f, L2=%f, log(vtot=%f)=%f,
2824 * c12=%f\n",mother->x.a[2], xx1, xx2,vtot,log(vtot),c12);
2825 */
2826 mother->x.a[1] = v1 * f1;
2827 mother->x.a[0] = mean;
2828
2829 }
2830
2831
2832 void
nuview_micro(node * mother,world_fmt * world,const long locus)2833 nuview_micro (node * mother, world_fmt * world, const long locus)
2834 {
2835 node *d1 = NULL;
2836 node *d2 = NULL;
2837 long s;
2838 long a;
2839 long aa1, aa2;
2840 long diff;
2841 long margin = world->options->micro_threshold[locus];
2842 MYREAL vv1, vv2, lx1, lx2;
2843 MYREAL x3m = -MYREAL_MAX;
2844 MYREAL pija1s, pija2s;
2845 MYREAL *xx1 = NULL, *xx2 = NULL;
2846
2847 MYREAL *pm1;
2848 MYREAL *pm2;
2849
2850 long smax = world->data->maxalleles[locus];
2851 MYREAL *xx3 = NULL;
2852 // needed for watkins' microsatellite model
2853 pair *helper = &world->options->msat_tuning;
2854
2855 children (mother, &d1, &d2);
2856 vv1 = d1->v;
2857 vv2 = d2->v;
2858 xx1 = d1->x.a;
2859 xx2 = d2->x.a;
2860 xx3 = mother->x.a;
2861 lx1 = d1->scale[0];
2862 lx2 = d2->scale[0];
2863
2864 pm1 = (MYREAL *) mymalloc(sizeof(MYREAL) * 4 * margin);
2865 pm2 = pm1 + 2 * margin;
2866
2867 for (diff = 0; diff < margin; diff++)
2868 {
2869 pm1[diff] = prob_micro (vv1, diff, world, helper);
2870 pm2[diff] = prob_micro (vv2, diff, world, helper);
2871 //fprintf(stdout,"pm[diff=%li]=(%g %g)\n",diff,pm1[diff],pm2[diff]);
2872 }
2873
2874 for (s = 0; s < smax; s++)
2875 {
2876 pija1s = pija2s = 0.0;
2877 aa1 = MAX (0, s - margin);
2878 aa2 = MIN(s + margin,smax);
2879 for (a = aa1; a < aa2; a++)
2880 //for (a = 0; a < smax; a++)
2881 {
2882 diff = labs (s - a);
2883 if(diff>=margin)
2884 continue;
2885 // fprintf(stdout,"***pm[diff=%li]=(%g %g) xx[a=%li]=(%f %f)\n",diff,pm1[diff],pm2[diff],a,xx1[a],xx2[a]);
2886 if (xx1[a] > 0)
2887 {
2888 pija1s += pm1[diff] * xx1[a];
2889 }
2890 if (xx2[a] > 0)
2891 {
2892 pija2s += pm2[diff] * xx2[a];
2893 }
2894 }
2895 xx3[s] = pija1s * pija2s;
2896 if (xx3[s] > x3m)
2897 x3m = xx3[s];
2898 }
2899 if (x3m == 0.0)
2900 {
2901 mother->scale[0] = -MYREAL_MAX;
2902 }
2903 else
2904 {
2905 for (s = 0; s < smax; s++)
2906 {
2907 xx3[s] /= x3m;
2908 }
2909 mother->scale[0] = LOG (x3m) + lx1 + lx2;
2910 }
2911 myfree(pm1);
2912 }
2913
2914
2915 void
calculate_steps(world_fmt * world)2916 calculate_steps (world_fmt * world)
2917 {
2918 long k, diff;
2919 long locus = world->locus;
2920 const long stepnum = world->options->micro_threshold[locus];
2921 MYREAL ***steps = world->options->steps;
2922
2923 for (diff = 0; diff < stepnum; diff++)
2924 {
2925 for (k = 0; k < stepnum; k++)
2926 {
2927 steps[locus][diff][k] = logfac(k + diff) + logfac (k);
2928 }
2929 }
2930 }
2931
2932
2933 ///
2934 /// calculates probability of a mutation of size diff in time t. used for msat calculations.
2935 /// i=diff
2936 /// prob[i_, t_] := Exp[-t] Sum[(t/2)^(i + 2 k)/((i + k)! k!), {k, 0, Infinity}]
2937 /// calculated as Sum[Exp[(-t + ((log(t)-log(2))*i) + (2*(log(t)-log(2)) * k) - steps(i,k))
2938 MYREAL
prob_micro_singlestep(MYREAL t,long diff,world_fmt * world,pair * helper)2939 prob_micro_singlestep(MYREAL t, long diff, world_fmt * world, pair *helper)
2940 {
2941 // helper is a dummy so that the call is the same as for prob_micro_watkins
2942 const long locus = world->locus;
2943 MYREAL *steps;
2944 long k;
2945 long k2;
2946 long stepnum = world->options->micro_threshold[locus];
2947 MYREAL temp1;
2948 MYREAL temp2;
2949 MYREAL temp3;
2950 MYREAL temp4;
2951 MYREAL const_part;
2952 MYREAL summ = 0.0;
2953 // MYREAL oldsum = 0.0;
2954 const MYREAL logt = LOG (t) - LOG2;
2955 const MYREAL logt2 = 2 * logt;
2956 if (diff >= stepnum)
2957 return summ;
2958 // linking to precalculated "denominator" Log[(i+k)! k!]
2959 // was precalculated in calculate_steps()
2960 steps = world->options->steps[locus][diff];
2961 const_part = -t + logt * diff;
2962 // loop unrolling to remove possible stalls
2963 // assumes stepnum is even
2964 for (k = 0; k < stepnum; k += 2)
2965 {
2966 k2 = k + 1;
2967 temp1 = const_part + logt2 * k - steps[k];
2968 temp2 = const_part + logt2 * k2 - steps[k2];
2969 temp3 = EXP(temp1);
2970 temp4 = EXP(temp2);
2971 summ += temp3 + temp4;
2972 // if (fabs (oldsum - summ) < eps)
2973 // break;
2974 //oldsum = summ;
2975 }
2976 return summ;
2977 }
2978
2979 ///
2980 /// Calculates probability of a change of repeat number in time. Used for msat calculations.
2981 /// based on Joe Watkins' 2007 in theoretical population biology, chapter 4.2
2982 /// prob[diff,t,tune, upchance]
2983 /// diff: is negative or positive depending on direction
2984 /// t: is the time
2985 /// tune: 0.0 with the single step mutation model
2986 /// 1.0 with the infinite allele model
2987 /// all values in between allow multiple steps
2988 /// upchance: is the probability that the repeat number increases (upchance < 2/3!)
2989 /// helper is the container for the additional variables
2990 MYREAL
prob_micro_watkins(MYREAL t,long diff,world_fmt * world,pair * helper)2991 prob_micro_watkins (MYREAL t, long diff, world_fmt * world, pair *helper)
2992 {
2993 const MYREAL tune = (*helper)[0];
2994 const MYREAL upchance = (*helper)[1];
2995 //const long locus = world->locus;
2996 MYREAL x;
2997 // long stepnum = world->options->micro_threshold[locus];
2998 MYREAL summ = 0.0;
2999 // MYREAL oldsum = 0.0;
3000 const MYREAL delta = 2. * PI / 100.;
3001 const MYREAL expt = EXP(-t);
3002 MYREAL oneplustunesq;
3003 MYREAL tunep;
3004 MYREAL oneminustunet;
3005 MYREAL sinx;
3006 MYREAL cosx;
3007 MYREAL invdenom;
3008 MYREAL first;
3009 MYREAL second;
3010
3011 //if (diff >= stepnum)
3012 // return summ;
3013
3014 oneplustunesq = 1 + tune * tune ;
3015 tunep = (1.0 - tune) * (2.0 * upchance - 1.0) * t;
3016 oneminustunet = (1.0 - tune) * t;
3017 for (x = -PI; x < PI; x += delta)
3018 {
3019 cosx = cos(x);
3020 sinx = sin(x);
3021 invdenom = (oneplustunesq - 2. * tune * cosx);
3022 if(invdenom < SMALL_VALUE)
3023 invdenom = 1. / SMALL_VALUE;
3024 first = (diff * x + tunep * sinx) * invdenom;
3025 second = oneminustunet * cosx * invdenom;
3026 summ += cos(first) * EXP(second);
3027 }
3028 #ifdef DEBUG
3029 if(summ == 0.0)
3030 error("underflow in msat prob calc\n");
3031 #endif
3032 return expt * INV2PI * delta * summ;
3033 }
3034
3035 ///
3036 /// test implementation of Watkins' calculations
3037 //MYREAL
3038 //prob_micro (MYREAL t, long diff, world_fmt * world)
3039 //{
3040 // return prob_micro_watkins (t, diff, 0.2 , 0.5, world);
3041 //}
3042 //MYINLINE MYREAL
3043 //prob_micro (MYREAL t, long diff, world_fmt * world)
3044 //{
3045 // return prob_micro_singlestep (t, diff, world);
3046 //}
3047
3048
3049 ///
3050 /// conditional likelihoods
3051 void
nuview_sequence(node * mother,world_fmt * world,const long locus)3052 nuview_sequence (node * mother, world_fmt * world, const long locus)
3053 {
3054 const long endsite = world->data->seq[0]->endsite;
3055 long i, j, k;
3056 register MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
3057 register MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
3058 register MYREAL lw1, lw2, yy1, yy2, ww1zz1, vv1zz1, ww2zz2, vv2zz2, vzsumr1,
3059 vzsumr2, vzsumy1, vzsumy2, sum1, sum2, sumr1, sumr2,
3060 sumy1, sumy2, ww1, ww2, zz1, zz2;
3061 register MYREAL freqa, freqc, freqg, freqt;//, freqr, freqy;
3062 register MYREAL freqar, freqcy, freqgr, freqty;
3063 register node *q, *r;
3064 register sitelike *xx1, *xx2, *xx3;
3065 register long rcategs = world->options->rcategs;
3066 register long categs = world->options->categs;
3067 register tbl_fmt tbl = world->tbl;
3068 register seqmodel_fmt *seq;
3069 register valrec *tbl00;
3070 register valrec *tblij;
3071 register valrec *tbljk;
3072 seq = world->data->seq[0];
3073
3074 q = crawlback (mother->next);
3075 r = crawlback (mother->next->next);
3076
3077 freqa = seq->basefrequencies[NUC_A];
3078 freqc = seq->basefrequencies[NUC_C];
3079 freqg = seq->basefrequencies[NUC_G];
3080 freqt = seq->basefrequencies[NUC_T];
3081 //freqr = seq->basefrequencies[NUC_R];
3082 //freqy = seq->basefrequencies[NUC_Y];
3083 freqar = seq->basefrequencies[NUC_AR];
3084 freqcy = seq->basefrequencies[NUC_CY];
3085 freqgr = seq->basefrequencies[NUC_GR];
3086 freqty = seq->basefrequencies[NUC_TY];
3087
3088 lw1 = -q->v * seq->fracchange;
3089
3090 if ((rcategs | categs) == 1)
3091 {
3092 tbl00 = tbl[0][0];
3093 ww1 = EXP (tbl00->ratxi * lw1);
3094 zz1 = EXP (tbl00->ratxv * lw1);
3095 ww1zz1 = ww1 * zz1;
3096 vv1zz1 = (1.0 - ww1) * zz1;
3097 lw2 = -r->v * seq->fracchange;
3098 ww2 = EXP (tbl00->ratxi * lw2);
3099 zz2 = EXP (tbl00->ratxv * lw2);
3100 ww2zz2 = ww2 * zz2;
3101 vv2zz2 = (1.0 - ww2) * zz2;
3102 yy1 = 1.0 - zz1;
3103 yy2 = 1.0 - zz2;
3104 for (i = 0; i < endsite; i++)
3105 {
3106 xx1 = &(q->x.s[i][0]);
3107 xx2 = &(r->x.s[i][0]);
3108 xx3 = &(mother->x.s[i][0]);
3109
3110 xx1t0 = (*xx1)[0];
3111 xx1t1 = (*xx1)[1];
3112 xx1t2 = (*xx1)[2];
3113 xx1t3 = (*xx1)[3];
3114
3115 xx2t0 = (*xx2)[0];
3116 xx2t1 = (*xx2)[1];
3117 xx2t2 = (*xx2)[2];
3118 xx2t3 = (*xx2)[3];
3119
3120 sum1 = yy1 * (freqa * xx1t0 + freqc * xx1t1 + freqg * xx1t2 + freqt * xx1t3);
3121 sum2 = yy2 * (freqa * xx2t0 + freqc * xx2t1 + freqg * xx2t2 + freqt * xx2t3);
3122
3123 sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3124 sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3125 sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3126 sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3127
3128 vzsumr1 = vv1zz1 * sumr1;
3129 vzsumr2 = vv2zz2 * sumr2;
3130 vzsumy1 = vv1zz1 * sumy1;
3131 vzsumy2 = vv2zz2 * sumy2;
3132 (*xx3)[0] =
3133 (sum1 + ww1zz1 * xx1t0 + vzsumr1) * (sum2 +
3134 ww2zz2 * xx2t0 +
3135 vzsumr2);
3136 (*xx3)[1] =
3137 (sum1 + ww1zz1 * xx1t1 + vzsumy1) * (sum2 +
3138 ww2zz2 * xx2t1 +
3139 vzsumy2);
3140 (*xx3)[2] =
3141 (sum1 + ww1zz1 * xx1t2 + vzsumr1) * (sum2 +
3142 ww2zz2 * xx2t2 +
3143 vzsumr2);
3144 (*xx3)[3] =
3145 (sum1 + ww1zz1 * xx1t3 + vzsumy1) * (sum2 +
3146 ww2zz2 * xx2t3 +
3147 vzsumy2);
3148 }
3149 }
3150 else
3151 {
3152 for (i = 0; i < rcategs; i++)
3153 for (j = 0; j < categs; j++)
3154 {
3155 tblij = tbl[i][j];
3156 tblij->ww1 = EXP (tblij->ratxi * lw1);
3157 tblij->zz1 = EXP (tblij->ratxv * lw1);
3158 tblij->ww1zz1 = tblij->ww1 * tblij->zz1;
3159 tblij->vv1zz1 = (1.0 - tblij->ww1) * tblij->zz1;
3160 }
3161 lw2 = -r->v * seq->fracchange;
3162 for (i = 0; i < rcategs; i++)
3163 for (j = 0; j < categs; j++)
3164 {
3165 tblij = tbl[i][j];
3166 tblij->ww2 = EXP (tblij->ratxi * lw2);
3167 tblij->zz2 = EXP (tblij->ratxv * lw2);
3168 tblij->ww2zz2 = tblij->ww2 * tblij->zz2;
3169 tblij->vv2zz2 = (1.0 - tblij->ww2) * tblij->zz2;
3170 }
3171 for (i = 0; i < endsite; i++)
3172 {
3173 k = seq->category[seq->alias[i] - 1] - 1;
3174 for (j = 0; j < rcategs; j++)
3175 {
3176 tbljk = tbl[j][k];
3177 ww1zz1 = tbljk->ww1zz1;
3178 vv1zz1 = tbljk->vv1zz1;
3179 yy1 = 1.0 - tbljk->zz1;
3180 ww2zz2 = tbljk->ww2zz2;
3181 vv2zz2 = tbljk->vv2zz2;
3182 yy2 = 1.0 - tbljk->zz2;
3183 xx1 = &(q->x.s[i][j]);
3184 xx2 = &(r->x.s[i][j]);
3185 xx3 = &(mother->x.s[i][j]);
3186
3187 xx1t0 = (*xx1)[0];
3188 xx1t1 = (*xx1)[1];
3189 xx1t2 = (*xx1)[2];
3190 xx1t3 = (*xx1)[3];
3191
3192 xx2t0 = (*xx2)[0];
3193 xx2t1 = (*xx2)[1];
3194 xx2t2 = (*xx2)[2];
3195 xx2t3 = (*xx2)[3];
3196
3197
3198 sum1 =
3199 yy1 * (freqa * xx1t0 + freqc * xx1t1 +
3200 freqg * xx1t2 + freqt * xx1t3);
3201 sum2 =
3202 yy2 * (freqa * xx2t0 + freqc * xx2t1 +
3203 freqg * xx2t2 + freqt * xx2t3);
3204 sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3205 sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3206 sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3207 sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3208 vzsumr1 = vv1zz1 * sumr1;
3209 vzsumr2 = vv2zz2 * sumr2;
3210 vzsumy1 = vv1zz1 * sumy1;
3211 vzsumy2 = vv2zz2 * sumy2;
3212 (*xx3)[0] =
3213 (sum1 + ww1zz1 * xx1t0 + vzsumr1) * (sum2 +
3214 ww2zz2 * xx2t0 +
3215 vzsumr2);
3216 (*xx3)[1] =
3217 (sum1 + ww1zz1 * xx1t1 + vzsumy1) * (sum2 +
3218 ww2zz2 * xx2t1 +
3219 vzsumy2);
3220 (*xx3)[2] =
3221 (sum1 + ww1zz1 * xx1t2 + vzsumr1) * (sum2 +
3222 ww2zz2 * xx2t2 +
3223 vzsumr2);
3224 (*xx3)[3] =
3225 (sum1 + ww1zz1 * xx1t3 + vzsumy1) * (sum2 +
3226 ww2zz2 * xx2t3 +
3227 vzsumy2);
3228 }
3229 }
3230 }
3231 } /* nuview */
3232
3233 ///
3234 /// accurate version of conditional likleihood calculator using a scaler
3235 #ifdef GAP
nuview_sequence_slow(node * mother,world_fmt * world,const long locus)3236 void nuview_sequence_slow (node * mother, world_fmt * world, const long locus)
3237 {
3238 nuview_f84gap_slow (mother, world, locus);
3239 }
3240 #else
nuview_sequence_slow(node * mother,world_fmt * world,const long locus)3241 void nuview_sequence_slow (node * mother, world_fmt * world, const long locus)
3242 {
3243 //static long count = 0;
3244 const long endsite = world->data->seq[0]->endsite;
3245 long i, j, k;
3246
3247 register MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
3248 register MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
3249
3250 register MYREAL lw1, lw2, yy1, yy2, ww1zz1, vv1zz1, ww2zz2, vv2zz2, vzsumr1,
3251 vzsumr2, vzsumy1, vzsumy2, sum1, sum2, sumr1, sumr2,
3252 sumy1, sumy2, ww1, ww2, zz1, zz2;
3253 MYREAL *sxx1 = NULL, *sxx2 = NULL;
3254 MYREAL sxx3m, tempsxx3m, invsxx3m;
3255 node *q, *r;
3256 sitelike *xx1, *xx2, *xx3;
3257 long rcategs = world->options->rcategs;
3258 long categs = world->options->categs;
3259 tbl_fmt tbl = world->tbl;
3260 register MYREAL freqa, freqc, freqg, freqt;//, freqr, freqy;
3261 register MYREAL freqar, freqcy, freqgr, freqty;
3262 seqmodel_fmt *seq;
3263 valrec *tbl00;
3264 valrec *tblij;
3265 valrec *tbljk;
3266 MYREAL scalesum = 0.0;
3267 boolean newscale=FALSE;;
3268 seq = world->data->seq[0];
3269
3270 freqa = seq->basefrequencies[NUC_A];
3271 freqc = seq->basefrequencies[NUC_C];
3272 freqg = seq->basefrequencies[NUC_G];
3273 freqt = seq->basefrequencies[NUC_T];
3274 //freqr = seq->basefrequencies[NUC_R];
3275 //freqy = seq->basefrequencies[NUC_Y];
3276 freqar = seq->basefrequencies[NUC_AR];
3277 freqcy = seq->basefrequencies[NUC_CY];
3278 freqgr = seq->basefrequencies[NUC_GR];
3279 freqty = seq->basefrequencies[NUC_TY];
3280
3281 q = crawlback (mother->next);
3282 r = crawlback (mother->next->next);
3283 lw1 = -q->v * seq->fracchange;
3284 sxx1 = q->scale;
3285 sxx2 = r->scale;
3286 tbl00 = tbl[0][0];
3287 if ((rcategs | categs) == 1)
3288 {
3289 ww1 = EXP (tbl00->ratxi * lw1);
3290 zz1 = EXP (tbl00->ratxv * lw1);
3291 ww1zz1 = ww1 * zz1;
3292 vv1zz1 = (1.0 - ww1) * zz1;
3293 lw2 = -r->v * seq->fracchange;
3294 ww2 = EXP (tbl00->ratxi * lw2);
3295 zz2 = EXP (tbl00->ratxv * lw2);
3296 ww2zz2 = ww2 * zz2;
3297 vv2zz2 = (1.0 - ww2) * zz2;
3298 yy1 = 1.0 - zz1;
3299 yy2 = 1.0 - zz2;
3300 for (i = 0; i < endsite; i++)
3301 {
3302 xx1 = &(q->x.s[i][0]);
3303 xx2 = &(r->x.s[i][0]);
3304 xx3 = &(mother->x.s[i][0]);
3305
3306 xx1t0 = (*xx1)[0];
3307 xx1t1 = (*xx1)[1];
3308 xx1t2 = (*xx1)[2];
3309 xx1t3 = (*xx1)[3];
3310
3311 xx2t0 = (*xx2)[0];
3312 xx2t1 = (*xx2)[1];
3313 xx2t2 = (*xx2)[2];
3314 xx2t3 = (*xx2)[3];
3315
3316
3317 sum1 = yy1 * (freqa * xx1t0 + freqc * xx1t1 + freqg * xx1t2 + freqt * xx1t3);
3318 sum2 = yy2 * (freqa * xx2t0 + freqc * xx2t1 + freqg * xx2t2 + freqt * xx2t3);
3319 sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3320 sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3321 sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3322 sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3323 vzsumr1 = vv1zz1 * sumr1;
3324 vzsumr2 = vv2zz2 * sumr2;
3325 vzsumy1 = vv1zz1 * sumy1;
3326 vzsumy2 = vv2zz2 * sumy2;
3327 (*xx3)[0] = (sum1 + ww1zz1 * xx1t0 + vzsumr1) * (sum2 + ww2zz2 * xx2t0 + vzsumr2);
3328 scalesum = (*xx3)[0];
3329 (*xx3)[1] =
3330 (sum1 + ww1zz1 * xx1t1 + vzsumy1) * (sum2 +
3331 ww2zz2 * xx2t1 +
3332 vzsumy2);
3333 scalesum += (*xx3)[1];
3334 (*xx3)[2] =
3335 (sum1 + ww1zz1 * xx1t2 + vzsumr1) * (sum2 +
3336 ww2zz2 * xx2t2 +
3337 vzsumr2);
3338 scalesum += (*xx3)[2];
3339 (*xx3)[3] =
3340 (sum1 + ww1zz1 * xx1t3 + vzsumy1) * (sum2 +
3341 ww2zz2 * xx2t3 +
3342 vzsumy2);
3343 scalesum += (*xx3)[3];
3344 mother->scale[i] = sxx1[i] + sxx2[i];
3345 if(!(scalesum > EPSILON))
3346 {
3347 sxx3m = MAX ((*xx3)[0], (*xx3)[1]);
3348 sxx3m = MAX (sxx3m, (*xx3)[2]);
3349 sxx3m = MAX (sxx3m, (*xx3)[3]);
3350 invsxx3m = 1.0/sxx3m;
3351 (*xx3)[0] *= invsxx3m;
3352 (*xx3)[1] *= invsxx3m;
3353 (*xx3)[2] *= invsxx3m;
3354 (*xx3)[3] *= invsxx3m;
3355 mother->scale[i] += LOG (sxx3m);
3356 }
3357 #ifdef DEBUG
3358 // printf("[(%f,%f,%f,%f) %f]",(*xx3)[0],(*xx3)[1],(*xx3)[2],(*xx3)[3],mother->scale[i]);
3359 #endif
3360 }
3361 #ifdef DEBUG
3362 // printf("id=%li\n",mother->id);
3363 #endif
3364 }
3365 else
3366 {
3367 lw2 = -r->v * seq->fracchange;
3368 for (i = 0; i < rcategs; i++)
3369 for (j = 0; j < categs; j++)
3370 {
3371 tblij = tbl[i][j];
3372 tblij->ww1 = EXP (tblij->ratxi * lw1);
3373 tblij->zz1 = EXP (tblij->ratxv * lw1);
3374 tblij->ww1zz1 = tblij->ww1 * tblij->zz1;
3375 tblij->vv1zz1 = (1.0 - tblij->ww1) * tblij->zz1;
3376 }
3377 for (i = 0; i < rcategs; i++)
3378 {
3379 for (j = 0; j < categs; j++)
3380 {
3381 tblij = tbl[i][j];
3382 tblij->ww2 = EXP (tblij->ratxi * lw2);
3383 tblij->zz2 = EXP (tblij->ratxv * lw2);
3384 tblij->ww2zz2 = tblij->ww2 * tblij->zz2;
3385 tblij->vv2zz2 = (1.0 - tblij->ww2) * tblij->zz2;
3386 }
3387 }
3388 for (i = 0; i < endsite; i++)
3389 {
3390 newscale = FALSE;
3391 sxx3m = -MYREAL_MAX;
3392 k = seq->category[seq->alias[i] - 1] - 1;
3393 for (j = 0; j < rcategs; j++)
3394 {
3395 tbljk = tbl[j][k];
3396 ww1zz1 = tbljk->ww1zz1;
3397 vv1zz1 = tbljk->vv1zz1;
3398 yy1 = 1.0 - tbljk->zz1;
3399 ww2zz2 = tbljk->ww2zz2;
3400 vv2zz2 = tbljk->vv2zz2;
3401 yy2 = 1.0 - tbljk->zz2;
3402 xx1 = &(q->x.s[i][j]);
3403 xx2 = &(r->x.s[i][j]);
3404 xx3 = &(mother->x.s[i][j]);
3405
3406
3407 xx1t0 = (*xx1)[0];
3408 xx1t1 = (*xx1)[1];
3409 xx1t2 = (*xx1)[2];
3410 xx1t3 = (*xx1)[3];
3411
3412 xx2t0 = (*xx2)[0];
3413 xx2t1 = (*xx2)[1];
3414 xx2t2 = (*xx2)[2];
3415 xx2t3 = (*xx2)[3];
3416
3417
3418
3419 sum1 =
3420 yy1 * (freqa * xx1t0 + freqc * xx1t1 +
3421 freqg * xx1t2 + freqt * xx1t3);
3422 sum2 =
3423 yy2 * (freqa * xx2t0 + freqc * xx2t1 +
3424 freqg * xx2t2 + freqt * xx2t3);
3425 sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3426 sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3427 sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3428 sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3429 vzsumr1 = vv1zz1 * sumr1;
3430 vzsumr2 = vv2zz2 * sumr2;
3431 vzsumy1 = vv1zz1 * sumy1;
3432 vzsumy2 = vv2zz2 * sumy2;
3433 (*xx3)[0] =
3434 (sum1 + ww1zz1 * xx1t0 + vzsumr1) * (sum2 +
3435 ww2zz2 * xx2t0 +
3436 vzsumr2);
3437 (*xx3)[1] =
3438 (sum1 + ww1zz1 * xx1t1 + vzsumy1) * (sum2 +
3439 ww2zz2 * xx2t1 +
3440 vzsumy2);
3441 (*xx3)[2] =
3442 (sum1 + ww1zz1 * xx1t2 + vzsumr1) * (sum2 +
3443 ww2zz2 * xx2t2 +
3444 vzsumr2);
3445 (*xx3)[3] =
3446 (sum1 + ww1zz1 * xx1t3 + vzsumy1) * (sum2 +
3447 ww2zz2 * xx2t3 +
3448 vzsumy2);
3449 if(!((*xx3)[0]+ (*xx3)[1]+ (*xx3)[2]+ (*xx3)[3]>EPSILON))
3450 newscale=TRUE;
3451 }
3452 if(newscale)
3453 {
3454 sxx3m = -MYREAL_MAX;
3455 for (j = 0; j < rcategs; j++)
3456 {
3457 xx3 = &(mother->x.s[i][j]);
3458 tempsxx3m = MAX ((*xx3)[0], (*xx3)[1]);
3459 tempsxx3m = MAX (tempsxx3m, (*xx3)[2]);
3460 tempsxx3m = MAX (tempsxx3m, (*xx3)[3]);
3461 if (tempsxx3m > sxx3m)
3462 sxx3m = tempsxx3m;
3463 }
3464 invsxx3m = 1.0 / sxx3m;
3465 for (j = 0; j < rcategs; j++)
3466 {
3467 xx3 = &(mother->x.s[i][j]);
3468 (*xx3)[0] *= invsxx3m;
3469 (*xx3)[1] *= invsxx3m;
3470 (*xx3)[2] *= invsxx3m;
3471 (*xx3)[3] *= invsxx3m;
3472 }
3473 mother->scale[i] = LOG (sxx3m) + sxx1[i] + sxx2[i];
3474 }
3475 else
3476 {
3477 mother->scale[i] = sxx1[i] + sxx2[i];
3478 }
3479 }
3480 }
3481 } /* nuview */
3482 #endif /*GAP*/
3483
3484 ///
3485 /// conditional likleihood calcculator using a simplified scheme called ancestral
3486 /// with two lineages the one with the higher value will win and is the ancestor
3487 /// non-Altivec version
3488 void
nuview_ancestral(node * mother,world_fmt * world,const long locus)3489 nuview_ancestral (node * mother, world_fmt * world, const long locus)
3490 {
3491 long i;
3492 const long endsite = world->data->seq[0]->endsite;
3493 MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
3494 MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
3495
3496 MYREAL lw1, lw2, ratio1, yy1, yy2, sum1, sum2;
3497 node *q, *r;
3498 sitelike *xx1, *xx2;
3499 register MYREAL freqa, freqc, freqg, freqt;
3500 //register MYREAL freqar, freqcy, freqgr, freqty;
3501 seqmodel_fmt *seq;
3502 seq = world->data->seq[0];
3503
3504 freqa = seq->basefrequencies[NUC_A];
3505 freqc = seq->basefrequencies[NUC_C];
3506 freqg = seq->basefrequencies[NUC_G];
3507 freqt = seq->basefrequencies[NUC_T];
3508 // freqr = seq->basefrequencies[NUC_R];
3509 //freqy = seq->basefrequencies[NUC_Y];
3510 //freqar = seq->basefrequencies[NUC_AR];
3511 //freqcy = seq->basefrequencies[NUC_CY];
3512 //freqgr = seq->basefrequencies[NUC_GR];
3513 //freqty = seq->basefrequencies[NUC_TY];
3514
3515 q = crawlback (mother->next);
3516 r = crawlback (mother->next->next);
3517 lw1 = -q->v * seq->fracchange;
3518 lw2 = -r->v * seq->fracchange;
3519 ratio1 = lw1 / (lw1 + lw2);
3520 yy1 = (1. - ratio1);
3521 yy2 = ratio1;
3522 //printf("%f ", q->tyme);
3523 // for (i = 0; i < endsite; i++)
3524 //printf("(%f %f %f %f)", q->x.s[i][0][0], q->x.s[i][0][1], q->x.s[i][0][2], q->x.s[i][0][3]);
3525 //printf("\n%f ", r->tyme);
3526 //for (i = 0; i < endsite; i++)
3527 //printf("(%f %f %f %f)", r->x.s[i][0][0], r->x.s[i][0][1], r->x.s[i][0][2], r->x.s[i][0][3]);
3528 //printf("\n");
3529 for (i = 0; i < endsite; i++)
3530 {
3531 xx1 = &(q->x.s[i][0]);
3532 xx2 = &(r->x.s[i][0]);
3533
3534
3535 xx1t0 = (*xx1)[0];
3536 xx1t1 = (*xx1)[1];
3537 xx1t2 = (*xx1)[2];
3538 xx1t3 = (*xx1)[3];
3539
3540 xx2t0 = (*xx2)[0];
3541 xx2t1 = (*xx2)[1];
3542 xx2t2 = (*xx2)[2];
3543 xx2t3 = (*xx2)[3];
3544
3545
3546
3547 sum1 =
3548 yy1 * (freqa * xx1t0 + freqc * xx1t1 +
3549 freqg * xx1t2 + freqt * xx1t3);
3550 sum2 =
3551 yy2 * (freqa * xx2t0 + freqc * xx2t1 +
3552 freqg * xx2t2 + freqt * xx2t3);
3553 if (sum1 == sum2)
3554 sum1 += RANDUM () > 0.5 ? -1. : 1.;
3555 if (sum1 > sum2)
3556 memcpy (mother->x.s[i][0], xx1, sizeof (sitelike));
3557 else
3558 memcpy (mother->x.s[i][0], xx2, sizeof (sitelike));
3559 }
3560 } /* nuview_ancestral */
3561
3562
3563 void
adjustroot(node * r)3564 adjustroot (node * r)
3565 {
3566 r->next->tyme = r->tyme;
3567 r->next->length = r->length;
3568 r->next->v = r->v;
3569 r->next->next->tyme = r->tyme;
3570 r->next->next->length = r->length;
3571 r->next->next->v = r->v;
3572 }
3573
3574
3575 /// \brief Calculation of conditional likelihood for new genealogy
3576 ///
3577 /// Calculation of conditional likelihood for new genealogy
3578 /// most of the code is unrolled for efficiency and may local variables are
3579 /// declared also for speed
3580 /// non-Altivec version
3581 MYINLINE
3582 void
pseudonu_seq(proposal_fmt * proposal,phenotype xxx1,MYREAL * sxx1,MYREAL v1,phenotype xxx2,MYREAL * sxx2,MYREAL v2)3583 pseudonu_seq (proposal_fmt * proposal, phenotype xxx1, MYREAL *sxx1,
3584 MYREAL v1, phenotype xxx2, MYREAL *sxx2, MYREAL v2)
3585 {
3586 const long endsite = proposal->world->data->seq[0]->endsite;
3587
3588 long i, j, k;
3589
3590 register MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
3591 register MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
3592 register MYREAL ta, tc, tg, tt;
3593 register MYREAL tta, ttc, ttg, ttt;
3594 register MYREAL lw1, lw2, yy1, yy2, ww1zz1, vv1zz1, ww2zz2, vv2zz2, vzsumr1,
3595 vzsumr2, vzsumy1, vzsumy2, sum1, sum2, sumr1, sumr2,
3596 sumy1, sumy2, ww1, ww2, zz1, zz2;
3597 register MYREAL freqa, freqc, freqg, freqt, freqar, freqgr, freqcy, freqty;
3598 register seqmodel_fmt *seq = proposal->world->data->seq[0];
3599 register MYREAL xxsumr1, xxsumr2, xxsumy1, xxsumy2;
3600 register valrec *tbl00;
3601 register valrec *tblij;
3602 register valrec *tbljk;
3603 register sitelike *xx1, *xx2 , *xx1copy, *xx2copy;
3604 register long rcategs = proposal->world->options->rcategs;
3605 register long categs = proposal->world->options->categs;
3606 register tbl_fmt tbl = proposal->world->tbl;
3607 freqa = seq->basefrequencies[NUC_A];
3608 freqc = seq->basefrequencies[NUC_C];
3609 freqg = seq->basefrequencies[NUC_G];
3610 freqt = seq->basefrequencies[NUC_T];
3611 //freqr = seq->basefrequencies[NUC_R];
3612 //freqy = seq->basefrequencies[NUC_Y];
3613 freqar = seq->basefrequencies[NUC_AR];
3614 freqcy = seq->basefrequencies[NUC_CY];
3615 freqgr = seq->basefrequencies[NUC_GR];
3616 freqty = seq->basefrequencies[NUC_TY];
3617
3618 lw1 = -v1 * seq->fracchange;
3619 // use shortcut for dataset that do not use mutiple categories
3620 // if (rcategs == 1 && categs == 1)
3621 if ((rcategs | categs ) == 1)
3622 {
3623 tbl00 = tbl[0][0];
3624 lw2 = -v2 * seq->fracchange;
3625
3626 ww1 = EXP (tbl00->ratxi * lw1);
3627 zz1 = EXP (tbl00->ratxv * lw1);
3628 ww2 = EXP (tbl00->ratxi * lw2);
3629 zz2 = EXP (tbl00->ratxv * lw2);
3630
3631 ww1zz1 = ww1 * zz1;
3632 vv1zz1 = (1.0 - ww1) * zz1;
3633
3634 ww2zz2 = ww2 * zz2;
3635 vv2zz2 = (1.0 - ww2) * zz2;
3636
3637 yy1 = 1.0 - zz1;
3638 yy2 = 1.0 - zz2;
3639
3640 for (i = 0; i < endsite; i++)
3641 {
3642 xx1 = xx1copy = &(xxx1[i][0]);
3643 xx2 = xx2copy = &(xxx2[i][0]);
3644
3645 xx1t0 = (*xx1)[0];
3646 ta = freqa * xx1t0;
3647 xx1t1 = (*xx1copy)[1];
3648 xx2t1 = (*xx2)[1];
3649 tc = freqc * xx1t1;
3650 xx1t2 = (*xx1)[2];
3651 xx2t2 = (*xx2copy)[2];
3652 tg = freqg * xx1t2;
3653 xx1t3 = (*xx1copy)[3];
3654 xx2t3 = (*xx2)[3];
3655 xx2t0 = (*xx2copy)[0];
3656 tt = freqt * xx1t3;
3657
3658 tta = freqa * xx2t0;
3659 ttc = freqc * xx2t1;
3660 ttg = freqg * xx2t2;
3661 ttt = freqt * xx2t3;
3662
3663 //sum1 = freqa * xx1t0 + freqc * xx1t1 + freqg * xx1t2 + freqt * xx1t3;
3664 sum1 = ta + tc + tg + tt;
3665 sum2 = tta + ttc + ttg + ttt ;
3666 sum1 *= yy1;
3667 sum2 *= yy2;
3668
3669 sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3670 sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3671 sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3672 sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3673
3674 vzsumr1 = vv1zz1 * sumr1;
3675 vzsumr2 = vv2zz2 * sumr2;
3676 vzsumy1 = vv1zz1 * sumy1;
3677 vzsumy2 = vv2zz2 * sumy2;
3678
3679 xxsumr1 = sum1 + vzsumr1;
3680 xxsumr2 = sum2 + vzsumr2;
3681
3682 (*xx1)[0] = (xxsumr1 + ww1zz1 * xx1t0) * (xxsumr2 + ww2zz2 * xx2t0);
3683 (*xx1)[2] = (xxsumr1 + ww1zz1 * xx1t2) * (xxsumr2 + ww2zz2 * xx2t2);
3684 xxsumy1 = sum1 + vzsumy1;
3685 xxsumy2 = sum2 + vzsumy2;
3686 (*xx1)[1] = (xxsumy1 + ww1zz1 * xx1t1) * (xxsumy2 + ww2zz2 * xx2t1);
3687 (*xx1)[3] = (xxsumy1 + ww1zz1 * xx1t3) * (xxsumy2 + ww2zz2 * xx2t3);
3688 }
3689 }
3690 else
3691 {
3692 for (i = 0; i < rcategs; i++)
3693 for (j = 0; j < categs; j++)
3694 {
3695 tblij = tbl[i][j];
3696 tblij->ww1 = EXP (tblij->ratxi * lw1);
3697 tblij->zz1 = EXP (tblij->ratxv * lw1);
3698 tblij->ww1zz1 = tblij->ww1 * tblij->zz1;
3699 tblij->vv1zz1 = (1.0 - tblij->ww1) * tblij->zz1;
3700 }
3701 lw2 = -v2 * seq->fracchange;
3702 for (i = 0; i < rcategs; i++)
3703 for (j = 0; j < categs; j++)
3704 {
3705 tblij = tbl[i][j];
3706 tblij->ww2 = EXP (tblij->ratxi * lw2);
3707 tblij->zz2 = EXP (tblij->ratxv * lw2);
3708 tblij->ww2zz2 = tblij->ww2 * tblij->zz2;
3709 tblij->vv2zz2 = (1.0 - tblij->ww2) * tblij->zz2;
3710 }
3711 for (i = 0; i < endsite; i++)
3712 {
3713 k = seq->category[seq->alias[i] - 1] - 1;
3714 for (j = 0; j < rcategs; j++)
3715 {
3716 tbljk = tbl[j][k];
3717 ww1zz1 = tbljk->ww1zz1;
3718 vv1zz1 = tbljk->vv1zz1;
3719 yy1 = 1.0 - tbljk->zz1;
3720 ww2zz2 = tbljk->ww2zz2;
3721 vv2zz2 = tbljk->vv2zz2;
3722 yy2 = 1.0 - tbljk->zz2;
3723 xx1 = &(xxx1[i][j]);
3724 xx2 = &(xxx2[i][j]);
3725
3726 xx1t0 = (*xx1)[0];
3727 xx1t1 = (*xx1)[1];
3728 xx1t2 = (*xx1)[2];
3729 xx1t3 = (*xx1)[3];
3730
3731 xx2t0 = (*xx2)[0];
3732 xx2t1 = (*xx2)[1];
3733 xx2t2 = (*xx2)[2];
3734 xx2t3 = (*xx2)[3];
3735
3736 sum1 = yy1 * (freqa * xx1t0 + freqc * xx1t1 +
3737 freqg * xx1t2 + freqt * xx1t3);
3738 sum2 = yy2 * (freqa * xx2t0 + freqc * xx2t1 +
3739 freqg * xx2t2 + freqt * xx2t3);
3740 sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3741 sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3742 sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3743 sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3744 vzsumr1 = vv1zz1 * sumr1;
3745 vzsumr2 = vv2zz2 * sumr2;
3746 /* xx3[j][0] */
3747 (*xx1)[0] =
3748 (sum1 + ww1zz1 * xx1t0 + vzsumr1) * (sum2 +
3749 ww2zz2 * xx2t0 +
3750 vzsumr2);
3751 /* xx3[j][2] */
3752 (*xx1)[2] =
3753 (sum1 + ww1zz1 * xx1t2 + vzsumr1) * (sum2 +
3754 ww2zz2 * xx2t2 +
3755 vzsumr2);
3756 vzsumy1 = vv1zz1 * sumy1;
3757 vzsumy2 = vv2zz2 * sumy2;
3758 /* xx3[j][1] */
3759 (*xx1)[1] =
3760 (sum1 + ww1zz1 * xx1t1 + vzsumy1) * (sum2 +
3761 ww2zz2 * xx2t1 +
3762 vzsumy2);
3763 /* xx3[j][3] */
3764 (*xx1)[3] =
3765 (sum1 + ww1zz1 * xx1t3 + vzsumy1) * (sum2 +
3766 ww2zz2 * xx2t3 +
3767 vzsumy2);
3768 }
3769 }
3770 }
3771 #ifdef BEAGLE
3772 printf(".");
3773 #endif
3774 } /* pseudonu_seq */
3775
3776
3777 ///
3778 /// calculates conditiional likleihood on a fake tree before acceptance/rejection scheme
3779 /// non-altivec version
3780 MYINLINE
3781 void
pseudonu_seq_slow(proposal_fmt * proposal,phenotype xxx1,MYREAL * sxx1,MYREAL v1,phenotype xxx2,MYREAL * sxx2,MYREAL v2)3782 pseudonu_seq_slow (proposal_fmt * proposal, phenotype xxx1, MYREAL *sxx1,
3783 MYREAL v1, phenotype xxx2, MYREAL *sxx2, MYREAL v2)
3784 {
3785
3786 const long endsite = proposal->world->data->seq[0]->endsite;
3787 boolean newscale;
3788 long i, j, k;
3789 //static long count = 0;
3790
3791 register MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
3792 register MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
3793 register MYREAL ta, tc, tg, tt;
3794 register MYREAL tta, ttc, ttg, ttt;
3795
3796 register MYREAL lw1, lw2, yy1, yy2, ww1zz1, vv1zz1, ww2zz2, vv2zz2, vzsumr1,
3797 vzsumr2, vzsumy1, vzsumy2, sum1, sum2, sumr1, sumr2,
3798 sumy1, sumy2, ww1, ww2, zz1, zz2;
3799 register MYREAL freqa, freqc, freqg, freqt, freqar, freqgr, freqcy, freqty;
3800 register seqmodel_fmt *seq = proposal->world->data->seq[0];
3801 register MYREAL xxsumr1, xxsumr2, xxsumy1, xxsumy2;
3802 register MYREAL sxx3m, tempsxx3m, invsxx3m;
3803 register sitelike *xx1, *xx2, *xx1copy, *xx2copy;
3804 register valrec *tblij;
3805 register valrec *tbljk;
3806 register valrec *tbl00;
3807 register long rcategs = proposal->world->options->rcategs;
3808 register long categs = proposal->world->options->categs;
3809 register tbl_fmt tbl = proposal->world->tbl;
3810
3811 freqa = seq->basefrequencies[NUC_A];
3812 freqc = seq->basefrequencies[NUC_C];
3813 freqg = seq->basefrequencies[NUC_G];
3814 freqt = seq->basefrequencies[NUC_T];
3815 //freqr = seq->basefrequencies[NUC_R];
3816 //freqy = seq->basefrequencies[NUC_Y];
3817 freqar = seq->basefrequencies[NUC_AR];
3818 freqcy = seq->basefrequencies[NUC_CY];
3819 freqgr = seq->basefrequencies[NUC_GR];
3820 freqty = seq->basefrequencies[NUC_TY];
3821 lw1 = -v1 * seq->fracchange;
3822 if ((rcategs | categs) == 1)
3823 {
3824 tbl00 = tbl[0][0];
3825 lw2 = -v2 * seq->fracchange;
3826
3827 ww1 = EXP (tbl00->ratxi * lw1);
3828 zz1 = EXP (tbl00->ratxv * lw1);
3829 ww2 = EXP (tbl00->ratxi * lw2);
3830 zz2 = EXP (tbl00->ratxv * lw2);
3831
3832 ww1zz1 = ww1 * zz1;
3833 vv1zz1 = (1.0 - ww1) * zz1;
3834
3835 ww2zz2 = ww2 * zz2;
3836 vv2zz2 = (1.0 - ww2) * zz2;
3837
3838 yy1 = 1.0 - zz1;
3839 yy2 = 1.0 - zz2;
3840
3841 for (i = 0; i < endsite; i++)
3842 {
3843 newscale = FALSE;
3844 xx1 = xx1copy = &(xxx1[i][0]);
3845 xx2 = xx2copy = &(xxx2[i][0]);
3846
3847 xx1t0 = (*xx1)[0];
3848 ta = freqa * xx1t0;
3849 xx1t1 = (*xx1copy)[1];
3850 xx2t1 = (*xx2)[1];
3851 tc = freqc * xx1t1;
3852 xx1t2 = (*xx1)[2];
3853 xx2t2 = (*xx2copy)[2];
3854 tg = freqg * xx1t2;
3855 xx1t3 = (*xx1copy)[3];
3856 xx2t3 = (*xx2)[3];
3857 xx2t0 = (*xx2copy)[0];
3858 tt = freqt * xx1t3;
3859
3860 tta = freqa * xx2t0;
3861 ttc = freqc * xx2t1;
3862 ttg = freqg * xx2t2;
3863 ttt = freqt * xx2t3;
3864
3865 sum1 = ta + tc + tg + tt;
3866 sum2 = tta + ttc + ttg + ttt ;
3867 sum1 *= yy1;
3868 sum2 *= yy2;
3869
3870 sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3871 sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3872 sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3873 sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3874
3875 vzsumr1 = vv1zz1 * sumr1;
3876 vzsumr2 = vv2zz2 * sumr2;
3877 vzsumy1 = vv1zz1 * sumy1;
3878 vzsumy2 = vv2zz2 * sumy2;
3879
3880 xxsumr1 = sum1 + vzsumr1;
3881 xxsumr2 = sum2 + vzsumr2;
3882
3883 (*xx1)[0] = (xxsumr1 + ww1zz1 * xx1t0) * (xxsumr2 + ww2zz2 * xx2t0);
3884 (*xx1)[2] = (xxsumr1 + ww1zz1 * xx1t2) * (xxsumr2 + ww2zz2 * xx2t2);
3885 xxsumy1 = sum1 + vzsumy1;
3886 xxsumy2 = sum2 + vzsumy2;
3887 (*xx1)[1] = (xxsumy1 + ww1zz1 * xx1t1) * (xxsumy2 + ww2zz2 * xx2t1);
3888 (*xx1)[3] = (xxsumy1 + ww1zz1 * xx1t3) * (xxsumy2 + ww2zz2 * xx2t3);
3889 sxx1[i] += sxx2[i];
3890 if(!((*xx1)[0]+ (*xx1)[1]+ (*xx1)[2]+ (*xx1)[3]>EPSILON))
3891 {
3892 // if(!((*xx1)[0]+ (*xx1)[1]+ (*xx1)[2]+ (*xx1)[3]>0.0))
3893 // {
3894 // warning("pseudolike problem\n");
3895 // }
3896 sxx3m = -MYREAL_MAX;
3897 tempsxx3m = MAX ((*xx1)[0], (*xx1)[1]);
3898 tempsxx3m = MAX (tempsxx3m, (*xx1)[2]);
3899 tempsxx3m = MAX (tempsxx3m, (*xx1)[3]);
3900 if (tempsxx3m > sxx3m)
3901 sxx3m = tempsxx3m;
3902 invsxx3m = 1. / sxx3m;
3903 (*xx1)[0] *= invsxx3m, (*xx1)[1] *= invsxx3m;
3904 (*xx1)[2] *= invsxx3m, (*xx1)[3] *= invsxx3m;
3905 sxx1[i] += LOG (sxx3m);// + sxx2[i];
3906 }
3907 }
3908 }
3909 else
3910 {
3911 lw2 = -v2 * seq->fracchange;
3912 for (i = 0; i < rcategs; i++)
3913 {
3914 for (j = 0; j < categs; j++)
3915 {
3916 tblij = tbl[i][j];
3917 tblij->ww1 = EXP (tblij->ratxi * lw1);
3918 tblij->zz1 = EXP (tblij->ratxv * lw1);
3919 tblij->ww1zz1 = tblij->ww1 * tblij->zz1;
3920 tblij->vv1zz1 = (1.0 - tblij->ww1) * tblij->zz1;
3921 tblij->ww2 = EXP (tblij->ratxi * lw2);
3922 tblij->zz2 = EXP (tblij->ratxv * lw2);
3923 tblij->ww2zz2 = tblij->ww2 * tblij->zz2;
3924 tblij->vv2zz2 = (1.0 - tblij->ww2) * tblij->zz2;
3925 }
3926 }
3927 for (i = 0; i < endsite; i++)
3928 {
3929 newscale = FALSE;
3930 sxx3m = -MYREAL_MAX;
3931 k = seq->category[seq->alias[i] - 1] - 1;
3932 for (j = 0; j < rcategs; j++)
3933 {
3934 tbljk = tbl[j][k];
3935 ww1zz1 = tbljk->ww1zz1;
3936 vv1zz1 = tbljk->vv1zz1;
3937 yy1 = 1.0 - tbljk->zz1;
3938 ww2zz2 = tbljk->ww2zz2;
3939 vv2zz2 = tbljk->vv2zz2;
3940 yy2 = 1.0 - tbljk->zz2;
3941 xx1 = xx1copy = &(xxx1[i][j]);
3942 xx2 = xx2copy = &(xxx2[i][j]);
3943
3944 xx1t0 = (*xx1)[0];
3945 ta = freqa * xx1t0;
3946 xx1t1 = (*xx1copy)[1];
3947 xx2t1 = (*xx2)[1];
3948 tc = freqc * xx1t1;
3949 xx1t2 = (*xx1)[2];
3950 xx2t2 = (*xx2copy)[2];
3951 tg = freqg * xx1t2;
3952 xx1t3 = (*xx1copy)[3];
3953 xx2t3 = (*xx2)[3];
3954 xx2t0 = (*xx2copy)[0];
3955 tt = freqt * xx1t3;
3956
3957 tta = freqa * xx2t0;
3958 ttc = freqc * xx2t1;
3959 ttg = freqg * xx2t2;
3960 ttt = freqt * xx2t3;
3961
3962 sum1 = ta + tc + tg + tt;
3963 sum2 = tta + ttc + ttg + ttt ;
3964 sum1 *= yy1;
3965 sum2 *= yy2;
3966
3967 sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3968 sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3969 sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3970 sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3971
3972 vzsumr1 = vv1zz1 * sumr1;
3973 vzsumr2 = vv2zz2 * sumr2;
3974 vzsumy1 = vv1zz1 * sumy1;
3975 vzsumy2 = vv2zz2 * sumy2;
3976
3977 xxsumr1 = sum1 + vzsumr1;
3978 xxsumr2 = sum2 + vzsumr2;
3979 xxsumy1 = sum1 + vzsumy1;
3980 xxsumy2 = sum2 + vzsumy2;
3981
3982 (*xx1)[0] = (xxsumr1 + ww1zz1 * xx1t0) * (xxsumr2 + ww2zz2 * xx2t0);
3983 (*xx1)[1] = (xxsumy1 + ww1zz1 * xx1t1) * (xxsumy2 + ww2zz2 * xx2t1);
3984 (*xx1)[2] = (xxsumr1 + ww1zz1 * xx1t2) * (xxsumr2 + ww2zz2 * xx2t2);
3985 (*xx1)[3] = (xxsumy1 + ww1zz1 * xx1t3) * (xxsumy2 + ww2zz2 * xx2t3);
3986 if(!((*xx1)[0]+ (*xx1)[1]+ (*xx1)[2]+ (*xx1)[3]>EPSILON))
3987 newscale=TRUE;
3988 }
3989 if(newscale)
3990 {
3991 sxx3m = -MYREAL_MAX;
3992 for (j = 0; j < rcategs; j++)
3993 {
3994 xx1 = &(xxx1[i][j]);
3995 tempsxx3m = MAX ((*xx1)[0], (*xx1)[1]);
3996 tempsxx3m = MAX (tempsxx3m, (*xx1)[2]);
3997 tempsxx3m = MAX (tempsxx3m, (*xx1)[3]);
3998 if (tempsxx3m > sxx3m)
3999 sxx3m = tempsxx3m;
4000 }
4001 invsxx3m = 1. / sxx3m;
4002 for (j = 0; j < rcategs; j++)
4003 {
4004 xx1 = &(xxx1[i][j]);
4005 (*xx1)[0] *= invsxx3m, (*xx1)[1] *= invsxx3m;
4006 (*xx1)[2] *= invsxx3m, (*xx1)[3] *= invsxx3m;
4007 }
4008 sxx1[i] += LOG (sxx3m) + sxx2[i];
4009 }
4010 else
4011 {
4012 sxx1[i] += sxx2[i];
4013 }
4014 }
4015 /* --- old scaling scheme ----- replaced with test above
4016 count++;
4017 if (count >= SCALEINTERVAL)
4018 {
4019 count = 0;
4020 for (i = 0; i < endsite; i++)
4021 {
4022 sxx3m = -MYREAL_MAX;
4023 for (j = 0; j < rcategs; j++)
4024 {
4025 xx1 = &(xxx1[i][j]);
4026 tempsxx3m = MAX ((*xx1)[0], (*xx1)[1]);
4027 tempsxx3m = MAX (tempsxx3m, (*xx1)[2]);
4028 tempsxx3m = MAX (tempsxx3m, (*xx1)[3]);
4029 if (tempsxx3m > sxx3m)
4030 sxx3m = tempsxx3m;
4031 }
4032 invsxx3m = 1. / sxx3m;
4033 for (j = 0; j < rcategs; j++)
4034 {
4035 xx1 = &(xxx1[i][j]);
4036 (*xx1)[0] *= invsxx3m, (*xx1)[1] *= invsxx3m;
4037 (*xx1)[2] *= invsxx3m, (*xx1)[3] *= invsxx3m;
4038 }
4039 sxx1[i] += LOG (sxx3m) + sxx2[i];
4040 }
4041 }
4042 }
4043 end of old scaling scheme------------*********/
4044 }
4045 }
4046 /* pseudonu_seq */
4047 ///
4048 /// calculates ancestral likelihood before acc/reject scheme
4049 /// non-atlivec version
4050 MYINLINE
4051 void
pseudonu_anc(proposal_fmt * proposal,phenotype xxx1,MYREAL v1,phenotype xxx2,MYREAL v2)4052 pseudonu_anc (proposal_fmt * proposal, phenotype xxx1, MYREAL v1,
4053 phenotype xxx2, MYREAL v2)
4054 {
4055 long i;
4056 const long endsite = proposal->world->data->seq[0]->endsite;
4057 MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
4058 MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
4059
4060 MYREAL lw1, lw2, ratio1, yy1, yy2, sum1, sum2;
4061 MYREAL freqa, freqc, freqg, freqt;
4062 seqmodel_fmt *seq;
4063 sitelike *xx1, *xx2;
4064 seq = proposal->world->data->seq[0];
4065 freqa = seq->basefrequencies[NUC_A];
4066 freqc = seq->basefrequencies[NUC_C];
4067 freqg = seq->basefrequencies[NUC_G];
4068 freqt = seq->basefrequencies[NUC_T];
4069 // freqr = seq->basefrequencies[NUC_R];
4070 //freqy = seq->basefrequencies[NUC_Y];
4071 //freqar = seq->basefrequencies[NUC_AR];
4072 //freqcy = seq->basefrequencies[NUC_CY];
4073 //freqgr = seq->basefrequencies[NUC_GR];
4074 //freqty = seq->basefrequencies[NUC_TY];
4075
4076 lw1 = -v1 * seq->fracchange;
4077 lw2 = -v2 * seq->fracchange;
4078 ratio1 = lw1 / (lw1 + lw2);
4079 yy1 = (1. - ratio1);
4080 yy2 = ratio1;
4081 //for (i = 0; i < endsite; i++)
4082 //printf("(%f %f %f %f)", xxx1[i][0][0], xxx1[i][0][1], xxx1[i][0][2], xxx1[i][0][3]);
4083 //printf("\n");
4084 //for (i = 0; i < endsite; i++)
4085 //printf("(%f %f %f %f)", xxx2[i][0][0], xxx2[i][0][1], xxx2[i][0][2], xxx2[i][0][3]);
4086 //printf("\n");
4087 for (i = 0; i < endsite; i++)
4088 {
4089 xx1 = &(xxx1[i][0]);
4090 xx2 = &(xxx2[i][0]);
4091
4092
4093 xx1t0 = (*xx1)[0];
4094 xx1t1 = (*xx1)[1];
4095 xx1t2 = (*xx1)[2];
4096 xx1t3 = (*xx1)[3];
4097
4098 xx2t0 = (*xx2)[0];
4099 xx2t1 = (*xx2)[1];
4100 xx2t2 = (*xx2)[2];
4101 xx2t3 = (*xx2)[3];
4102
4103
4104 sum1 =
4105 yy1 * (freqa * xx1t0 + freqc * xx1t1 + freqg * xx1t2 +
4106 freqt * xx1t3);
4107 sum2 =
4108 yy2 * (freqa * xx2t0 + freqc * xx2t1 + freqg * xx2t2 +
4109 freqt * xx2t3);
4110 if (sum1 == sum2)
4111 sum1 += RANDUM () > 0.5 ? -1. : 1.;
4112 if (sum1 > sum2)
4113 memcpy (xxx1[i][0], *xx1, sizeof (sitelike));
4114 else
4115 memcpy (xxx1[i][0], *xx2, sizeof (sitelike));
4116 }
4117 } /* pseudo_nu_anc */
4118
4119 ///
4120 /// ancestral conditional method, calculates ancestral tree likelihood
4121 MYINLINE MYREAL
pseudo_tl_anc(phenotype xx1,phenotype xx2,MYREAL v1,MYREAL v2,proposal_fmt * proposal,world_fmt * world)4122 pseudo_tl_anc (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
4123 proposal_fmt * proposal, world_fmt * world)
4124 {
4125 const long endsite = proposal->world->data->seq[0]->endsite;
4126 contribarr tterm;
4127 MYREAL summ;
4128 long i;
4129 sitelike *x1;
4130
4131 register MYREAL freqa, freqc, freqg, freqt;
4132 seqmodel_fmt *seq;
4133 seq = proposal->world->data->seq[0];
4134
4135 freqa = seq->basefrequencies[NUC_A];
4136 freqc = seq->basefrequencies[NUC_C];
4137 freqg = seq->basefrequencies[NUC_G];
4138 freqt = seq->basefrequencies[NUC_T];
4139
4140 seq = world->data->seq[0];
4141 summ = 0.0;
4142 for (i = 0; i < endsite; i++)
4143 {
4144 x1 = &(xx1[i][0]);
4145 tterm[0] =
4146 freqa * (*x1)[0] + freqc * (*x1)[1] + freqg * (*x1)[2] +
4147 freqt * (*x1)[3];
4148 summ += seq->aliasweight[i] * LOG (tterm[0]);
4149 //printf("pseudo %3li> %f %f \n", i, tterm[0], summ);
4150 }
4151 return summ;
4152 } /*anc*/
4153
4154
4155 ///
4156 /// calculates the conditional likelihood on a tree
4157 MYINLINE MYREAL
pseudo_tl_seq(phenotype xx1,phenotype xx2,MYREAL v1,MYREAL v2,proposal_fmt * proposal,world_fmt * world)4158 pseudo_tl_seq (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
4159 proposal_fmt * proposal, world_fmt * world)
4160 {
4161 const long endsite = proposal->world->data->seq[0]->endsite;
4162 contribarr tterm;
4163 contribarr clai;
4164 contribarr like;
4165 contribarr nulike;
4166 //long size = sizeof(MYREAL) * world->options->rcategs;
4167 MYREAL summ = 0.0;
4168 MYREAL sum2 = 0.0;
4169 MYREAL sumc = 0.0;
4170 MYREAL sumterm = 0.0;
4171 MYREAL lterm;
4172 long i;
4173 long j;
4174 long k;
4175 long lai;
4176 worldoption_fmt *opt;
4177 register MYREAL freqa, freqc, freqg, freqt;//, freqr, freqy;
4178 seqmodel_fmt *seq = proposal->world->data->seq[0];
4179 sitelike *x1;
4180
4181 freqa = seq->basefrequencies[NUC_A];
4182 freqc = seq->basefrequencies[NUC_C];
4183 freqg = seq->basefrequencies[NUC_G];
4184 freqt = seq->basefrequencies[NUC_T];
4185 //freqr = seq->basefrequencies[NUC_R];
4186 //freqy = seq->basefrequencies[NUC_Y];
4187
4188 opt = world->options;
4189 summ = 0.0;
4190 if (opt->rcategs == 1 && opt->categs == 1)
4191 {
4192 for (i = 0; i < endsite; i++)
4193 {
4194 x1 = &(xx1[i][0]);
4195 tterm[0] =
4196 freqa * (*x1)[0] + freqc * (*x1)[1] + freqg * (*x1)[2] +
4197 freqt * (*x1)[3];
4198 summ += seq->aliasweight[i] * (LOG (tterm[0]) + proposal->mf[i]);
4199 }
4200 }
4201 else
4202 {
4203 for (i = 0; i < endsite; i++)
4204 {
4205 for (j = 0; j < opt->rcategs; j++)
4206 {
4207 x1 = &(xx1[i][j]);
4208 tterm[j] = freqa * (*x1)[0] + freqc * (*x1)[1] + freqg * (*x1)[2] + freqt * (*x1)[3];
4209 }
4210 sumterm = 0.0;
4211 for (j = 0; j < opt->rcategs; j++)
4212 sumterm += opt->probcat[j] * tterm[j];
4213 lterm = LOG (sumterm) + proposal->mf[i];
4214 for (j = 0; j < opt->rcategs; j++)
4215 clai[j] = tterm[j] / sumterm;
4216 swap (clai, world->contribution[i]);
4217 //memcpy(world->contribution[i], clai, size);
4218 summ += seq->aliasweight[i] * lterm;
4219 }
4220 for (j = 0; j < opt->rcategs; j++)
4221 like[j] = 1.0;
4222 for (i = 0; i < seq->sites[world->locus]; i++)
4223 {
4224 sumc = 0.0;
4225 for (k = 0; k < opt->rcategs; k++)
4226 sumc += opt->probcat[k] * like[k];
4227 sumc *= opt->lambda;
4228 if ((seq->ally[i] > 0) && (seq->location[seq->ally[i] - 1] > 0))
4229 {
4230 lai = seq->location[seq->ally[i] - 1];
4231 swap (world->contribution[lai - 1], clai);
4232 //memcpy(clai, world->contribution[lai - 1], size);
4233 for (j = 0; j < opt->rcategs; j++)
4234 nulike[j] = ((1.0 - opt->lambda) * like[j] + sumc) * clai[j];
4235 }
4236 else
4237 {
4238 for (j = 0; j < opt->rcategs; j++)
4239 nulike[j] = ((1.0 - opt->lambda) * like[j] + sumc);
4240 }
4241 swap (nulike, like);
4242 //memcpy(like, nulike, size);
4243 }
4244 sum2 = 0.0;
4245 for (i = 0; i < opt->rcategs; i++)
4246 sum2 += opt->probcat[i] * like[i];
4247 summ += LOG (sum2);
4248 }
4249 // printf("summ=%f\n",summ);
4250 return summ;
4251 }
4252
4253 MYINLINE
4254 MYREAL
pseudo_tl_snp(phenotype xx1,phenotype xx2,MYREAL v1,MYREAL v2,proposal_fmt * proposal,world_fmt * world)4255 pseudo_tl_snp (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
4256 proposal_fmt * proposal, world_fmt * world)
4257 {
4258 const long endsite = world->data->seq[0]->endsite;
4259 contribarr tterm, invariants;
4260 contribarr like, nulike, clai;
4261 //long size = sizeof(MYREAL) * world->options->rcategs;
4262 MYREAL summ, sum2, sumc, sumterm, lterm;
4263 long i, j, k, lai;
4264 worldoption_fmt *opt = world->options;
4265 register MYREAL freqa, freqc, freqg, freqt;//, freqr, freqy;
4266 seqmodel_fmt *seq = world->data->seq[0];
4267 sitelike *x1;
4268 freqa = seq->basefrequencies[NUC_A];
4269 freqc = seq->basefrequencies[NUC_C];
4270 freqg = seq->basefrequencies[NUC_G];
4271 freqt = seq->basefrequencies[NUC_T];
4272 //freqr = seq->basefrequencies[NUC_R];
4273 //freqy = seq->basefrequencies[NUC_Y];
4274
4275 summ = 0.0;
4276 memset (invariants, 0, sizeof (contribarr));
4277 snp_invariants (invariants, world, world->locus, xx1,proposal->mf);
4278 if ((opt->rcategs | opt->categs) == 1)
4279 {
4280 for (i = 0; i < endsite - seq->addon; i++)
4281 {
4282 x1 = &(xx1[i][0]);
4283 tterm[0] =
4284 ((freqa * (*x1)[0] + freqc * (*x1)[1] +
4285 freqg * (*x1)[2] + freqt * (*x1)[3]));
4286 if (tterm[0] == 0.0)
4287 {
4288 #ifdef TESTDEBUG
4289 fprintf(stderr,"Freq/condL=(%f,%f | %f,%f | %f,%f | %f,%f) Invars=%f\n",
4290 freqa , (*x1)[0] , freqc , (*x1)[1] ,
4291 freqg , (*x1)[2] , freqt , (*x1)[3] , invariants[0]);
4292
4293 warning ("Tree incompatible with data\n");
4294 #endif
4295 return -HUGE;
4296 }
4297 lterm = LOG (tterm[0]) + proposal->mf[i] - log(invariants[0]);
4298 summ += seq->aliasweight[i] * lterm;
4299 }
4300 return summ;
4301 }
4302 else
4303 {
4304 for (i = 0; i < endsite - 4; i++)
4305 {
4306 // k = seq->category[seq->alias[i] - 1] - 1;
4307 for (j = 0; j < opt->rcategs; j++)
4308 {
4309 x1 = &(xx1[i][j]);
4310 tterm[j] =
4311 (freqa * (*x1)[0] + freqc * (*x1)[1] +
4312 freqg * (*x1)[2] +
4313 freqt * (*x1)[3]) / invariants[j];
4314 }
4315 sumterm = 0.0;
4316 for (j = 0; j < opt->rcategs; j++)
4317 sumterm += opt->probcat[j] * tterm[j];
4318 lterm = LOG (sumterm) + proposal->mf[i];
4319 for (j = 0; j < opt->rcategs; j++)
4320 clai[j] = tterm[j] / sumterm;
4321 swap (clai, world->contribution[i]);
4322 summ += seq->aliasweight[i] * lterm;
4323 }
4324 for (j = 0; j < opt->rcategs; j++)
4325 like[j] = 1.0;
4326 for (i = 0; i < seq->sites[world->locus]; i++)
4327 {
4328 sumc = 0.0;
4329 for (k = 0; k < opt->rcategs; k++)
4330 sumc += opt->probcat[k] * like[k];
4331 sumc *= opt->lambda;
4332 if ((seq->ally[i] > 0) && (seq->location[seq->ally[i] - 1] > 0))
4333 {
4334 lai = seq->location[seq->ally[i] - 1];
4335 swap (world->contribution[lai - 1], clai);
4336 for (j = 0; j < opt->rcategs; j++)
4337 nulike[j] = ((1.0 - opt->lambda) * like[j] + sumc) * clai[j];
4338 }
4339 else
4340 {
4341 for (j = 0; j < opt->rcategs; j++)
4342 nulike[j] = ((1.0 - opt->lambda) * like[j] + sumc);
4343 }
4344 swap (nulike, like);
4345 //memcpy(like, nulike, size);
4346 }
4347 sum2 = 0.0;
4348 for (i = 0; i < opt->rcategs; i++)
4349 sum2 += opt->probcat[i] * like[i];
4350 summ += LOG (sum2);
4351 return summ;
4352 }
4353 }
4354
4355 MYINLINE
4356 MYREAL
pseudo_tl_snp_unlinked(phenotype xx1,phenotype xx2,MYREAL v1,MYREAL v2,proposal_fmt * proposal,world_fmt * world)4357 pseudo_tl_snp_unlinked (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
4358 proposal_fmt * proposal, world_fmt * world)
4359 {
4360 const long endsite = world->data->seq[0]->endsite;
4361 contribarr tterm, invariants;
4362 MYREAL summ, datasum = 0, lterm, result = 0;
4363 long i;
4364 //worldoption_fmt *opt;
4365 MYREAL freqa, freqc, freqg, freqt;//, freqr, freqy;
4366 seqmodel_fmt *seq = proposal->world->data->seq[0];
4367 sitelike *x1;
4368 freqa = seq->basefrequencies[NUC_A];
4369 freqc = seq->basefrequencies[NUC_C];
4370 freqg = seq->basefrequencies[NUC_G];
4371 freqt = seq->basefrequencies[NUC_T];
4372 //freqr = seq->basefrequencies[NUC_R];
4373 // freqy = seq->basefrequencies[NUC_Y];
4374
4375 //opt = world->options;
4376 seq = world->data->seq[0];
4377 summ = 0.0;
4378 memset (invariants, 0, sizeof (contribarr));
4379 snp_invariants (invariants, world, world->locus, xx1, proposal->mf);
4380 for (i = 0; i < endsite; i++)
4381 {
4382 x1 = &(xx1[i][0]);
4383 tterm[0] =
4384 (freqa * (*x1)[0] + freqc * (*x1)[1] +
4385 freqg * (*x1)[2] + freqt * (*x1)[3]);
4386 if (tterm[0] == 0.0)
4387 error ("Tree incompatible with data\n");
4388
4389 if (i % 5 == 0)
4390 {
4391 lterm = LOG (tterm[0]) + proposal->mf[i];
4392 summ = 0;
4393 datasum = seq->aliasweight[i / 5] * lterm;
4394 }
4395 else
4396 summ += pow (tterm[0], (MYREAL) seq->aliasweight[i / 5]);
4397 if (((i + 1) % 5) == 0 && i != 0)
4398 result +=
4399 datasum + LOG ((1 - EXP (LOG (summ) - datasum)) / invariants[0]);
4400 }
4401 //EXP (sum) is the prob(xa | g)
4402 // EXP (datasum) is prob(? a | g)
4403 // panelsum = invariants is prob(x ? |g)
4404 // (datasum - sum) / invariants
4405 // ++some small number business
4406 return result;
4407 }
4408
4409
4410
4411
4412 MYREAL
treelike_anc(world_fmt * world,long locus)4413 treelike_anc (world_fmt * world, long locus)
4414 {
4415 const long endsite = world->data->seq[0]->endsite;
4416 contribarr tterm;
4417
4418 MYREAL summ;
4419 long i;
4420 node *p;
4421 seqmodel_fmt *seq = world->data->seq[0];
4422 register MYREAL freqa, freqc,freqg,freqt;
4423 sitelike *x1;
4424 freqa = seq->basefrequencies[NUC_A];
4425 freqc = seq->basefrequencies[NUC_C];
4426 freqg = seq->basefrequencies[NUC_G];
4427 freqt = seq->basefrequencies[NUC_T];
4428
4429 p = crawlback (world->root->next);
4430 summ = 0.0;
4431 for (i = 0; i < endsite; i++)
4432 {
4433 x1 = &(p->x.s[i][0]);
4434 tterm[0] =
4435 freqa * (*x1)[0] + freqc * (*x1)[1] +
4436 freqg * (*x1)[2] + freqt * (*x1)[3];
4437 summ += seq->aliasweight[i] * LOG (tterm[0]);
4438 //printf("real %3li> %f %f \n", i, tterm[0], summ);
4439 }
4440 return summ;
4441 } /* treelike_anc */
4442
4443
4444 ///
4445 /// write a tree to a diskfile, compile setting will allow to do this as a NEXUS file
4446 void
treeout(FILE * file,node * joint,node * p,long s)4447 treeout (FILE * file, node * joint, node * p, long s)
4448 {
4449 /* write out file with representation of final tree */
4450 MYREAL x;
4451 char migstring[30];
4452 if (p->type == 't')
4453 {
4454 #ifdef UEP
4455 if(p->uep!=NULL)
4456 FPRINTF (file, "%s [%c]", p->nayme, p->uep[0]);
4457 else
4458 FPRINTF (file, "%s ", p->nayme);
4459 #else
4460 FPRINTF (file, "%s ", p->nayme);
4461 #endif
4462 #ifdef TREECOMMENTS
4463 FPRINTF(file,"[& t:%.10f]",p->tyme);
4464 #endif
4465 }
4466 else
4467 {
4468 FPRINTF (file, "(");
4469 treeout (file, joint, crawlback (p->next), s);
4470 FPRINTF (file, ",");
4471 treeout (file, joint, crawlback (p->next->next), s);
4472 FPRINTF (file, ")");
4473 #ifdef TREECOMMENTS
4474 FPRINTF(file,"[& c:%.10f]",p->tyme);
4475 #endif
4476 }
4477 if (p != joint)
4478 {
4479 x = crawlback (p)->tyme - p->tyme;
4480 #ifdef UEP
4481 if(p->uep!=NULL)
4482 FPRINTF (file, ":%.10f [%c]", x, p->uep[0]);
4483 else
4484 FPRINTF (file, ":%.10f ", x);
4485 #else
4486 FPRINTF (file, ":%.10f ", x);
4487 #endif
4488 p = showtop (p->back);
4489 while (p->type == 'm')
4490 {
4491 sprintf (migstring, " [&M %li %li:%g]", p->pop, p->actualpop,
4492 p->tyme - showtop (p->next->back)->tyme);
4493 FPRINTF (file, "%s", migstring);
4494 p = showtop (p->back);
4495 }
4496 }
4497 else
4498 {
4499 #ifdef NEXUSTREE
4500 FPRINTF (file, ";\n");
4501 #else
4502 FPRINTF (file, ":0;\n");
4503 #endif
4504 }
4505 } /* treeout */
4506
4507 ///
4508 /// write a tree to a diskfile, compile setting will allow to do this as a NEXUS file
4509 void
debugtreeout(FILE * file,node * joint,node * p,long s)4510 debugtreeout (FILE * file, node * joint, node * p, long s)
4511 {
4512 /* write out file with representation of final tree */
4513 // MYREAL x;
4514 char migstring[30];
4515 if (p->type == 't')
4516 {
4517 FPRINTF (file, "%li ", p->id);
4518 }
4519 else
4520 {
4521 FPRINTF (file, "(");
4522 debugtreeout (file, joint, crawlback (p->next), s);
4523 FPRINTF (file, ",");
4524 debugtreeout (file, joint, crawlback (p->next->next), s);
4525 FPRINTF (file, ")<%li>",p->id);
4526 }
4527 if (p != joint)
4528 {
4529 //x = crawlback (p)->tyme - p->tyme;
4530 // FPRINTF (file, ":%.10f ", x);
4531 p = showtop (p->back);
4532 while (p->type == 'm')
4533 {
4534 sprintf (migstring, " [%li-%li]", p->next->id, p->id);
4535 FPRINTF (file, "%s", migstring);
4536 p = showtop (p->back);
4537 }
4538 }
4539 else
4540 {
4541 FPRINTF (file, ":0\n");
4542
4543 }
4544 } /* treeout */
4545
4546 ///
4547 /// writes tree in newick format to a string
4548 void
treeout_string(char ** file,long * filesize,long * pos,node * joint,node * p,long s)4549 treeout_string (char ** file, long *filesize, long *pos, node * joint, node * p, long s)
4550 {
4551 char *tmp;
4552 MYREAL x;
4553 char migstring[30];
4554 tmp = (char *) mycalloc(1024,sizeof(char));
4555 /* write out file with representation of final tree */
4556 // long w;
4557 if (p->type == 't')
4558 {
4559 #ifdef UEP
4560 if(p->uep!=NULL)
4561 print_to_buffer(file, filesize, tmp, pos, "%s [%c]", p->nayme, p->uep[0]);
4562 else
4563 print_to_buffer(file, filesize, tmp, pos, "%s ", p->nayme);
4564 #else
4565 print_to_buffer(file, filesize, tmp, pos, "%s ", p->nayme);
4566 #endif
4567 #ifdef TREECOMMENTS
4568 print_to_buffer(file, filesize, tmp, pos,"[& t:%.10f]",p->tyme);
4569 #endif
4570 }
4571 else
4572 {
4573 print_to_buffer(file, filesize, tmp, pos, "(");
4574 treeout_string (file, filesize, pos, joint, crawlback (p->next), s);
4575 print_to_buffer(file, filesize, tmp, pos, ",");
4576 treeout_string (file, filesize, pos, joint, crawlback (p->next->next), s);
4577 print_to_buffer(file, filesize, tmp, pos, ")");
4578 #ifdef TREECOMMENTS
4579 print_to_buffer(file, filesize, tmp, pos,"[& c:%.10f]",p->tyme);
4580 #endif
4581
4582 }
4583 if (p == joint)
4584 {
4585 x = 0.0;
4586 }
4587 else
4588 {
4589 x = crawlback (p)->tyme - p->tyme;
4590 }
4591 #ifdef UEP
4592 if(p->uep!=NULL)
4593 print_to_buffer(file, filesize, tmp, pos, ":%.10f [%c]", x, p->uep[0]);
4594 else
4595 print_to_buffer(file, filesize, tmp, pos, ":%.10f ", x);
4596 #else
4597 print_to_buffer(file, filesize, tmp, pos, ":%.10f ", x);
4598 #endif
4599 if (p != joint)
4600 {
4601 p = showtop (p->back);
4602 while (p->type == 'm')
4603 {
4604 sprintf (migstring, " [&M %li %li:%g]", p->pop, p->actualpop,
4605 p->tyme - showtop (p->next->back)->tyme);
4606 print_to_buffer(file, filesize, tmp, pos, "%s", migstring);
4607 p = showtop (p->back);
4608 }
4609 }
4610 else
4611 {
4612 print_to_buffer(file, filesize, tmp, pos, ";\n\0");
4613 }
4614 myfree(tmp);
4615 } /* treeout_string */
4616
4617
4618 void
print_tree(world_fmt * world,long g,long * filepos)4619 print_tree (world_fmt * world, long g, long *filepos)
4620 {
4621 #ifdef NEXUSTREE
4622 static long counter = 0;
4623 #endif
4624 static long count = 1;
4625 long pos = 0;
4626 long allocval = 0;
4627 char *tmp;
4628 tmp = (char *) calloc(1024,sizeof(char));
4629
4630 switch (world->options->treeprint)
4631 {
4632 case BEST:
4633 if(world->options->treeinmemory)
4634 {
4635 *filepos = 0;
4636 pos = 0;
4637 allocval = world->treespacealloc[world->locus];
4638 // if (world->likelihood[g] >= world->besttreelike[world->locus])
4639 if (world->param_like >= world->besttreelike[world->locus])
4640 {
4641 // printf("%i> print_tree(BEST) at %p locus=%li bestlike=%f like=%f\n",myID, &world->treespace[world->locus], world->locus,world->besttreelike[world->locus], world->likelihood[g]);
4642 world->besttreelike[world->locus] = world->param_like;
4643 // world->besttreelike[world->locus] = world->likelihood[g];
4644 world->treespace[world->locus] = (char *) myrealloc(world->treespace[world->locus],
4645 sizeof(char) * LONGLINESIZE);
4646 allocval = LONGLINESIZE;
4647 //speed problem memset(world->treespace[world->locus],0, sizeof(char) * LONGLINESIZE);
4648 world->treespace[world->locus][0]='\0';
4649 pos = sprintf (world->treespace[world->locus], "\n[& Locus %li, best ln(L) = %f (c=coalescent node, t=tipnode) ]\n",
4650 world->locus + 1, world->likelihood[g]);
4651 treeout_string (&(world->treespace[world->locus]),
4652 &allocval,&pos,
4653 crawlback (world->root->next),
4654 crawlback (world->root->next), 0);
4655 world->treespacealloc[world->locus] = allocval;
4656 world->treespacenum[world->locus] = pos;
4657 }
4658 // else
4659 // {
4660 // printf("%i> *** print_tree(BEST) locus=%li bestlike=%f like=%f\n", myID, world->locus, world->besttreelike[world->locus], world->likelihood[g]);
4661 // }
4662 }
4663 else
4664 {
4665 if (world->likelihood[g] > world->allikemax)
4666 {
4667 if (world->allikemax == -MYREAL_MAX)
4668 {
4669 *filepos = ftell (world->treefile);
4670 }
4671 else
4672 {
4673 fseek (world->treefile, *filepos, SEEK_SET);
4674 }
4675 world->allikemax = world->likelihood[g];
4676 FPRINTF (world->treefile,
4677 "\n[& Locus %li, best ln(L) = %f (c=coalescent node, t=tipnode) ]\n",
4678 world->locus + 1, world->likelihood[g]);
4679 treeout (world->treefile, crawlback (world->root->next),
4680 crawlback (world->root->next), 0);
4681 }
4682 }
4683 break;
4684 case ALL:
4685 case LASTCHAIN:
4686 if((count++ % world->options->treeinc) == 0)
4687 {
4688 if (world->in_last_chain)
4689 {
4690 if(world->options->treeinmemory)
4691 {
4692 *filepos = 0;
4693 pos = world->treespacenum[world->locus];
4694 allocval = world->treespacealloc[world->locus];
4695 #ifdef NEXUSTREE
4696 pos = print_to_buffer(&(world->treespace[world->locus]), &allocval, tmp, &pos,
4697 "\n[& Locus %li, best ln(L) = %f (c=coalescent node, t=tipnode) ]\ntree repl.%li = [&R] ",
4698 world->locus + 1, world->likelihood[g], counter++);
4699 #else
4700 pos = print_to_buffer(&(world->treespace[world->locus]), &allocval, tmp, &pos,
4701 "\n[& Locus %li, best ln(L) = %f (c=coalescent node, t=tipnode) ]\n",
4702 world->locus + 1, world->likelihood[g]);
4703 #endif
4704 treeout_string (&(world->treespace[world->locus]),
4705 &allocval,&pos,
4706 crawlback (world->root->next),
4707 crawlback (world->root->next), 0);
4708 world->treespacealloc[world->locus] = allocval;
4709 world->treespacenum[world->locus] = pos;
4710 }
4711 else
4712 {
4713 // writing direct to file
4714 FPRINTF (world->treefile,
4715 "\n[& Locus %li, ln(L) = %f (c=coalescent node, t=tipnode) ]\n",
4716 world->locus + 1, world->likelihood[g]);
4717 #ifdef NEXUSTREE
4718 FPRINTF (world->treefile,"tree repl.%li = [&R] ",counter++);
4719 #endif
4720 treeout (world->treefile, crawlback (world->root->next),
4721 crawlback (world->root->next), 0);
4722 }
4723 }
4724 }
4725 break;
4726 case _NONE:
4727 break;
4728 default:
4729 break;
4730 }
4731 myfree(tmp);
4732 }
4733
4734
4735 boolean
treereader(world_fmt * world,option_fmt * options,data_fmt * data)4736 treereader (world_fmt * world, option_fmt *options, data_fmt * data)
4737 {
4738 /*
4739 * read a tree with or without migration events from the usertree and set up nodes
4740 * and pointers
4741 */
4742 boolean has_migration=FALSE;
4743 node **nodelist;
4744 char *nayme;
4745 char *temp, *temp2;
4746 long pop, w, zz, z = 0, zzz = 0;
4747 world->nodep = (node **) mycalloc (world->sumtips+(world->sumtips+1),sizeof (node *));
4748 temp = (char *) mymalloc (LINESIZE * sizeof (char));
4749 temp2 = (char *) mymalloc (LINESIZE * sizeof (char));
4750 has_migration = treeread (data->utreefile, world, options, &(world->root), NULL);
4751 length_to_times (world->root->next->back);
4752 nodelist = (node **) mycalloc (1, sizeof (node *) * (world->sumtips + 1));
4753 pop = -1;
4754 set_tree_pop (world->root, &pop);
4755 allocate_x (world->root, world, world->options->datatype, WITHTIPS);
4756 find_tips (world->root, nodelist, &z);
4757 for (pop = 0; pop < data->numpop; pop++)
4758 {
4759 for (w = 0; w < data->numind[pop][world->locus]; w++)
4760 {
4761 strcpy (temp2, data->indnames[pop][w][world->locus]);
4762 unpad(temp2," ");
4763 // temp2[strcspn (temp2, " ")] = '\0';
4764 sprintf (temp, "%li%s", pop, temp2);
4765 for (zz = 0; zz < z; zz++)
4766 {
4767 nayme = nodelist[zz]->nayme;
4768 unpad(nayme," _");
4769 if (!strcmp (temp, nayme) || !strcmp (temp2, nayme))
4770 {
4771 world->nodep[zzz++] = nodelist[zz];
4772 break;
4773 }
4774 }
4775 }
4776 }
4777 myfree(nodelist);
4778 myfree(temp);
4779 myfree(temp2);
4780 return has_migration;
4781 }
4782
4783
4784 char
processlength(FILE * file,node ** p,option_fmt * options)4785 processlength (FILE * file, node ** p, option_fmt *options)
4786 {
4787 char ch;
4788 long digit, ordzero;
4789 MYREAL valyew, divisor;
4790 boolean pointread, minusread;
4791
4792 ordzero = '0';
4793 pointread = FALSE;
4794 minusread = FALSE;
4795 valyew = 0.0;
4796 divisor = 1.0;
4797 ch = getc (file);
4798 digit = ch - ordzero;
4799 while (((unsigned long) digit <= 9) | (ch == '.') || (ch == '-'))
4800 {
4801 if (ch == '.')
4802 pointread = TRUE;
4803 else if (ch == '-')
4804 minusread = TRUE;
4805 else
4806 {
4807 valyew = valyew * 10.0 + digit;
4808 if (pointread)
4809 divisor *= 10.0;
4810 }
4811 ch = getc (file);
4812 digit = ch - ordzero;
4813 }
4814 if (!minusread)
4815 (*p)->length = valyew / divisor;
4816 else
4817 (*p)->length = 0.0;
4818 return ch;
4819 }
4820
4821 boolean
treeread(FILE * file,world_fmt * world,option_fmt * options,node ** pp,node * q)4822 treeread (FILE * file, world_fmt * world, option_fmt *options, node ** pp, node * q)
4823 {
4824 node *p=NULL;
4825 boolean has_migration=FALSE;
4826 char ch = getc (file);
4827 int retval;
4828 while (ch != ';')
4829 {
4830 switch (ch)
4831 {
4832 case '(':
4833 p = create_interior_node (world, &q);
4834 q = p->next;
4835 ch = getc (file);
4836 break;
4837 case ',':
4838 q = q->next;
4839 if (q->top)
4840 {
4841 usererror ("Multifurcation handling not yet installed");
4842 }
4843 ch = getc (file);
4844 break;
4845 case ')':
4846 p = showtop (q);
4847 q = p->back;
4848 ch = getc (file);
4849 break;
4850 case ' ':
4851 case '\n':
4852 case '\t':
4853 ch = getc (file);
4854 break;
4855 case ':':
4856 ch = processlength (file, &p, options);
4857 break;
4858 case '[':
4859 has_migration = processbracket (file, world, &p, &ch);
4860 if(has_migration>0)
4861 {
4862 q->back = p;
4863 p->back = q;
4864 }
4865 break;
4866 default:
4867 p = create_tip_node (file, world, options, &q, &ch);
4868 break;
4869 }
4870 }
4871 if(p!=NULL)
4872 {
4873 p->length = 10000.;
4874 (*pp) = showtop (p->back);
4875 }
4876 retval = fscanf (file, "%*[^\n]");
4877 getc (file);
4878 return has_migration;
4879 }
4880
4881 void
length_to_times(node * p)4882 length_to_times (node * p)
4883 {
4884 node *q;
4885 if (p->type != 't')
4886 {
4887 length_to_times ((p)->next->back);
4888 if ((p)->type == 'i')
4889 length_to_times ((p)->next->next->back);
4890 }
4891 q = showtop ((p)->back);
4892 q->tyme = q->next->tyme = q->next->next->tyme = (p)->tyme + (p)->length;
4893 }
4894
4895 void
find_tips(node * p,node ** nodelist,long * z)4896 find_tips (node * p, node ** nodelist, long *z)
4897 {
4898 if (p->type == 't')
4899 {
4900 nodelist[(*z)++] = p;
4901 }
4902 else
4903 {
4904 if (p->next->back != NULL)
4905 find_tips (crawlback (p->next), nodelist, z);
4906 if (p->next->next->back != NULL)
4907 find_tips (crawlback (p->next->next), nodelist, z);
4908 }
4909 }
4910
4911 long
find_firstpop(node * p)4912 find_firstpop (node * p)
4913 {
4914 static boolean found = FALSE;
4915 static long pop = -1;
4916 if (p->type == 'm')
4917 {
4918 found = TRUE;
4919 pop = p->pop;
4920 }
4921 else
4922 {
4923 if (p->next->back != NULL)
4924 {
4925 find_firstpop (p->next->back);
4926 if (found)
4927 return pop;
4928 }
4929 if (p->next->next->back != NULL)
4930 find_firstpop (p->next->next->back);
4931 }
4932 return pop;
4933 }
4934
4935 /* touches only coalescent nodes! migration nodes are already set */
4936 void
set_tree_pop_old(node * p,long * pop)4937 set_tree_pop_old (node * p, long *pop)
4938 {
4939 if (p->type != 'r')
4940 {
4941
4942 (*pop) =
4943 (showtop (p->back)->actualpop !=
4944 *pop) ? showtop (p->back)->actualpop : *pop;
4945 }
4946 p->actualpop = p->pop = *pop;
4947 if (p->type != 't')
4948 {
4949 if (p->next->back != NULL)
4950 {
4951 set_tree_pop (crawlback (p->next), pop);
4952 }
4953 if (p->type != 'm' && p->next->next->back != NULL)
4954 {
4955 set_tree_pop (crawlback (p->next->next), pop);
4956 }
4957 }
4958 }
4959
4960 /// sets the actualpop and pop values deducting from the mgiration nodes that
4961 /// are already set
4962 void
set_tree_pop(node * p,long * pop)4963 set_tree_pop (node * p, long *pop)
4964 {
4965 static boolean done=FALSE;
4966 switch(p->type)
4967 {
4968 case 'r':
4969 set_tree_pop(p->next->back, pop);
4970 *pop = p->actualpop = p->pop = p->next->back->pop;
4971 if(!done)
4972 {
4973 done=TRUE;
4974 if(*pop == -1)
4975 *pop = 0;
4976 set_tree_pop(p,pop);
4977 }
4978 break;
4979 case 't':
4980 if (*pop != -1)
4981 p->actualpop = p->pop = *pop;
4982 break;
4983 case 'i':
4984 set_tree_pop(p->next->back,pop);
4985 if(*pop != -1)
4986 p->actualpop = p->pop = p->next->back->pop;
4987 set_tree_pop(p->next->next->back,pop);
4988 if(*pop != -1)
4989 {
4990 p->actualpop = p->pop = p->next->next->back->pop;
4991 *pop = p->pop;
4992 }
4993 break;
4994 case 'm':
4995 *pop = p->actualpop;
4996 set_tree_pop(p->next->back, pop);
4997 *pop = p->pop;
4998 break;
4999 }
5000 }
5001
5002
5003 node *
create_interior_node(world_fmt * world,node ** q)5004 create_interior_node (world_fmt * world, node ** q)
5005 {
5006 node *p;
5007 p = allocate_nodelet (world, 3, 'i');
5008 p->top = TRUE;
5009 p->scale = (MYREAL *) mycalloc (world->data->seq[0]->endsite+world->data->seq[0]->addon, sizeof (MYREAL));
5010 p->s = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop);
5011 p->back = *q;
5012 if ((*q) == NULL)
5013 create_root_node (world, &p);
5014 else
5015 (*q)->back = p;
5016 return p;
5017 }
5018
5019 node *
create_root_node(world_fmt * world,node ** q)5020 create_root_node (world_fmt *world, node ** q)
5021 {
5022 node *p;
5023 p = allocate_nodelet (world, 3, 'r');
5024 p->top = TRUE;
5025 p->next->back = *q;
5026 (*q)->back = p->next;
5027 return p;
5028 }
5029
5030
5031 node *
create_tip_node(FILE * file,world_fmt * world,option_fmt * options,node ** q,char * ch)5032 create_tip_node (FILE * file, world_fmt * world, option_fmt *options, node ** q, char *ch)
5033 {
5034 long pop;
5035 node *p;
5036 char c;
5037 char *nayme;
5038 long i = 1;
5039 nayme = (char *) mycalloc (options->nmlength+1, sizeof (char));
5040 nayme[0] = (*ch);
5041 while (strchr ("[):;,\t\n\r", (int) (c = getc (file))) == NULL)
5042 nayme[i++] = c;
5043 nayme[i] = '\0';
5044 p = allocate_nodelet (world, 1, 't');
5045 p->nayme = (char *) mycalloc (options->nmlength+5, sizeof (char));
5046 p->top = TRUE;
5047 p->tip = TRUE;
5048 // PB is this needed with usertrees
5049 p->scale = (MYREAL *) mycalloc (world->data->seq[0]->endsite+world->data->seq[0]->addon, sizeof (MYREAL));
5050 p->s = (MYREAL *) mycalloc (world->numpop, sizeof (MYREAL) );
5051 for (i = 0; i < world->numpop; i++)
5052 {
5053 p->s[i] = MYREAL_MAX;
5054 }
5055
5056 sprintf(p->nayme, "%-*s",(int) options->nmlength, nayme);
5057 unpad(p->nayme," ");
5058 translate(p->nayme,' ', '_');
5059 sscanf(nayme,"%li ",&pop);
5060 p->s[pop] = 0;
5061 p->back = *q;
5062 (*q)->back = p;
5063 myfree(nayme);
5064 (*ch) = c;
5065 return p;
5066 }
5067
5068 boolean
processbracket(FILE * file,world_fmt * world,node ** p,char * ch)5069 processbracket (FILE * file, world_fmt *world, node ** p, char * ch)
5070 {
5071 boolean migfound=FALSE;
5072 long pop1, pop2;
5073 MYREAL utime;
5074 char c;
5075 int retval;
5076 c = getc (file);
5077 if (c == '&')
5078 {
5079 c = getc (file);
5080 switch (c)
5081 {
5082 case 'M':
5083 #ifdef USE_MYREAL_FLOAT
5084 retval = fscanf (file, "%li %li:%f", &pop1, &pop2, &utime);
5085 #else
5086 retval = fscanf (file, "%li %li:%lf", &pop1, &pop2, &utime);
5087 #endif
5088 /*c=*/getc (file);
5089 (*p) = add_migration (world, *p, pop1, pop2, utime);
5090 migfound = TRUE;
5091 break;
5092 default:
5093 while (c != ']')
5094 c = getc (file);
5095 break;
5096 }
5097 }
5098 else
5099 {
5100 while (c != ']')
5101 c = getc (file);
5102 }
5103 *ch = getc (file);
5104 return migfound;
5105 }
5106
5107
5108 node *
add_migration(world_fmt * world,node * p,long from,long to,MYREAL utime)5109 add_migration (world_fmt *world, node * p, long from, long to, MYREAL utime)
5110 {
5111 node *tmp;
5112 tmp = allocate_nodelet (world, 2, 'm');
5113 tmp->top = TRUE;
5114 tmp->next->back = p;
5115 p->back = tmp->next;
5116 tmp->length = p->length - utime;
5117 p->length = utime;
5118 tmp->tyme = p->tyme + utime;
5119 tmp->pop = tmp->next->pop = from;
5120 tmp->actualpop = tmp->next->actualpop = to;
5121 return tmp;
5122 }
5123
5124 void
allocate_x(node * p,world_fmt * world,char datatype,boolean withtips)5125 allocate_x (node * p, world_fmt * world, char datatype, boolean withtips)
5126 {
5127 if (p->type != 't')
5128 {
5129 if (p->next->back != NULL)
5130 allocate_x (crawlback (p->next), world, datatype, withtips);
5131 if (p->next->next->back != NULL)
5132 allocate_x (crawlback (p->next->next), world, datatype, withtips);
5133 if (strchr (SEQUENCETYPES, world->options->datatype))
5134 {
5135 alloc_seqx (world, p);
5136 }
5137 else
5138 {
5139 if (strchr (SEQUENCETYPES, world->options->datatype))
5140 p->x.a = (MYREAL *) mycalloc (1, world->sumtips * sizeof (MYREAL));
5141 else
5142 p->x.a =
5143 (MYREAL *) mycalloc (1,
5144 MAX (world->sumtips,
5145 world->data->maxalleles[world->locus]) *
5146 sizeof (MYREAL));
5147 }
5148 #ifdef UEP
5149 if(world->options->uep)
5150 {
5151 p->uep = (int *) mycalloc (world->data->uepsites, sizeof (int));
5152 p->ux.s = (pair *) mycalloc (world->data->uepsites, sizeof (pair));
5153 }
5154 #endif
5155
5156 }
5157 else
5158 {
5159 if (withtips)
5160 {
5161 if (strchr (SEQUENCETYPES, world->options->datatype))
5162 {
5163 alloc_seqx (world, p);
5164 }
5165 else
5166 {
5167 p->x.a =
5168 (MYREAL *) mycalloc (1,
5169 world->data->maxalleles[world->locus] *
5170 sizeof (MYREAL));
5171 }
5172 }
5173 //#ifdef UEP
5174 // if(world->options->uep)
5175 // {
5176 // p->uep = (long *) mycalloc (world->data->uepsites, sizeof (long));
5177 // p->ux.s = (pair *) mycalloc (world->data->uepsites, sizeof (pair));
5178 // }
5179 //#endif
5180
5181 }
5182 }
5183
5184 long
number_genomes(int datatype)5185 number_genomes (int datatype)
5186 {
5187 switch (datatype)
5188 {
5189 case 'a':
5190 case 'b':
5191 case 'm':
5192 return 2;
5193 case 's':
5194 case 'n':
5195 case 'h':
5196 case 'u':
5197 case 'f':
5198 return 1;
5199 default:
5200 error ("Wrong data type");
5201 }
5202 return 0;
5203
5204 }
5205
5206
5207 void
copy_tree(world_fmt * original,world_fmt * kopie)5208 copy_tree (world_fmt * original, world_fmt * kopie)
5209 {
5210 kopie->root = copy_node (original, original->root, kopie, NULL);
5211 }
5212
5213 node *
copy_node(world_fmt * original,node * o,world_fmt * kopie,node * last)5214 copy_node (world_fmt * original, node * o, world_fmt * kopie, node * last)
5215 {
5216 static long z = 0;
5217
5218 node *t = NULL, *t2, *t3;
5219 if (o == NULL)
5220 return NULL;
5221 if (!o->top)
5222 error ("copy_tree messed up");
5223
5224 switch (o->type)
5225 {
5226 case 'r':
5227 z = 0;
5228 case 'i':
5229 t = (node *) mycalloc (1, sizeof (node));
5230 t2 = (node *) mycalloc (1, sizeof (node));
5231 t3 = (node *) mycalloc (1, sizeof (node));
5232 t->next = t2;
5233 t2->next = t3;
5234 t3->next = t;
5235 copy_node_content (original, kopie, o, t);
5236 copy_node_content (original, kopie, o->next, t2);
5237 copy_node_content (original, kopie, o->next->next, t3);
5238 if (o->next->back != NULL)
5239 t2->back = copy_node (original, o->next->back, kopie, t2);
5240 if (o->next->next->back != NULL)
5241 t3->back = copy_node (original, o->next->next->back, kopie, t3);
5242 t->back = last;
5243 break;
5244 case 'm':
5245 t = (node *) mycalloc (1, sizeof (node));
5246 t2 = (node *) mycalloc (1, sizeof (node));
5247 t->next = t2;
5248 t2->next = t;
5249 copy_node_content (original, kopie, o, t);
5250 copy_node_content (original, kopie, o->next, t2);
5251 t2->back = copy_node (original, o->next->back, kopie, t2);
5252 t->back = last;
5253 break;
5254 case 't':
5255 t = (node *) mycalloc (1, sizeof (node));
5256 //kopie->nodep[z++] = t;
5257 t->next = t;
5258 copy_node_content (original, kopie, o, t);
5259 t->back = last;
5260 break;
5261 }
5262 return t;
5263 }
5264
5265 ///
5266 /// copy the guts of a node in the tree (but to where?)
5267 void
copy_node_content(world_fmt * original,world_fmt * kopie,node * o,node * t)5268 copy_node_content (world_fmt * original, world_fmt * kopie, node * o,
5269 node * t)
5270 {
5271 long i;
5272 // long j;
5273 const long endsite = original->data->seq[0]->endsite;
5274 const long rcategs = original->options->rcategs;
5275 t->type = o->type;
5276 t->number = o->number;
5277 t->pop = o->pop;
5278 t->actualpop = o->actualpop;
5279 t->id = o->id;
5280 t->top = o->top;
5281 t->dirty = o->dirty;
5282 t->v = o->v;
5283 t->tyme = o->tyme;
5284 t->length = o->length;
5285
5286 if (t->top && t->type != 'm')
5287 {
5288 t->scale = (MYREAL *) mycalloc (endsite, sizeof (MYREAL));
5289 memcpy (t->scale, o->scale, sizeof (MYREAL) * endsite);
5290 #ifdef UEP
5291
5292 if (original->options->uep)
5293 {
5294 t->uep = (int *) mycalloc (original->data->uepsites, sizeof (int));
5295 memcpy (t->uep, o->uep, sizeof (long) * original->data->uepsites);
5296 t->ux.s = (pair *) mycalloc (original->data->uepsites, sizeof (pair));
5297 for (i = 0; i < original->data->uepsites; i++)
5298 {
5299 memcpy (t->ux.s[i], o->ux.s[i], sizeof (pair));
5300 }
5301 }
5302 #endif
5303 if (strchr (SEQUENCETYPES, original->options->datatype))
5304 {
5305 alloc_seqx (kopie, t);
5306 memcpy (t->scale, o->scale, sizeof (MYREAL) * endsite);
5307 for (i = 0; i < endsite; i++)
5308 {
5309 // for (j = 0; j < rcategs; j++)
5310 // {
5311 memcpy (t->x.s[i], o->x.s[i], rcategs * sizeof (sitelike));
5312
5313 // }
5314 }
5315 }
5316 else
5317 {
5318 t->x.a =
5319 (MYREAL *) mycalloc (1,
5320 original->data->maxalleles[original->locus] *
5321 sizeof (MYREAL));
5322 memcpy (t->x.a, o->x.a,
5323 sizeof (MYREAL) *
5324 original->data->maxalleles[original->locus]);
5325 }
5326 }
5327 //if (o->s != NULL)
5328 ///{
5329 //t->s = (MYREAL *) mycalloc(1, sizeof(MYREAL) * original->numpop);
5330 //memcpy] (t->s, o->s, sizeof(MYREAL) * original->numpop);
5331 //
5332 // }
5333 //
5334 else
5335 t->s = NULL;
5336 if (o->nayme != NULL)
5337 {
5338 t->nayme = (char*) strdup (o->nayme);
5339 }
5340 else
5341 t->nayme = NULL;
5342 }
5343
5344 void
swap_tree(world_fmt * tthis,world_fmt * tthat)5345 swap_tree (world_fmt * tthis, world_fmt * tthat)
5346 {
5347 node *tmp;
5348 node **nodetemps;
5349 tmp = tthis->root;
5350 tthis->root = tthat->root;
5351 tthat->root = tmp;
5352 nodetemps = tthis->nodep;
5353 tthis->nodep = tthat->nodep;
5354 tthat->nodep = nodetemps;
5355
5356 }
5357
5358 void
calc_treelength(node * p,MYREAL * treelen)5359 calc_treelength (node * p, MYREAL *treelen)
5360 {
5361 node *pn, *pnn;
5362 switch (p->type)
5363 {
5364 case 't':
5365 break;
5366 case 'm':
5367 error ("yelp\n");
5368 break;
5369 case 'i':
5370 pn = crawlback (p->next);
5371 calc_treelength (pn, treelen);
5372 pnn = crawlback (p->next->next);
5373 calc_treelength (pnn, treelen);
5374 break;
5375 default:
5376 error ("default reached");
5377 }
5378 pn = showtop (crawlback (p));
5379 if (pn->type != 'r')
5380 *treelen += pn->tyme - p->tyme;
5381 }
5382
5383 MYREAL
calc_pseudotreelength(proposal_fmt * proposal,MYREAL treelen)5384 calc_pseudotreelength (proposal_fmt * proposal, MYREAL treelen)
5385 {
5386 MYREAL len = 0.0;
5387 MYREAL ot = proposal->origin->tyme;
5388 MYREAL obt = proposal->oback->tyme;
5389 MYREAL tt = proposal->target->tyme;
5390 MYREAL rt = proposal->world->root->next->back->tyme;
5391 //target is not root
5392 if (proposal->target != proposal->world->root)
5393 {
5394 //oback is not root
5395 if (proposal->oback != proposal->world->root)
5396 {
5397 len = treelen - (obt - ot) + (proposal->time - ot);
5398 //printf("pseudo_treelen: ob!=r t!=r %f\n", len);
5399 }
5400 else
5401 {
5402 //oback is root
5403 len = treelen - (obt - ot) - (rt - tt) +
5404 (proposal->time - ot) + (proposal->time - tt);
5405 //printf("pseudo_treelen: ob=r t!=r %f\n", len);
5406 }
5407 }
5408 else
5409 //target is root
5410 {
5411 //oback is not root
5412 if (proposal->oback != proposal->world->root)
5413 {
5414 len = treelen - (obt - ot) + (proposal->time - ot) - (rt - tt) +
5415 (proposal->time - tt);
5416 //printf("pseudo_treelen: ob!=r t=r %f\n", len);
5417 }
5418 else
5419 {
5420 //oback is root
5421 len = treelen - (obt - ot) - (obt - tt) + (proposal->time - ot)
5422 + (proposal->time - tt);
5423 //printf("pseudo_treelen: ob=r t=r %f\n", len);
5424 }
5425 }
5426 return len;
5427 }
5428
5429 void
swap(void * a,void * b)5430 swap (void *a, void *b)
5431 {
5432 void *t;
5433 t = a;
5434 a = b;
5435 b = t;
5436 }
5437
5438
5439 void
free_tree(node * p,world_fmt * world)5440 free_tree (node * p, world_fmt * world)
5441 {
5442 if (p != NULL)
5443 {
5444 if (p->type != 't')
5445 {
5446 if (p->next->back != NULL)
5447 {
5448 free_tree (p->next->back, world);
5449 }
5450 if (p->type != 'm' && p->next->next->back != NULL)
5451 {
5452 free_tree (p->next->next->back, world);
5453 }
5454 }
5455 switch (p->type)
5456 {
5457 case 'm':
5458 free_mignodelet (p, world);
5459 break;
5460 case 't':
5461 myfree(p->nayme);
5462 free_tipnodelet (p, world);
5463 break;
5464 case 'i':
5465 free_nodelet (p, 3, world);
5466 break;
5467 case 'r':
5468 free_nodelet (p, 3, world);
5469 world->root = NULL;
5470 break;
5471 default:
5472 error("error in freeing nodes, a node with no type found");
5473 break;
5474 }
5475 }
5476 }
5477
5478 ///
5479 /// delete tipnode-nodelet and its content data
5480 void
free_tipnodelet(node * p,world_fmt * world)5481 free_tipnodelet (node * p, world_fmt * world)
5482 {
5483 free_nodedata (p, world);
5484 collect_nodelet(world, p);
5485 //myfree(p);
5486 //p->id = -p->id;
5487 }
5488
5489 ///
5490 /// delete migratenode-nodelets
5491 void
free_mignodelet(node * p,world_fmt * world)5492 free_mignodelet (node * p, world_fmt * world)
5493 {
5494 node *q = p->next;
5495 collect_nodelet(world, q);
5496 collect_nodelet(world, p);
5497 // myfree(q);
5498 //myfree(p);
5499 }
5500
5501 ///
5502 /// delete interior node and its data
5503 void
free_nodelet(node * p,long num,world_fmt * world)5504 free_nodelet (node * p, long num, world_fmt * world)
5505 {
5506 long i;
5507 node *q , *r;
5508 switch(num)
5509 {
5510 case 1:
5511 if(p->top == TRUE)
5512 free_nodedata (p, world);
5513 collect_nodelet(world, p);
5514 //myfree(p);
5515 break;
5516 case 2:
5517 q = p->next;
5518 if(p->top == TRUE)
5519 free_nodedata (p, world);
5520 collect_nodelet(world, p);
5521 // myfree(p);
5522 if(q!=NULL)
5523 {
5524 if(q->top == TRUE)
5525 free_nodedata (q, world);
5526 collect_nodelet(world, q);
5527 // myfree(q);
5528 }
5529 break;
5530 case 3:
5531 q = p->next;
5532 r = p->next->next;
5533 if(p->top == TRUE)
5534 free_nodedata (p, world);
5535 collect_nodelet(world, p);
5536 // myfree(p);
5537 //p->id = -p->id;
5538 if(q!=NULL)
5539 {
5540 if(q->top == TRUE)
5541 free_nodedata (q, world);
5542 collect_nodelet(world, q);
5543 // myfree(q);
5544 //q->id = -q->id;
5545 }
5546 if(r!=NULL)
5547 {
5548 if(r->top == TRUE)
5549 free_nodedata (r, world);
5550 collect_nodelet(world, r);
5551 // myfree(r);
5552 //r->id = -r->id;
5553 }
5554 break;
5555 default:
5556 for (i = 0; i < num; i++)
5557 {
5558 q = p->next;
5559 if(p->top == TRUE)
5560 free_nodedata (p, world);
5561 // myfree(p);
5562 collect_nodelet(world, p);
5563 p = q;
5564 }
5565 }
5566 }
5567
5568 void
free_nodedata(node * p,world_fmt * world)5569 free_nodedata (node * p, world_fmt * world)
5570 {
5571 // long endsite;
5572 if (strchr (SEQUENCETYPES, world->options->datatype))
5573 {
5574 // endsite = world->data->seq->endsite;
5575 if(p->x.s != NULL)
5576 {
5577 myfree(p->x.s[0]);
5578 myfree(p->x.s);
5579 }
5580 }
5581 else
5582 {
5583 if(p->x.a != NULL)
5584 {
5585 myfree(p->x.a);
5586 }
5587 }
5588 if (p->s != NULL)
5589 myfree(p->s);
5590 if(p->scale!=NULL)
5591 myfree(p->scale);
5592
5593 #ifdef UEP
5594 if (world->options->uep)
5595 myfree(p->uep);
5596 #endif
5597
5598 }
5599
5600 ///
5601 /// calculates the 1/probability that no event happens
5602 /// is safeguarded against a problem on two tip trees
5603 /// that will return a result of zero and so a inverse of INF
5604 /// INF is replaced by MYREAL_MAX
5605 MYREAL
inverse_logprob_noevent(world_fmt * world,long interval)5606 inverse_logprob_noevent (world_fmt * world, long interval)
5607 {
5608 long pop, k;
5609 MYREAL result = 0.0;
5610 for (pop = 0; pop < world->numpop; pop++)
5611 {
5612 k = world->treetimes[0].tl[interval].lineages[pop];
5613 result +=
5614 (k * (k - 1) / world->param0[pop]) + sum_migprob (world, pop, interval);
5615 }
5616 if(result > 0.0)
5617 return 1./result;
5618 else
5619 {
5620 if(world->treetimes[0].tl[interval].eventnode->type=='t')
5621 {
5622 return world->treetimes[0].tl[interval].age;
5623 }
5624 else
5625 return MYREAL_MAX;
5626 }
5627 }
5628
5629 /// calculates the sum of all immigrations into a specific population
5630 MYREAL
sum_migprob(world_fmt * world,long pop,long interval)5631 sum_migprob (world_fmt * world, long pop, long interval)
5632 {
5633 long i;
5634 MYREAL result = 0.0;
5635 long *lineages = world->treetimes[0].tl[interval].lineages;
5636 long msta = world->mstart[pop];
5637 long msto = world->mend[pop];
5638 boolean usem = world->options->usem;
5639 MYREAL pk;
5640 for (i = msta; i < msto; i++)
5641 {
5642 pk = usem ? world->param0[i] : world->param0[i]/world->param0[pop];
5643 result += pk;
5644 }
5645 return result * lineages[pop];
5646 }
5647
debugline(node * up)5648 void debugline(node *up)
5649 {
5650 node *thenode = up;
5651 node * down = crawlback(showtop(up));
5652 while (thenode != down)
5653 {
5654 printf("<%li:%li %c>\n",thenode->id, showtop(thenode)->id, thenode->type);
5655 thenode = showtop(thenode)->back;
5656 }
5657 }
5658