1 /* \file tree.c */
2 /*------------------------------------------------------
3 Maximum likelihood estimation
4 of migration rate  and effectice population size
5 using a Metropolis-Hastings Monte Carlo algorithm
6 -------------------------------------------------------
7 	T R E E B U I L D I N G   R O U T I N E S
8 
9 	Peter Beerli 1996, Seattle
10 	beerli@fsu.edu
11 
12 	Copyright 1997-2002 Peter Beerli and Joseph Felsenstein, Seattle WA
13 	Copyright 2003-2004 Peter Beerli, Tallahassee FL
14 
15 	some code in this file are successors of code in dnaml in the PHYLIP
16         package of Joseph Felsenstein. Several changes were made to the conditionl
17         likelihood methods to improve speed, but the original design idea is Joe's.
18 
19 	A new model that includes treatment of gaps in the alignment is in the works
20         and may superceede all other models because it seems that this model
21         is capable to handle all other sequence models. <THIS IS NOY WORKING YET>
22 
23  Permission is hereby granted, free of charge, to any person obtaining
24  a copy of this software and associated documentation files (the
25  "Software"), to deal in the Software without restriction, including
26  without limitation the rights to use, copy, modify, merge, publish,
27  distribute, sublicense, and/or sell copies of the Software, and to
28  permit persons to whom the Software is furnished to do so, subject
29  to the following conditions:
30 
31  The above copyright notice and this permission notice shall be
32  included in all copies or substantial portions of the Software.
33 
34  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
37  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
38  ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
39  CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
40  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
41 
42 
43 $Id: tree.c 2171 2013-09-22 22:05:02Z beerli $
44 
45 -------------------------------------------------------*/
46 #include <stdlib.h>
47 #include "migration.h"
48 #include "sighandler.h"
49 #include "random.h"
50 #include "options.h"
51 #include "data.h"
52 #include "sequence.h"
53 #include "world.h"
54 #include "tools.h"
55 #include "migrate_mpi.h"
56 #include "mcmc.h"
57 #ifdef UEP
58 #include "uep.h"
59 #endif
60 #include "tree.h"
61 #ifdef BEAGLE
62 #include "calculator.h"
63 #endif
64 #ifdef DMALLOC_FUNC_CHECK
65 #include <dmalloc.h>
66 #endif
67 
68 #define NOTIPS 0
69 #define WITHTIPS 1
70 
71 extern long unique_id_global;
72 
73 //// prototypes -------------------------------------------
74 ////
75 ////
76 //// creating trees
77 //void buildtree (world_fmt * world, option_fmt * options, data_fmt * data, long locus);
78 //void create_treetimelist (world_fmt * world,  timelist_fmt ** ltl, long locus);
79 //void fix_times (world_fmt * world, option_fmt * options);
80 //void first_smooth (world_fmt * world, long locus);
81 //void set_dirty (node * p);
82 //void construct_tymelist (world_fmt * world, timelist_fmt * timevector);
83 //void timeslices (timelist_fmt ** timevector);
84 //void add_partlineages (long numpop, timelist_fmt ** timevector);
85 //// likelihood calculation
86 //MYREAL treelikelihood (world_fmt * world);
87 //MYREAL pseudotreelikelihood (world_fmt * world, proposal_fmt * proposal);
88 MYREAL treelike_anc (world_fmt * world, long locus);
89 //void set_pop (node * theNode, long pop, long actualpop);
90 //void pseudonuview (proposal_fmt * proposal, xarray_fmt xx1, MYREAL *lx1,
91 //                   MYREAL v1, xarray_fmt xx2, MYREAL *lx2, MYREAL v2);
92 //void ltov (node * p);
93 //void treeout (FILE * treefile, node * joint, node * p, long s);
94 //void print_tree (world_fmt * world, long g, long *filepos);
95 //MYREAL find_tipdate(char * id, long pop, world_fmt *world);
96 //
97 /* private functions------------------------------------- */
98 
99 void allocate_tree (world_fmt * world, option_fmt * options, data_fmt * data,
100                     long locus);
101 /* allocations of nodes */
102 void allocatetips (world_fmt * world, option_fmt * options, data_fmt * data,
103                    long locus);
104 void allocateinterior (world_fmt * world, data_fmt * data, long locus);
105 void allocatepoproot (world_fmt * world, data_fmt * data, long locus);
106 void allocate_tip (world_fmt * world, option_fmt * options, node ** p,
107                    long pop, long locus, long a, long ind, char **tipnames);
108 void alloc_seqx (world_fmt * world, node * theNode);
109 void     allocate_xseq(xarray_fmt *x, long sites, long categs);
110 
111 /* first tree material (upgma, distance) */
112 void set_tree (world_fmt * world, option_fmt * options, data_fmt * data,
113                long locus);
114 // sets migration events into a tree read from the user (dna only)
115 void set_migrations (world_fmt * world, option_fmt * options, data_fmt * data,
116 					 long locus);
117 void distance_EP (char **data, long tips, MYREAL **m);
118 void distance_micro (char **data, long tips, MYREAL **m);
119 void distance_sequence (data_fmt * data, long locus, long tips, long sites,
120                         long nmlength, MYREAL **m);
121 void distance_allele (world_fmt * world, option_fmt * options, long locus,
122                       long tips, MYREAL **distm);
123 void randomize_distm(MYREAL **distm, data_fmt * data, long locus,
124 		     long tips, char *custm);
125 void constrain_distance_zeromig (MYREAL **m, option_fmt *options, data_fmt * data, long locus,
126                                  long tips, char *custm);
127 
128 void makevalues (world_fmt * world, option_fmt * options, data_fmt * data,
129                  long locus);
130 void make_alleles (world_fmt * world, option_fmt * options, data_fmt * data, long locus);
131 void make_microsatellites (world_fmt * world, option_fmt * options, data_fmt * data, long locus);
132 void make_microbrownian (world_fmt * world, option_fmt *options, data_fmt * data, long locus);
133 void upgma (world_fmt * world, MYREAL **x, long tips, node ** nodep);
134 void set_top (world_fmt * world, node * p, long locus);
135 void set_v (node * p);
136 void calc_sancost (MYREAL **cost, long numpop);
137 
138 
139 void free_treetimes (world_fmt * world, long size);
140 void traverseNodes (node * theNode, timelist_fmt ** timevector, long *slice, world_fmt *world, long *tips);
141 void increase_timelist (timelist_fmt ** timevector);
142 void allocate_lineages (timelist_fmt **timevector, const long offset, const long numpop);
143 void smooth (const node * root, node * p, world_fmt * world,
144              const long locus);
145 void which_nuview (char datatype, boolean fastlike, boolean use_gaps, int watkins);
146 void which_pseudonuview (char datatype, boolean fastlike, boolean use_gaps, int watkins);
147 void nuview_allele (node * mother, world_fmt * world, const long locus);
148 void nuview_micro (node * mother, world_fmt * world, const long locus);
149 void nuview_brownian (node * mother, world_fmt * world, const long locus);
150 void nuview_sequence (node * mother, world_fmt * world, const long locus);
151 void nuview_sequence_slow (node * mother, world_fmt * world,
152                            const long locus);
153 void nuview_ancestral (node * mother, world_fmt * world, const long locus);
154 void adjustroot (node * r);
155 MYINLINE MYREAL pseudo_tl_seq (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
156                                proposal_fmt * proposal, world_fmt * world);
157 MYINLINE  MYREAL pseudo_tl_snp (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
158                                 proposal_fmt * proposal, world_fmt * world);
159 MYINLINE  MYREAL pseudo_tl_snp_unlinked (phenotype xx1, phenotype xx2, MYREAL v1,
160                                          MYREAL v2, proposal_fmt * proposal,
161                                          world_fmt * world);
162 MYINLINE MYREAL pseudo_tl_anc (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
163                                proposal_fmt * proposal, world_fmt * world);
164 MYINLINE void pseudonu_allele (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1,
165                                MYREAL vv1, MYREAL *xx2, MYREAL lx2, MYREAL vv2);
166 MYINLINE void pseudonu_micro (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1,
167                               MYREAL v1, MYREAL *xx2, MYREAL lx2, MYREAL v2);
168 MYINLINE void pseudonu_brownian (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1,
169                                  MYREAL v1, MYREAL *xx2, MYREAL lx2, MYREAL v2);
170 MYINLINE void pseudonu_seq (proposal_fmt * proposal, phenotype xxx1, MYREAL *sxx1, MYREAL v1,
171                             phenotype xxx2, MYREAL *sxx2, MYREAL v2);
172 MYINLINE void pseudonu_seq_slow (proposal_fmt * proposal, phenotype xxx1, MYREAL *sxx1,
173                                  MYREAL v1, phenotype xxx2, MYREAL *sxx2, MYREAL v2);
174 MYINLINE void pseudonu_anc (proposal_fmt * proposal, phenotype xxx1, MYREAL v1,
175                             phenotype xxx2, MYREAL v2);
176 void calculate_steps (world_fmt * world);
177 MYREAL logfac (long n);
178 
179 
180 boolean treereader (world_fmt * world,  option_fmt *options,  data_fmt * data);
181 void length_to_times (node * p);
182 boolean treeread (FILE * file, world_fmt * world, option_fmt *options, node ** pp, node * q);
183 char processlength (FILE * file, node ** p, option_fmt * options);
184 node *allocate_nodelet (world_fmt *world, long num, char type);
185 void find_tips (node * p, node ** nodelist, long *z);
186 node *add_migration (world_fmt *world, node * p, long from, long to, MYREAL utime);
187 node *create_interior_node (world_fmt * world, node ** q);
188 node *create_root_node (world_fmt *world, node ** q);
189 node *create_tip_node (FILE * file, world_fmt * world, option_fmt *options, node ** q, char *ch);
190 boolean processbracket (FILE * file, world_fmt *world, node ** p, char *ch);
191 void set_tree_pop (node * p, long *pop);
192 void allocate_x (node * p, world_fmt * world, char datatype,
193                  boolean withtips);
194 long find_firstpop (node * p);
195 
196 void sankoff (world_fmt * world);
197 MYREAL minimum (MYREAL *vec1, MYREAL *vec2, long n);
198 void santraverse (world_fmt *world, node * theNode, MYREAL **cost, long numpop);
199 long ranbest (MYREAL *array, long tie, MYREAL best, long n);
200 void jumble (long *s, long n);
201 void jumble_ownseed (long *s, long n);
202 long number_genomes (int datatype);
203 
204 /* copy whole tree */
205 void copy_tree (world_fmt * original, world_fmt * kopie);
206 node *copy_node (world_fmt * original, node * o, world_fmt * kopie,
207                  node * last);
208 void copy_node_content (world_fmt * original, world_fmt * kopie, node * o,
209                         node * t);
210 void swap_tree (world_fmt * tthis, world_fmt * tthat);
211 void swap (void *a, void *b);
212 
213 void free_tree (node * p, world_fmt * world);
214 void free_tipnodelet (node * p, world_fmt * world);
215 void free_mignodelet (node * p, world_fmt * world);
216 void free_nodelet (node * p, long num, world_fmt * world);
217 void free_nodedata (node * p, world_fmt * world);
218 
219 MYREAL inverse_logprob_noevent (world_fmt * world, long interval);
220 MYREAL sum_migprob (world_fmt * world, long pop, long interval);
221 
222 MYREAL prob_micro_watkins (MYREAL t, long diff, world_fmt * world, pair *helper);
223 MYREAL prob_micro_singlestep (MYREAL t, long diff, world_fmt * world, pair *helper);
224 
225 void
226 debugtreeout (FILE * file, node * joint, node * p, long s);
227 
228 
229 /** global variable NUVIEW points to function nuview_datatype() */
230 static void (*nuview) (node *, world_fmt *, long);
231 static double (*prob_micro) (MYREAL , long , world_fmt *, pair *);
232 static void (*ppseudonuview)(proposal_fmt *, xarray_fmt , MYREAL *, MYREAL , xarray_fmt , MYREAL *, MYREAL);
233 /* ======================================================= */
234 /*
235  * Creates a start-genealogy using a coalescence approach
236  *
237  * - set NUVIEW according to datatype
238  * initializes tree structure - fills tree with data - set_tree():
239  * upgma-tree, adjust for times, sankoff() for migration events
240  */
241 /// \brief builds the starting tree
242 ///
243 /// Builds the starting tree.
244 /// \callgraph
245 void
buildtree(world_fmt * earth,world_fmt * world,option_fmt * options,data_fmt * data,long locus)246 buildtree (world_fmt *earth, world_fmt * world, option_fmt * options, data_fmt * data,
247            long locus)
248 {
249     long pop;
250     long genomes = number_genomes (options->datatype);
251     free_tree(world->root, world);
252     world->sumtips = 0;
253     world->migration_counts = 0;
254     for (pop = 0; pop < data->numpop; pop++)
255       {
256 	if(options->randomsubset > 0 && options->randomsubset < data->numind[pop][locus])
257 	  world->sumtips += options->randomsubset * genomes;
258 	else
259 	  world->sumtips += data->numalleles[pop][locus];
260       }
261     which_nuview (options->datatype, options->fastlike, FALSE, options->msat_option);
262     //future    which_pseudonuview (options->datatype, options->fastlike, FALSE, options->msat_option);
263     switch (options->datatype)
264       {
265       case 's':
266       case 'n':
267       case 'h':
268       case 'u':
269       case 'f':
270 	init_sequences (world, options, data, locus);
271 	init_sequences_aliases (earth, world, options, data, locus);
272 	break;
273       case 'b':
274 	world->data->seq[0]->endsite = 1;
275 	data->freq = -10000000000000.;
276 	break;
277       case 'm':
278 	world->data->seq[0]->endsite = 1;
279 	/* world->data->freq = 1. / world->options->micro_stepnum; */
280 	break;
281       case 'a':
282 	world->data->seq[0]->endsite = 1;
283 	world->data->freq = 1. / (data->maxalleles[locus]);
284 	world->data->freqlast = 1. - world->data->freq;
285       }
286 
287     if (options->usertree)
288       {
289         options->usertreewithmig = treereader (world, options, data);
290         makevalues (world, options, data, locus);//insert values into tips
291       }
292     else
293       {
294         allocate_tree (world, options, data, locus); //allocate nodep
295         makevalues (world, options, data, locus); //allocate/insert data into tips
296         allocateinterior(world,data,locus); //allocate nodep guts
297         allocatepoproot (world, data, locus); //allocate bottom parts
298         if (world->data->skiploci[locus])
299 	  return;
300       }
301     //    reorder_populations(world, options, data);
302 
303     if (strchr (SEQUENCETYPES, world->options->datatype))
304       {
305         init_tbl (world, locus);
306         if (world->cold && world->replicate == 0)
307 	  {
308             print_seqfreqs (world->outfile, world, options);
309             print_tbl (world->outfile, world, options, locus);
310             print_weights (world->outfile, world, options, locus);
311 	  }
312         if (world->options->progress)
313 	  {
314 	    if (world->cold &&  world->replicate == 0)
315 	      {
316 #ifndef MPI
317 		print_seqfreqs (stdout, world, options);
318 		print_tbl (stdout, world, options, locus);
319 		print_weights (stdout, world, options, locus);
320 #endif
321 		if (world->options->writelog)
322 		  {
323 		    print_seqfreqs (world->options->logfile, world, options);
324 		    print_tbl (world->options->logfile, world, options, locus);
325 		    print_weights (world->options->logfile, world, options,
326 				   locus);
327 		  }
328 	      }
329 	  }
330       }
331     if(!options->usertree)
332       set_tree (world, options, data, locus);
333     else
334       {
335         if(!options->usertreewithmig)
336 	  set_migrations (world, options, data, locus);
337       }
338     if (world->options->datatype == 'b')
339       world->data->maxalleles[locus] = XBROWN_SIZE;
340 
341     // calculate the migration counts for the first tree
342     world->migration_counts = 0;
343     count_migrations (world->root->next->back, &world->migration_counts);
344     //traverse_check(crawlback (world->root->next));printf("@");
345 
346 }
347 
348 /*
349  * creates the timelist which represents all time intervals on a tree. The
350  * timelist is an array of pointers and not a linked list.
351  *
352  * - allocates memory using an arbitrary value this will be later adjusted if a
353  * longer list is needed - construct
354  */
355 void
create_treetimelist(world_fmt * world,timelist_fmt ** ltl,long locus)356 create_treetimelist (world_fmt * world,  timelist_fmt ** ltl, long locus)
357 {
358     if ((*ltl)->tl == NULL)
359     {
360       (*ltl)->allocT =  TIMELIST_GUESS;
361       (*ltl)->tl = (vtlist *) mycalloc ((*ltl)->allocT, sizeof (vtlist));
362         allocate_lineages (ltl, 0, world->numpop);
363     }
364     (*ltl)->copies = 0;
365     construct_tymelist (world, (*ltl));
366 }
367 
368 
369 void
allocate_lineages(timelist_fmt ** timevector,const long offset,const long numpop)370 allocate_lineages (timelist_fmt **timevector, const long offset, const long numpop)
371 {
372     long i;
373     const long allocT = (*timevector)->allocT;
374     vtlist * tl;
375     long *lineages;
376 
377     // speedup and simplification for allocating and freeing
378     // accessing is still done the old way through tl[i].lineages
379     // but instead having memory scattered the allocation is one long string
380     // freeing time for lineages should be reduced this way [I hope]
381     if((*timevector)->lineages != NULL)
382       {
383 	(*timevector)->lineages = (long *) myrealloc((*timevector)->lineages,sizeof(long)*numpop*allocT);
384 	if(offset != allocT)
385 	  {
386 	    memset((*timevector)->lineages+offset,0,sizeof(long)*numpop*(allocT-offset));
387 	  }
388       }
389     else
390       (*timevector)->lineages = (long *) mycalloc(numpop*allocT,sizeof(long));
391 
392     tl = (*timevector)->tl;
393     lineages = (*timevector)->lineages;
394 
395     for (i = offset; i < allocT; i++)
396       {
397 	tl[i].lineages = lineages + i * numpop;
398       }
399 }
400 
401 /*
402  * start first pass trhough the tree to calculate the tree-likleihood
403  */
404 void
first_smooth(world_fmt * world,long locus)405 first_smooth (world_fmt * world, long locus)
406 {
407     smooth (world->root->next, crawlback (world->root->next), world, locus);
408 }
409 
410 /*
411  * Marks a node, so that TREELIKELIHOOD() will recalulated values in node
412  */
413 void
set_dirty(node * p)414 set_dirty (node * p)
415 {
416     p->dirty = TRUE;
417 }
418 ///
419 /// inserts the from and to into the timelist from the tree
420 void
timeslices(timelist_fmt ** timevector)421 timeslices (timelist_fmt ** timevector)
422 {
423     long z;
424     vtlist * tl = (*timevector)->tl;
425     vtlist * tlz;
426     node * eventnode;
427     const long T = (*timevector)->T;
428 
429     for (z = 0; z < T; z++)
430     {
431       tlz        = &(tl[z]);
432       eventnode  = tlz->eventnode;
433       tlz->from  = eventnode->pop;
434       tlz->to    = eventnode->actualpop;
435       tlz->slice = z;
436     }
437 }
438 
439 void
add_partlineages(long numpop,timelist_fmt ** timevector)440 add_partlineages (long numpop, timelist_fmt ** timevector)
441 {
442   // this points to the MRCA
443   const long T = (*timevector)->T - 2;
444 
445   long i, pop;
446   vtlist *tl = (*timevector)->tl;
447   vtlist *tli;
448   vtlist *tli1;
449   long from;
450   long to;
451   long *lineages;
452   char type;
453   // this should add a lineages for the MRCA
454   memset(tl[T+1].lineages, 0, numpop * sizeof(long));
455   tl[T+1].lineages[tl[T].eventnode->pop] = 1;
456   for (i = T; i >= 0; i--)
457     {
458       tli1 = &tl[i+1];
459       tli = &tl[i];
460       lineages = tli->lineages;
461       from = tli->from;
462       to = tli->to;
463       type = tli->eventnode->type;
464       memset(lineages, 0, numpop * sizeof(long));
465       // if the node is an internode (a coalescent node) then add an additional line
466       // if it is a tip reduce one
467       if(type == 't')
468 	{
469 	  lineages[to] -= 1;
470 	}
471       else
472 	{
473 	  if(type != 'r')
474 	    {
475 	      if (from == to)
476 		{
477 		  lineages[to] += 1;
478 		}
479 	      else
480 		{
481 		  lineages[to] += 1;
482 		  lineages[from] -= 1;
483 		}
484 	    }
485 	}
486       // this copies the content from the last timeinterval (tli1) to the next (tli)
487       //      printf("%i> lineages %5li:", myID, i);
488       for (pop = 0; pop < numpop; pop++)
489 	{
490 	  lineages[pop] += tli1->lineages[pop];
491 	  //  printf(" %li",lineages[pop]);
492 	}
493       //      printf(" %f %c %li-->%li %s\n",tli->eventnode->tyme, tli->eventnode->type, tli->from, tli->to, tli->eventnode->nayme);
494     }
495 }
496 
497 void
add_partlineages_313(long numpop,timelist_fmt ** timevector)498 add_partlineages_313 (long numpop, timelist_fmt ** timevector)
499 {
500   // this points to the MRCA
501   const long T = (*timevector)->T - 2;
502 
503   long i, pop;
504   vtlist *tl = (*timevector)->tl;
505   vtlist *tli;
506   vtlist *tli1;
507   long from;
508   long to;
509   long *lineages;
510   char type;
511   // this should add a lineages for the MRCA
512   memset(tl[T+1].lineages, 0, numpop * sizeof(long));
513   tl[T+1].lineages[tl[T].eventnode->pop] = 1;
514   for (i = T; i >= 0; i--)
515     {
516       tli1 = &tl[i+1];
517       tli = &tl[i];
518       lineages = tli->lineages;
519       from = tli1->from;
520       to = tli1->to;
521       type = tli1->eventnode->type;
522       memset(lineages, 0, numpop * sizeof(long));
523       // if the node is an internode (a coalescent node) then add an additional line
524       // if it is a tip reduce one
525       if(type == 't')
526 	{
527 	  lineages[to] -= 1;
528 	}
529       else
530 	{
531 	  if(type != 'r')
532 	    {
533 	      if (from == to)
534 		{
535 		  lineages[to] += 1;
536 		}
537 	      else
538 		{
539 		  lineages[to] += 1;
540 		  lineages[from] -= 1;
541 		}
542 	    }
543 	}
544       // this copies the content from the last timeinterval (tli1) to the next (tli)
545       //      printf("%i> lineages %5li:", myID, i);
546       for (pop = 0; pop < numpop; pop++)
547 	{
548 	  lineages[pop] += tli1->lineages[pop];
549 	  //  printf(" %li",lineages[pop]);
550 	}
551       //      printf(" %f %c %li-->%li %s\n",tli->eventnode->tyme, tli->eventnode->type, tli->from, tli->to, tli->eventnode->nayme);
552     }
553 }
554 
555 void
add_partlineages_312(long numpop,timelist_fmt ** timevector)556 add_partlineages_312 (long numpop, timelist_fmt ** timevector)
557 {
558   // this points to the MRCA
559   const long T = (*timevector)->T - 2;
560 
561   long i, pop;
562   vtlist *tl = (*timevector)->tl;
563   vtlist *tli;
564   vtlist *tli1;
565   long from;
566   long to;
567   long *lineages;
568   char type;
569   // this should add a lineages for the MRCA
570   memset(tl[T+1].lineages, 0, numpop * sizeof(long));
571     tl[T].lineages[tl[T].eventnode->pop] = 1;// the tMRCA has then 1 lineages looking to the root
572   tl[T+1].lineages[tl[T].eventnode->pop] = 0;// the tMRCA has then 1 lineages looking to the root
573   for (i = T; i > 0; i--)
574     {
575       tli1 = &tl[i-1];
576       tli = &tl[i];
577       lineages = tli1->lineages;
578       from = tli->from;
579       to = tli->to;
580       type = tli->eventnode->type;
581       memset(lineages, 0, numpop * sizeof(long));
582       // if the node is an internode (a coalescent node) then add an additional line
583       // if it is a tip reduce one
584       if(type == 't')
585 	{
586 	    lineages[to] -= 1;
587 	}
588       else
589 	{
590 	  if(type != 'r')
591 	    {
592 	      if (from == to)
593 		{
594 		  lineages[to] += 1;
595 		}
596 	      else
597 		{
598 		  lineages[to] += 1;
599 		  lineages[from] -= 1;
600 		}
601 	    }
602 	}
603       // this copies the content from the last timeinterval (tli1) to the next (tli)
604       //printf("%i> lineages %5li:", myID, i);
605       for (pop = 0; pop < numpop; pop++)
606 	{
607 	  lineages[pop] += tli->lineages[pop];
608 	  //printf(" %li",lineages[pop]);
609 	}
610       //      printf(" %f %c %li-->%li %s lin=%li\n",tli->eventnode->tyme, tli->eventnode->type, tli->from, tli->to, tli->eventnode->nayme, lineages[0]);
611     }
612 }
613 
614 
615 /*
616  calculates the tree-likelihood according to datatype a, m, b, s,u
617  */
618 MYREAL
treelikelihood(world_fmt * world)619 treelikelihood (world_fmt * world)
620 {
621     long a;
622     MYREAL term = 0.0;
623     //MYREAL term1 = 0.0;
624     node *nn = crawlback (world->root->next);
625     //#ifdef BEAGLE
626     //reset_beagle(world->beagle);
627     //#endif
628 #ifdef BEAGLE
629     printf("TreeLike:");
630     term = calcLnL(world,FALSE);
631     //    printf("Start LnL from Beagle: %f\n",term);
632     return term;
633 #endif
634     set_dirty (nn);
635     smooth (world->root->next, crawlback(world->root->next), world, world->locus);
636     adjustroot (world->root);
637     switch (world->options->datatype)
638     {
639     case 's':
640       term = treelike_seq (world, world->locus);
641       break;
642     case 'n':
643     case 'h':
644       term = treelike_snp (world, world->locus);
645       break;
646     case 'u':
647       term = treelike_snp_unlinked (world, world->locus);
648       break;
649     case 'a':
650       //		  printf("@@@ ");
651       for (a = 0; a < world->data->maxalleles[world->locus] - 1; a++)
652 	{
653 	  term += (world->data->freq * nn->x.a[a]);
654 	  //  printf("%f ",term);
655 	}
656       term += (world->data->freqlast * nn->x.a[a]);
657       //		  printf("%f ",term);
658       term = (term != 0.0) ? (LOG (term) + nn->scale[0]) : -MYREAL_MAX;
659       //		  printf("\n@@treelike=%f scale=%f\n",term, nn->scale[0]);
660       break;
661     case 'm':
662       for (a = 0; a < world->data->maxalleles[world->locus]; a++)
663 	term += nn->x.a[a];
664       term = (term != 0.0) ? (LOG (term) + nn->scale[0]) : -MYREAL_MAX;
665       break;
666     case 'b':
667       term = nn->x.a[2];
668       break;
669     case 'f':
670       term = treelike_anc (world, world->locus);
671       break;
672     }
673 #ifdef UEP
674     if (world->options->uep)
675         ueplikelihood (world);
676 #endif
677 #ifdef DEBUG
678     //     printf("@L(D|G)=%f\n",term);
679 #endif
680     return term;
681 }
682 
683 /*
684  * calculates tree-likelihood using only arrays DOES NOT CHANGE ARRAYS IN THE
685  * TREE
686  */
687 MYREAL
pseudotreelikelihood(world_fmt * world,proposal_fmt * proposal)688 pseudotreelikelihood (world_fmt * world, proposal_fmt * proposal)
689 {
690   long a, locus = world->locus;
691   /* freq is not different between pop */
692   MYREAL term = 0.0;
693   switch (world->options->datatype)
694     {
695     case 's':
696 #ifdef BEAGLE
697       printf("PseudoLike: ");
698       return calcLnL(world,world->beagle->instance);
699 #endif
700       term = pseudo_tl_seq (proposal->xf.s, proposal->xt.s, proposal->v,
701 			    proposal->vs, proposal, world);
702 
703       break;
704     case 'n':
705     case 'h':
706       term = pseudo_tl_snp (proposal->xf.s, proposal->xt.s, proposal->v,
707 			    proposal->vs, proposal, world);
708       break;
709     case 'u':
710       term = pseudo_tl_snp_unlinked (proposal->xf.s, proposal->xt.s,
711 				     proposal->v, proposal->vs, proposal,
712 				     world);
713       break;
714     case 'a':
715       //		  printf("@@@");
716       for (a = 0; a < world->data->maxalleles[locus] - 1; a++)
717 	{
718 	  term += (world->data->freq * proposal->xf.a[a]);
719 	  //printf("%f ",term);
720 	}
721       term += (world->data->freqlast * proposal->xf.a[a]);
722       //printf("%f ",term);
723       if (term == 0.0)
724 	term = -MYREAL_MAX;
725       else
726 	term = (LOG (term) + proposal->mf[0]);
727       //			printf("\n@@pseudolike=%f scale=%f\n",term, proposal->mf[0]);
728       break;
729     case 'b':
730       term = proposal->xf.a[2];
731       break;
732     case 'm':
733       for (a = 0; a < world->data->maxalleles[locus]; a++)
734 	{
735 	  term += proposal->xf.a[a];
736 	}
737       if (term == 0.0)
738 	term = -MYREAL_MAX;
739       else
740 	term = (LOG (term) + proposal->mf[0]);
741       break;
742     case 'f':
743       term = pseudo_tl_anc (proposal->xf.s, proposal->xt.s, proposal->v,
744 			    proposal->vs, proposal, world);
745       break;
746     default:
747       warning("no datatype found for treelikelihood() calculation!");
748       term = -MYREAL_MAX;
749       break;
750     }
751 #ifdef UEP
752   if (proposal->world->options->uep)
753     term += pseudo_tl_uep (&(proposal->uf), &proposal->ut, proposal->v,
754 			   proposal->vs, proposal, world);
755 #endif
756   if(MYFINITE(((double) term)) == 0)
757     term = -MYREAL_MAX;
758 #ifdef DEBUG
759   //    printf("   proposed L(D|G)=%f\n",term);
760 #endif
761   return term;
762 }
763 
764 
765 /*
766  * Calculates the sub-likelihoods but does not change the arrays in the tree,
767  * it uses the passed arrays and overwrites the xx1 array DOES NOT CHANGE THE
768  * TREE
769  */
770 void
pseudonuview(proposal_fmt * proposal,xarray_fmt xx1,MYREAL * lx1,MYREAL v1,xarray_fmt xx2,MYREAL * lx2,MYREAL v2)771 pseudonuview (proposal_fmt * proposal, xarray_fmt xx1, MYREAL *lx1, MYREAL v1,
772               xarray_fmt xx2, MYREAL *lx2, MYREAL v2)
773 {
774   /*  (*ppseudonuview)(proposal, xx1, lx1, v1, xx2, lx2, v2);
775   return;
776 }
777 */
778     switch (proposal->datatype)
779     {
780     case 'a':
781       pseudonu_allele (proposal, &xx1.a, &(lx1[0]), v1, xx2.a, lx2[0], v2);
782       break;
783     case 'b':
784       pseudonu_brownian (proposal, &xx1.a, &(lx1[0]), v1, xx2.a, lx2[0], v2);
785       break;
786     case 'm':
787       pseudonu_micro (proposal, &xx1.a, &(lx1[0]), v1, xx2.a, lx2[0], v2);
788       break;
789     case 'u':
790     case 'n':
791     case 'h':
792 //      pseudonu_seq (proposal, xx1.s, v1, xx2.s, v2);
793 //      break;
794     case 's':
795       //#ifdef BEAGLE
796       //prepare_beagle_instances_proposal(proposal,xx1.s, v1,xx2.s, v2, world->beagle);
797       //#else
798       if (proposal->world->options->fastlike)
799 	pseudonu_seq (proposal, xx1.s, NULL, v1, xx2.s, NULL, v2);
800       else
801 	pseudonu_seq_slow (proposal, xx1.s, lx1, v1, xx2.s, lx2, v2);
802       //#endif
803       break;
804     case 'f':
805       pseudonu_anc (proposal, xx1.s, v1, xx2.s, v2);
806       break;
807     }
808 #ifdef  UEP
809     if (proposal->world->options->uep)
810     {
811         pseudonu_twostate (proposal, &proposal->uf, proposal->umf, v1,
812                            &proposal->ut, proposal->umt, v2);
813     }
814 #endif
815 }
816 
817 
818 MYINLINE void
pseudonu_allele(proposal_fmt * proposal,MYREAL ** xx1,MYREAL * lx1,MYREAL vv1,MYREAL * xx2,MYREAL lx2,MYREAL vv2)819 pseudonu_allele (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1,
820                  MYREAL vv1, MYREAL *xx2, MYREAL lx2, MYREAL vv2)
821 {
822 
823     long   a;
824     long   aa;
825     long   locus = proposal->world->locus; /* allele counters */
826     long   mal   = proposal->world->data->maxalleles[locus]; /* maxalleles */
827     MYREAL freq  = proposal->world->data->freq;
828     //MYREAL freqlast = proposal->world->data->freqlast; // same as 1 - k/(k+1) = 1/(k+1) = freq
829     MYREAL w1 = 0.0; /* time variables */
830     MYREAL w2 = 0.0;
831     MYREAL v1;
832     MYREAL v2;
833     MYREAL v1freq;
834     MYREAL v2freq;
835     MYREAL pija1;/* summary of probabilities */
836     MYREAL pija2;
837     MYREAL x3m = -MYREAL_MAX;
838     MYREAL inv_x3m;
839     MYREAL *xx3;
840     xx3 = (MYREAL *) mymalloc(sizeof(MYREAL) * mal);
841     v1 = 1.0 - EXP (-vv1);
842     v2 = 1.0 - EXP (-vv2);
843     if (v1 >= 1.)
844     {
845         w1 = 0.0;
846         v1 = 1.0;
847     }
848     else
849     {
850         w1 = 1.0 - v1;
851     }
852     if (v2 >= 1.)
853     {
854         w2 = 0.0;
855         v2 = 1.0;
856     }
857     else
858     {
859         w2 = 1.0 - v2;
860     }
861     //    printf("@@pseudonu 1:{%f,%f,%f,%f}   2:(%f,%f,%f,%f}\n",(*xx1)[0],(*xx1)[1],*lx1,v1,
862     //	   xx2[0], xx2[1],lx2,v2);
863 
864     v1freq = v1 * freq;
865     v2freq = v2 * freq;
866 
867     for (aa = 0; aa < mal; aa++)
868     {
869         pija1 = pija2 = 0.0;
870         for (a = 0; a < mal; a++)
871         {
872 	  if(aa==a)
873 	    {
874 	      pija1 += (w1 + v1freq) * (*xx1)[a];
875 	      pija2 += (w2 + v2freq) * xx2[a];
876 	    }
877 	  else
878 	    {
879 	      pija1 += v1freq * (*xx1)[a];
880 	      pija2 += v2freq * xx2[a];
881 	    }
882 	}
883         xx3[aa] = pija1 * pija2;
884         if (xx3[aa] > x3m)
885             x3m = xx3[aa];
886     }
887     inv_x3m = 1./ x3m;
888     for (aa = 0; aa < mal; aa++)
889     {
890         (*xx1)[aa] = xx3[aa] * inv_x3m;
891     }
892     //    printf("@@finish: %f %f\n",(*xx1)[0],(*xx1)[1]);
893     *lx1 = LOG (x3m) + lx2 + *lx1;
894     myfree(xx3);
895 }
896 
897 MYINLINE
898 void
pseudonu_micro(proposal_fmt * proposal,MYREAL ** xx1,MYREAL * lx1,MYREAL v1,MYREAL * xx2,MYREAL lx2,MYREAL v2)899 pseudonu_micro (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1, MYREAL v1,
900                 MYREAL *xx2, MYREAL lx2, MYREAL v2)
901 {
902     long a, s, diff, locus = proposal->world->locus; /* allele counters */
903     long aa1, aa2;
904     long smax = proposal->world->data->maxalleles[locus];
905     long margin = proposal->world->options->micro_threshold[locus];
906     MYREAL pija1s, pija2s, vv1, vv2;
907     MYREAL x3m = -MYREAL_MAX;
908     world_fmt *world = proposal->world;
909     MYREAL inv_x3m;
910     MYREAL *xx3;
911     MYREAL *pm1;
912     MYREAL *pm2;
913     pair *helper = &world->options->msat_tuning;
914     xx3 = (MYREAL *) mymalloc(sizeof(MYREAL) * smax);
915     vv1 = v1;
916     vv2 = v2;
917 
918     pm1 = (MYREAL *) mymalloc(sizeof(MYREAL) * 2 * margin);
919     pm2 = pm1 +  margin;
920 
921     for (diff = 0; diff < margin; diff++)
922       {
923 	pm1[diff] = prob_micro (vv1, diff, world, helper);
924 	pm2[diff] = prob_micro (vv2, diff, world, helper);
925       }
926 
927     for (s = 0; s < smax; s++)
928     {
929         pija1s = pija2s = 0.0;
930 	aa1 = MAX (0, s - margin);
931 	aa2 = MIN(s + margin,smax);
932 	for (a = aa1; a < aa2; a++)
933 	  //for (a = 0; a < smax; a++)
934         {
935             diff = labs (s - a);
936 	    if(diff >= margin)
937 	      continue;
938 
939             if ((*xx1)[a] > 0)
940             {
941                 pija1s += pm1[diff] * (*xx1)[a];
942             }
943             if (xx2[a] > 0)
944             {
945                 pija2s += pm2[diff] * xx2[a];
946             }
947         }
948         xx3[s] = pija1s * pija2s;
949         if (xx3[s] > x3m)
950             x3m = xx3[s];
951     }
952     inv_x3m = 1./ x3m;
953     for (s = 0; s < smax; s++)
954     {
955         (*xx1)[s]= xx3[s] * inv_x3m;
956     }
957     *lx1 += LOG (x3m) + lx2;
958     myfree(xx3);
959     myfree(pm1);
960 }
961 
962 //================================================
963 // brownian motion calculation for data likelihood: ghost-version
964 // changed divisions to multiply inverse
965 // simplified check and include of boundary for vtot
966 MYINLINE
967 void
pseudonu_brownian(proposal_fmt * proposal,MYREAL ** xx1,MYREAL * lx1,MYREAL v1,MYREAL * xx2,MYREAL lx2,MYREAL v2)968 pseudonu_brownian (proposal_fmt * proposal, MYREAL **xx1, MYREAL *lx1,
969                    MYREAL v1, MYREAL *xx2, MYREAL lx2, MYREAL v2)
970 {
971     MYREAL vtot, rvtot, c12;
972     MYREAL mean1, mean2, mean, vv1, vv2, f1, f2, diff;
973     mean1 = (*xx1)[0];
974     mean2 = xx2[0];
975 
976     vv1 = v1 + (*xx1)[1];
977     vv2 = v2 + xx2[1];
978     vtot = vv1 + vv2;
979     if (vtot > 0.0)
980     {
981         rvtot = 1./vtot;
982         f1 = vv2 * rvtot;
983         f2 = 1.0 - f1;
984         mean = f1 * mean1 + f2 * mean2;
985         diff = mean1 - mean2;
986         c12 = diff * diff * rvtot;
987         (*xx1)[2] = (*xx1)[2] + xx2[2] + MIN (0, -0.5 * (LOG (vtot) + c12) + LOG2PIHALF);
988         (*xx1)[1] = vv1 * f1;
989         (*xx1)[0] = mean;
990     }
991     else
992     {
993         //xcode rvtot = HUGE;
994         //xcode vtot = SMALL_VALUE;
995         f1 = 0.5;
996         (*xx1)[2] = HUGE;
997         (*xx1)[1] = vv1 * f1;
998         (*xx1)[0] = f1 * (mean1 + mean2);
999     }
1000 }
1001 
1002 /*
1003  adjust the variables POP and ACTUALPOP in interior nodes
1004  */
1005 void
set_pop(node * theNode,long pop,long actualpop)1006 set_pop (node * theNode, long pop, long actualpop)
1007 {
1008     switch (theNode->type)
1009     {
1010 		case 'm':
1011 			theNode->pop = theNode->next->pop = pop;
1012 			theNode->actualpop = theNode->next->actualpop = actualpop;
1013 			break;
1014 		case 'i':
1015 		case 'r':
1016 			theNode->pop = theNode->next->pop = theNode->next->next->pop = pop;
1017 			theNode->actualpop = theNode->next->actualpop = actualpop;
1018 			theNode->next->next->actualpop = actualpop;
1019 			break;
1020 		case 't':
1021 			if (theNode->pop != pop)
1022 				error ("Population designation scrambled");
1023 			break;
1024 		default:
1025 			error ("Undefined node type?!");
1026 			break;
1027     }
1028 }
1029 
1030 
1031 
1032 /*
1033  * ======================================================= local functions
1034  */
1035 
1036 void
allocate_tree(world_fmt * world,option_fmt * options,data_fmt * data,long locus)1037 allocate_tree(world_fmt * world, option_fmt * options, data_fmt * data,
1038               long locus)
1039 {
1040     long nodenum = 0, pop, numpop = data->numpop;
1041     for (pop = 0; pop < numpop; pop++)
1042     {
1043         nodenum += data->numalleles[pop][locus] * 2;
1044     }
1045     world->nodep = (node **) mycalloc (nodenum+1, sizeof (node *));
1046 }
1047 
1048 void
allocatetips(world_fmt * world,option_fmt * options,data_fmt * data,long locus)1049 allocatetips (world_fmt * world, option_fmt * options,
1050               data_fmt * data, long locus)
1051 {
1052     long a, pop;
1053     long zz = 0;
1054     for (pop=0; pop < data->numpop; pop++)
1055     {
1056         for(a=0; a < data->numalleles[pop][locus]; a++)
1057         {
1058 	  allocate_tip (world, options, &world->nodep[zz], pop, locus, zz, a,
1059 			data->indnames[pop][a]);
1060             zz++;
1061         }
1062     }
1063 }
1064 
1065 void
allocateinterior(world_fmt * world,data_fmt * data,long locus)1066 allocateinterior (world_fmt * world, data_fmt * data, long locus)
1067 {
1068   long sites = world->data->seq[0]->endsite + world->data->seq[0]->addon;
1069     node *p;
1070     long i;
1071     long temp=0;
1072     long mini = 0, maxi = 0;
1073     long numpop = data->numpop;
1074     for (i = 0; i < numpop; i++)
1075     {
1076         temp += data->numalleles[i][locus];
1077     }
1078     mini = temp;
1079     maxi = temp * 2 - 1;
1080     for (i = mini; i < maxi; i++)
1081     {
1082       p = allocate_nodelet (world, 3, 'i');
1083         p->top = TRUE;
1084         p->scale =
1085             (MYREAL *) mycalloc (sites, sizeof (MYREAL));
1086         p->s = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop);
1087 	p->id = i;
1088         world->nodep[i] = p;
1089     }
1090 }
1091 
1092 node *
allocate_nodelet(world_fmt * world,long num,char type)1093 allocate_nodelet (world_fmt *world, long num, char type)
1094 {
1095   boolean isfirst = TRUE;
1096   long j;
1097   node *p, *q = NULL, *pfirst = NULL;
1098   for (j = 0; j < num; j++)
1099     {
1100       p = dispense_nodelet(world);
1101       if(p==NULL)
1102 	{
1103 	  warning( "%i> heat=%f Nodelet dispenser failed to supply nodelet\n",myID,1./world->heat);
1104 	  p = (node *) mymalloc (sizeof (node));
1105 	}
1106       p->tip = FALSE;
1107       p->visited = FALSE;
1108       p->number = unique_id_global;
1109       p->pop = -1;
1110       p->actualpop = -1;
1111       p->type = type;
1112       p->id = -1;
1113       p->top = FALSE;
1114       p->dirty = TRUE;
1115       p->next = q;
1116       p->scale = NULL;
1117       p->s = NULL;
1118       p->x.s = NULL;
1119       p->x.a = NULL;
1120 #ifdef UEP
1121 
1122       p->uep = NULL;
1123       p->ux.s = NULL;
1124       p->ux.a = NULL;
1125 #endif
1126       p->back = NULL;
1127       p->nayme = NULL;
1128       p->v = 0.0;
1129       p->tyme = 0.0;
1130       p->length = 0.0;
1131       if (isfirst)
1132         {
1133 	  isfirst = FALSE;
1134 	  pfirst = p;
1135         }
1136       q = p;
1137     }
1138     if(pfirst !=NULL)
1139 	  pfirst->next = q;
1140   return q;
1141 }
1142 
1143 void
allocatepoproot(world_fmt * world,data_fmt * data,long locus)1144 allocatepoproot (world_fmt * world, data_fmt * data, long locus)
1145 {
1146   long sites = world->data->seq[0]->endsite + world->data->seq[0]->addon;
1147   //long i;
1148   node *p, *q;//, *qq;
1149     //long nodenum = 0;  /* arbitrarily to the first */
1150     q = NULL;
1151     //p = allocate_nodelet (world, 3, 'i');
1152     //p->top = TRUE;
1153     //for (i = 0; i < world->numpop; i++)
1154     //    nodenum += data->numalleles[i][locus];
1155     //p->x.a = (MYREAL *) mycalloc (nodenum + 1, sizeof (MYREAL));
1156     //p->top = TRUE;
1157     //qq = p;
1158     //q = NULL;
1159     p = allocate_nodelet (world, 3, 'r');
1160     p->top = TRUE;
1161     p->scale = (MYREAL *) mycalloc (sites, sizeof (MYREAL));
1162     //p->next->back = qq;
1163     //qq->back = p->next;
1164     world->root = p;
1165 }
1166 
1167 
1168 void
allocate_tip(world_fmt * world,option_fmt * options,node ** p,long pop,long locus,long a,long ind,char ** tipnames)1169 allocate_tip (world_fmt * world, option_fmt * options, node ** p, long pop,
1170               long locus, long a, long ind, char **tipnames)
1171 {
1172   const long sites = world->data->seq[0]->endsite + world->data->seq[0]->addon;
1173   long i;
1174   char *tipname;
1175   if(options->usertree)
1176         return; //do nothing because the tip is alread allocated
1177     if(tipnames[locus][0]!='\0')
1178       tipname = tipnames[locus];
1179     else
1180       tipname = tipnames[0];
1181     (*p) = allocate_nodelet (world, 1, 't');
1182     (*p)->tip = TRUE;
1183     (*p)->top = TRUE;
1184     (*p)->id  = a;
1185     if(options->has_datefile)
1186       {
1187 	(*p)->tyme = find_tipdate(tipname, pop, world);
1188       }
1189     else
1190       (*p)->tyme = 0.0;
1191     (*p)->scale = (MYREAL *) mycalloc (sites, sizeof (MYREAL));
1192     (*p)->pop = (*p)->actualpop = options->newpops[pop]-1;
1193     (*p)->s = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop);
1194     for (i = 0; i < world->numpop; i++)
1195       (*p)->s[i] = MYREAL_MAX;
1196     (*p)->s[options->newpops[pop]-1] = 0;
1197     if (strchr (SEQUENCETYPES, world->options->datatype))
1198       {
1199         (*p)->nayme = (char * ) strdup(tipname);
1200 	unpad((*p)->nayme," ");
1201 	translate((*p)->nayme,' ', '_');
1202         alloc_seqx (world, (*p));
1203       }
1204     else
1205       {
1206         (*p)->x.a =
1207 	  (MYREAL *) mycalloc (1,
1208 			       world->data->maxalleles[locus] * sizeof (MYREAL));
1209         (*p)->nayme = (char *) strdup(tipname);
1210 	unpad((*p)->nayme," ");
1211 	translate((*p)->nayme,' ', '_');
1212       }
1213 #ifdef UEP
1214     if (world->options->uep)
1215       {
1216         (*p)->uep = (int *) mycalloc (world->data->uepsites, sizeof (int));
1217         (*p)->ux.s = (pair *) mycalloc (world->data->uepsites, sizeof (pair));
1218       }
1219 #endif
1220 }
1221 
1222 
1223 void
makevalues(world_fmt * world,option_fmt * options,data_fmt * data,long locus)1224 makevalues (world_fmt * world, option_fmt * options, data_fmt * data,
1225             long locus)
1226 {
1227   long invi;
1228   long invsites;
1229   seqmodel_fmt * seq = world->data->seq[0];
1230   // switching accorind to option and not world->option
1231   // this would allow to run snips with known number of invariant sites
1232   //
1233   world->options->datatype=options->datatype;
1234     switch (world->options->datatype)
1235     {
1236     case 'a':
1237       make_alleles (world, options, data, locus);
1238       break;
1239     case 'b':
1240       make_microbrownian (world, options, data, locus);
1241       break;
1242     case 'm':
1243       make_microsatellites (world, options, data, locus);
1244       break;
1245     case 's':
1246     case 'f':
1247       //  allocatetips (world, options, data, locus);
1248       make_sequences (world, options, data, locus);
1249       seq->oldsite = seq->endsite;
1250       break;
1251     case 'n':
1252     case 'h':
1253       //allocatetips (world, options, data, locus);
1254       make_sequences (world, options, data, locus);
1255       make_invarsites (world, data, locus);
1256       seq->oldsite = seq->endsite;
1257       invi = seq->endsite;
1258       seq->endsite += 4;
1259       if (options->totalsites>0)
1260 	{
1261 	  invsites = options->totalsites - seq->oldsite;
1262 	  seq->aliasweight[invi++] = (long) invsites*seq->basefrequencies[NUC_A];
1263 	  seq->aliasweight[invi++] = (long) invsites*seq->basefrequencies[NUC_C];
1264 	  seq->aliasweight[invi++] = (long) invsites*seq->basefrequencies[NUC_G];
1265 	  seq->aliasweight[invi]   = (long) invsites*seq->basefrequencies[NUC_T];
1266 	  world->options->datatype = 's';
1267 	}
1268       break;
1269     case 'u':
1270       allocatetips (world, options, data, locus);
1271       make_snp (world, options, data, locus);
1272       make_invarsites_unlinked (world, data, locus);
1273       seq->oldsite = seq->endsite;
1274       seq->endsite *= (seq->addon + 1);
1275       break;
1276     default:
1277       usererror ("Oh yes, it would be nice if there were more\n \
1278 possible datatypes than just an\n				\
1279 allele model, microsatellite model,\n sequence model, or	\
1280 single nucleotide polymorphism model.\n				\
1281 But there are currently no others, so the programs stops\n\n");
1282 			break;
1283     }
1284 #ifdef UEP
1285     if (world->options->uep)
1286     {
1287         make_uep_values (world, data, locus);
1288     }
1289 #endif
1290 }
1291 
1292 ///
1293 ///
1294 /// creates the branchlength and adds migration nodes to the
1295 /// start tree
1296 /// - creates rough genetic distance for upgma
1297 /// - upgma
1298 /// - find branches where we need to insert migrations
1299 /// - insert migrations
1300 /// - adjust time of all nodes using the coalescent with migration
1301 void
set_tree(world_fmt * world,option_fmt * options,data_fmt * data,long locus)1302 set_tree (world_fmt * world, option_fmt * options, data_fmt * data,
1303           long locus)
1304 {
1305     long     tips = world->sumtips;
1306     MYREAL   **distm;
1307     node     **topnodes;
1308 
1309     topnodes = (node **) mycalloc (1, sizeof (node *) * tips);
1310     doublevec2d(&distm,tips, tips);
1311 
1312     if (!options->randomtree)
1313       {
1314 	/* create a crude distance matrix according to the datatype */
1315 	switch (world->options->datatype)
1316 	  {
1317 	  case 'a':
1318 	  case 'b':
1319 	  case 'm':
1320 	    distance_allele (world, options, locus, tips, distm);
1321 	    break;
1322 	  case 's':
1323 	  case 'n':
1324 	  case 'h':
1325 	  case 'u':
1326 	  case 'f':
1327 	    distance_sequence (data, locus, tips,
1328 			       world->data->seq[0]->sites[locus],
1329 			       options->nmlength, distm);
1330 	    break;
1331 	  }
1332       }
1333     else
1334       {
1335 	randomize_distm (distm, data, locus, tips,
1336 			 world->options->custm);
1337 	//printf("\n\nRANDOM TREE START\n\n");
1338       }
1339     constrain_distance_zeromig (distm, options, data, locus, tips,
1340 				world->options->custm);
1341     //    printf("distm[0][1]=%f distm[1][2]=%f\n",distm[0][1],distm[1][2]);
1342     //fflush(stdout);
1343 
1344 #ifdef UEP
1345 	if (options->uep)
1346 	  constrain_distance_uep (data->uep, world->data->uepsites, distm,
1347 				  tips);
1348 #endif
1349     if(!options->usertree)
1350       upgma (world, distm, tips, world->nodep);
1351     myfree(distm[0]);
1352     myfree(distm);
1353     //}
1354     world->root->tyme = world->root->next->tyme =
1355       world->root->next->next->tyme = world->root->next->back->tyme + 10000.;
1356     /* orient the tree up-down, set the length and v */
1357     set_top (world, world->root->next->back, locus);
1358     set_v (world->root->next->back);
1359 #ifdef BEAGLE
1360     long bid=0;
1361     bid = set_branch_index (world->root->next->back, &bid);
1362 #endif
1363     /*
1364      * insert migration nodes into the tree using the Slatkin and
1365      * Maddison approach (Fitch parsimony)
1366      */
1367     memcpy (topnodes, world->nodep, sizeof (node *) * tips);
1368     //zzz = 0;
1369     if(!options->usertree) //TRIAL
1370       allocate_x (world->root, world, world->options->datatype, NOTIPS);
1371 #ifdef UEP
1372 
1373     if (world->options->uep)
1374       {
1375 	//      allocate_uep (world->root, world, world->options->datatype, NOTIPS);
1376 	update_uep (world->root->next->back, world);
1377 	check_uep_root (world->root->next->back, world);
1378       }
1379 #endif
1380     //    debugtreeout (stdout, crawlback (world->root->next), crawlback (world->root->next),0);
1381     sankoff (world);
1382     //    debugtreeout (stdout, crawlback (world->root->next), crawlback (world->root->next),0);
1383     myfree(topnodes);
1384 }    /* set_tree */
1385 
1386 /*
1387  creates the branchlength and adds migration nodes to the
1388  start tree
1389  - creates rough genetic distance for upgma
1390  - upgma
1391  - find branches where we need to insert migrations
1392  - insert migrations
1393  - adjust time of all nodes using the coalescent with migration
1394  */
1395 void
set_migrations(world_fmt * world,option_fmt * options,data_fmt * data,long locus)1396 set_migrations (world_fmt * world, option_fmt * options, data_fmt * data,
1397 				long locus)
1398 {
1399     long tips = world->sumtips;
1400 
1401     node **topnodes;
1402     topnodes = (node **) mycalloc (1, sizeof (node *) * tips);
1403 	// can we do this here ???#ifdef UEP
1404 	//            if (options->uep)
1405 	//            constrain_distance_uep (data->uep, world->data->uepsites, distm,  tips);
1406 	//#endif
1407 	world->root->tyme = world->root->next->tyme =
1408 		world->root->next->next->tyme = world->root->next->back->tyme + 10000.;
1409 	/* orient the tree up-down, set the length and v */
1410 	set_top (world, world->root->next->back, locus);
1411 	set_v (world->root->next->back);
1412 	/*
1413 	 * insert migration nodes into the tree using the Slatkin and
1414 	 * Maddison approach (Fitch parsimony)
1415 	 */
1416 	memcpy (topnodes, world->nodep, sizeof (node *) * tips);
1417 #ifdef UEP
1418 	if (world->options->uep)
1419 	{
1420 		//      allocate_uep (world->root, world, world->options->datatype, NOTIPS);
1421 		update_uep (world->root->next->back, world);
1422 		check_uep_root (world->root->next->back, world);
1423 	}
1424 #endif
1425 	sankoff (world);
1426 	myfree(topnodes);
1427 }    /* set_migrations */
1428 
1429 
randomize_distm(MYREAL ** distm,data_fmt * data,long locus,long tips,char * custm)1430 void randomize_distm(MYREAL **distm, data_fmt * data, long locus,
1431                             long tips, char *custm)
1432 {
1433   long i;
1434   long j;
1435   for(i=0;i<tips; i++)
1436     {
1437       distm[i][i] = 0.0;
1438       for(j=0;j<i;j++)
1439 	{
1440 	  distm[i][j] = distm[j][i] = RANDUM();
1441 	  //printf("%f ",distm[i][j]);
1442 	}
1443       //printf("\n");
1444     }
1445 }
1446 
1447 
1448 void
constrain_distance_zeromig(MYREAL ** m,option_fmt * options,data_fmt * data,long locus,long tips,char * custm)1449 constrain_distance_zeromig (MYREAL **m, option_fmt *options, data_fmt * data, long locus,
1450                             long tips, char *custm)
1451 {
1452     long pop, ind, j, i = 0;
1453     long *pops;
1454     pops = (long *) mycalloc (tips, sizeof (long));
1455     for (pop = 0; pop < data->numpop; pop++)
1456     {
1457         for (ind = 0; ind < data->numalleles[pop][locus]; ind++)
1458 	  {
1459             pops[i++] = options->newpops[pop];
1460 	    //OLD	    pops[i++] = pop;
1461 	  }
1462     }
1463     for (i = 0; i < tips; i++)
1464     {
1465         for (j = 0; j < i; j++)
1466         {
1467             if (custm[pops[i] + pops[i] * pops[j]] == '0')
1468                 m[i][j] = m[j][i] = 1000;
1469 	    //printf("%f ",m[i][j]);
1470         }
1471 	//printf("\n");
1472     }
1473     myfree(pops);
1474 }
1475 
1476 void
distance_EP(char ** data,long tips,MYREAL ** m)1477 distance_EP (char **data, long tips, MYREAL **m)
1478 {
1479     long i, j;
1480     for (i = 0; i < tips; i++)
1481     {
1482         for (j = 0; j < i; j++)
1483         {
1484             if (!strcmp (data[i], data[j]))
1485                 m[i][j] = m[j][i] = fabs (rannor (1., 0.1));
1486             else
1487                 m[i][j] = m[j][i] = fabs (rannor (0., 0.1));
1488         }
1489     }
1490 }
1491 
1492 void
distance_micro(char ** data,long tips,MYREAL ** m)1493 distance_micro (char **data, long tips, MYREAL **m)
1494 {
1495     long i, j;
1496     for (i = 0; i < tips; i++)
1497     {
1498         for (j = 0; j < i; j++)
1499         {
1500             m[i][j] = m[j][i] = pow (atof (data[i]) - atof (data[j]), 2.);
1501             m[i][j] = m[j][i] = fabs (rannor (m[i][j], 0.1));
1502         }
1503     }
1504 }
1505 
1506 /* calculate pairwise distances using allele disimilarity */
1507 void
distance_allele(world_fmt * world,option_fmt * options,long locus,long tips,MYREAL ** distm)1508 distance_allele (world_fmt * world, option_fmt * options, long locus,
1509                  long tips, MYREAL **distm)
1510 {
1511     char **mdata;
1512     long pop;
1513 
1514     mdata = (char **) mycalloc (1, sizeof (char *) * (tips + 1));
1515     for (pop = 0; pop < tips; pop++)
1516     {
1517         mdata[pop] =
1518 		(char *) mycalloc (1, sizeof (char) * options->allelenmlength);
1519         strcpy (mdata[pop], strrchr(world->nodep[pop]->nayme,'!')+1);
1520     }
1521     if (world->options->datatype == 'a')
1522         distance_EP (mdata, tips, distm);
1523     else
1524         distance_micro (mdata, tips, distm);
1525     for (pop = 0; pop < tips; pop++)
1526     {
1527         myfree(mdata[pop]);
1528     }
1529     myfree(mdata);
1530 }
1531 
1532 /* calculate  pairwise distances using sequence similarity */
1533 void
distance_sequence(data_fmt * data,long locus,long tips,long sites,long nmlength,MYREAL ** m)1534 distance_sequence (data_fmt * data, long locus, long tips, long sites,
1535                    long nmlength, MYREAL **m)
1536 {
1537     long i = 0, j, z, pop, ind;
1538     char **dat;
1539     if (data->distfile != NULL)
1540     {
1541         read_distance_fromfile (data->distfile, tips, nmlength, m);
1542     }
1543     else
1544     {
1545         dat = (char **) mymalloc (sizeof (char *) * tips);
1546         for (pop = 0; pop < data->numpop; pop++)
1547         {
1548             for (ind = 0; ind < data->numalleles[pop][locus]; ind++)
1549             {
1550                 dat[i++] = data->yy[pop][ind][locus][0];
1551             }
1552         }
1553         if (i != tips)
1554         {
1555             error ("Mistake in distance_sequence() tips is not equal sum(i)\n");
1556         }
1557         for (i = 0; i < tips; i++)
1558         {
1559 
1560             for (j = i + 1; j < tips; j++)
1561             {
1562                 //to come m[i][j] = m[j][i] = make_ml_distance(dat[i], dat[j], i, j);
1563 
1564                 for (z = 0; z < sites; z++)
1565                 {
1566 
1567                     if (dat[i][z] != dat[j][z])
1568                     {
1569                         //m[i][j] = m[j][i] += fabs(rannor(1.0, 0.1));
1570                         m[i][j] = m[j][i] += 1.0;
1571                     }
1572                 }
1573             }
1574         }
1575         myfree(dat);
1576     }
1577 }
1578 
1579 
1580 
1581 void
fix_times(world_fmt * world,option_fmt * options)1582 fix_times (world_fmt * world, option_fmt * options)
1583 {
1584     long k;
1585     MYREAL tipdate=0.0;
1586     node *theNode;
1587     MYREAL age = world->data->maxsampledate
1588       * world->options->meanmu[world->locus]
1589       * world->options->generation_year  * world->options->mu_rates[world->locus];
1590     if (!options->usertree)
1591     {
1592 #ifdef TREEDEBUG
1593       printf("%i> %s\nage= %f\nmaxsamp=%f\nmut=%f\ngen=%f\nmu_rate=%g\n-----------------------------\n",myID, "start timelist ---------------------", age,world->data->maxsampledate,world->options->meanmu[world->locus],1./world->options->generation_year,world->options->mu_rates[world->locus]);
1594 #endif
1595       for (k = 0; k < world->treetimes[0].T - 1; k++)
1596 	{
1597 	  theNode = world->treetimes[0].tl[k].eventnode;
1598 	  if(theNode->type == 't')
1599 	    {
1600 	      //tipdate = find_tipdate(theNode->nayme, theNode->pop, world);
1601 	      tipdate = theNode->tyme;
1602 #ifdef TREEDEBUG
1603 	      printf("DEBUG TIPDATE %i> %s %g\n",myID, theNode->nayme, tipdate);
1604 #endif
1605 	      //theNode->tyme = tipdate;
1606 	      world->treetimes[0].tl[k].age = tipdate;
1607 	    }
1608 	  else
1609 	    {
1610 	      //k here is never 0, so this k-1 seems correct
1611 	      age += inverse_logprob_noevent (world, k-1);
1612 	      world->treetimes[0].tl[k].age = age;
1613 	      adjust_time (theNode, age);
1614 #ifdef TREEDEBUG
1615 	      printf("%i> %10.10s %g\n",myID, " ", theNode->tyme);
1616 #endif
1617 	    }
1618         }
1619       world->treetimes[0].tl[k].age = age + 10000.; /* this is the root */
1620       adjust_time (world->treetimes[0].tl[k].eventnode, age+10000);
1621 #ifdef TREEDEBUG
1622       printf("%i> %s %g\n",myID, "end timelist ---------------------", age+10000 );
1623 #endif
1624     }
1625     set_v (world->root->next->back);
1626 }
1627 
1628 /*
1629  * creates a UPGMA tree: x     = distance matrix which will be destroyed
1630  * through the process, tips  = # of sequences/alleles, nodep = treenodes
1631  * have to be allocated for ALL nodes
1632  *
1633  * This code is stripped neighbor-joining code out of phylip v3.6. Only the
1634  * upgma option is present.
1635  */
1636 void
upgma(world_fmt * world,MYREAL ** x,long tips,node ** nodep)1637 upgma (world_fmt * world, MYREAL **x, long tips, node ** nodep)
1638 {
1639     long nc, nextnode, mini = -900, minj = -900, i, j, jj, ia, iaa, ja, jaa;
1640     MYREAL zz = (world->data->maxsampledate
1641 		 * world->options->meanmu[world->locus]
1642 		 * world->options->generation_year  * world->options->mu_rates[world->locus]+ 1);
1643     MYREAL total, tmin, bi, bj, /* ti, tj, */ da;
1644     MYREAL *av;
1645     long *oc;
1646     node **cluster;
1647     long *enterorder;
1648 	MYREAL denom=1.;
1649     /* First initialization */
1650     enterorder = (long *) mycalloc (tips, sizeof (long));
1651     for (ia = 0; ia < tips; ia++)
1652         enterorder[ia] = ia;
1653     jumble (enterorder, tips);
1654     nextnode = tips;
1655     av = (MYREAL *) mycalloc (tips, sizeof (MYREAL));
1656     oc = (long *) mymalloc (tips * sizeof (long));
1657     cluster = (node **) mycalloc (tips, sizeof (node *));
1658     for (i = 0; i < tips; i++)
1659         oc[i] = 1;
1660     for (i = 0; i < tips; i++)
1661         cluster[i] = nodep[i];
1662     /* Enter the main cycle */
1663     for (nc = 0; nc < tips - 1; nc++)
1664     {
1665         tmin = 99999.0;
1666         /* Compute sij and minimize */
1667         for (jaa = 1; jaa < tips; jaa++)
1668         {
1669             ja = enterorder[jaa];
1670             if (cluster[ja] != NULL)
1671             {
1672                 for (iaa = 0; iaa < jaa; iaa++)
1673                 {
1674                     ia = enterorder[iaa];
1675                     if (cluster[ia] != NULL)
1676                     {
1677                         total = x[ia][ja];
1678 			//printf("ia=%li ja=%li x[ia][ja]=%f\n",ia,ja,x[ia][ja]);
1679                         if (total < tmin)
1680                         {
1681                             tmin = total;
1682                             mini = ia;
1683                             minj = ja;
1684                         }
1685                     }
1686                 }
1687             }
1688         }   /* compute lengths and print */
1689         bi = x[mini][minj] / 2.0 - av[mini];
1690         bj = x[mini][minj] / 2.0 - av[minj];
1691         av[mini] += bi;
1692         nodep[nextnode]->next->back = cluster[mini];
1693       	//xcode
1694         if(cluster[mini]!=NULL)
1695         {
1696             if(nodep[nextnode]->next!=NULL)
1697             {
1698                 cluster[mini]->back = nodep[nextnode]->next;
1699             }
1700             nodep[nextnode]->next->next->back = cluster[minj];
1701             cluster[minj]->back = nodep[nextnode]->next->next;
1702             cluster[mini]->back->v = cluster[mini]->v = bi;
1703             cluster[minj]->back->v = cluster[minj]->v = bj;
1704             cluster[mini] = nodep[nextnode];
1705         }
1706         adjust_time (nodep[nextnode], (MYREAL) zz++);
1707         cluster[minj] = NULL;
1708         nextnode++;
1709         /* re-initialization */
1710         denom = (oc[mini] + oc[minj]);
1711         for (jj = 0; jj < tips; jj++)
1712         {
1713             if (cluster[jj] != NULL)
1714             {
1715                 da = (x[mini][jj] * oc[mini] + x[minj][jj] * oc[minj])/denom ;
1716                 x[mini][jj] = da;
1717                 x[jj][mini] = da;
1718             }
1719         }
1720         for (j = 0; j < tips; j++)
1721         {
1722             x[minj][j] = x[j][minj] = 0.0;
1723         }
1724         oc[mini] += oc[minj];
1725     }
1726     /* the last cycle */
1727     for (i = 0; i < tips; i++)
1728     {
1729         if (cluster[i] != NULL)
1730             break;
1731     }
1732     world->root->next->back = cluster[i];
1733     cluster[i]->back = world->root->next;
1734     myfree(av);
1735     myfree(oc);
1736     myfree(cluster);
1737     myfree(enterorder);
1738 }
1739 
1740 void
set_top(world_fmt * world,node * p,long locus)1741 set_top (world_fmt * world, node * p, long locus)
1742 {
1743 
1744 
1745     if (p->type == 't')
1746     {
1747         p->top = TRUE;
1748         //p->tyme = 0.0;
1749         return;
1750     }
1751     p->top = TRUE;
1752     p->next->top = FALSE;
1753     if (p->type != 'm')
1754     {
1755         p->next->next->top = FALSE;
1756     }
1757     set_top (world, p->next->back, locus);
1758     if (p->type != 'm')
1759     {
1760         set_top (world, p->next->next->back, locus);
1761     }
1762     if (p == crawlback (world->root->next))
1763     {
1764         p->back->top = FALSE;
1765         p->back->tyme = ROOTLENGTH;
1766     }
1767 }    /* set_top */
1768 
1769 
1770 void
set_v(node * p)1771 set_v (node * p)
1772 {
1773     if (p->type == 't')
1774     {
1775         p->v = p->length = lengthof (p);
1776         return;
1777     }
1778     ltov (p);
1779     set_v (crawlback (p->next));
1780     set_v (crawlback (p->next->next));
1781 }    /* set_v */
1782 
1783 void
ltov(node * p)1784 ltov (node * p)
1785 {
1786     p->v = lengthof (p);
1787 }    /* ltov */
1788 
1789 /*
1790  * cost matrix COST is for the ISLAND and MATRIX model all 1 with a 0
1791  * diagonal, this will be changable through options, but perhaps this is not
1792  * so important
1793  */
1794 void
calc_sancost(MYREAL ** cost,long numpop)1795 calc_sancost (MYREAL **cost, long numpop)
1796 {
1797     long i, j;
1798     for (i = 0; i < numpop; i++)
1799     {
1800         for (j = 0; j < numpop; j++)
1801         {
1802             if (i != j)
1803                 cost[i][j] = 1.0;
1804             else
1805                 cost[i][i] = 0.0;
1806         }
1807     }
1808 }
1809 
1810 void
sankoff(world_fmt * world)1811 sankoff (world_fmt * world)
1812 {
1813     MYREAL **cost;
1814     long i;
1815     cost = (MYREAL **) mymalloc (sizeof (MYREAL *) * world->numpop);
1816     cost[0] =
1817         (MYREAL *) mymalloc (sizeof (MYREAL) * world->numpop * world->numpop);
1818     for (i = 1; i < world->numpop; i++)
1819     {
1820         cost[i] = cost[0] + world->numpop * i;
1821     }
1822     calc_sancost (cost, world->numpop);
1823     santraverse (world, crawlback (world->root->next), cost, world->numpop);
1824     myfree(cost[0]);
1825     myfree(cost);
1826 }
1827 
1828 void
jumble(long * s,long n)1829 jumble (long *s, long n)
1830 {
1831     long *temp, i, rr, tn = n;
1832 
1833     temp = (long *) mycalloc (1, sizeof (long) * n);
1834     memcpy (temp, s, sizeof (long) * n);
1835     for (i = 0; i < n && tn > 0; i++)
1836     {
1837         s[i] = temp[rr = RANDINT (0, tn - 1)];
1838         temp[rr] = temp[tn - 1];
1839         tn--;
1840     }
1841     myfree(temp);
1842 }
1843 
1844 
1845 #ifdef WIN32
1846 #define SUBSETRANDOM (((double) rand())/((double) RAND_MAX))
1847 #else
1848 #define SUBSETRANDOM (drand48())
1849 #endif
1850 
1851 // this uses a different stream than the usual random number
1852 // and is used to guarantee that the selection of individual subsets can be
1853 // maintained between different runs
1854 void
jumble_ownseed(long * s,long n)1855 jumble_ownseed (long *s, long n)
1856 {
1857     long *temp, i, rr, tn = n;
1858 
1859     temp = (long *) mycalloc (1, sizeof (long) * n);
1860     memcpy (temp, s, sizeof (long) * n);
1861     for (i = 0; i < n && tn > 0; i++)
1862     {
1863       s[i] = temp[rr = (long) ( SUBSETRANDOM * (tn - 1))];
1864       temp[rr] = temp[tn - 1];
1865       tn--;
1866     }
1867     myfree(temp);
1868 }
1869 
1870 void
santraverse(world_fmt * world,node * theNode,MYREAL ** cost,long numpop)1871 santraverse (world_fmt *world, node * theNode, MYREAL **cost, long numpop)
1872 {
1873     long i, ii, tie, which = 0;
1874     node *p = NULL, *q = NULL, *tmp = NULL, *left = NULL, *right = NULL;
1875     MYREAL best;
1876     long *poplist;
1877     poplist = (long *) mycalloc (1, sizeof (long) * numpop);
1878     if (theNode->type != 't')
1879     {
1880         if (RANDUM () > 0.5)
1881         {
1882             left = theNode->next;
1883             right = theNode->next->next;
1884         }
1885         else
1886         {
1887             left = theNode->next->next;
1888             right = theNode->next;
1889         }
1890         if (left->back != NULL)
1891         {
1892           p = crawlback (left);
1893 	  santraverse (world, p, cost, numpop);
1894         }
1895         if (right->back != NULL)
1896         {
1897           q = crawlback (right);
1898 	  santraverse (world, q, cost, numpop);
1899         }
1900         best = MYREAL_MAX;
1901         tie = 0;
1902         for (i = 0; i < numpop; i++)
1903             poplist[i] = i;
1904         jumble (poplist, numpop);
1905         for (ii = 0; ii < numpop; ii++)
1906         {
1907             i = poplist[ii];
1908           	if(p!=NULL && q!=NULL)
1909             {
1910             	theNode->s[i] =
1911                 	minimum (cost[i], p->s, numpop) + minimum (cost[i], q->s, numpop);
1912             }
1913             if (theNode->s[i] < best)
1914             {
1915                 best = theNode->s[i];
1916                 which = i;
1917                 tie = 0;
1918             }
1919             else
1920             {
1921                 if (theNode->s[i] == best)
1922                 {
1923                     tie++;
1924                     which = i;
1925                 }
1926             }
1927         }
1928         if (tie != 0)
1929         {
1930             theNode->pop = theNode->actualpop = which =
1931 			ranbest (theNode->s, tie, best, numpop);
1932             theNode->s[which] -= SANKOFF_DELTA;
1933         }
1934         else
1935         {
1936             theNode->pop = theNode->actualpop = which;
1937         }
1938         if (p!=NULL && p->actualpop != which)
1939         {
1940 	  //	  printf("%i> p: theNode->tyme=%f, p->tyme=%f mignode=%f\n",myID, theNode->tyme,p->tyme,p->tyme + (theNode->tyme - p->tyme) / 2.);
1941             tmp = add_migration (world, p, theNode->actualpop, p->actualpop,
1942 			     (MYREAL) RANDDOUBLE(0.0, theNode->tyme - p->tyme));
1943             left->back = tmp;
1944             tmp->back = left;
1945             //debug FPRINTF(startfile, "%li %li\n", theNode->actualpop, p->actualpop);
1946         }
1947         if (q!=NULL && q->actualpop != which)
1948         {
1949             tmp = add_migration (world, q, theNode->actualpop, q->actualpop,
1950 			     (MYREAL) RANDDOUBLE(0.0, theNode->tyme - q->tyme));
1951 
1952 	    //printf("%i> q: theNode->tyme=%f, q->tyme=%f mignode=%f\n",myID, theNode->tyme,q->tyme,q->tyme + (theNode->tyme - q->tyme) / 2.);
1953             right->back = tmp;
1954             tmp->back = right;
1955             //debug FPRINTF(startfile, "%li %li\n", theNode->actualpop, q->actualpop);
1956         }
1957     }
1958     myfree(poplist);
1959 }
1960 
1961 /* returns minimum for sankoff routine */
1962 MYREAL
minimum(MYREAL * vec1,MYREAL * vec2,long n)1963 minimum (MYREAL *vec1, MYREAL *vec2, long n)
1964 {
1965     long j;
1966     MYREAL summ, min = MYREAL_MAX;
1967     for (j = 0; j < n; j++)
1968     {
1969         if (vec2[j] < MYREAL_MAX)
1970         {
1971             if ((summ = vec1[j] + vec2[j]) < min)
1972                 min = summ;
1973         }
1974     }
1975     return min;
1976 }
1977 
1978 long
ranbest(MYREAL * array,long tie,MYREAL best,long n)1979 ranbest (MYREAL *array, long tie, MYREAL best, long n)
1980 {
1981     long i;
1982     long which = RANDINT (0, tie);
1983     for (i = 0; i < n; i++)
1984     {
1985         if (fabs (array[i] - best) < EPSILON)
1986         {
1987             if (which == 0)
1988                 return i;
1989             else
1990                 which--;
1991         }
1992     }
1993     return -2;
1994 }
1995 
1996 ///
1997 /// allocate memory for sequence data
1998 void
alloc_seqx(world_fmt * world,node * theNode)1999 alloc_seqx (world_fmt * world, node * theNode)
2000 {
2001     //    long j,z;
2002     long endsite = world->data->seq[0]->endsite;
2003     if (strchr ("u", world->options->datatype))
2004     {
2005         endsite = endsite * (world->data->seq[0]->addon + 1) + world->data->seq[0]->addon;
2006     }
2007     if (strchr ("nh", world->options->datatype))
2008     {
2009         endsite += world->data->seq[0]->addon;
2010     }
2011     allocate_xseq(&theNode->x, endsite, world->options->rcategs);
2012 }
2013 
allocate_xseq(xarray_fmt * x,long sites,long categs)2014 void     allocate_xseq(xarray_fmt *x, long sites, long categs)
2015 {
2016     long j;
2017     (*x).s = (phenotype) mycalloc (sites, sizeof (ratelike *));
2018 #ifdef VARMUT
2019     (*x).s[0] = (ratelike) mycalloc (linkedloci * categs, sizeof (MYREAL) * sites[datamodeltype] * sitelikesize[datamodeltype]);
2020 #else
2021     (*x).s[0] = (ratelike) mycalloc (categs * sites, sizeof (sitelike));
2022 #endif /*VARMUT*/
2023     for (j = 1; j < sites; j++)
2024         (*x).s[j] = (*x).s[0] + categs * j;
2025 
2026 }
2027 
2028 ///
2029 /// zeroes sequence data vector
zero_xseq(xarray_fmt * x,long sites,long categs)2030 void     zero_xseq(xarray_fmt *x, long sites, long categs)
2031 {
2032     long j;
2033     //    (*x).s = (phenotype) mycalloc (sites, sizeof (ratelike *));
2034     memset((*x).s[0], 0, categs * sites * sizeof (sitelike));
2035     for (j = 1; j < sites; j++)
2036         (*x).s[j] = (*x).s[0] + categs * j;
2037 
2038 }
2039 
2040 void
make_alleles(world_fmt * world,option_fmt * options,data_fmt * data,long locus)2041 make_alleles (world_fmt * world, option_fmt * options, data_fmt * data, long locus)
2042 {
2043   long ii,top;
2044     long pop, ind;
2045     long zz=0;
2046     long zpop;
2047     long iu;
2048     long len;
2049     char a1[DEFAULT_ALLELENMLENGTH];
2050     char a2[DEFAULT_ALLELENMLENGTH];
2051     node **nodelist = world->nodep;
2052     for (pop = 0; pop < data->numpop; pop++)
2053     {
2054       // preparation of combining populations
2055       //      if(world->custm[m2mm(pop,world->numpop)])
2056       //
2057       zpop=0;
2058       top = max_shuffled_individuals(options, data, pop, locus);
2059       for (ii = 0; ii < top; ii++)
2060         {
2061 	  ind = data->shuffled[pop][locus][ii];
2062 	  strcpy (a1, data->yy[pop][ind][locus][0]);
2063 	  strcpy (a2, data->yy[pop][ind][locus][1]);
2064 	  if (strcmp (a1, "?"))
2065             {
2066 	      allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2067 	      len = strlen(nodelist[zz]->nayme);
2068 	      nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2069 	      strcat(nodelist[zz]->nayme,"A!");
2070 	      strcat(nodelist[zz]->nayme,a1);
2071 	      nodelist[zz++]->x.a[findAllele (data, a1, locus)] = 1.0;
2072 	      zpop++;
2073             }
2074 	  else
2075 	    {
2076 	      if(options->include_unknown)
2077 		{
2078 		  fprintf(stdout,"? found\n");
2079 		  allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2080 		  for(iu=0;iu<data->maxalleles[locus]; iu++)
2081 		    nodelist[zz]->x.a[iu] = 1.0;
2082 		  len = strlen(nodelist[zz]->nayme);
2083 		  nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2084 		  strcat(nodelist[zz]->nayme,"A!");
2085 		  strcat(nodelist[zz]->nayme,a1);
2086 		  zz++;
2087 		  zpop++;
2088 		}
2089 	    }
2090 	  if (strcmp (a2, "?"))
2091             {
2092 	      allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2093 	      len = strlen(nodelist[zz]->nayme);
2094 	      nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2095 	      strcat(nodelist[zz]->nayme,"B!");
2096 	      strcat(nodelist[zz]->nayme,a2);
2097 	      nodelist[zz++]->x.a[findAllele (data, a2, locus)] = 1.0;
2098 	      zpop++;
2099             }
2100 	  else
2101 	    {
2102 	      if(options->include_unknown)
2103 		{
2104 		  allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2105 		  //		  strcpy (nodelist[zz]->nayme, a1);
2106 		  for(iu=0;iu<data->maxalleles[locus]; iu++)
2107 		    nodelist[zz]->x.a[iu] = 1.0;
2108 		  len = strlen(nodelist[zz]->nayme);
2109 		  nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2110 		  strcat(nodelist[zz]->nayme,"B!");
2111 		  strcat(nodelist[zz]->nayme,a2);
2112 		  zz++;
2113 		  zpop++;
2114 		}
2115 	    }
2116         }
2117       data->numalleles[pop][locus] = zpop;
2118     }
2119     world->sumtips = zz;
2120     if (world->sumtips==0)
2121       {
2122 	data->skiploci[locus] = TRUE;
2123         world->data->skiploci[locus] = TRUE;
2124         world->skipped += 1;
2125       }
2126 }
2127 
2128 void
find_minmax_msat_allele(world_fmt * world,data_fmt * data,long locus,long * smallest,long * biggest)2129 find_minmax_msat_allele (world_fmt * world, data_fmt * data, long locus,
2130                          long *smallest, long *biggest)
2131 {
2132     long pop, ind, tmp;
2133     *biggest = 0;
2134     *smallest = LONG_MAX;
2135     for (pop = 0; pop < data->numpop; pop++)
2136     {
2137         for (ind = 0; ind < data->numind[pop][locus]; ind++)
2138         {
2139             if (data->yy[pop][ind][locus][0][0] != '?')
2140             {
2141                 if ((tmp = atoi (data->yy[pop][ind][locus][0])) > *biggest)
2142                     *biggest = tmp;
2143                 if (tmp < *smallest)
2144                     *smallest = tmp;
2145             }
2146             if (data->yy[pop][ind][locus][1][0] != '?')
2147             {
2148                 if ((tmp = atoi (data->yy[pop][ind][locus][1])) > *biggest)
2149                     *biggest = tmp;
2150                 if (tmp < *smallest)
2151                     *smallest = tmp;
2152             }
2153         }
2154     }
2155 }
2156 
2157 void
make_microsatellites(world_fmt * world,option_fmt * options,data_fmt * data,long locus)2158 make_microsatellites (world_fmt * world, option_fmt *options, data_fmt * data, long locus)
2159 {
2160   long top;
2161   long pop, ind, ii;//, tmp = 0;//, tips = 0, unknownsum = 0;
2162   long zz=0;
2163   long zpop;
2164   long iu;
2165   long len;
2166   long smallest = 0;
2167   long biggest = 0;
2168     long smax;
2169   node **nodelist = world->nodep;
2170   char a1[DEFAULT_ALLELENMLENGTH];
2171   char a2[DEFAULT_ALLELENMLENGTH];
2172   calculate_steps (world);
2173   find_minmax_msat_allele (world, data, locus, &smallest, &biggest);
2174   smax = biggest - smallest;
2175   if(smax == 0)
2176     smax = 1;
2177   if(world->options->micro_threshold[locus] > smax)
2178     world->options->micro_threshold[locus] = smax;
2179   smax += 2 * world->options->micro_threshold[locus];
2180   world->data->maxalleles[locus] = data->maxalleles[locus] = smax;
2181   //we start with the smallest - margin and go up to biggest + margin
2182   smallest = smallest - 1 - world->options->micro_threshold[locus];
2183   if (smallest < 0)
2184     smallest = 0;
2185   for (pop = 0; pop < data->numpop; pop++)
2186     {
2187       zpop=0;
2188       top = max_shuffled_individuals(options, data, pop, locus);
2189       for (ii = 0; ii < top; ii++)
2190         {
2191 	  ind = data->shuffled[pop][locus][ii];
2192 	  strcpy (a1, data->yy[pop][ind][locus][0]);
2193 	  strcpy (a2, data->yy[pop][ind][locus][1]);
2194 	  if (strcmp (a1, "?"))
2195             {
2196 	      allocate_tip(world,options, &world->nodep[zz],pop,locus,zz, ind,
2197 			   data->indnames[pop][ind]);
2198 	      nodelist[zz]->x.a =
2199 		(MYREAL *) myrealloc (nodelist[zz]->x.a, sizeof (MYREAL) * smax);
2200 	      len = strlen(nodelist[zz]->nayme);
2201 	      nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2202 	      strcat(nodelist[zz]->nayme,"A!");
2203 	      strcat(nodelist[zz]->nayme,a1);
2204 	      nodelist[zz++]->x.a[atoi (a1) - smallest] = 1.0;
2205 	      zpop++;
2206             }
2207 	  else
2208 	    {
2209 	      if(options->include_unknown)
2210 		{
2211 		  allocate_tip(world,options, &world->nodep[zz],pop,locus,zz, ind,data->indnames[pop][ind]);
2212 		  //		  strcpy (nodelist[zz]->nayme, a1);
2213 		  for(iu=0;iu<data->maxalleles[locus]; iu++)
2214 		    nodelist[zz]->x.a[iu] = 1.0;
2215 		  len = strlen(nodelist[zz]->nayme);
2216 		  nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2217 		  strcat(nodelist[zz]->nayme,"A!");
2218 		  strcat(nodelist[zz]->nayme,a1);
2219 		  zz++;
2220 		  zpop++;
2221 		}
2222 	    }
2223 	  if (strcmp (a2, "?"))
2224 	    {
2225 	      allocate_tip(world,options, &world->nodep[zz],pop,locus,zz, ind,
2226 			data->indnames[pop][ind]);
2227 	      nodelist[zz]->x.a =
2228 		(MYREAL *) myrealloc (nodelist[zz]->x.a, sizeof (MYREAL) * smax);
2229 	      len = strlen(nodelist[zz]->nayme);
2230 	      nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2231 	      strcat(nodelist[zz]->nayme,"B!");
2232 	      strcat(nodelist[zz]->nayme,a2);
2233 	      nodelist[zz++]->x.a[atoi (a2) - smallest] = 1.0;
2234 	      zpop++;
2235 	    }
2236 	  else
2237 	    {
2238 	      if(options->include_unknown)
2239 		{
2240 		  allocate_tip(world,options, &world->nodep[zz],pop,locus,zz, ind,data->indnames[pop][ind]);
2241 		  //		  strcpy (nodelist[zz]->nayme, a1);
2242 		  for(iu=0;iu<data->maxalleles[locus]; iu++)
2243 		    nodelist[zz]->x.a[iu] = 1.0;
2244 		  len = strlen(nodelist[zz]->nayme);
2245 		  nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2246 		  strcat(nodelist[zz]->nayme,"B!");
2247 		  strcat(nodelist[zz]->nayme,a2);
2248 		  zz++;
2249 		  zpop++;
2250 		}
2251 	    }
2252         }
2253       data->numalleles[pop][locus] = zpop;
2254     }
2255   world->sumtips = zz;
2256   if (world->sumtips==0)
2257     {
2258       data->skiploci[locus] = TRUE;
2259       world->data->skiploci[locus] = TRUE;
2260       world->skipped += 1;
2261     }
2262 }
2263 
2264 void
make_microbrownian(world_fmt * world,option_fmt * options,data_fmt * data,long locus)2265 make_microbrownian (world_fmt * world, option_fmt * options, data_fmt * data, long locus)
2266 {
2267   long top;
2268   long pop, ind, ii;
2269   long len;
2270   char a1[DEFAULT_ALLELENMLENGTH];
2271   char a2[DEFAULT_ALLELENMLENGTH];
2272   node **nodelist = world->nodep;
2273   long zz=0;
2274   long zpop;
2275   //    long iu;
2276   long btotal;
2277   MYREAL bsum;
2278     for (pop = 0; pop < data->numpop; pop++)
2279     {
2280         btotal=0;
2281         bsum=0.;
2282 	top = max_shuffled_individuals(options, data, pop, locus);
2283         for (ii = 0; ii < top; ii++)
2284 	  {
2285 	    ind = data->shuffled[pop][locus][ii];
2286             strcpy (a1, data->yy[pop][ind][locus][0]);
2287             strcpy (a2, data->yy[pop][ind][locus][1]);
2288             if (strcmp (a1, "?"))
2289 	      {
2290                 btotal++;
2291                 bsum += atof (a1);
2292 	      }
2293             if (strcmp (a2, "?"))
2294 	      {
2295                 btotal++;
2296                 bsum += atof (a2);
2297 	      }
2298 	  }
2299         bsum /= btotal;
2300 
2301         zpop = 0;
2302         for (ii = 0; ii < top; ii++)
2303 	  {
2304 	    ind = data->shuffled[pop][locus][ii];
2305 	    strcpy (a1, data->yy[pop][ind][locus][0]);
2306 	    strcpy (a2, data->yy[pop][ind][locus][1]);
2307 	    if (strcmp (a1, "?"))
2308 	      {
2309 		allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2310 		len = strlen(nodelist[zz]->nayme);
2311 		nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2312 		strcat(nodelist[zz]->nayme,"A!");
2313 		strcat(nodelist[zz]->nayme,a1);
2314 		nodelist[zz++]->x.a[0] = atof (a1);
2315 		zpop++;
2316 	      }
2317 	    else
2318 	      {
2319 		if(options->include_unknown)
2320 		  {
2321 		    allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2322 		    len = strlen(nodelist[zz]->nayme);
2323 		    nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a1)));
2324 		    strcat(nodelist[zz]->nayme,"A!");
2325 		    strcat(nodelist[zz]->nayme,a1);
2326 		    nodelist[zz++]->x.a[0] = bsum; //average over other values
2327 		    // this might not be very sensible at all.
2328 		    zpop++;
2329 		  }
2330 	      }
2331 	    if (strcmp (a2, "?"))
2332 	      {
2333 		allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2334 		len = strlen(nodelist[zz]->nayme);
2335 		nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2336 		strcat(nodelist[zz]->nayme,"B!");
2337 		strcat(nodelist[zz]->nayme,a2);
2338 		nodelist[zz++]->x.a[0] = atof (a2);
2339 		zpop++;
2340 	      }
2341 	    else
2342 	      {
2343 		if(options->include_unknown)
2344 		  {
2345 		    allocate_tip(world,options, &world->nodep[zz],pop,locus,zz,ind,data->indnames[pop][ind]);
2346 		    len = strlen(nodelist[zz]->nayme);
2347 		    nodelist[zz]->nayme = (char *) realloc(nodelist[zz]->nayme,sizeof(char)*(len+5+strlen(a2)));
2348 		    strcat(nodelist[zz]->nayme,"B!");
2349 		    strcat(nodelist[zz]->nayme,a2);
2350 		    nodelist[zz++]->x.a[0] = bsum;
2351 		    zpop++;
2352 		  }
2353 	      }
2354 	  }
2355 	data->numalleles[pop][locus] = zpop;
2356     }
2357     world->sumtips = zz;
2358     if (world->sumtips==0)
2359       {
2360 	data->skiploci[locus] = TRUE;
2361         world->data->skiploci[locus] = TRUE;
2362         world->skipped += 1;
2363       }
2364 }
2365 
2366 
2367 /*---------------------------------------------
2368 free_treetimes frees the ptr_array of timeslist
2369 */
2370 void
free_treetimes(world_fmt * world,long size)2371 free_treetimes (world_fmt * world, long size)
2372 {
2373   //  long i;
2374   while (size >= 0)
2375     {
2376       //      for(i=world->treetimes[size].allocT-1; i>= 0 ; i--)
2377       //	myfree(world->treetimes[size].tl[i].lineages);
2378       myfree(world->treetimes[size].lineages);
2379       myfree(world->treetimes[size].tl);
2380       size--;
2381     }
2382 }
2383 
2384 ///
2385 /// construct timelist
2386 void
construct_tymelist(world_fmt * world,timelist_fmt * timevector)2387 construct_tymelist (world_fmt * world, timelist_fmt * timevector)
2388 {
2389     long z = 0;
2390     long tips = 0;
2391     //long ii;
2392     //long T;
2393     //    long pop;
2394     MYREAL tmp;
2395     timevector->numpop = world->numpop;
2396     traverseNodes (world->root, &timevector, &z, world, &tips);
2397     timevector->T = z;
2398 #ifdef TREEDEBUG
2399     printf("timevector->T=%li tips=%li\n",timevector->T,tips);
2400 #endif
2401     qsort ((void *) timevector->tl, timevector->T, sizeof (vtlist), agecmp);
2402     if ((*timevector).tl[(*timevector).T - 1].eventnode->type != 'r')
2403     {
2404         z = 0;
2405         while ((*timevector).tl[z].eventnode->type != 'r')
2406             z++;
2407 
2408 	tmp = 	(*timevector).tl[(*timevector).T - 1].eventnode->tyme + 10000.;
2409 	(*timevector).tl[z].eventnode->tyme = tmp;
2410 	(*timevector).tl[z].eventnode->next->tyme = tmp;
2411 	(*timevector).tl[z].eventnode->next->next->tyme = tmp;
2412 
2413         (*timevector).tl[z].age = (*timevector).tl[z].eventnode->tyme;
2414         qsort ((void *) timevector->tl, timevector->T, sizeof (vtlist), agecmp);
2415         warning("construct_tymelist root moved: new time = %f\n",(*timevector).tl[z].eventnode->tyme);
2416     }
2417     timeslices (&timevector);
2418     add_partlineages (world->numpop, &timevector);
2419 #ifdef TREEDEBUG
2420     fprintf(stderr,"///////////////////\n");
2421     long T = timevector->T-2;
2422     long ii, pop;
2423     for (ii = T; ii >= 0; ii--)
2424       {
2425 	for (pop = 0; pop < world->numpop; pop++)
2426 	  fprintf(stderr,"%li ",timevector->tl[ii].lineages[pop]);
2427 	fprintf(stderr,"%20.10f %20.10f | %f %c %li -> %li (%li)(%li)\n",timevector->tl[ii].age,timevector->tl[ii].eventnode->tyme, timevector->tl[ii].eventnode->tyme, timevector->tl[ii].eventnode->type, timevector->tl[ii].from, timevector->tl[ii].to, timevector->tl[ii].eventnode->id, showtop(timevector->tl[ii].eventnode->back)->id);
2428       }
2429     fprintf(stderr,"/////////////////////////////////////////////\n");
2430 #endif
2431 }
2432 
2433 
2434 
2435 /// find a tipdate
2436 MYREAL
find_tipdate(char * id,long pop,world_fmt * world)2437 find_tipdate(char * id, long pop, world_fmt *world)
2438 {
2439   MYREAL date;
2440   long ind;
2441   long slen;
2442   long locus = world->locus;
2443   tipdate_fmt *sampledates = world->data->sampledates[pop][locus];
2444 
2445   for(ind=0;ind < world->data->numind[pop][locus]; ind++)
2446     {
2447       if(sampledates[ind].name != NULL)
2448 	{
2449 	  slen = strlen(sampledates[ind].name);
2450 	  if(!strncmp(sampledates[ind].name,id, slen))
2451 	    {
2452 	      date = (sampledates[ind].date
2453 		      * world->options->generation_year
2454 		      * world->options->meanmu[locus]) * world->options->mu_rates[locus];
2455 	      printf("%i> id='%s' name='%s' realdate=%f date=%f locus=%li pop=%li\n",myID, id,sampledates[ind].name, sampledates[ind].date, date, locus, pop);
2456 	      fflush(stdout);
2457 	      return date;
2458 	    }
2459 	}
2460     }
2461   return 0.0;
2462 }
2463 
2464 ///
2465 /// traverse the tree and writes node-information into the real timelist also
2466 /// takes care that the size of timelist is increased accordingly
2467 void
traverseNodes(node * theNode,timelist_fmt ** timevector,long * slice,world_fmt * world,long * tips)2468 traverseNodes (node * theNode, timelist_fmt ** timevector, long *slice, world_fmt *world, long *tips)
2469 {
2470     //#ifdef DEBUG
2471     // MYREAL tipdate = 0.0;
2472     //#endif
2473     if (theNode != NULL)
2474       {
2475           if (theNode->type != 't')
2476             {
2477               if (theNode->next->back != NULL)
2478                 {
2479                   traverseNodes (theNode->next->back, timevector, slice, world, tips);
2480                 }
2481               if (theNode->type != 'm' && theNode->next->next->back != NULL)
2482                 {
2483                   traverseNodes (theNode->next->next->back, timevector, slice, world, tips);
2484                 }
2485               if (theNode->top)
2486                 {
2487                   /*
2488                    * Here we are on the save side if we increase the
2489                    * timelist so never a fence-write can happen
2490                    */
2491                   if (*slice >= (*timevector)->allocT-1)
2492                     {
2493                       increase_timelist (timevector);
2494                     }
2495                   /*
2496                    * (*timevector)->tl[*slice].pop =
2497                    * theNode->actualpop;
2498                    */
2499                   (*timevector)->tl[*slice].age = theNode->tyme;
2500                   //mark the visited node this will not work with recycled node pointers
2501                   //does it work at all?
2502                   //if(theNode != (*timevector)->tl[*slice].eventnode)
2503                   //  (*timevector)->tl[*slice].visited = TRUE;
2504                   //else
2505                   //  (*timevector)->tl[*slice].visited = FALSE;
2506                   //
2507                   (*timevector)->tl[*slice].eventnode = theNode;
2508                   (*timevector)->tl[*slice].slice = *slice;
2509                   (*timevector)->tl[*slice].from = theNode->pop;
2510                   (*timevector)->tl[*slice].to = theNode->actualpop;
2511                   (*slice) += 1;
2512                 }
2513               else
2514                 {
2515                   error("traverseNodes expects to look only at TOP nodes, but received another one and died\n");
2516                 }
2517             }
2518           else
2519             {
2520               //tipdate = find_tipdate(theNode->nayme, theNode->pop, world);
2521               //theNode->tyme = tipdate;
2522               if (*slice >= (*timevector)->allocT-1)
2523                 {
2524                   increase_timelist (timevector);
2525                 }
2526               (*timevector)->tl[*slice].age = theNode->tyme;
2527               (*timevector)->tl[*slice].eventnode = theNode;
2528               (*timevector)->tl[*slice].slice = *slice;
2529               (*timevector)->tl[*slice].from = theNode->pop;
2530               (*timevector)->tl[*slice].to = theNode->actualpop;
2531               (*tips) += 1;
2532               (*slice) += 1;
2533             }
2534       }
2535     else
2536         error("no node?????????");
2537 }
2538 
2539 void
increase_timelist(timelist_fmt ** timevector)2540 increase_timelist (timelist_fmt ** timevector)
2541 {
2542   (*timevector)->oldT = (*timevector)->allocT;
2543   (*timevector)->allocT += (*timevector)->allocT / 4; /* increase timelist by
2544 						       * 25% */
2545   (*timevector)->tl = (vtlist *) myrealloc ((*timevector)->tl,
2546 					    sizeof (vtlist) * ((*timevector)->allocT + 1));
2547   memset ((*timevector)->tl + (*timevector)->oldT, 0,
2548 	  ((*timevector)->allocT - (*timevector)->oldT) * sizeof (vtlist));
2549   allocate_lineages (timevector, 0, (*timevector)->numpop);
2550 }
2551 
2552 
set_all_dirty(const node * root,node * p,world_fmt * world,const long locus)2553 void set_all_dirty (const node * root, node * p, world_fmt * world, const long locus)
2554 {
2555 //  node *left;
2556 //  node *right;
2557   if (p->type == 'm')
2558     error ("MIGRATION NODE IN SMOOTH FOUND, PLEASE REPORT!!!!\n");
2559   if (p->type == 'i')
2560     {
2561         set_all_dirty (root, /*right=*/crawlback (p->next), world, locus);
2562         set_all_dirty (root, /*left=*/crawlback (p->next->next), world, locus);
2563         p->dirty = TRUE;
2564 	printf("(INT %li, %li, %f)-->%li\n",p->id, p->bid, p->v, showtop(crawlback(p))->id);
2565     }
2566   else
2567     {
2568 	printf("(TIP %li, %li, %f)-->%li\n",p->id, p->bid, p->v, showtop(crawlback(p))->id);
2569     }
2570 }    /* set_all_dirty */
2571 
2572 void
smooth(const node * root,node * p,world_fmt * world,const long locus)2573 smooth (const node * root, node * p, world_fmt * world, const long locus)
2574 {
2575  // node *left;
2576  // node *right;
2577  //   /* static */ long panic;
2578     /* only changed lineages are considered */
2579     if (!p->dirty)
2580         return;
2581 
2582 //xcode    if (p == (crawlback (root)))
2583 //xcode         panic = 0;
2584     if (p->type == 'm')
2585         error ("MIGRATION NODE IN SMOOTH FOUND, PLEASE REPORT!!!!\n");
2586     if (p->type == 'i')
2587     {
2588         smooth (root, /*right=*/crawlback (p->next), world, locus);
2589         smooth (root, /*left=*/crawlback (p->next->next), world, locus);
2590 #ifdef BEAGLE
2591 	prepare_beagle_instances(p,left, right, world->beagle);
2592 	//	printf("(PINT %li, %li, %f)-->%li\n",p->id, p->bid, p->v, showtop(crawlback(p))->id);
2593 #else
2594         (*nuview) (p, world, locus);
2595 #endif
2596 #ifdef UEP
2597         if(world->options->uep)
2598             twostate_nuview (p, world, locus);
2599 #endif
2600         p->dirty = FALSE;
2601     }
2602 #ifdef BEAGLE
2603     //else
2604     //  {
2605     //	printf("(PTIP: %li, %li, %f)-->%li\n",p->id, p->bid, p->v, showtop(crawlback(p))->id);
2606     //  }
2607 #endif
2608 }    /* smooth */
2609 
2610 
2611 /// sets the nuview machinery so that depending on datatype the correct
2612 /// conditional likelihood calculator is used
2613 void
which_pseudonuview(char datatype,boolean fastlike,boolean use_gaps,int watkins)2614 which_pseudonuview (char datatype, boolean fastlike, boolean use_gaps, int watkins)
2615 {
2616   switch (datatype)
2617     {
2618     case 'a':
2619       ppseudonuview = (void (*)(proposal_fmt *, xarray_fmt , MYREAL *, MYREAL, xarray_fmt , MYREAL *, MYREAL)) pseudonu_allele;
2620       break;
2621     case 'b':
2622       ppseudonuview =  (void (*)(proposal_fmt *, xarray_fmt , MYREAL *, MYREAL, xarray_fmt, MYREAL *, MYREAL)) pseudonu_brownian;
2623       break;
2624     case 'm':
2625       ppseudonuview =  (void (*)(proposal_fmt *, xarray_fmt, MYREAL *, MYREAL, xarray_fmt, MYREAL *, MYREAL)) pseudonu_micro;
2626       break;
2627     case 's':
2628     case 'n':
2629     case 'h':
2630     case 'u':
2631       if (fastlike && !use_gaps)
2632 	ppseudonuview = (void (*)(proposal_fmt *, xarray_fmt, MYREAL *, MYREAL, xarray_fmt , MYREAL *, MYREAL)) pseudonu_seq_slow;
2633       else
2634 	{
2635 	  if(fastlike)
2636 	    ppseudonuview =  (void (*)(proposal_fmt *, xarray_fmt, MYREAL *, MYREAL, xarray_fmt, MYREAL *, MYREAL)) pseudonu_seq;
2637 	  else /*use_gaps*/
2638 	    ppseudonuview =  (void (*)(proposal_fmt *, xarray_fmt, MYREAL *, MYREAL, xarray_fmt, MYREAL *, MYREAL)) pseudonu_seq_slow; //nuview_gaps;
2639 	 }
2640       break;
2641     case 'f':   /* fitch, reconstruction of ancestral state
2642 		 * method */
2643       ppseudonuview =  (void (*)(proposal_fmt *, xarray_fmt, MYREAL *, MYREAL, xarray_fmt, MYREAL * , MYREAL)) pseudonu_anc;
2644     }
2645 }
2646 
2647 /// sets the nuview machinery so that depending on datatype the correct
2648 /// conditional likelihood calculator is used
2649 void
which_nuview(char datatype,boolean fastlike,boolean use_gaps,int watkins)2650 which_nuview (char datatype, boolean fastlike, boolean use_gaps, int watkins)
2651 {
2652   switch (datatype)
2653     {
2654     case 'a':
2655       nuview = (void (*)(node *, world_fmt *, long)) nuview_allele;
2656       break;
2657     case 'b':
2658       nuview = (void (*)(node *, world_fmt *, long)) nuview_brownian;
2659       break;
2660     case 'm':
2661       switch(watkins)
2662 	{
2663 	case MULTISTEP:
2664 	  prob_micro = (double (*)(MYREAL, long, world_fmt *, pair *)) prob_micro_watkins;
2665 	  break;
2666 	case SINGLESTEP:
2667 	default:
2668 	  prob_micro = (double (*)(MYREAL, long, world_fmt *, pair *)) prob_micro_singlestep;
2669 	}
2670       nuview = (void (*)(node *, world_fmt *, long)) nuview_micro;
2671       break;
2672     case 's':
2673     case 'n':
2674     case 'h':
2675     case 'u':
2676       if (fastlike && !use_gaps)
2677 	nuview = (void (*)(node *, world_fmt *, long)) nuview_sequence;
2678       else
2679 	{
2680 	  if(fastlike)
2681 	    nuview = (void (*)(node *, world_fmt *, long)) nuview_sequence_slow;
2682 	  else /*use_gaps*/
2683 	    nuview = (void (*)(node *, world_fmt *, long)) nuview_sequence_slow; //nuview_gaps;
2684 	 }
2685       break;
2686     case 'f':   /* fitch, reconstruction of ancestral state
2687 		 * method */
2688       nuview = (void (*)(node *, world_fmt *, long)) nuview_ancestral;
2689     }
2690 }
2691 
2692 void
nuview_allele(node * mother,world_fmt * world,const long locus)2693 nuview_allele (node * mother, world_fmt * world, const long locus)
2694 {
2695   long a;
2696   long aa;
2697   long mal = world->data->maxalleles[locus];
2698 
2699   MYREAL freq = world->data->freq;
2700   //  MYREAL freqlast = world->data->freqlast; see pseudonuview
2701   MYREAL w1;
2702   MYREAL w2;
2703   MYREAL v1;
2704   MYREAL v2;
2705   MYREAL v1freq;
2706   MYREAL v2freq;
2707   MYREAL pija1;
2708   MYREAL pija2;
2709   MYREAL lx1;
2710   MYREAL lx2;
2711 
2712   MYREAL x3m = -MYREAL_MAX;
2713 
2714   MYREAL *xx1, *xx2;
2715   MYREAL *xx3;
2716 
2717   node *d1 = NULL;
2718   node *d2 = NULL;
2719 
2720   //  MYREAL test = 0.0;
2721 
2722   children (mother, &d1, &d2);
2723   xx1 = d1->x.a;
2724   xx2 = d2->x.a;
2725   xx3 = mother->x.a;
2726   lx1 = d1->scale[0];
2727   lx2 = d2->scale[0];
2728   v1 = 1 - EXP (-d1->v);
2729   v2 = 1 - EXP (-d2->v);
2730     if (v1 >= 1.)
2731     {
2732         w1 = 0.0;
2733         v1 = 1.0;
2734     }
2735     else
2736     {
2737         w1 = 1.0 - v1;
2738     }
2739     if (v2 >= 1.)
2740     {
2741         w2 = 0.0;
2742         v2 = 1.0;
2743     }
2744     else
2745     {
2746         w2 = 1.0 - v2;
2747     }
2748     //    printf("@@nuview 1:{%f,%f, %f,%f}   2:(%f,%f, %f,%f}\n",xx1[0],xx1[1],lx1,v1,xx2[0],xx2[1],lx2,v2);
2749 
2750     v1freq = v1 * freq;
2751     v2freq = v2 * freq;
2752 
2753     for (aa = 0; aa < mal; aa++)
2754     {
2755         pija1 = pija2 = 0.0;
2756         for (a = 0; a < mal; a++)
2757         {
2758 	  if(aa==a)
2759 	    {
2760 	      pija1 += (w1 + v1freq) * xx1[a];
2761 	      pija2 += (w2 + v2freq) * xx2[a];
2762 	    }
2763 	  else
2764 	    {
2765 	      pija1 += v1 * freq * xx1[a];
2766 	      pija2 += v2 * freq * xx2[a];
2767 	    }
2768 	}
2769         xx3[aa] = pija1 * pija2;
2770         //test += xx3[aa];
2771         if (xx3[aa] > x3m)
2772             x3m = xx3[aa];
2773 
2774     }
2775     //if (test <= 0.0)
2776     //    error ("xx3 is 0 or garbage!");
2777     for (aa = 0; aa < mal; aa++)
2778     {
2779         xx3[aa] /= x3m;
2780     }
2781     //    printf("@@finish: (%f=%f) (%f=%f)\n",xx3[0],mother->x.a[0],xx3[1],mother->x.a[1]);
2782     mother->scale[0] = LOG (x3m) + lx2 + lx1;
2783 }
2784 
2785 void
nuview_brownian(node * mother,world_fmt * world,const long locus)2786 nuview_brownian (node * mother, world_fmt * world, const long locus)
2787 {
2788 
2789     node *d1 = NULL, *d2 = NULL;
2790     MYREAL rvtot = 0.;
2791     MYREAL xx1, xx2, c12, diff;
2792     MYREAL mean1, mean2, mean, v1, v2, vtot, f1, f2;
2793     children (mother, &d1, &d2);
2794     mean1 = d1->x.a[0];
2795     mean2 = d2->x.a[0];
2796     xx1 = d1->x.a[2];
2797     xx2 = d2->x.a[2];
2798 
2799     v1 = d1->v + d1->x.a[1]; /* di->v == length of branch time(n1) -
2800 		* time(n2) */
2801     v2 = d2->v + d2->x.a[1]; /* x.a[1] contains the deltav */
2802     vtot = v1 + v2;
2803     diff = mean1 - mean2;
2804     if (vtot > 0.0)
2805       {
2806 	rvtot = 1./ vtot;
2807         f1 = v2 * rvtot;
2808 	c12 = diff * diff * rvtot;
2809       }
2810     else
2811       {
2812         f1 = 0.5;
2813 	c12 = HUGE;
2814 	vtot = SMALL_VALUE;
2815       }
2816     f2 = 1.0 - f1;
2817     mean = f1 * mean1 + f2 * mean2;
2818 
2819 
2820     mother->x.a[2] =
2821         xx1 + xx2 + MIN (0.0, -0.5 * (LOG (vtot) + c12) + LOG2PIHALF);
2822     /*
2823      * printf("L=%f , L1=%f, L2=%f, log(vtot=%f)=%f,
2824      * c12=%f\n",mother->x.a[2], xx1, xx2,vtot,log(vtot),c12);
2825      */
2826     mother->x.a[1] = v1 * f1;
2827     mother->x.a[0] = mean;
2828 
2829 }
2830 
2831 
2832 void
nuview_micro(node * mother,world_fmt * world,const long locus)2833 nuview_micro (node * mother, world_fmt * world, const long locus)
2834 {
2835     node *d1 = NULL;
2836     node *d2 = NULL;
2837     long s;
2838     long a;
2839     long aa1, aa2;
2840     long diff;
2841     long margin = world->options->micro_threshold[locus];
2842     MYREAL vv1, vv2, lx1, lx2;
2843     MYREAL x3m = -MYREAL_MAX;
2844     MYREAL pija1s, pija2s;
2845     MYREAL *xx1 = NULL, *xx2 = NULL;
2846 
2847     MYREAL *pm1;
2848     MYREAL *pm2;
2849 
2850     long smax = world->data->maxalleles[locus];
2851     MYREAL *xx3 = NULL;
2852     // needed for watkins' microsatellite model
2853     pair *helper = &world->options->msat_tuning;
2854 
2855     children (mother, &d1, &d2);
2856     vv1 = d1->v;
2857     vv2 = d2->v;
2858     xx1 = d1->x.a;
2859     xx2 = d2->x.a;
2860     xx3 = mother->x.a;
2861     lx1 = d1->scale[0];
2862     lx2 = d2->scale[0];
2863 
2864     pm1 = (MYREAL *) mymalloc(sizeof(MYREAL) * 4 * margin);
2865     pm2 = pm1 +  2 * margin;
2866 
2867     for (diff = 0; diff < margin; diff++)
2868       {
2869 	pm1[diff] = prob_micro (vv1, diff, world, helper);
2870 	pm2[diff] = prob_micro (vv2, diff, world, helper);
2871 	//fprintf(stdout,"pm[diff=%li]=(%g %g)\n",diff,pm1[diff],pm2[diff]);
2872       }
2873 
2874     for (s = 0; s < smax; s++)
2875     {
2876         pija1s = pija2s = 0.0;
2877 	aa1 = MAX (0, s - margin);
2878 	aa2 = MIN(s + margin,smax);
2879 	for (a = aa1; a < aa2; a++)
2880 	  //for (a = 0; a < smax; a++)
2881         {
2882             diff = labs (s - a);
2883 	    if(diff>=margin)
2884 	      continue;
2885 	    //	    fprintf(stdout,"***pm[diff=%li]=(%g %g) xx[a=%li]=(%f %f)\n",diff,pm1[diff],pm2[diff],a,xx1[a],xx2[a]);
2886             if (xx1[a] > 0)
2887             {
2888                 pija1s += pm1[diff] * xx1[a];
2889             }
2890             if (xx2[a] > 0)
2891             {
2892                 pija2s += pm2[diff] * xx2[a];
2893             }
2894         }
2895         xx3[s] = pija1s * pija2s;
2896         if (xx3[s] > x3m)
2897             x3m = xx3[s];
2898     }
2899     if (x3m == 0.0)
2900     {
2901         mother->scale[0] = -MYREAL_MAX;
2902     }
2903     else
2904     {
2905         for (s = 0; s < smax; s++)
2906         {
2907             xx3[s] /= x3m;
2908         }
2909         mother->scale[0] = LOG (x3m) + lx1 + lx2;
2910     }
2911     myfree(pm1);
2912 }
2913 
2914 
2915 void
calculate_steps(world_fmt * world)2916 calculate_steps (world_fmt * world)
2917 {
2918     long k, diff;
2919     long locus = world->locus;
2920     const long stepnum = world->options->micro_threshold[locus];
2921     MYREAL ***steps = world->options->steps;
2922 
2923     for (diff = 0; diff < stepnum; diff++)
2924     {
2925         for (k = 0; k < stepnum; k++)
2926         {
2927 			steps[locus][diff][k] = logfac(k + diff) + logfac (k);
2928         }
2929     }
2930 }
2931 
2932 
2933 ///
2934 /// calculates probability of a mutation of size diff in time t. used for msat calculations.
2935 /// i=diff
2936 /// prob[i_, t_] := Exp[-t] Sum[(t/2)^(i + 2 k)/((i + k)! k!), {k, 0, Infinity}]
2937 /// calculated as Sum[Exp[(-t + ((log(t)-log(2))*i) + (2*(log(t)-log(2)) * k) - steps(i,k))
2938 MYREAL
prob_micro_singlestep(MYREAL t,long diff,world_fmt * world,pair * helper)2939 prob_micro_singlestep(MYREAL t, long diff, world_fmt * world, pair *helper)
2940 {
2941   // helper is a dummy so that the call is the same as for prob_micro_watkins
2942   const long locus = world->locus;
2943   MYREAL *steps;
2944   long k;
2945   long k2;
2946   long stepnum = world->options->micro_threshold[locus];
2947   MYREAL temp1;
2948   MYREAL temp2;
2949   MYREAL temp3;
2950   MYREAL temp4;
2951   MYREAL const_part;
2952   MYREAL summ = 0.0;
2953   // MYREAL oldsum = 0.0;
2954   const MYREAL logt = LOG (t) - LOG2;
2955   const MYREAL logt2 = 2 * logt;
2956   if (diff >= stepnum)
2957     return summ;
2958   // linking to precalculated "denominator" Log[(i+k)! k!]
2959   // was precalculated in calculate_steps()
2960   steps = world->options->steps[locus][diff];
2961   const_part = -t + logt * diff;
2962   // loop unrolling to remove possible stalls
2963   // assumes stepnum is even
2964   for (k = 0; k < stepnum; k += 2)
2965     {
2966       k2 = k + 1;
2967       temp1 = const_part + logt2 * k - steps[k];
2968       temp2 = const_part + logt2 * k2 - steps[k2];
2969       temp3 = EXP(temp1);
2970       temp4 = EXP(temp2);
2971       summ += temp3 + temp4;
2972       //      if (fabs (oldsum - summ) < eps)
2973       //	break;
2974       //oldsum = summ;
2975     }
2976     return summ;
2977 }
2978 
2979 ///
2980 /// Calculates probability of a change of repeat number in time. Used for msat calculations.
2981 /// based on Joe Watkins' 2007 in theoretical population biology, chapter 4.2
2982 /// prob[diff,t,tune, upchance]
2983 /// diff: is negative or positive depending on direction
2984 /// t: is the time
2985 /// tune:    0.0 with the single step mutation model
2986 ///          1.0 with the infinite allele model
2987 ///          all values in between allow multiple steps
2988 /// upchance: is the probability that the repeat number increases (upchance < 2/3!)
2989 /// helper is the container for the additional variables
2990 MYREAL
prob_micro_watkins(MYREAL t,long diff,world_fmt * world,pair * helper)2991 prob_micro_watkins (MYREAL t, long diff, world_fmt * world, pair *helper)
2992 {
2993   const MYREAL tune = (*helper)[0];
2994   const MYREAL upchance = (*helper)[1];
2995   //const long locus = world->locus;
2996   MYREAL x;
2997   //  long stepnum = world->options->micro_threshold[locus];
2998   MYREAL summ = 0.0;
2999   // MYREAL oldsum = 0.0;
3000   const MYREAL delta = 2. * PI / 100.;
3001   const MYREAL expt = EXP(-t);
3002   MYREAL oneplustunesq;
3003   MYREAL tunep;
3004   MYREAL oneminustunet;
3005   MYREAL sinx;
3006   MYREAL cosx;
3007   MYREAL invdenom;
3008   MYREAL first;
3009   MYREAL second;
3010 
3011   //if (diff >= stepnum)
3012   //  return summ;
3013 
3014   oneplustunesq = 1 + tune * tune ;
3015   tunep = (1.0 - tune) * (2.0 * upchance - 1.0) * t;
3016   oneminustunet = (1.0 - tune) * t;
3017   for (x = -PI; x < PI; x += delta)
3018     {
3019       cosx = cos(x);
3020       sinx = sin(x);
3021       invdenom = (oneplustunesq - 2. * tune * cosx);
3022       if(invdenom < SMALL_VALUE)
3023 	invdenom = 1. / SMALL_VALUE;
3024       first = (diff * x + tunep * sinx) * invdenom;
3025       second = oneminustunet * cosx * invdenom;
3026       summ += cos(first) * EXP(second);
3027     }
3028 #ifdef DEBUG
3029   if(summ == 0.0)
3030     error("underflow in msat prob calc\n");
3031 #endif
3032   return expt * INV2PI * delta * summ;
3033 }
3034 
3035 ///
3036 /// test implementation of Watkins' calculations
3037 //MYREAL
3038 //prob_micro (MYREAL t, long diff, world_fmt * world)
3039 //{
3040 //  return prob_micro_watkins (t, diff, 0.2 , 0.5, world);
3041 //}
3042 //MYINLINE MYREAL
3043 //prob_micro (MYREAL t, long diff, world_fmt * world)
3044 //{
3045 //  return prob_micro_singlestep (t, diff, world);
3046 //}
3047 
3048 
3049 ///
3050 /// conditional likelihoods
3051 void
nuview_sequence(node * mother,world_fmt * world,const long locus)3052 nuview_sequence (node * mother, world_fmt * world, const long locus)
3053 {
3054   const long endsite = world->data->seq[0]->endsite;
3055     long i, j, k;
3056     register    MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
3057     register MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
3058     register MYREAL lw1, lw2, yy1, yy2, ww1zz1, vv1zz1, ww2zz2, vv2zz2, vzsumr1,
3059       vzsumr2, vzsumy1, vzsumy2, sum1, sum2, sumr1, sumr2,
3060 		sumy1, sumy2, ww1, ww2, zz1, zz2;
3061     register MYREAL freqa, freqc, freqg, freqt;//, freqr, freqy;
3062      register MYREAL freqar, freqcy, freqgr, freqty;
3063      register node *q, *r;
3064      register sitelike *xx1, *xx2, *xx3;
3065      register long rcategs = world->options->rcategs;
3066      register long categs = world->options->categs;
3067      register tbl_fmt tbl = world->tbl;
3068      register seqmodel_fmt *seq;
3069      register valrec *tbl00;
3070      register valrec *tblij;
3071      register valrec *tbljk;
3072     seq = world->data->seq[0];
3073 
3074     q = crawlback (mother->next);
3075     r = crawlback (mother->next->next);
3076 
3077     freqa = seq->basefrequencies[NUC_A];
3078     freqc = seq->basefrequencies[NUC_C];
3079     freqg = seq->basefrequencies[NUC_G];
3080     freqt = seq->basefrequencies[NUC_T];
3081     //freqr = seq->basefrequencies[NUC_R];
3082     //freqy = seq->basefrequencies[NUC_Y];
3083     freqar = seq->basefrequencies[NUC_AR];
3084     freqcy = seq->basefrequencies[NUC_CY];
3085     freqgr = seq->basefrequencies[NUC_GR];
3086     freqty = seq->basefrequencies[NUC_TY];
3087 
3088     lw1 = -q->v * seq->fracchange;
3089 
3090     if ((rcategs | categs) == 1)
3091     {
3092         tbl00 = tbl[0][0];
3093         ww1 = EXP (tbl00->ratxi * lw1);
3094         zz1 = EXP (tbl00->ratxv * lw1);
3095         ww1zz1 = ww1 * zz1;
3096         vv1zz1 = (1.0 - ww1) * zz1;
3097         lw2 = -r->v * seq->fracchange;
3098         ww2 = EXP (tbl00->ratxi * lw2);
3099         zz2 = EXP (tbl00->ratxv * lw2);
3100         ww2zz2 = ww2 * zz2;
3101         vv2zz2 = (1.0 - ww2) * zz2;
3102         yy1 = 1.0 - zz1;
3103         yy2 = 1.0 - zz2;
3104         for (i = 0; i < endsite; i++)
3105         {
3106             xx1 = &(q->x.s[i][0]);
3107             xx2 = &(r->x.s[i][0]);
3108             xx3 = &(mother->x.s[i][0]);
3109 
3110             xx1t0 = (*xx1)[0];
3111             xx1t1 = (*xx1)[1];
3112             xx1t2 = (*xx1)[2];
3113             xx1t3 = (*xx1)[3];
3114 
3115             xx2t0 = (*xx2)[0];
3116             xx2t1 = (*xx2)[1];
3117             xx2t2 = (*xx2)[2];
3118             xx2t3 = (*xx2)[3];
3119 
3120             sum1 = yy1 * (freqa * xx1t0 + freqc * xx1t1 + freqg * xx1t2 + freqt * xx1t3);
3121             sum2 = yy2 * (freqa * xx2t0 + freqc * xx2t1 + freqg * xx2t2 + freqt * xx2t3);
3122 
3123             sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3124             sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3125             sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3126             sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3127 
3128             vzsumr1 = vv1zz1 * sumr1;
3129             vzsumr2 = vv2zz2 * sumr2;
3130             vzsumy1 = vv1zz1 * sumy1;
3131             vzsumy2 = vv2zz2 * sumy2;
3132             (*xx3)[0] =
3133                 (sum1 + ww1zz1 * xx1t0 + vzsumr1) * (sum2 +
3134 													 ww2zz2 * xx2t0 +
3135 													 vzsumr2);
3136             (*xx3)[1] =
3137                 (sum1 + ww1zz1 * xx1t1 + vzsumy1) * (sum2 +
3138 													 ww2zz2 * xx2t1 +
3139 													 vzsumy2);
3140             (*xx3)[2] =
3141                 (sum1 + ww1zz1 * xx1t2 + vzsumr1) * (sum2 +
3142 													 ww2zz2 * xx2t2 +
3143 													 vzsumr2);
3144             (*xx3)[3] =
3145                 (sum1 + ww1zz1 * xx1t3 + vzsumy1) * (sum2 +
3146 													 ww2zz2 * xx2t3 +
3147 													 vzsumy2);
3148         }
3149     }
3150     else
3151     {
3152         for (i = 0; i < rcategs; i++)
3153             for (j = 0; j < categs; j++)
3154             {
3155                 tblij = tbl[i][j];
3156                 tblij->ww1 = EXP (tblij->ratxi * lw1);
3157                 tblij->zz1 = EXP (tblij->ratxv * lw1);
3158                 tblij->ww1zz1 = tblij->ww1 * tblij->zz1;
3159                 tblij->vv1zz1 = (1.0 - tblij->ww1) * tblij->zz1;
3160             }
3161 				lw2 = -r->v * seq->fracchange;
3162         for (i = 0; i < rcategs; i++)
3163             for (j = 0; j < categs; j++)
3164             {
3165                 tblij = tbl[i][j];
3166                 tblij->ww2 = EXP (tblij->ratxi * lw2);
3167                 tblij->zz2 = EXP (tblij->ratxv * lw2);
3168                 tblij->ww2zz2 = tblij->ww2 * tblij->zz2;
3169                 tblij->vv2zz2 = (1.0 - tblij->ww2) * tblij->zz2;
3170             }
3171 	for (i = 0; i < endsite; i++)
3172 	  {
3173 	    k = seq->category[seq->alias[i] - 1] - 1;
3174 	    for (j = 0; j < rcategs; j++)
3175 	      {
3176 		tbljk = tbl[j][k];
3177 		ww1zz1 = tbljk->ww1zz1;
3178 		vv1zz1 = tbljk->vv1zz1;
3179 		yy1 = 1.0 - tbljk->zz1;
3180 		ww2zz2 = tbljk->ww2zz2;
3181 		vv2zz2 = tbljk->vv2zz2;
3182 		yy2 = 1.0 - tbljk->zz2;
3183 		xx1 = &(q->x.s[i][j]);
3184 		xx2 = &(r->x.s[i][j]);
3185 		xx3 = &(mother->x.s[i][j]);
3186 
3187 		xx1t0 = (*xx1)[0];
3188 		xx1t1 = (*xx1)[1];
3189 		xx1t2 = (*xx1)[2];
3190 		xx1t3 = (*xx1)[3];
3191 
3192 		xx2t0 = (*xx2)[0];
3193 		xx2t1 = (*xx2)[1];
3194 		xx2t2 = (*xx2)[2];
3195 		xx2t3 = (*xx2)[3];
3196 
3197 
3198 		sum1 =
3199 		  yy1 * (freqa * xx1t0 + freqc * xx1t1 +
3200 			 freqg * xx1t2 + freqt * xx1t3);
3201 		sum2 =
3202 		  yy2 * (freqa * xx2t0 + freqc * xx2t1 +
3203 			 freqg * xx2t2 + freqt * xx2t3);
3204 		sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3205 		sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3206 		sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3207 		sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3208 		vzsumr1 = vv1zz1 * sumr1;
3209 		vzsumr2 = vv2zz2 * sumr2;
3210 		vzsumy1 = vv1zz1 * sumy1;
3211 		vzsumy2 = vv2zz2 * sumy2;
3212 		(*xx3)[0] =
3213 		  (sum1 + ww1zz1 * xx1t0 + vzsumr1) * (sum2 +
3214 						       ww2zz2 * xx2t0 +
3215 						       vzsumr2);
3216 		(*xx3)[1] =
3217 		  (sum1 + ww1zz1 * xx1t1 + vzsumy1) * (sum2 +
3218 						       ww2zz2 * xx2t1 +
3219 						       vzsumy2);
3220 		(*xx3)[2] =
3221 		  (sum1 + ww1zz1 * xx1t2 + vzsumr1) * (sum2 +
3222 						       ww2zz2 * xx2t2 +
3223 						       vzsumr2);
3224 		(*xx3)[3] =
3225 		  (sum1 + ww1zz1 * xx1t3 + vzsumy1) * (sum2 +
3226 						       ww2zz2 * xx2t3 +
3227 						       vzsumy2);
3228 	      }
3229 	  }
3230     }
3231 }    /* nuview */
3232 
3233 ///
3234 /// accurate version of conditional likleihood calculator using a scaler
3235 #ifdef GAP
nuview_sequence_slow(node * mother,world_fmt * world,const long locus)3236 void nuview_sequence_slow (node * mother, world_fmt * world, const long locus)
3237 {
3238   nuview_f84gap_slow (mother, world, locus);
3239 }
3240 #else
nuview_sequence_slow(node * mother,world_fmt * world,const long locus)3241 void nuview_sequence_slow (node * mother, world_fmt * world, const long locus)
3242 {
3243     //static long count = 0;
3244     const long endsite = world->data->seq[0]->endsite;
3245     long i, j, k;
3246 
3247     register MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
3248     register MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
3249 
3250     register MYREAL lw1, lw2, yy1, yy2, ww1zz1, vv1zz1, ww2zz2, vv2zz2, vzsumr1,
3251         vzsumr2, vzsumy1, vzsumy2, sum1, sum2, sumr1, sumr2,
3252         sumy1, sumy2, ww1, ww2, zz1, zz2;
3253     MYREAL *sxx1 = NULL, *sxx2 = NULL;
3254     MYREAL sxx3m, tempsxx3m, invsxx3m;
3255     node *q, *r;
3256     sitelike *xx1, *xx2, *xx3;
3257     long rcategs = world->options->rcategs;
3258     long categs = world->options->categs;
3259     tbl_fmt tbl = world->tbl;
3260     register MYREAL freqa, freqc, freqg, freqt;//, freqr, freqy;
3261     register MYREAL freqar, freqcy, freqgr, freqty;
3262     seqmodel_fmt *seq;
3263     valrec *tbl00;
3264     valrec *tblij;
3265     valrec *tbljk;
3266     MYREAL scalesum = 0.0;
3267     boolean newscale=FALSE;;
3268     seq = world->data->seq[0];
3269 
3270     freqa = seq->basefrequencies[NUC_A];
3271     freqc = seq->basefrequencies[NUC_C];
3272     freqg = seq->basefrequencies[NUC_G];
3273     freqt = seq->basefrequencies[NUC_T];
3274     //freqr = seq->basefrequencies[NUC_R];
3275     //freqy = seq->basefrequencies[NUC_Y];
3276     freqar = seq->basefrequencies[NUC_AR];
3277     freqcy = seq->basefrequencies[NUC_CY];
3278     freqgr = seq->basefrequencies[NUC_GR];
3279     freqty = seq->basefrequencies[NUC_TY];
3280 
3281     q = crawlback (mother->next);
3282     r = crawlback (mother->next->next);
3283     lw1 = -q->v * seq->fracchange;
3284     sxx1 = q->scale;
3285     sxx2 = r->scale;
3286     tbl00 = tbl[0][0];
3287     if ((rcategs | categs) == 1)
3288     {
3289         ww1 = EXP (tbl00->ratxi * lw1);
3290         zz1 = EXP (tbl00->ratxv * lw1);
3291         ww1zz1 = ww1 * zz1;
3292         vv1zz1 = (1.0 - ww1) * zz1;
3293         lw2 = -r->v * seq->fracchange;
3294         ww2 = EXP (tbl00->ratxi * lw2);
3295         zz2 = EXP (tbl00->ratxv * lw2);
3296         ww2zz2 = ww2 * zz2;
3297         vv2zz2 = (1.0 - ww2) * zz2;
3298         yy1 = 1.0 - zz1;
3299         yy2 = 1.0 - zz2;
3300         for (i = 0; i < endsite; i++)
3301         {
3302             xx1 = &(q->x.s[i][0]);
3303             xx2 = &(r->x.s[i][0]);
3304             xx3 = &(mother->x.s[i][0]);
3305 
3306             xx1t0 = (*xx1)[0];
3307             xx1t1 = (*xx1)[1];
3308             xx1t2 = (*xx1)[2];
3309             xx1t3 = (*xx1)[3];
3310 
3311             xx2t0 = (*xx2)[0];
3312             xx2t1 = (*xx2)[1];
3313             xx2t2 = (*xx2)[2];
3314             xx2t3 = (*xx2)[3];
3315 
3316 
3317             sum1 = yy1 * (freqa * xx1t0 + freqc * xx1t1 + freqg * xx1t2 + freqt * xx1t3);
3318             sum2 = yy2 * (freqa * xx2t0 + freqc * xx2t1 + freqg * xx2t2 + freqt * xx2t3);
3319             sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3320             sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3321             sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3322             sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3323             vzsumr1 = vv1zz1 * sumr1;
3324             vzsumr2 = vv2zz2 * sumr2;
3325             vzsumy1 = vv1zz1 * sumy1;
3326             vzsumy2 = vv2zz2 * sumy2;
3327             (*xx3)[0] = (sum1 + ww1zz1 * xx1t0 + vzsumr1) * (sum2 + ww2zz2 * xx2t0 + vzsumr2);
3328 	    scalesum = (*xx3)[0];
3329             (*xx3)[1] =
3330                 (sum1 + ww1zz1 * xx1t1 + vzsumy1) * (sum2 +
3331                                                      ww2zz2 * xx2t1 +
3332                                                      vzsumy2);
3333 	    scalesum += (*xx3)[1];
3334             (*xx3)[2] =
3335                 (sum1 + ww1zz1 * xx1t2 + vzsumr1) * (sum2 +
3336                                                      ww2zz2 * xx2t2 +
3337                                                      vzsumr2);
3338 	    scalesum += (*xx3)[2];
3339             (*xx3)[3] =
3340                 (sum1 + ww1zz1 * xx1t3 + vzsumy1) * (sum2 +
3341                                                      ww2zz2 * xx2t3 +
3342                                                      vzsumy2);
3343 	    scalesum += (*xx3)[3];
3344             mother->scale[i] = sxx1[i] + sxx2[i];
3345 	    if(!(scalesum > EPSILON))
3346               {
3347                 sxx3m = MAX ((*xx3)[0], (*xx3)[1]);
3348                 sxx3m = MAX (sxx3m, (*xx3)[2]);
3349                 sxx3m = MAX (sxx3m, (*xx3)[3]);
3350 		invsxx3m = 1.0/sxx3m;
3351                 (*xx3)[0] *= invsxx3m;
3352 		(*xx3)[1] *= invsxx3m;
3353 		(*xx3)[2] *= invsxx3m;
3354 		(*xx3)[3] *= invsxx3m;
3355                 mother->scale[i] += LOG (sxx3m);
3356 	      }
3357 #ifdef DEBUG
3358 	    //	printf("[(%f,%f,%f,%f) %f]",(*xx3)[0],(*xx3)[1],(*xx3)[2],(*xx3)[3],mother->scale[i]);
3359 #endif
3360         }
3361 #ifdef DEBUG
3362 	//   printf("id=%li\n",mother->id);
3363 #endif
3364     }
3365     else
3366     {
3367       lw2 = -r->v * seq->fracchange;
3368       for (i = 0; i < rcategs; i++)
3369 	for (j = 0; j < categs; j++)
3370 	  {
3371 	    tblij = tbl[i][j];
3372 	    tblij->ww1 = EXP (tblij->ratxi * lw1);
3373 	    tblij->zz1 = EXP (tblij->ratxv * lw1);
3374 	    tblij->ww1zz1 = tblij->ww1 * tblij->zz1;
3375 	    tblij->vv1zz1 = (1.0 - tblij->ww1) * tblij->zz1;
3376 	  }
3377       for (i = 0; i < rcategs; i++)
3378 	{
3379 	  for (j = 0; j < categs; j++)
3380 	    {
3381 	      tblij = tbl[i][j];
3382 	      tblij->ww2 = EXP (tblij->ratxi * lw2);
3383 	      tblij->zz2 = EXP (tblij->ratxv * lw2);
3384 	      tblij->ww2zz2 = tblij->ww2 * tblij->zz2;
3385 	      tblij->vv2zz2 = (1.0 - tblij->ww2) * tblij->zz2;
3386 	    }
3387 	}
3388       for (i = 0; i < endsite; i++)
3389 	{
3390 	  newscale = FALSE;
3391 	  sxx3m = -MYREAL_MAX;
3392 	  k = seq->category[seq->alias[i] - 1] - 1;
3393 	  for (j = 0; j < rcategs; j++)
3394 	    {
3395 	      tbljk = tbl[j][k];
3396 		ww1zz1 = tbljk->ww1zz1;
3397 		vv1zz1 = tbljk->vv1zz1;
3398 		yy1 = 1.0 - tbljk->zz1;
3399 		ww2zz2 = tbljk->ww2zz2;
3400 		vv2zz2 = tbljk->vv2zz2;
3401 		yy2 = 1.0 - tbljk->zz2;
3402 		xx1 = &(q->x.s[i][j]);
3403 		xx2 = &(r->x.s[i][j]);
3404 		xx3 = &(mother->x.s[i][j]);
3405 
3406 
3407 		xx1t0 = (*xx1)[0];
3408 		xx1t1 = (*xx1)[1];
3409 		xx1t2 = (*xx1)[2];
3410 		xx1t3 = (*xx1)[3];
3411 
3412 		xx2t0 = (*xx2)[0];
3413 		xx2t1 = (*xx2)[1];
3414 		xx2t2 = (*xx2)[2];
3415 		xx2t3 = (*xx2)[3];
3416 
3417 
3418 
3419 		sum1 =
3420 		  yy1 * (freqa * xx1t0 + freqc * xx1t1 +
3421 			 freqg * xx1t2 + freqt * xx1t3);
3422 		sum2 =
3423 		  yy2 * (freqa * xx2t0 + freqc * xx2t1 +
3424 			 freqg * xx2t2 + freqt * xx2t3);
3425 		sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3426 		sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3427 		sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3428 		sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3429 		vzsumr1 = vv1zz1 * sumr1;
3430 		vzsumr2 = vv2zz2 * sumr2;
3431 		vzsumy1 = vv1zz1 * sumy1;
3432 		vzsumy2 = vv2zz2 * sumy2;
3433 		(*xx3)[0] =
3434 		  (sum1 + ww1zz1 * xx1t0 + vzsumr1) * (sum2 +
3435 						       ww2zz2 * xx2t0 +
3436 						       vzsumr2);
3437 		(*xx3)[1] =
3438 		  (sum1 + ww1zz1 * xx1t1 + vzsumy1) * (sum2 +
3439 						       ww2zz2 * xx2t1 +
3440 						       vzsumy2);
3441 		(*xx3)[2] =
3442 		  (sum1 + ww1zz1 * xx1t2 + vzsumr1) * (sum2 +
3443 						       ww2zz2 * xx2t2 +
3444 						       vzsumr2);
3445 		(*xx3)[3] =
3446 		  (sum1 + ww1zz1 * xx1t3 + vzsumy1) * (sum2 +
3447 						       ww2zz2 * xx2t3 +
3448 						       vzsumy2);
3449 		if(!((*xx3)[0]+ (*xx3)[1]+ (*xx3)[2]+ (*xx3)[3]>EPSILON))
3450 		  newscale=TRUE;
3451 	      }
3452 	    if(newscale)
3453 	      {
3454 		sxx3m = -MYREAL_MAX;
3455                 for (j = 0; j < rcategs; j++)
3456 		  {
3457                     xx3 = &(mother->x.s[i][j]);
3458                     tempsxx3m = MAX ((*xx3)[0], (*xx3)[1]);
3459                     tempsxx3m = MAX (tempsxx3m, (*xx3)[2]);
3460                     tempsxx3m = MAX (tempsxx3m, (*xx3)[3]);
3461                     if (tempsxx3m > sxx3m)
3462                         sxx3m = tempsxx3m;
3463 		  }
3464 		invsxx3m = 1.0 / sxx3m;
3465                 for (j = 0; j < rcategs; j++)
3466                 {
3467 		  xx3 = &(mother->x.s[i][j]);
3468 		  (*xx3)[0] *= invsxx3m;
3469 		  (*xx3)[1] *= invsxx3m;
3470 		  (*xx3)[2] *= invsxx3m;
3471 		  (*xx3)[3] *= invsxx3m;
3472                 }
3473                 mother->scale[i] = LOG (sxx3m)  + sxx1[i] + sxx2[i];
3474 	      }
3475 	    else
3476 	      {
3477 		mother->scale[i] = sxx1[i] + sxx2[i];
3478 	      }
3479 	  }
3480     }
3481 }    /* nuview */
3482 #endif /*GAP*/
3483 
3484 ///
3485 /// conditional likleihood calcculator using a simplified scheme called ancestral
3486 /// with two lineages the one with the higher value will win and is the ancestor
3487 /// non-Altivec version
3488 void
nuview_ancestral(node * mother,world_fmt * world,const long locus)3489 nuview_ancestral (node * mother, world_fmt * world, const long locus)
3490 {
3491     long i;
3492     const long endsite = world->data->seq[0]->endsite;
3493     MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
3494     MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
3495 
3496     MYREAL lw1, lw2, ratio1, yy1, yy2, sum1, sum2;
3497     node *q, *r;
3498     sitelike *xx1, *xx2;
3499     register MYREAL freqa, freqc, freqg, freqt;
3500     //register MYREAL freqar, freqcy, freqgr, freqty;
3501     seqmodel_fmt *seq;
3502     seq = world->data->seq[0];
3503 
3504     freqa = seq->basefrequencies[NUC_A];
3505     freqc = seq->basefrequencies[NUC_C];
3506     freqg = seq->basefrequencies[NUC_G];
3507     freqt = seq->basefrequencies[NUC_T];
3508     //    freqr = seq->basefrequencies[NUC_R];
3509     //freqy = seq->basefrequencies[NUC_Y];
3510     //freqar = seq->basefrequencies[NUC_AR];
3511     //freqcy = seq->basefrequencies[NUC_CY];
3512     //freqgr = seq->basefrequencies[NUC_GR];
3513     //freqty = seq->basefrequencies[NUC_TY];
3514 
3515     q = crawlback (mother->next);
3516     r = crawlback (mother->next->next);
3517     lw1 = -q->v * seq->fracchange;
3518     lw2 = -r->v * seq->fracchange;
3519     ratio1 = lw1 / (lw1 + lw2);
3520     yy1 = (1. - ratio1);
3521     yy2 = ratio1;
3522     //printf("%f ", q->tyme);
3523     //    for (i = 0; i < endsite; i++)
3524     //printf("(%f %f %f %f)", q->x.s[i][0][0], q->x.s[i][0][1], q->x.s[i][0][2], q->x.s[i][0][3]);
3525     //printf("\n%f ", r->tyme);
3526     //for (i = 0; i < endsite; i++)
3527     //printf("(%f %f %f %f)", r->x.s[i][0][0], r->x.s[i][0][1], r->x.s[i][0][2], r->x.s[i][0][3]);
3528     //printf("\n");
3529     for (i = 0; i < endsite; i++)
3530       {
3531 	xx1 = &(q->x.s[i][0]);
3532 	xx2 = &(r->x.s[i][0]);
3533 
3534 
3535 	xx1t0 = (*xx1)[0];
3536             xx1t1 = (*xx1)[1];
3537             xx1t2 = (*xx1)[2];
3538             xx1t3 = (*xx1)[3];
3539 
3540             xx2t0 = (*xx2)[0];
3541             xx2t1 = (*xx2)[1];
3542             xx2t2 = (*xx2)[2];
3543             xx2t3 = (*xx2)[3];
3544 
3545 
3546 
3547             sum1 =
3548                 yy1 * (freqa * xx1t0 + freqc * xx1t1 +
3549                        freqg * xx1t2 + freqt * xx1t3);
3550             sum2 =
3551                 yy2 * (freqa * xx2t0 + freqc * xx2t1 +
3552                        freqg * xx2t2 + freqt * xx2t3);
3553             if (sum1 == sum2)
3554                 sum1 += RANDUM () > 0.5 ? -1. : 1.;
3555             if (sum1 > sum2)
3556                 memcpy (mother->x.s[i][0], xx1, sizeof (sitelike));
3557             else
3558                 memcpy (mother->x.s[i][0], xx2, sizeof (sitelike));
3559         }
3560 }    /* nuview_ancestral */
3561 
3562 
3563 void
adjustroot(node * r)3564 adjustroot (node * r)
3565 {
3566   r->next->tyme = r->tyme;
3567   r->next->length = r->length;
3568   r->next->v = r->v;
3569   r->next->next->tyme = r->tyme;
3570   r->next->next->length = r->length;
3571   r->next->next->v = r->v;
3572 }
3573 
3574 
3575 /// \brief Calculation of conditional likelihood for new genealogy
3576 ///
3577 /// Calculation of conditional likelihood for new genealogy
3578 /// most of the code is unrolled for efficiency and may local variables are
3579 /// declared also for speed
3580 /// non-Altivec version
3581 MYINLINE
3582 void
pseudonu_seq(proposal_fmt * proposal,phenotype xxx1,MYREAL * sxx1,MYREAL v1,phenotype xxx2,MYREAL * sxx2,MYREAL v2)3583 pseudonu_seq (proposal_fmt * proposal, phenotype xxx1, MYREAL *sxx1,
3584 	      MYREAL v1, phenotype xxx2, MYREAL *sxx2, MYREAL v2)
3585 {
3586   const long endsite = proposal->world->data->seq[0]->endsite;
3587 
3588   long i, j, k;
3589 
3590   register MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
3591   register MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
3592   register MYREAL ta, tc,  tg, tt;
3593   register MYREAL tta, ttc,  ttg, ttt;
3594   register MYREAL lw1, lw2, yy1, yy2, ww1zz1, vv1zz1, ww2zz2, vv2zz2, vzsumr1,
3595     vzsumr2, vzsumy1, vzsumy2, sum1, sum2, sumr1, sumr2,
3596     sumy1, sumy2, ww1, ww2, zz1, zz2;
3597   register MYREAL freqa, freqc, freqg, freqt, freqar, freqgr, freqcy, freqty;
3598   register seqmodel_fmt *seq = proposal->world->data->seq[0];
3599   register MYREAL xxsumr1, xxsumr2, xxsumy1, xxsumy2;
3600   register valrec *tbl00;
3601   register valrec *tblij;
3602   register valrec *tbljk;
3603   register sitelike *xx1, *xx2 , *xx1copy, *xx2copy;
3604   register long rcategs = proposal->world->options->rcategs;
3605   register long categs = proposal->world->options->categs;
3606   register tbl_fmt tbl = proposal->world->tbl;
3607   freqa = seq->basefrequencies[NUC_A];
3608   freqc = seq->basefrequencies[NUC_C];
3609   freqg = seq->basefrequencies[NUC_G];
3610   freqt = seq->basefrequencies[NUC_T];
3611   //freqr = seq->basefrequencies[NUC_R];
3612   //freqy = seq->basefrequencies[NUC_Y];
3613   freqar = seq->basefrequencies[NUC_AR];
3614   freqcy = seq->basefrequencies[NUC_CY];
3615   freqgr = seq->basefrequencies[NUC_GR];
3616   freqty = seq->basefrequencies[NUC_TY];
3617 
3618   lw1 = -v1 * seq->fracchange;
3619   // use shortcut for dataset that do not use mutiple categories
3620   // if (rcategs == 1 && categs == 1)
3621     if ((rcategs | categs ) == 1)
3622     {
3623       tbl00 = tbl[0][0];
3624       lw2 = -v2 * seq->fracchange;
3625 
3626       ww1 = EXP (tbl00->ratxi * lw1);
3627       zz1 = EXP (tbl00->ratxv * lw1);
3628       ww2 = EXP (tbl00->ratxi * lw2);
3629       zz2 = EXP (tbl00->ratxv * lw2);
3630 
3631       ww1zz1 = ww1 * zz1;
3632       vv1zz1 = (1.0 - ww1) * zz1;
3633 
3634       ww2zz2 = ww2 * zz2;
3635       vv2zz2 = (1.0 - ww2) * zz2;
3636 
3637       yy1 = 1.0 - zz1;
3638       yy2 = 1.0 - zz2;
3639 
3640       for (i = 0; i < endsite; i++)
3641 	{
3642 	  xx1 = xx1copy = &(xxx1[i][0]);
3643 	  xx2 = xx2copy = &(xxx2[i][0]);
3644 
3645 	  xx1t0 = (*xx1)[0];
3646 	  ta = freqa * xx1t0;
3647 	  xx1t1 = (*xx1copy)[1];
3648 	  xx2t1 = (*xx2)[1];
3649 	  tc = freqc * xx1t1;
3650 	  xx1t2 = (*xx1)[2];
3651 	  xx2t2 = (*xx2copy)[2];
3652 	  tg = freqg * xx1t2;
3653 	  xx1t3 = (*xx1copy)[3];
3654 	  xx2t3 = (*xx2)[3];
3655 	  xx2t0 = (*xx2copy)[0];
3656 	  tt = freqt * xx1t3;
3657 
3658 	  tta = freqa * xx2t0;
3659 	  ttc = freqc * xx2t1;
3660 	  ttg = freqg * xx2t2;
3661 	  ttt = freqt * xx2t3;
3662 
3663 	  //sum1 = freqa * xx1t0 + freqc * xx1t1 + freqg * xx1t2 + freqt * xx1t3;
3664 	  sum1 = ta + tc + tg + tt;
3665 	  sum2 = tta + ttc + ttg + ttt ;
3666 	  sum1 *= yy1;
3667 	  sum2 *= yy2;
3668 
3669 	  sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3670 	  sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3671 	  sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3672 	  sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3673 
3674 	  vzsumr1 = vv1zz1 * sumr1;
3675 	  vzsumr2 = vv2zz2 * sumr2;
3676 	  vzsumy1 = vv1zz1 * sumy1;
3677 	  vzsumy2 = vv2zz2 * sumy2;
3678 
3679 	  xxsumr1 = sum1 + vzsumr1;
3680 	  xxsumr2 = sum2 + vzsumr2;
3681 
3682 	  (*xx1)[0] = (xxsumr1 + ww1zz1 * xx1t0) * (xxsumr2 + ww2zz2 * xx2t0);
3683 	  (*xx1)[2] = (xxsumr1 + ww1zz1 * xx1t2) * (xxsumr2 + ww2zz2 * xx2t2);
3684 	  xxsumy1 = sum1 + vzsumy1;
3685 	  xxsumy2 = sum2 + vzsumy2;
3686 	  (*xx1)[1] = (xxsumy1 + ww1zz1 * xx1t1) * (xxsumy2 + ww2zz2 * xx2t1);
3687 	  (*xx1)[3] = (xxsumy1 + ww1zz1 * xx1t3) * (xxsumy2 + ww2zz2 * xx2t3);
3688 	}
3689     }
3690   else
3691     {
3692       for (i = 0; i < rcategs; i++)
3693 	for (j = 0; j < categs; j++)
3694 	  {
3695 	    tblij = tbl[i][j];
3696 	    tblij->ww1 = EXP (tblij->ratxi * lw1);
3697 	    tblij->zz1 = EXP (tblij->ratxv * lw1);
3698 	    tblij->ww1zz1 = tblij->ww1 * tblij->zz1;
3699 	    tblij->vv1zz1 = (1.0 - tblij->ww1) * tblij->zz1;
3700 	  }
3701       lw2 = -v2 * seq->fracchange;
3702       for (i = 0; i < rcategs; i++)
3703 	for (j = 0; j < categs; j++)
3704 	  {
3705 	    tblij = tbl[i][j];
3706 	    tblij->ww2 = EXP (tblij->ratxi * lw2);
3707 	    tblij->zz2 = EXP (tblij->ratxv * lw2);
3708 	    tblij->ww2zz2 = tblij->ww2 * tblij->zz2;
3709 	    tblij->vv2zz2 = (1.0 - tblij->ww2) * tblij->zz2;
3710 	  }
3711       for (i = 0; i < endsite; i++)
3712 	{
3713 	  k = seq->category[seq->alias[i] - 1] - 1;
3714 	  for (j = 0; j < rcategs; j++)
3715 	    {
3716 	      tbljk = tbl[j][k];
3717 	      ww1zz1 = tbljk->ww1zz1;
3718 	      vv1zz1 = tbljk->vv1zz1;
3719 	      yy1 = 1.0 - tbljk->zz1;
3720 	      ww2zz2 = tbljk->ww2zz2;
3721 	      vv2zz2 = tbljk->vv2zz2;
3722 	      yy2 = 1.0 - tbljk->zz2;
3723 	      xx1 = &(xxx1[i][j]);
3724 	      xx2 = &(xxx2[i][j]);
3725 
3726 	      xx1t0 = (*xx1)[0];
3727 	      xx1t1 = (*xx1)[1];
3728 	      xx1t2 = (*xx1)[2];
3729 	      xx1t3 = (*xx1)[3];
3730 
3731 	      xx2t0 = (*xx2)[0];
3732 	      xx2t1 = (*xx2)[1];
3733 	      xx2t2 = (*xx2)[2];
3734 	      xx2t3 = (*xx2)[3];
3735 
3736 	      sum1 = yy1 * (freqa * xx1t0 + freqc * xx1t1 +
3737 			    freqg * xx1t2 + freqt * xx1t3);
3738 	      sum2 = yy2 * (freqa * xx2t0 + freqc * xx2t1 +
3739 			    freqg * xx2t2 + freqt * xx2t3);
3740 	      sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3741 	      sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3742 	      sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3743 	      sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3744 	      vzsumr1 = vv1zz1 * sumr1;
3745 	      vzsumr2 = vv2zz2 * sumr2;
3746 	      /* xx3[j][0] */
3747 	      (*xx1)[0] =
3748 		(sum1 + ww1zz1 * xx1t0 + vzsumr1) * (sum2 +
3749 						     ww2zz2 * xx2t0 +
3750 						     vzsumr2);
3751 	      /* xx3[j][2] */
3752 	      (*xx1)[2] =
3753 		(sum1 + ww1zz1 * xx1t2 + vzsumr1) * (sum2 +
3754 						     ww2zz2 * xx2t2 +
3755 						     vzsumr2);
3756 	      vzsumy1 = vv1zz1 * sumy1;
3757 	      vzsumy2 = vv2zz2 * sumy2;
3758 	      /* xx3[j][1] */
3759 	      (*xx1)[1] =
3760 		(sum1 + ww1zz1 * xx1t1 + vzsumy1) * (sum2 +
3761 						     ww2zz2 * xx2t1 +
3762 						     vzsumy2);
3763 	      /* xx3[j][3] */
3764 	      (*xx1)[3] =
3765 		(sum1 + ww1zz1 * xx1t3 + vzsumy1) * (sum2 +
3766 						     ww2zz2 * xx2t3 +
3767 						     vzsumy2);
3768 	    }
3769 	}
3770     }
3771 #ifdef BEAGLE
3772   printf(".");
3773 #endif
3774 }    /* pseudonu_seq */
3775 
3776 
3777 ///
3778 /// calculates conditiional likleihood on a fake tree before acceptance/rejection scheme
3779 /// non-altivec version
3780 MYINLINE
3781     void
pseudonu_seq_slow(proposal_fmt * proposal,phenotype xxx1,MYREAL * sxx1,MYREAL v1,phenotype xxx2,MYREAL * sxx2,MYREAL v2)3782     pseudonu_seq_slow (proposal_fmt * proposal, phenotype xxx1, MYREAL *sxx1,
3783                        MYREAL v1, phenotype xxx2, MYREAL *sxx2, MYREAL v2)
3784 {
3785 
3786   const long endsite = proposal->world->data->seq[0]->endsite;
3787   boolean newscale;
3788   long i, j, k;
3789   //static long count = 0;
3790 
3791   register MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
3792   register MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
3793   register MYREAL ta, tc,  tg, tt;
3794   register MYREAL tta, ttc,  ttg, ttt;
3795 
3796   register MYREAL lw1, lw2, yy1, yy2, ww1zz1, vv1zz1, ww2zz2, vv2zz2, vzsumr1,
3797     vzsumr2, vzsumy1, vzsumy2, sum1, sum2, sumr1, sumr2,
3798     sumy1, sumy2, ww1, ww2, zz1, zz2;
3799   register MYREAL freqa, freqc, freqg, freqt, freqar, freqgr, freqcy, freqty;
3800   register seqmodel_fmt *seq = proposal->world->data->seq[0];
3801   register MYREAL xxsumr1, xxsumr2, xxsumy1, xxsumy2;
3802   register  MYREAL sxx3m, tempsxx3m, invsxx3m;
3803   register  sitelike *xx1, *xx2, *xx1copy, *xx2copy;
3804   register  valrec *tblij;
3805   register  valrec *tbljk;
3806   register  valrec *tbl00;
3807   register  long rcategs = proposal->world->options->rcategs;
3808   register  long categs = proposal->world->options->categs;
3809   register  tbl_fmt tbl = proposal->world->tbl;
3810 
3811   freqa = seq->basefrequencies[NUC_A];
3812   freqc = seq->basefrequencies[NUC_C];
3813   freqg = seq->basefrequencies[NUC_G];
3814   freqt = seq->basefrequencies[NUC_T];
3815   //freqr = seq->basefrequencies[NUC_R];
3816   //freqy = seq->basefrequencies[NUC_Y];
3817   freqar = seq->basefrequencies[NUC_AR];
3818   freqcy = seq->basefrequencies[NUC_CY];
3819   freqgr = seq->basefrequencies[NUC_GR];
3820   freqty = seq->basefrequencies[NUC_TY];
3821   lw1 = -v1 * seq->fracchange;
3822   if ((rcategs | categs) == 1)
3823     {
3824         tbl00 = tbl[0][0];
3825         lw2 = -v2 * seq->fracchange;
3826 
3827         ww1 = EXP (tbl00->ratxi * lw1);
3828         zz1 = EXP (tbl00->ratxv * lw1);
3829         ww2 = EXP (tbl00->ratxi * lw2);
3830         zz2 = EXP (tbl00->ratxv * lw2);
3831 
3832         ww1zz1 = ww1 * zz1;
3833         vv1zz1 = (1.0 - ww1) * zz1;
3834 
3835         ww2zz2 = ww2 * zz2;
3836         vv2zz2 = (1.0 - ww2) * zz2;
3837 
3838         yy1 = 1.0 - zz1;
3839         yy2 = 1.0 - zz2;
3840 
3841         for (i = 0; i < endsite; i++)
3842         {
3843 	  newscale = FALSE;
3844             xx1 = xx1copy = &(xxx1[i][0]);
3845             xx2 = xx2copy = &(xxx2[i][0]);
3846 
3847 	    xx1t0 = (*xx1)[0];
3848 	    ta = freqa * xx1t0;
3849 	    xx1t1 = (*xx1copy)[1];
3850 	    xx2t1 = (*xx2)[1];
3851 	    tc = freqc * xx1t1;
3852 	    xx1t2 = (*xx1)[2];
3853 	    xx2t2 = (*xx2copy)[2];
3854 	    tg = freqg * xx1t2;
3855 	    xx1t3 = (*xx1copy)[3];
3856 	    xx2t3 = (*xx2)[3];
3857 	    xx2t0 = (*xx2copy)[0];
3858 	    tt = freqt * xx1t3;
3859 
3860 	    tta = freqa * xx2t0;
3861 	    ttc = freqc * xx2t1;
3862 	    ttg = freqg * xx2t2;
3863 	    ttt = freqt * xx2t3;
3864 
3865 	    sum1 = ta + tc + tg + tt;
3866 	    sum2 = tta + ttc + ttg + ttt ;
3867 	    sum1 *= yy1;
3868 	    sum2 *= yy2;
3869 
3870             sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3871             sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3872             sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3873             sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3874 
3875             vzsumr1 = vv1zz1 * sumr1;
3876             vzsumr2 = vv2zz2 * sumr2;
3877             vzsumy1 = vv1zz1 * sumy1;
3878             vzsumy2 = vv2zz2 * sumy2;
3879 
3880 	    xxsumr1 = sum1 + vzsumr1;
3881 	    xxsumr2 = sum2 + vzsumr2;
3882 
3883 	    (*xx1)[0] = (xxsumr1 + ww1zz1 * xx1t0) * (xxsumr2 + ww2zz2 * xx2t0);
3884 	    (*xx1)[2] = (xxsumr1 + ww1zz1 * xx1t2) * (xxsumr2 + ww2zz2 * xx2t2);
3885 	    xxsumy1 = sum1 + vzsumy1;
3886 	    xxsumy2 = sum2 + vzsumy2;
3887 	    (*xx1)[1] = (xxsumy1 + ww1zz1 * xx1t1) * (xxsumy2 + ww2zz2 * xx2t1);
3888 	    (*xx1)[3] = (xxsumy1 + ww1zz1 * xx1t3) * (xxsumy2 + ww2zz2 * xx2t3);
3889             sxx1[i] += sxx2[i];
3890 	    if(!((*xx1)[0]+ (*xx1)[1]+ (*xx1)[2]+ (*xx1)[3]>EPSILON))
3891 	      {
3892 		//		if(!((*xx1)[0]+ (*xx1)[1]+ (*xx1)[2]+ (*xx1)[3]>0.0))
3893 		//  {
3894 		//    warning("pseudolike problem\n");
3895 		//  }
3896 		sxx3m = -MYREAL_MAX;
3897 		tempsxx3m = MAX ((*xx1)[0], (*xx1)[1]);
3898 		tempsxx3m = MAX (tempsxx3m, (*xx1)[2]);
3899 		tempsxx3m = MAX (tempsxx3m, (*xx1)[3]);
3900 		if (tempsxx3m > sxx3m)
3901 		  sxx3m = tempsxx3m;
3902 		invsxx3m = 1. / sxx3m;
3903 		(*xx1)[0] *= invsxx3m, (*xx1)[1] *= invsxx3m;
3904 		(*xx1)[2] *= invsxx3m, (*xx1)[3] *= invsxx3m;
3905 		sxx1[i] += LOG (sxx3m);// + sxx2[i];
3906 	      }
3907 	}
3908     }
3909   else
3910     {
3911       lw2 = -v2 * seq->fracchange;
3912         for (i = 0; i < rcategs; i++)
3913 	  {
3914             for (j = 0; j < categs; j++)
3915 	      {
3916                 tblij = tbl[i][j];
3917                 tblij->ww1 = EXP (tblij->ratxi * lw1);
3918                 tblij->zz1 = EXP (tblij->ratxv * lw1);
3919                 tblij->ww1zz1 = tblij->ww1 * tblij->zz1;
3920                 tblij->vv1zz1 = (1.0 - tblij->ww1) * tblij->zz1;
3921                 tblij->ww2 = EXP (tblij->ratxi * lw2);
3922                 tblij->zz2 = EXP (tblij->ratxv * lw2);
3923                 tblij->ww2zz2 = tblij->ww2 * tblij->zz2;
3924                 tblij->vv2zz2 = (1.0 - tblij->ww2) * tblij->zz2;
3925 	      }
3926 	  }
3927 	for (i = 0; i < endsite; i++)
3928 	  {
3929 	    newscale = FALSE;
3930 	    sxx3m = -MYREAL_MAX;
3931 	    k = seq->category[seq->alias[i] - 1] - 1;
3932 	    for (j = 0; j < rcategs; j++)
3933 	      {
3934 		tbljk = tbl[j][k];
3935 		ww1zz1 = tbljk->ww1zz1;
3936 		vv1zz1 = tbljk->vv1zz1;
3937 		yy1 = 1.0 - tbljk->zz1;
3938 		ww2zz2 = tbljk->ww2zz2;
3939 		vv2zz2 = tbljk->vv2zz2;
3940 		yy2 = 1.0 - tbljk->zz2;
3941 		xx1 = xx1copy = &(xxx1[i][j]);
3942 		xx2 = xx2copy = &(xxx2[i][j]);
3943 
3944 		xx1t0 = (*xx1)[0];
3945 		ta = freqa * xx1t0;
3946 		xx1t1 = (*xx1copy)[1];
3947 		xx2t1 = (*xx2)[1];
3948 		tc = freqc * xx1t1;
3949 		xx1t2 = (*xx1)[2];
3950 		xx2t2 = (*xx2copy)[2];
3951 		tg = freqg * xx1t2;
3952 		xx1t3 = (*xx1copy)[3];
3953 		xx2t3 = (*xx2)[3];
3954 		xx2t0 = (*xx2copy)[0];
3955 		tt = freqt * xx1t3;
3956 
3957 		tta = freqa * xx2t0;
3958 		ttc = freqc * xx2t1;
3959 		ttg = freqg * xx2t2;
3960 		ttt = freqt * xx2t3;
3961 
3962 		sum1 = ta + tc + tg + tt;
3963 		sum2 = tta + ttc + ttg + ttt ;
3964 		sum1 *= yy1;
3965 		sum2 *= yy2;
3966 
3967 		sumr1 = freqar * xx1t0 + freqgr * xx1t2;
3968 		sumr2 = freqar * xx2t0 + freqgr * xx2t2;
3969 		sumy1 = freqcy * xx1t1 + freqty * xx1t3;
3970 		sumy2 = freqcy * xx2t1 + freqty * xx2t3;
3971 
3972 		vzsumr1 = vv1zz1 * sumr1;
3973 		vzsumr2 = vv2zz2 * sumr2;
3974 		vzsumy1 = vv1zz1 * sumy1;
3975 		vzsumy2 = vv2zz2 * sumy2;
3976 
3977 		xxsumr1 = sum1 + vzsumr1;
3978 		xxsumr2 = sum2 + vzsumr2;
3979 		xxsumy1 = sum1 + vzsumy1;
3980 		xxsumy2 = sum2 + vzsumy2;
3981 
3982 		(*xx1)[0] = (xxsumr1 + ww1zz1 * xx1t0) * (xxsumr2 + ww2zz2 * xx2t0);
3983 		(*xx1)[1] = (xxsumy1 + ww1zz1 * xx1t1) * (xxsumy2 + ww2zz2 * xx2t1);
3984 	        (*xx1)[2] = (xxsumr1 + ww1zz1 * xx1t2) * (xxsumr2 + ww2zz2 * xx2t2);
3985 		(*xx1)[3] = (xxsumy1 + ww1zz1 * xx1t3) * (xxsumy2 + ww2zz2 * xx2t3);
3986 		if(!((*xx1)[0]+ (*xx1)[1]+ (*xx1)[2]+ (*xx1)[3]>EPSILON))
3987 		  newscale=TRUE;
3988 	      }
3989 	    if(newscale)
3990 	      {
3991 		sxx3m = -MYREAL_MAX;
3992 		for (j = 0; j < rcategs; j++)
3993 		  {
3994 		    xx1 = &(xxx1[i][j]);
3995 		    tempsxx3m = MAX ((*xx1)[0], (*xx1)[1]);
3996 		    tempsxx3m = MAX (tempsxx3m, (*xx1)[2]);
3997 		    tempsxx3m = MAX (tempsxx3m, (*xx1)[3]);
3998 		    if (tempsxx3m > sxx3m)
3999 		      sxx3m = tempsxx3m;
4000 		  }
4001 		invsxx3m = 1. / sxx3m;
4002 		for (j = 0; j < rcategs; j++)
4003 		  {
4004 		    xx1 = &(xxx1[i][j]);
4005 		    (*xx1)[0] *= invsxx3m, (*xx1)[1] *= invsxx3m;
4006 		    (*xx1)[2] *= invsxx3m, (*xx1)[3] *= invsxx3m;
4007 		  }
4008 		sxx1[i] += LOG (sxx3m) + sxx2[i];
4009 	      }
4010 	    else
4011 	      {
4012 		sxx1[i] += sxx2[i];
4013 	      }
4014 	  }
4015 	/* --- old scaling scheme ----- replaced with test above
4016 	   count++;
4017 	   if (count >= SCALEINTERVAL)
4018 	   {
4019 	   count = 0;
4020 	   for (i = 0; i < endsite; i++)
4021 	   {
4022 	   sxx3m = -MYREAL_MAX;
4023 	   for (j = 0; j < rcategs; j++)
4024 	   {
4025 	   xx1 = &(xxx1[i][j]);
4026 	   tempsxx3m = MAX ((*xx1)[0], (*xx1)[1]);
4027 	   tempsxx3m = MAX (tempsxx3m, (*xx1)[2]);
4028 	   tempsxx3m = MAX (tempsxx3m, (*xx1)[3]);
4029 	   if (tempsxx3m > sxx3m)
4030 	   sxx3m = tempsxx3m;
4031 	   }
4032 	   invsxx3m = 1. / sxx3m;
4033 	   for (j = 0; j < rcategs; j++)
4034 	   {
4035 	   xx1 = &(xxx1[i][j]);
4036 	   (*xx1)[0] *= invsxx3m, (*xx1)[1] *= invsxx3m;
4037 	   (*xx1)[2] *= invsxx3m, (*xx1)[3] *= invsxx3m;
4038 	   }
4039 	   sxx1[i] += LOG (sxx3m) + sxx2[i];
4040 	   }
4041 	   }
4042 	   }
4043 	   end of old scaling scheme------------*********/
4044     }
4045 }
4046 /* pseudonu_seq */
4047   ///
4048 /// calculates ancestral likelihood before acc/reject scheme
4049 /// non-atlivec version
4050 MYINLINE
4051 void
pseudonu_anc(proposal_fmt * proposal,phenotype xxx1,MYREAL v1,phenotype xxx2,MYREAL v2)4052 pseudonu_anc (proposal_fmt * proposal, phenotype xxx1, MYREAL v1,
4053               phenotype xxx2, MYREAL v2)
4054 {
4055     long i;
4056     const long endsite = proposal->world->data->seq[0]->endsite;
4057     MYREAL xx1t0, xx1t1,xx1t2, xx1t3;
4058     MYREAL xx2t0, xx2t1, xx2t2, xx2t3;
4059 
4060     MYREAL lw1, lw2, ratio1, yy1, yy2, sum1, sum2;
4061     MYREAL freqa, freqc, freqg, freqt;
4062     seqmodel_fmt *seq;
4063     sitelike *xx1, *xx2;
4064     seq = proposal->world->data->seq[0];
4065     freqa = seq->basefrequencies[NUC_A];
4066     freqc = seq->basefrequencies[NUC_C];
4067     freqg = seq->basefrequencies[NUC_G];
4068     freqt = seq->basefrequencies[NUC_T];
4069     //    freqr = seq->basefrequencies[NUC_R];
4070     //freqy = seq->basefrequencies[NUC_Y];
4071     //freqar = seq->basefrequencies[NUC_AR];
4072     //freqcy = seq->basefrequencies[NUC_CY];
4073     //freqgr = seq->basefrequencies[NUC_GR];
4074     //freqty = seq->basefrequencies[NUC_TY];
4075 
4076     lw1 = -v1 * seq->fracchange;
4077     lw2 = -v2 * seq->fracchange;
4078     ratio1 = lw1 / (lw1 + lw2);
4079     yy1 = (1. - ratio1);
4080     yy2 = ratio1;
4081     //for (i = 0; i < endsite; i++)
4082     //printf("(%f %f %f %f)", xxx1[i][0][0], xxx1[i][0][1], xxx1[i][0][2], xxx1[i][0][3]);
4083     //printf("\n");
4084     //for (i = 0; i < endsite; i++)
4085     //printf("(%f %f %f %f)", xxx2[i][0][0], xxx2[i][0][1], xxx2[i][0][2], xxx2[i][0][3]);
4086     //printf("\n");
4087     for (i = 0; i < endsite; i++)
4088     {
4089         xx1 = &(xxx1[i][0]);
4090         xx2 = &(xxx2[i][0]);
4091 
4092 
4093 		xx1t0 = (*xx1)[0];
4094 		xx1t1 = (*xx1)[1];
4095 		xx1t2 = (*xx1)[2];
4096 		xx1t3 = (*xx1)[3];
4097 
4098 		xx2t0 = (*xx2)[0];
4099 		xx2t1 = (*xx2)[1];
4100 		xx2t2 = (*xx2)[2];
4101 		xx2t3 = (*xx2)[3];
4102 
4103 
4104         sum1 =
4105             yy1 * (freqa * xx1t0 + freqc * xx1t1 + freqg * xx1t2 +
4106                    freqt * xx1t3);
4107         sum2 =
4108             yy2 * (freqa * xx2t0 + freqc * xx2t1 + freqg * xx2t2 +
4109                    freqt * xx2t3);
4110         if (sum1 == sum2)
4111             sum1 += RANDUM () > 0.5 ? -1. : 1.;
4112         if (sum1 > sum2)
4113             memcpy (xxx1[i][0], *xx1, sizeof (sitelike));
4114         else
4115             memcpy (xxx1[i][0], *xx2, sizeof (sitelike));
4116     }
4117 }    /* pseudo_nu_anc */
4118 
4119 ///
4120 /// ancestral conditional method, calculates ancestral tree likelihood
4121 MYINLINE MYREAL
pseudo_tl_anc(phenotype xx1,phenotype xx2,MYREAL v1,MYREAL v2,proposal_fmt * proposal,world_fmt * world)4122 pseudo_tl_anc (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
4123                proposal_fmt * proposal, world_fmt * world)
4124 {
4125   const long endsite = proposal->world->data->seq[0]->endsite;
4126     contribarr tterm;
4127     MYREAL summ;
4128     long i;
4129     sitelike *x1;
4130 
4131     register MYREAL freqa, freqc, freqg, freqt;
4132     seqmodel_fmt *seq;
4133     seq = proposal->world->data->seq[0];
4134 
4135     freqa = seq->basefrequencies[NUC_A];
4136     freqc = seq->basefrequencies[NUC_C];
4137     freqg = seq->basefrequencies[NUC_G];
4138     freqt = seq->basefrequencies[NUC_T];
4139 
4140     seq = world->data->seq[0];
4141     summ = 0.0;
4142     for (i = 0; i < endsite; i++)
4143     {
4144         x1 = &(xx1[i][0]);
4145         tterm[0] =
4146             freqa * (*x1)[0] + freqc * (*x1)[1] + freqg * (*x1)[2] +
4147             freqt * (*x1)[3];
4148         summ += seq->aliasweight[i] * LOG (tterm[0]);
4149         //printf("pseudo %3li> %f %f \n", i, tterm[0], summ);
4150     }
4151     return summ;
4152 } /*anc*/
4153 
4154 
4155 ///
4156 /// calculates the conditional likelihood on a tree
4157 MYINLINE MYREAL
pseudo_tl_seq(phenotype xx1,phenotype xx2,MYREAL v1,MYREAL v2,proposal_fmt * proposal,world_fmt * world)4158 pseudo_tl_seq (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
4159                proposal_fmt * proposal, world_fmt * world)
4160 {
4161   const long endsite = proposal->world->data->seq[0]->endsite;
4162     contribarr tterm;
4163     contribarr clai;
4164     contribarr like;
4165     contribarr nulike;
4166     //long          size = sizeof(MYREAL) * world->options->rcategs;
4167     MYREAL summ    = 0.0;
4168     MYREAL sum2    = 0.0;
4169     MYREAL sumc    = 0.0;
4170     MYREAL sumterm = 0.0;
4171     MYREAL lterm;
4172     long i;
4173     long j;
4174     long k;
4175     long lai;
4176     worldoption_fmt *opt;
4177     register MYREAL freqa, freqc, freqg, freqt;//, freqr, freqy;
4178     seqmodel_fmt *seq = proposal->world->data->seq[0];
4179     sitelike *x1;
4180 
4181     freqa = seq->basefrequencies[NUC_A];
4182     freqc = seq->basefrequencies[NUC_C];
4183     freqg = seq->basefrequencies[NUC_G];
4184     freqt = seq->basefrequencies[NUC_T];
4185     //freqr = seq->basefrequencies[NUC_R];
4186     //freqy = seq->basefrequencies[NUC_Y];
4187 
4188     opt = world->options;
4189     summ = 0.0;
4190     if (opt->rcategs == 1 && opt->categs == 1)
4191     {
4192         for (i = 0; i < endsite; i++)
4193         {
4194             x1 = &(xx1[i][0]);
4195             tterm[0] =
4196                 freqa * (*x1)[0] + freqc * (*x1)[1] + freqg * (*x1)[2] +
4197                 freqt * (*x1)[3];
4198             summ += seq->aliasweight[i] * (LOG (tterm[0]) + proposal->mf[i]);
4199         }
4200     }
4201     else
4202     {
4203         for (i = 0; i < endsite; i++)
4204         {
4205             for (j = 0; j < opt->rcategs; j++)
4206             {
4207                 x1 = &(xx1[i][j]);
4208                 tterm[j] = freqa * (*x1)[0] + freqc * (*x1)[1] + freqg * (*x1)[2] + freqt * (*x1)[3];
4209             }
4210             sumterm = 0.0;
4211             for (j = 0; j < opt->rcategs; j++)
4212                 sumterm += opt->probcat[j] * tterm[j];
4213             lterm = LOG (sumterm) + proposal->mf[i];
4214             for (j = 0; j < opt->rcategs; j++)
4215                 clai[j] = tterm[j] / sumterm;
4216             swap (clai, world->contribution[i]);
4217             //memcpy(world->contribution[i], clai, size);
4218             summ += seq->aliasweight[i] * lterm;
4219         }
4220         for (j = 0; j < opt->rcategs; j++)
4221             like[j] = 1.0;
4222         for (i = 0; i < seq->sites[world->locus]; i++)
4223         {
4224             sumc = 0.0;
4225             for (k = 0; k < opt->rcategs; k++)
4226                 sumc += opt->probcat[k] * like[k];
4227             sumc *= opt->lambda;
4228             if ((seq->ally[i] > 0) && (seq->location[seq->ally[i] - 1] > 0))
4229             {
4230                 lai = seq->location[seq->ally[i] - 1];
4231                 swap (world->contribution[lai - 1], clai);
4232                 //memcpy(clai, world->contribution[lai - 1], size);
4233                 for (j = 0; j < opt->rcategs; j++)
4234                     nulike[j] = ((1.0 - opt->lambda) * like[j] + sumc) * clai[j];
4235             }
4236             else
4237             {
4238                 for (j = 0; j < opt->rcategs; j++)
4239                     nulike[j] = ((1.0 - opt->lambda) * like[j] + sumc);
4240             }
4241             swap (nulike, like);
4242             //memcpy(like, nulike, size);
4243         }
4244         sum2 = 0.0;
4245         for (i = 0; i < opt->rcategs; i++)
4246             sum2 += opt->probcat[i] * like[i];
4247         summ += LOG (sum2);
4248     }
4249     //    printf("summ=%f\n",summ);
4250     return summ;
4251 }
4252 
4253 MYINLINE
4254 MYREAL
pseudo_tl_snp(phenotype xx1,phenotype xx2,MYREAL v1,MYREAL v2,proposal_fmt * proposal,world_fmt * world)4255 pseudo_tl_snp (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
4256                proposal_fmt * proposal, world_fmt * world)
4257 {
4258   const long endsite = world->data->seq[0]->endsite;
4259     contribarr tterm, invariants;
4260     contribarr like, nulike, clai;
4261     //long          size = sizeof(MYREAL) * world->options->rcategs;
4262     MYREAL summ, sum2, sumc, sumterm, lterm;
4263     long i, j, k, lai;
4264     worldoption_fmt *opt = world->options;
4265     register MYREAL freqa, freqc, freqg, freqt;//, freqr, freqy;
4266     seqmodel_fmt *seq = world->data->seq[0];
4267     sitelike *x1;
4268     freqa = seq->basefrequencies[NUC_A];
4269     freqc = seq->basefrequencies[NUC_C];
4270     freqg = seq->basefrequencies[NUC_G];
4271     freqt = seq->basefrequencies[NUC_T];
4272     //freqr = seq->basefrequencies[NUC_R];
4273     //freqy = seq->basefrequencies[NUC_Y];
4274 
4275     summ = 0.0;
4276     memset (invariants, 0, sizeof (contribarr));
4277     snp_invariants (invariants, world, world->locus, xx1,proposal->mf);
4278     if ((opt->rcategs | opt->categs) == 1)
4279     {
4280         for (i = 0; i < endsite - seq->addon; i++)
4281         {
4282             x1 = &(xx1[i][0]);
4283             tterm[0] =
4284 	      ((freqa * (*x1)[0] + freqc * (*x1)[1] +
4285 		freqg * (*x1)[2] + freqt * (*x1)[3]));
4286             if (tterm[0] == 0.0)
4287 	      {
4288 #ifdef TESTDEBUG
4289 		fprintf(stderr,"Freq/condL=(%f,%f | %f,%f | %f,%f | %f,%f) Invars=%f\n",
4290 			freqa , (*x1)[0] , freqc , (*x1)[1] ,
4291 			freqg , (*x1)[2] , freqt , (*x1)[3] , invariants[0]);
4292 
4293                 warning ("Tree incompatible with data\n");
4294 #endif
4295 		return -HUGE;
4296 	      }
4297             lterm = LOG (tterm[0]) + proposal->mf[i] - log(invariants[0]);
4298             summ += seq->aliasweight[i] * lterm;
4299         }
4300         return summ;
4301     }
4302     else
4303     {
4304         for (i = 0; i < endsite - 4; i++)
4305         {
4306            // k = seq->category[seq->alias[i] - 1] - 1;
4307             for (j = 0; j < opt->rcategs; j++)
4308             {
4309                 x1 = &(xx1[i][j]);
4310                 tterm[j] =
4311                     (freqa * (*x1)[0] + freqc * (*x1)[1] +
4312                      freqg * (*x1)[2] +
4313                      freqt * (*x1)[3]) / invariants[j];
4314             }
4315             sumterm = 0.0;
4316             for (j = 0; j < opt->rcategs; j++)
4317 	      sumterm += opt->probcat[j] * tterm[j];
4318             lterm = LOG (sumterm) + proposal->mf[i];
4319             for (j = 0; j < opt->rcategs; j++)
4320                 clai[j] = tterm[j] / sumterm;
4321             swap (clai, world->contribution[i]);
4322             summ += seq->aliasweight[i] * lterm;
4323         }
4324         for (j = 0; j < opt->rcategs; j++)
4325             like[j] = 1.0;
4326         for (i = 0; i < seq->sites[world->locus]; i++)
4327         {
4328             sumc = 0.0;
4329             for (k = 0; k < opt->rcategs; k++)
4330                 sumc += opt->probcat[k] * like[k];
4331             sumc *= opt->lambda;
4332             if ((seq->ally[i] > 0) && (seq->location[seq->ally[i] - 1] > 0))
4333             {
4334                 lai = seq->location[seq->ally[i] - 1];
4335                 swap (world->contribution[lai - 1], clai);
4336                 for (j = 0; j < opt->rcategs; j++)
4337                     nulike[j] = ((1.0 - opt->lambda) * like[j] + sumc) * clai[j];
4338             }
4339             else
4340             {
4341                 for (j = 0; j < opt->rcategs; j++)
4342                     nulike[j] = ((1.0 - opt->lambda) * like[j] + sumc);
4343             }
4344             swap (nulike, like);
4345             //memcpy(like, nulike, size);
4346         }
4347         sum2 = 0.0;
4348         for (i = 0; i < opt->rcategs; i++)
4349             sum2 += opt->probcat[i] * like[i];
4350         summ += LOG (sum2);
4351         return summ;
4352     }
4353 }
4354 
4355 MYINLINE
4356 MYREAL
pseudo_tl_snp_unlinked(phenotype xx1,phenotype xx2,MYREAL v1,MYREAL v2,proposal_fmt * proposal,world_fmt * world)4357 pseudo_tl_snp_unlinked (phenotype xx1, phenotype xx2, MYREAL v1, MYREAL v2,
4358                         proposal_fmt * proposal, world_fmt * world)
4359 {
4360   const long endsite = world->data->seq[0]->endsite;
4361     contribarr tterm, invariants;
4362     MYREAL summ, datasum = 0, lterm, result = 0;
4363     long i;
4364     //worldoption_fmt *opt;
4365     MYREAL freqa, freqc, freqg, freqt;//, freqr, freqy;
4366     seqmodel_fmt *seq = proposal->world->data->seq[0];
4367     sitelike *x1;
4368     freqa = seq->basefrequencies[NUC_A];
4369     freqc = seq->basefrequencies[NUC_C];
4370     freqg = seq->basefrequencies[NUC_G];
4371     freqt = seq->basefrequencies[NUC_T];
4372     //freqr = seq->basefrequencies[NUC_R];
4373    // freqy = seq->basefrequencies[NUC_Y];
4374 
4375     //opt = world->options;
4376     seq = world->data->seq[0];
4377     summ = 0.0;
4378     memset (invariants, 0, sizeof (contribarr));
4379     snp_invariants (invariants, world, world->locus,  xx1, proposal->mf);
4380     for (i = 0; i < endsite; i++)
4381     {
4382         x1 = &(xx1[i][0]);
4383         tterm[0] =
4384             (freqa * (*x1)[0] + freqc * (*x1)[1] +
4385              freqg * (*x1)[2] + freqt * (*x1)[3]);
4386         if (tterm[0] == 0.0)
4387             error ("Tree incompatible with data\n");
4388 
4389         if (i % 5 == 0)
4390         {
4391             lterm = LOG (tterm[0]) + proposal->mf[i];
4392             summ = 0;
4393             datasum = seq->aliasweight[i / 5] * lterm;
4394         }
4395         else
4396             summ += pow (tterm[0], (MYREAL) seq->aliasweight[i / 5]);
4397         if (((i + 1) % 5) == 0 && i != 0)
4398             result +=
4399                 datasum + LOG ((1 - EXP (LOG (summ) - datasum)) / invariants[0]);
4400     }
4401     //EXP (sum) is the prob(xa | g)
4402     //              EXP (datasum) is prob(? a | g)
4403     //              panelsum = invariants is prob(x ? |g)
4404     // (datasum - sum) / invariants
4405     // ++some small number business
4406     return result;
4407 }
4408 
4409 
4410 
4411 
4412 MYREAL
treelike_anc(world_fmt * world,long locus)4413 treelike_anc (world_fmt * world, long locus)
4414 {
4415   const long endsite = world->data->seq[0]->endsite;
4416     contribarr tterm;
4417 
4418     MYREAL summ;
4419     long i;
4420     node *p;
4421     seqmodel_fmt *seq = world->data->seq[0];
4422     register MYREAL freqa, freqc,freqg,freqt;
4423     sitelike *x1;
4424     freqa = seq->basefrequencies[NUC_A];
4425     freqc = seq->basefrequencies[NUC_C];
4426     freqg = seq->basefrequencies[NUC_G];
4427     freqt = seq->basefrequencies[NUC_T];
4428 
4429     p = crawlback (world->root->next);
4430     summ = 0.0;
4431     for (i = 0; i < endsite; i++)
4432     {
4433         x1 = &(p->x.s[i][0]);
4434         tterm[0] =
4435             freqa * (*x1)[0] + freqc * (*x1)[1] +
4436             freqg * (*x1)[2] + freqt * (*x1)[3];
4437         summ += seq->aliasweight[i] * LOG (tterm[0]);
4438         //printf("real  %3li> %f %f \n", i, tterm[0], summ);
4439     }
4440     return summ;
4441 }    /* treelike_anc */
4442 
4443 
4444 ///
4445 /// write a tree to a diskfile, compile setting will allow to do this as a NEXUS file
4446 void
treeout(FILE * file,node * joint,node * p,long s)4447 treeout (FILE * file, node * joint, node * p, long s)
4448 {
4449     /* write out file with representation of final tree */
4450     MYREAL x;
4451     char migstring[30];
4452     if (p->type == 't')
4453     {
4454 #ifdef UEP
4455         if(p->uep!=NULL)
4456             FPRINTF (file, "%s [%c]", p->nayme, p->uep[0]);
4457         else
4458             FPRINTF (file, "%s ", p->nayme);
4459 #else
4460         FPRINTF (file, "%s ", p->nayme);
4461 #endif
4462 #ifdef TREECOMMENTS
4463 	FPRINTF(file,"[& t:%.10f]",p->tyme);
4464 #endif
4465     }
4466     else
4467     {
4468         FPRINTF (file, "(");
4469         treeout (file, joint, crawlback (p->next), s);
4470         FPRINTF (file, ",");
4471         treeout (file, joint, crawlback (p->next->next), s);
4472         FPRINTF (file, ")");
4473 #ifdef TREECOMMENTS
4474 	FPRINTF(file,"[& c:%.10f]",p->tyme);
4475 #endif
4476     }
4477     if (p != joint)
4478     {
4479         x = crawlback (p)->tyme - p->tyme;
4480 #ifdef UEP
4481 	if(p->uep!=NULL)
4482 	  FPRINTF (file, ":%.10f [%c]", x, p->uep[0]);
4483 	else
4484 	  FPRINTF (file, ":%.10f ",  x);
4485 #else
4486 	FPRINTF (file, ":%.10f ", x);
4487 #endif
4488         p = showtop (p->back);
4489         while (p->type == 'm')
4490 	  {
4491             sprintf (migstring, " [&M %li %li:%g]", p->pop, p->actualpop,
4492                      p->tyme - showtop (p->next->back)->tyme);
4493             FPRINTF (file, "%s", migstring);
4494             p = showtop (p->back);
4495 	  }
4496     }
4497     else
4498     {
4499 #ifdef NEXUSTREE
4500 	FPRINTF (file, ";\n");
4501 #else
4502 	FPRINTF (file, ":0;\n");
4503 #endif
4504     }
4505 }    /* treeout */
4506 
4507 ///
4508 /// write a tree to a diskfile, compile setting will allow to do this as a NEXUS file
4509 void
debugtreeout(FILE * file,node * joint,node * p,long s)4510 debugtreeout (FILE * file, node * joint, node * p, long s)
4511 {
4512     /* write out file with representation of final tree */
4513   //  MYREAL x;
4514     char migstring[30];
4515     if (p->type == 't')
4516     {
4517         FPRINTF (file, "%li ", p->id);
4518     }
4519     else
4520     {
4521         FPRINTF (file, "(");
4522         debugtreeout (file, joint, crawlback (p->next), s);
4523         FPRINTF (file, ",");
4524         debugtreeout (file, joint, crawlback (p->next->next), s);
4525         FPRINTF (file, ")<%li>",p->id);
4526     }
4527     if (p != joint)
4528     {
4529         //x = crawlback (p)->tyme - p->tyme;
4530         //	FPRINTF (file, ":%.10f ", x);
4531         p = showtop (p->back);
4532         while (p->type == 'm')
4533 	  {
4534             sprintf (migstring, " [%li-%li]", p->next->id, p->id);
4535             FPRINTF (file, "%s", migstring);
4536             p = showtop (p->back);
4537 	  }
4538     }
4539     else
4540     {
4541 	FPRINTF (file, ":0\n");
4542 
4543     }
4544 }    /* treeout */
4545 
4546 ///
4547 /// writes tree in newick format to a string
4548 void
treeout_string(char ** file,long * filesize,long * pos,node * joint,node * p,long s)4549 treeout_string (char ** file, long *filesize, long *pos, node * joint, node * p, long s)
4550 {
4551   char *tmp;
4552   MYREAL x;
4553   char migstring[30];
4554   tmp = (char *) mycalloc(1024,sizeof(char));
4555   /* write out file with representation of final tree */
4556   //    long w;
4557   if (p->type == 't')
4558     {
4559 #ifdef UEP
4560         if(p->uep!=NULL)
4561 	  print_to_buffer(file, filesize, tmp, pos, "%s [%c]", p->nayme, p->uep[0]);
4562         else
4563 	  print_to_buffer(file, filesize, tmp, pos, "%s ", p->nayme);
4564 #else
4565         print_to_buffer(file, filesize, tmp, pos, "%s ", p->nayme);
4566 #endif
4567 #ifdef TREECOMMENTS
4568 	print_to_buffer(file, filesize, tmp, pos,"[& t:%.10f]",p->tyme);
4569 #endif
4570     }
4571     else
4572     {
4573       print_to_buffer(file, filesize, tmp, pos, "(");
4574       treeout_string (file, filesize, pos, joint, crawlback (p->next), s);
4575       print_to_buffer(file, filesize, tmp, pos, ",");
4576       treeout_string (file, filesize, pos, joint, crawlback (p->next->next), s);
4577       print_to_buffer(file, filesize, tmp, pos, ")");
4578 #ifdef TREECOMMENTS
4579       print_to_buffer(file, filesize, tmp, pos,"[& c:%.10f]",p->tyme);
4580 #endif
4581 
4582     }
4583     if (p == joint)
4584     {
4585         x = 0.0;
4586     }
4587     else
4588     {
4589         x = crawlback (p)->tyme - p->tyme;
4590     }
4591 #ifdef UEP
4592     if(p->uep!=NULL)
4593       print_to_buffer(file, filesize, tmp, pos, ":%.10f [%c]", x, p->uep[0]);
4594     else
4595       print_to_buffer(file, filesize, tmp, pos, ":%.10f ", x);
4596 #else
4597     print_to_buffer(file, filesize, tmp, pos, ":%.10f ", x);
4598 #endif
4599     if (p != joint)
4600     {
4601         p = showtop (p->back);
4602         while (p->type == 'm')
4603         {
4604             sprintf (migstring, " [&M %li %li:%g]", p->pop, p->actualpop,
4605                      p->tyme - showtop (p->next->back)->tyme);
4606             print_to_buffer(file, filesize, tmp, pos, "%s", migstring);
4607             p = showtop (p->back);
4608         }
4609     }
4610     else
4611     {
4612       print_to_buffer(file, filesize, tmp, pos, ";\n\0");
4613     }
4614     myfree(tmp);
4615 }    /* treeout_string */
4616 
4617 
4618 void
print_tree(world_fmt * world,long g,long * filepos)4619 print_tree (world_fmt * world, long g, long *filepos)
4620 {
4621 #ifdef NEXUSTREE
4622   static long counter = 0;
4623 #endif
4624   static long count   = 1;
4625   long pos            = 0;
4626   long allocval       = 0;
4627   char *tmp;
4628   tmp = (char *) calloc(1024,sizeof(char));
4629 
4630   switch (world->options->treeprint)
4631     {
4632     case BEST:
4633       if(world->options->treeinmemory)
4634 	{
4635 	  *filepos = 0;
4636 	  pos = 0;
4637 	  allocval = world->treespacealloc[world->locus];
4638 	  //	  if (world->likelihood[g] >= world->besttreelike[world->locus])
4639 	  if (world->param_like >= world->besttreelike[world->locus])
4640 	    {
4641 	      // printf("%i> print_tree(BEST) at %p  locus=%li bestlike=%f like=%f\n",myID, &world->treespace[world->locus], world->locus,world->besttreelike[world->locus], world->likelihood[g]);
4642 	      world->besttreelike[world->locus] = world->param_like;
4643 	      // world->besttreelike[world->locus] = world->likelihood[g];
4644 	      world->treespace[world->locus] = (char *) myrealloc(world->treespace[world->locus],
4645 								  sizeof(char) * LONGLINESIZE);
4646 	      allocval = LONGLINESIZE;
4647 	      //speed problem	      memset(world->treespace[world->locus],0, sizeof(char) * LONGLINESIZE);
4648 	      world->treespace[world->locus][0]='\0';
4649 	      pos = sprintf (world->treespace[world->locus], "\n[& Locus %li, best ln(L) = %f (c=coalescent node, t=tipnode) ]\n",
4650 			     world->locus + 1, world->likelihood[g]);
4651 	      treeout_string (&(world->treespace[world->locus]),
4652 			      &allocval,&pos,
4653 			      crawlback (world->root->next),
4654 			      crawlback (world->root->next), 0);
4655 	      world->treespacealloc[world->locus] = allocval;
4656 	      world->treespacenum[world->locus] = pos;
4657 	    }
4658 	  //	  else
4659 	  //  {
4660 	  //    printf("%i> *** print_tree(BEST) locus=%li bestlike=%f like=%f\n", myID, world->locus, world->besttreelike[world->locus], world->likelihood[g]);
4661 	  //  }
4662 	}
4663       else
4664 	{
4665 	  if (world->likelihood[g] > world->allikemax)
4666 	    {
4667 	      if (world->allikemax == -MYREAL_MAX)
4668 		{
4669 		  *filepos = ftell (world->treefile);
4670 		}
4671 	      else
4672 		{
4673 		  fseek (world->treefile, *filepos, SEEK_SET);
4674 		}
4675 	      world->allikemax = world->likelihood[g];
4676 	      FPRINTF (world->treefile,
4677 			     "\n[& Locus %li, best ln(L) = %f (c=coalescent node, t=tipnode) ]\n",
4678 		       world->locus + 1, world->likelihood[g]);
4679 	      treeout (world->treefile, crawlback (world->root->next),
4680 		       crawlback (world->root->next), 0);
4681 	    }
4682 	}
4683       break;
4684     case ALL:
4685     case LASTCHAIN:
4686       if((count++ % world->options->treeinc) == 0)
4687 	{
4688 	  if (world->in_last_chain)
4689 	    {
4690 	      if(world->options->treeinmemory)
4691 		{
4692 		  *filepos = 0;
4693 		  pos = world->treespacenum[world->locus];
4694 		  allocval = world->treespacealloc[world->locus];
4695 #ifdef NEXUSTREE
4696 		  pos = print_to_buffer(&(world->treespace[world->locus]), &allocval, tmp, &pos,
4697 					"\n[& Locus %li, best ln(L) = %f (c=coalescent node, t=tipnode) ]\ntree repl.%li = [&R] ",
4698 					world->locus + 1, world->likelihood[g], counter++);
4699 #else
4700 		  pos = print_to_buffer(&(world->treespace[world->locus]), &allocval, tmp, &pos,
4701 					"\n[& Locus %li, best ln(L) = %f (c=coalescent node, t=tipnode) ]\n",
4702 					world->locus + 1, world->likelihood[g]);
4703 #endif
4704 		  treeout_string (&(world->treespace[world->locus]),
4705 				  &allocval,&pos,
4706 				  crawlback (world->root->next),
4707 				  crawlback (world->root->next), 0);
4708 		  world->treespacealloc[world->locus] = allocval;
4709 		  world->treespacenum[world->locus] = pos;
4710 		}
4711 	      else
4712 		{
4713 		  // writing direct to file
4714 		  FPRINTF (world->treefile,
4715 			   "\n[& Locus %li, ln(L) = %f (c=coalescent node, t=tipnode) ]\n",
4716 			   world->locus + 1, world->likelihood[g]);
4717 #ifdef NEXUSTREE
4718 		  FPRINTF (world->treefile,"tree repl.%li = [&R] ",counter++);
4719 #endif
4720 		  treeout (world->treefile, crawlback (world->root->next),
4721 			   crawlback (world->root->next), 0);
4722 		}
4723 	    }
4724 	}
4725       break;
4726     case _NONE:
4727       break;
4728     default:
4729       break;
4730     }
4731   myfree(tmp);
4732 }
4733 
4734 
4735 boolean
treereader(world_fmt * world,option_fmt * options,data_fmt * data)4736 treereader (world_fmt * world,  option_fmt *options, data_fmt * data)
4737 {
4738     /*
4739      * read a  tree with or without migration events from the usertree and set up nodes
4740      * and pointers
4741      */
4742     boolean has_migration=FALSE;
4743     node **nodelist;
4744     char *nayme;
4745     char *temp, *temp2;
4746     long pop, w, zz, z = 0, zzz = 0;
4747     world->nodep = (node **) mycalloc (world->sumtips+(world->sumtips+1),sizeof (node *));
4748     temp = (char *) mymalloc (LINESIZE * sizeof (char));
4749     temp2 = (char *) mymalloc (LINESIZE * sizeof (char));
4750     has_migration = treeread (data->utreefile, world, options, &(world->root), NULL);
4751     length_to_times (world->root->next->back);
4752     nodelist = (node **) mycalloc (1, sizeof (node *) * (world->sumtips + 1));
4753     pop = -1;
4754     set_tree_pop (world->root, &pop);
4755     allocate_x (world->root, world, world->options->datatype, WITHTIPS);
4756     find_tips (world->root, nodelist, &z);
4757     for (pop = 0; pop < data->numpop; pop++)
4758     {
4759         for (w = 0; w < data->numind[pop][world->locus]; w++)
4760         {
4761             strcpy (temp2, data->indnames[pop][w][world->locus]);
4762 	    unpad(temp2," ");
4763 	    //            temp2[strcspn (temp2, " ")] = '\0';
4764             sprintf (temp, "%li%s", pop, temp2);
4765             for (zz = 0; zz < z; zz++)
4766             {
4767                 nayme = nodelist[zz]->nayme;
4768 		unpad(nayme," _");
4769                 if (!strcmp (temp, nayme) || !strcmp (temp2, nayme))
4770                 {
4771                     world->nodep[zzz++] = nodelist[zz];
4772                     break;
4773                 }
4774             }
4775         }
4776     }
4777     myfree(nodelist);
4778     myfree(temp);
4779     myfree(temp2);
4780     return has_migration;
4781 }
4782 
4783 
4784 char
processlength(FILE * file,node ** p,option_fmt * options)4785 processlength (FILE * file, node ** p, option_fmt *options)
4786 {
4787     char ch;
4788     long digit, ordzero;
4789     MYREAL valyew, divisor;
4790     boolean pointread, minusread;
4791 
4792     ordzero = '0';
4793     pointread = FALSE;
4794     minusread = FALSE;
4795     valyew = 0.0;
4796     divisor = 1.0;
4797     ch = getc (file);
4798     digit = ch - ordzero;
4799     while (((unsigned long) digit <= 9) | (ch == '.') || (ch == '-'))
4800     {
4801         if (ch == '.')
4802             pointread = TRUE;
4803         else if (ch == '-')
4804             minusread = TRUE;
4805         else
4806         {
4807             valyew = valyew * 10.0 + digit;
4808             if (pointread)
4809                 divisor *= 10.0;
4810         }
4811         ch = getc (file);
4812         digit = ch - ordzero;
4813     }
4814     if (!minusread)
4815         (*p)->length = valyew / divisor;
4816     else
4817         (*p)->length = 0.0;
4818     return ch;
4819 }
4820 
4821 boolean
treeread(FILE * file,world_fmt * world,option_fmt * options,node ** pp,node * q)4822 treeread (FILE * file, world_fmt * world, option_fmt *options, node ** pp, node * q)
4823 {
4824     node *p=NULL;
4825     boolean has_migration=FALSE;
4826     char ch = getc (file);
4827     int retval;
4828     while (ch != ';')
4829     {
4830         switch (ch)
4831         {
4832 			case '(':
4833 				p = create_interior_node (world, &q);
4834 				q = p->next;
4835 				ch = getc (file);
4836 				break;
4837 			case ',':
4838 				q = q->next;
4839 				if (q->top)
4840 				{
4841 					usererror ("Multifurcation handling not yet installed");
4842 				}
4843 					ch = getc (file);
4844 				break;
4845 			case ')':
4846 				p = showtop (q);
4847 				q = p->back;
4848 				ch = getc (file);
4849 				break;
4850 			case ' ':
4851 			case '\n':
4852 			case '\t':
4853 				ch = getc (file);
4854 				break;
4855 			case ':':
4856 				ch = processlength (file, &p, options);
4857 				break;
4858 			case '[':
4859 			  has_migration = processbracket (file, world, &p, &ch);
4860 			  if(has_migration>0)
4861 			    {
4862 			      q->back = p;
4863 			      p->back = q;
4864 			    }
4865 			  break;
4866 			default:
4867 				p = create_tip_node (file, world, options, &q, &ch);
4868 				break;
4869         }
4870     }
4871     if(p!=NULL)
4872       {
4873 	    p->length = 10000.;
4874     	(*pp) = showtop (p->back);
4875       }
4876     retval = fscanf (file, "%*[^\n]");
4877     getc (file);
4878     return has_migration;
4879 }
4880 
4881 void
length_to_times(node * p)4882 length_to_times (node * p)
4883 {
4884     node *q;
4885     if (p->type != 't')
4886     {
4887         length_to_times ((p)->next->back);
4888         if ((p)->type == 'i')
4889             length_to_times ((p)->next->next->back);
4890     }
4891     q = showtop ((p)->back);
4892     q->tyme = q->next->tyme = q->next->next->tyme = (p)->tyme + (p)->length;
4893 }
4894 
4895 void
find_tips(node * p,node ** nodelist,long * z)4896 find_tips (node * p, node ** nodelist, long *z)
4897 {
4898     if (p->type == 't')
4899     {
4900         nodelist[(*z)++] = p;
4901     }
4902     else
4903     {
4904         if (p->next->back != NULL)
4905             find_tips (crawlback (p->next), nodelist, z);
4906         if (p->next->next->back != NULL)
4907             find_tips (crawlback (p->next->next), nodelist, z);
4908     }
4909 }
4910 
4911 long
find_firstpop(node * p)4912 find_firstpop (node * p)
4913 {
4914     static boolean found = FALSE;
4915     static long pop = -1;
4916     if (p->type == 'm')
4917     {
4918         found = TRUE;
4919         pop = p->pop;
4920     }
4921     else
4922     {
4923         if (p->next->back != NULL)
4924         {
4925             find_firstpop (p->next->back);
4926             if (found)
4927                 return pop;
4928         }
4929         if (p->next->next->back != NULL)
4930             find_firstpop (p->next->next->back);
4931     }
4932     return pop;
4933 }
4934 
4935 /* touches only coalescent nodes! migration nodes are already set */
4936 void
set_tree_pop_old(node * p,long * pop)4937 set_tree_pop_old (node * p, long *pop)
4938 {
4939     if (p->type != 'r')
4940     {
4941 
4942         (*pop) =
4943 		(showtop (p->back)->actualpop !=
4944 		 *pop) ? showtop (p->back)->actualpop : *pop;
4945     }
4946     p->actualpop = p->pop = *pop;
4947     if (p->type != 't')
4948     {
4949         if (p->next->back != NULL)
4950         {
4951             set_tree_pop (crawlback (p->next), pop);
4952         }
4953         if (p->type != 'm' && p->next->next->back != NULL)
4954         {
4955             set_tree_pop (crawlback (p->next->next), pop);
4956         }
4957     }
4958 }
4959 
4960 /// sets the actualpop and pop values deducting from the mgiration nodes that
4961 /// are already set
4962 void
set_tree_pop(node * p,long * pop)4963 set_tree_pop (node * p, long *pop)
4964 {
4965   static boolean done=FALSE;
4966   switch(p->type)
4967     {
4968     case 'r':
4969 	  set_tree_pop(p->next->back, pop);
4970 	  *pop = p->actualpop = p->pop = p->next->back->pop;
4971       if(!done)
4972 	{
4973 	  done=TRUE;
4974 	  if(*pop == -1)
4975 	    *pop = 0;
4976 	  set_tree_pop(p,pop);
4977 	}
4978       break;
4979     case 't':
4980       if (*pop != -1)
4981 	p->actualpop = p->pop = *pop;
4982       break;
4983     case 'i':
4984       set_tree_pop(p->next->back,pop);
4985       if(*pop != -1)
4986 	p->actualpop = p->pop = p->next->back->pop;
4987       set_tree_pop(p->next->next->back,pop);
4988       if(*pop != -1)
4989 	{
4990 	  p->actualpop = p->pop = p->next->next->back->pop;
4991 	  *pop = p->pop;
4992 	}
4993       break;
4994     case 'm':
4995       *pop = p->actualpop;
4996       set_tree_pop(p->next->back, pop);
4997       *pop = p->pop;
4998       break;
4999     }
5000 }
5001 
5002 
5003 node *
create_interior_node(world_fmt * world,node ** q)5004 create_interior_node (world_fmt * world, node ** q)
5005 {
5006     node *p;
5007     p = allocate_nodelet (world, 3, 'i');
5008     p->top = TRUE;
5009     p->scale = (MYREAL *) mycalloc (world->data->seq[0]->endsite+world->data->seq[0]->addon, sizeof (MYREAL));
5010     p->s = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop);
5011     p->back = *q;
5012     if ((*q) == NULL)
5013       create_root_node (world, &p);
5014     else
5015         (*q)->back = p;
5016     return p;
5017 }
5018 
5019 node *
create_root_node(world_fmt * world,node ** q)5020 create_root_node (world_fmt *world, node ** q)
5021 {
5022     node *p;
5023     p = allocate_nodelet (world, 3, 'r');
5024     p->top = TRUE;
5025     p->next->back = *q;
5026     (*q)->back = p->next;
5027     return p;
5028 }
5029 
5030 
5031 node *
create_tip_node(FILE * file,world_fmt * world,option_fmt * options,node ** q,char * ch)5032 create_tip_node (FILE * file, world_fmt * world, option_fmt *options, node ** q, char *ch)
5033 {
5034     long pop;
5035     node *p;
5036     char c;
5037     char *nayme;
5038     long i = 1;
5039     nayme = (char *) mycalloc (options->nmlength+1, sizeof (char));
5040     nayme[0] = (*ch);
5041     while (strchr ("[):;,\t\n\r", (int) (c = getc (file))) == NULL)
5042         nayme[i++] = c;
5043     nayme[i] = '\0';
5044     p = allocate_nodelet (world, 1, 't');
5045     p->nayme = (char *) mycalloc (options->nmlength+5, sizeof (char));
5046     p->top = TRUE;
5047     p->tip = TRUE;
5048     // PB is this needed with usertrees
5049     p->scale = (MYREAL *) mycalloc (world->data->seq[0]->endsite+world->data->seq[0]->addon, sizeof (MYREAL));
5050     p->s = (MYREAL *) mycalloc (world->numpop, sizeof (MYREAL)  );
5051     for (i = 0; i < world->numpop; i++)
5052       {
5053         p->s[i] = MYREAL_MAX;
5054       }
5055 
5056     sprintf(p->nayme, "%-*s",(int) options->nmlength,  nayme);
5057     unpad(p->nayme," ");
5058     translate(p->nayme,' ', '_');
5059     sscanf(nayme,"%li ",&pop);
5060     p->s[pop] = 0;
5061     p->back = *q;
5062     (*q)->back = p;
5063     myfree(nayme);
5064     (*ch) = c;
5065     return p;
5066 }
5067 
5068 boolean
processbracket(FILE * file,world_fmt * world,node ** p,char * ch)5069 processbracket (FILE * file, world_fmt *world, node ** p, char * ch)
5070 {
5071     boolean migfound=FALSE;
5072     long pop1, pop2;
5073     MYREAL utime;
5074     char c;
5075     int retval;
5076     c = getc (file);
5077     if (c == '&')
5078     {
5079         c = getc (file);
5080         switch (c)
5081         {
5082 			case 'M':
5083 #ifdef USE_MYREAL_FLOAT
5084 				retval = fscanf (file, "%li %li:%f", &pop1, &pop2, &utime);
5085 #else
5086 				retval = fscanf (file, "%li %li:%lf", &pop1, &pop2, &utime);
5087 #endif
5088 				/*c=*/getc (file);
5089 				(*p) = add_migration (world, *p, pop1, pop2, utime);
5090 				migfound = TRUE;
5091 				break;
5092 			default:
5093 				while (c != ']')
5094 					c = getc (file);
5095 				break;
5096         }
5097     }
5098     else
5099     {
5100         while (c != ']')
5101             c = getc (file);
5102     }
5103     *ch = getc (file);
5104     return migfound;
5105 }
5106 
5107 
5108 node *
add_migration(world_fmt * world,node * p,long from,long to,MYREAL utime)5109 add_migration (world_fmt *world, node * p, long from, long to, MYREAL utime)
5110 {
5111     node *tmp;
5112     tmp = allocate_nodelet (world, 2, 'm');
5113     tmp->top = TRUE;
5114     tmp->next->back = p;
5115     p->back = tmp->next;
5116     tmp->length = p->length - utime;
5117     p->length = utime;
5118     tmp->tyme = p->tyme + utime;
5119     tmp->pop = tmp->next->pop = from;
5120     tmp->actualpop = tmp->next->actualpop = to;
5121     return tmp;
5122 }
5123 
5124 void
allocate_x(node * p,world_fmt * world,char datatype,boolean withtips)5125 allocate_x (node * p, world_fmt * world, char datatype, boolean withtips)
5126 {
5127     if (p->type != 't')
5128     {
5129         if (p->next->back != NULL)
5130             allocate_x (crawlback (p->next), world, datatype, withtips);
5131         if (p->next->next->back != NULL)
5132             allocate_x (crawlback (p->next->next), world, datatype, withtips);
5133         if (strchr (SEQUENCETYPES, world->options->datatype))
5134         {
5135             alloc_seqx (world, p);
5136         }
5137         else
5138         {
5139             if (strchr (SEQUENCETYPES, world->options->datatype))
5140                 p->x.a = (MYREAL *) mycalloc (1, world->sumtips * sizeof (MYREAL));
5141             else
5142                 p->x.a =
5143                     (MYREAL *) mycalloc (1,
5144                                          MAX (world->sumtips,
5145                                               world->data->maxalleles[world->locus]) *
5146                                          sizeof (MYREAL));
5147         }
5148 #ifdef UEP
5149         if(world->options->uep)
5150         {
5151             p->uep = (int *) mycalloc (world->data->uepsites, sizeof (int));
5152             p->ux.s = (pair *) mycalloc (world->data->uepsites, sizeof (pair));
5153         }
5154 #endif
5155 
5156     }
5157     else
5158     {
5159         if (withtips)
5160         {
5161             if (strchr (SEQUENCETYPES, world->options->datatype))
5162             {
5163                 alloc_seqx (world, p);
5164             }
5165             else
5166             {
5167                 p->x.a =
5168 				(MYREAL *) mycalloc (1,
5169                                      world->data->maxalleles[world->locus] *
5170                                      sizeof (MYREAL));
5171             }
5172         }
5173         //#ifdef UEP
5174         //      if(world->options->uep)
5175         // {
5176         //   p->uep = (long *) mycalloc (world->data->uepsites, sizeof (long));
5177         //   p->ux.s = (pair *) mycalloc (world->data->uepsites, sizeof (pair));
5178         // }
5179         //#endif
5180 
5181     }
5182 }
5183 
5184 long
number_genomes(int datatype)5185 number_genomes (int datatype)
5186 {
5187     switch (datatype)
5188     {
5189     case 'a':
5190     case 'b':
5191     case 'm':
5192       return 2;
5193     case 's':
5194     case 'n':
5195     case 'h':
5196     case 'u':
5197     case 'f':
5198       return 1;
5199     default:
5200       error ("Wrong data type");
5201     }
5202     return 0;
5203 
5204 }
5205 
5206 
5207 void
copy_tree(world_fmt * original,world_fmt * kopie)5208 copy_tree (world_fmt * original, world_fmt * kopie)
5209 {
5210     kopie->root = copy_node (original, original->root, kopie, NULL);
5211 }
5212 
5213 node *
copy_node(world_fmt * original,node * o,world_fmt * kopie,node * last)5214 copy_node (world_fmt * original, node * o, world_fmt * kopie, node * last)
5215 {
5216     static long z = 0;
5217 
5218     node *t = NULL, *t2, *t3;
5219     if (o == NULL)
5220         return NULL;
5221     if (!o->top)
5222         error ("copy_tree messed up");
5223 
5224     switch (o->type)
5225     {
5226 		case 'r':
5227 			z = 0;
5228 		case 'i':
5229 			t = (node *) mycalloc (1, sizeof (node));
5230 			t2 = (node *) mycalloc (1, sizeof (node));
5231 			t3 = (node *) mycalloc (1, sizeof (node));
5232 			t->next = t2;
5233 			t2->next = t3;
5234 			t3->next = t;
5235 			copy_node_content (original, kopie, o, t);
5236 			copy_node_content (original, kopie, o->next, t2);
5237 			copy_node_content (original, kopie, o->next->next, t3);
5238 			if (o->next->back != NULL)
5239 				t2->back = copy_node (original, o->next->back, kopie, t2);
5240 				if (o->next->next->back != NULL)
5241 					t3->back = copy_node (original, o->next->next->back, kopie, t3);
5242 					t->back = last;
5243 			break;
5244 		case 'm':
5245 			t = (node *) mycalloc (1, sizeof (node));
5246 			t2 = (node *) mycalloc (1, sizeof (node));
5247 			t->next = t2;
5248 			t2->next = t;
5249 			copy_node_content (original, kopie, o, t);
5250 			copy_node_content (original, kopie, o->next, t2);
5251 			t2->back = copy_node (original, o->next->back, kopie, t2);
5252 			t->back = last;
5253 			break;
5254 		case 't':
5255 			t = (node *) mycalloc (1, sizeof (node));
5256 			//kopie->nodep[z++] = t;
5257 			t->next = t;
5258 			copy_node_content (original, kopie, o, t);
5259 			t->back = last;
5260 			break;
5261     }
5262     return t;
5263 }
5264 
5265 ///
5266 /// copy the guts of a node in the tree (but to where?)
5267 void
copy_node_content(world_fmt * original,world_fmt * kopie,node * o,node * t)5268 copy_node_content (world_fmt * original, world_fmt * kopie, node * o,
5269                    node * t)
5270 {
5271     long i;
5272 //    long j;
5273     const long endsite = original->data->seq[0]->endsite;
5274     const long rcategs = original->options->rcategs;
5275     t->type = o->type;
5276     t->number = o->number;
5277     t->pop = o->pop;
5278     t->actualpop = o->actualpop;
5279     t->id = o->id;
5280     t->top = o->top;
5281     t->dirty = o->dirty;
5282     t->v = o->v;
5283     t->tyme = o->tyme;
5284     t->length = o->length;
5285 
5286     if (t->top && t->type != 'm')
5287     {
5288         t->scale = (MYREAL *) mycalloc (endsite, sizeof (MYREAL));
5289         memcpy (t->scale, o->scale, sizeof (MYREAL) * endsite);
5290 #ifdef UEP
5291 
5292         if (original->options->uep)
5293         {
5294             t->uep = (int *) mycalloc (original->data->uepsites, sizeof (int));
5295             memcpy (t->uep, o->uep, sizeof (long) * original->data->uepsites);
5296             t->ux.s = (pair *) mycalloc (original->data->uepsites, sizeof (pair));
5297             for (i = 0; i < original->data->uepsites; i++)
5298             {
5299                 memcpy (t->ux.s[i], o->ux.s[i], sizeof (pair));
5300             }
5301         }
5302 #endif
5303         if (strchr (SEQUENCETYPES, original->options->datatype))
5304         {
5305             alloc_seqx (kopie, t);
5306             memcpy (t->scale, o->scale, sizeof (MYREAL) * endsite);
5307             for (i = 0; i < endsite; i++)
5308             {
5309 //                for (j = 0; j < rcategs; j++)
5310 //                {
5311                     memcpy (t->x.s[i], o->x.s[i], rcategs * sizeof (sitelike));
5312 
5313 //                }
5314             }
5315         }
5316         else
5317         {
5318             t->x.a =
5319 			(MYREAL *) mycalloc (1,
5320                                  original->data->maxalleles[original->locus] *
5321                                  sizeof (MYREAL));
5322             memcpy (t->x.a, o->x.a,
5323                     sizeof (MYREAL) *
5324                     original->data->maxalleles[original->locus]);
5325         }
5326     }
5327     //if (o->s != NULL)
5328     ///{
5329     //t->s = (MYREAL *) mycalloc(1, sizeof(MYREAL) * original->numpop);
5330     //memcpy] (t->s, o->s, sizeof(MYREAL) * original->numpop);
5331     //
5332     //      }
5333     //
5334     else
5335         t->s = NULL;
5336     if (o->nayme != NULL)
5337     {
5338       t->nayme = (char*) strdup (o->nayme);
5339     }
5340     else
5341         t->nayme = NULL;
5342 }
5343 
5344 void
swap_tree(world_fmt * tthis,world_fmt * tthat)5345 swap_tree (world_fmt * tthis, world_fmt * tthat)
5346 {
5347     node *tmp;
5348     node **nodetemps;
5349     tmp = tthis->root;
5350     tthis->root = tthat->root;
5351     tthat->root = tmp;
5352     nodetemps = tthis->nodep;
5353     tthis->nodep = tthat->nodep;
5354     tthat->nodep = nodetemps;
5355 
5356 }
5357 
5358 void
calc_treelength(node * p,MYREAL * treelen)5359 calc_treelength (node * p, MYREAL *treelen)
5360 {
5361     node *pn, *pnn;
5362     switch (p->type)
5363     {
5364 		case 't':
5365 			break;
5366 		case 'm':
5367 			error ("yelp\n");
5368 			break;
5369 		case 'i':
5370 			pn = crawlback (p->next);
5371 			calc_treelength (pn, treelen);
5372 			pnn = crawlback (p->next->next);
5373 			calc_treelength (pnn, treelen);
5374 			break;
5375 		default:
5376         error ("default reached");
5377     }
5378     pn = showtop (crawlback (p));
5379     if (pn->type != 'r')
5380         *treelen += pn->tyme - p->tyme;
5381 }
5382 
5383 MYREAL
calc_pseudotreelength(proposal_fmt * proposal,MYREAL treelen)5384 calc_pseudotreelength (proposal_fmt * proposal, MYREAL treelen)
5385 {
5386     MYREAL len = 0.0;
5387     MYREAL ot = proposal->origin->tyme;
5388     MYREAL obt = proposal->oback->tyme;
5389     MYREAL tt = proposal->target->tyme;
5390     MYREAL rt = proposal->world->root->next->back->tyme;
5391     //target is not root
5392     if (proposal->target != proposal->world->root)
5393     {
5394         //oback is not root
5395         if (proposal->oback != proposal->world->root)
5396         {
5397             len = treelen - (obt - ot) + (proposal->time - ot);
5398             //printf("pseudo_treelen: ob!=r t!=r %f\n", len);
5399         }
5400         else
5401         {
5402             //oback is root
5403             len = treelen - (obt - ot) - (rt - tt) +
5404 			(proposal->time - ot) + (proposal->time - tt);
5405             //printf("pseudo_treelen: ob=r t!=r %f\n", len);
5406         }
5407     }
5408     else
5409         //target is root
5410     {
5411         //oback is not root
5412         if (proposal->oback != proposal->world->root)
5413         {
5414             len = treelen - (obt - ot) + (proposal->time - ot) - (rt - tt) +
5415 			(proposal->time - tt);
5416             //printf("pseudo_treelen: ob!=r t=r %f\n", len);
5417         }
5418         else
5419         {
5420             //oback is root
5421             len = treelen - (obt - ot) - (obt - tt) + (proposal->time - ot)
5422 			+ (proposal->time - tt);
5423             //printf("pseudo_treelen: ob=r t=r %f\n", len);
5424         }
5425     }
5426     return len;
5427 }
5428 
5429 void
swap(void * a,void * b)5430 swap (void *a, void *b)
5431 {
5432     void *t;
5433     t = a;
5434     a = b;
5435     b = t;
5436 }
5437 
5438 
5439 void
free_tree(node * p,world_fmt * world)5440 free_tree (node * p, world_fmt * world)
5441 {
5442     if (p != NULL)
5443     {
5444         if (p->type != 't')
5445         {
5446             if (p->next->back != NULL)
5447             {
5448                 free_tree (p->next->back, world);
5449             }
5450             if (p->type != 'm' && p->next->next->back != NULL)
5451             {
5452                 free_tree (p->next->next->back, world);
5453             }
5454         }
5455         switch (p->type)
5456         {
5457 	case 'm':
5458 	  free_mignodelet (p, world);
5459 	  break;
5460 	case 't':
5461 	  myfree(p->nayme);
5462 	  free_tipnodelet (p, world);
5463 	  break;
5464 	case 'i':
5465 	  free_nodelet (p, 3, world);
5466 	  break;
5467 	case 'r':
5468 	  free_nodelet (p, 3, world);
5469 	  world->root = NULL;
5470 	  break;
5471 	default:
5472 	  error("error in freeing nodes, a node with no type found");
5473 	  break;
5474         }
5475     }
5476 }
5477 
5478 ///
5479 /// delete tipnode-nodelet and its content data
5480 void
free_tipnodelet(node * p,world_fmt * world)5481 free_tipnodelet (node * p, world_fmt * world)
5482 {
5483   free_nodedata (p, world);
5484   collect_nodelet(world, p);
5485   //myfree(p);
5486   //p->id = -p->id;
5487 }
5488 
5489 ///
5490 /// delete migratenode-nodelets
5491 void
free_mignodelet(node * p,world_fmt * world)5492 free_mignodelet (node * p, world_fmt * world)
5493 {
5494   node *q = p->next;
5495   collect_nodelet(world, q);
5496   collect_nodelet(world, p);
5497   //  myfree(q);
5498   //myfree(p);
5499 }
5500 
5501 ///
5502 /// delete interior node and its data
5503 void
free_nodelet(node * p,long num,world_fmt * world)5504 free_nodelet (node * p, long num, world_fmt * world)
5505 {
5506     long i;
5507     node *q , *r;
5508     switch(num)
5509       {
5510       case 1:
5511 	if(p->top == TRUE)
5512 	  free_nodedata (p, world);
5513 	collect_nodelet(world, p);
5514 	//myfree(p);
5515 	break;
5516       case 2:
5517 	q = p->next;
5518 	if(p->top == TRUE)
5519 	  free_nodedata (p, world);
5520 	collect_nodelet(world, p);
5521 	//	myfree(p);
5522 	if(q!=NULL)
5523 	  {
5524 	    if(q->top == TRUE)
5525 	      free_nodedata (q, world);
5526 	    collect_nodelet(world, q);
5527 	    // myfree(q);
5528 	  }
5529 	break;
5530       case 3:
5531 	q = p->next;
5532 	r = p->next->next;
5533 	if(p->top == TRUE)
5534 	  free_nodedata (p, world);
5535 	collect_nodelet(world, p);
5536 	//	myfree(p);
5537 	//p->id = -p->id;
5538 	if(q!=NULL)
5539 	  {
5540 	    if(q->top == TRUE)
5541 	      free_nodedata (q, world);
5542 	    collect_nodelet(world, q);
5543 	    //	    myfree(q);
5544 	    //q->id = -q->id;
5545 	  }
5546 	if(r!=NULL)
5547 	  {
5548 	    if(r->top == TRUE)
5549 	      free_nodedata (r, world);
5550 	    collect_nodelet(world, r);
5551 	    //	    myfree(r);
5552 	    //r->id = -r->id;
5553 	  }
5554 	break;
5555       default:
5556 	for (i = 0; i < num; i++)
5557 	  {
5558 	    q = p->next;
5559 	    if(p->top == TRUE)
5560 	      free_nodedata (p, world);
5561 	    //	    myfree(p);
5562 	    collect_nodelet(world, p);
5563 	    p = q;
5564 	  }
5565       }
5566 }
5567 
5568 void
free_nodedata(node * p,world_fmt * world)5569 free_nodedata (node * p, world_fmt * world)
5570 {
5571   //    long endsite;
5572     if (strchr (SEQUENCETYPES, world->options->datatype))
5573     {
5574       //       endsite = world->data->seq->endsite;
5575       if(p->x.s != NULL)
5576 	{
5577 	  myfree(p->x.s[0]);
5578 	  myfree(p->x.s);
5579 	}
5580     }
5581     else
5582     {
5583       if(p->x.a != NULL)
5584 	{
5585 	  myfree(p->x.a);
5586 	}
5587     }
5588     if (p->s != NULL)
5589       myfree(p->s);
5590     if(p->scale!=NULL)
5591       myfree(p->scale);
5592 
5593 #ifdef UEP
5594     if (world->options->uep)
5595         myfree(p->uep);
5596 #endif
5597 
5598 }
5599 
5600 ///
5601 /// calculates the 1/probability that no event happens
5602 /// is safeguarded against a problem on two tip trees
5603 /// that will return a result of zero and so a inverse of INF
5604 /// INF is replaced by MYREAL_MAX
5605 MYREAL
inverse_logprob_noevent(world_fmt * world,long interval)5606 inverse_logprob_noevent (world_fmt * world, long interval)
5607 {
5608     long pop, k;
5609     MYREAL result = 0.0;
5610     for (pop = 0; pop < world->numpop; pop++)
5611     {
5612         k = world->treetimes[0].tl[interval].lineages[pop];
5613         result +=
5614             (k * (k - 1) / world->param0[pop]) + sum_migprob (world, pop, interval);
5615     }
5616     if(result > 0.0)
5617       return 1./result;
5618     else
5619       {
5620 	if(world->treetimes[0].tl[interval].eventnode->type=='t')
5621 	  {
5622 	    return world->treetimes[0].tl[interval].age;
5623 	  }
5624 	else
5625 	  return MYREAL_MAX;
5626       }
5627 }
5628 
5629 /// calculates the sum of all immigrations into a specific population
5630 MYREAL
sum_migprob(world_fmt * world,long pop,long interval)5631 sum_migprob (world_fmt * world, long pop, long interval)
5632 {
5633     long i;
5634     MYREAL result = 0.0;
5635     long *lineages = world->treetimes[0].tl[interval].lineages;
5636     long msta = world->mstart[pop];
5637     long msto = world->mend[pop];
5638     boolean usem = world->options->usem;
5639     MYREAL pk;
5640     for (i = msta; i < msto; i++)
5641     {
5642       pk = usem ? world->param0[i] : world->param0[i]/world->param0[pop];
5643       result += pk;
5644     }
5645     return result * lineages[pop];
5646 }
5647 
debugline(node * up)5648 void debugline(node *up)
5649 {
5650   node *thenode = up;
5651   node * down = crawlback(showtop(up));
5652   while (thenode != down)
5653     {
5654       printf("<%li:%li %c>\n",thenode->id, showtop(thenode)->id, thenode->type);
5655       thenode = showtop(thenode)->back;
5656     }
5657 }
5658