1 #include "recombine.h"
2 #include "getdata.h"
3 
4 #ifdef DMEMDEBUG
5 #define MEMDEBUG
6 #include "memdebug.h"
7 #endif
8 
9 #ifdef DMALLOC_FUNC_CHECK
10 #include "/usr/local/include/dmalloc.h"
11 #endif
12 
13 /***************************************************************************
14  *  Version 0.40. (c) Copyright 1986, 1991, 1992 by the University of      *
15  *  Washington and Joseph Felsenstein.  Written by Joseph Felsenstein,     *
16  *  Mary K. Kuhner and Jon A. Yamato, with some additional grunt work by   *
17  *  Sean T. Lamont.  Permission is granted to copy and use this program    *
18  *  provided no fee is charged for it and provided that this copyright     *
19  *  notice is not removed.                                                 *
20  *                                                                         *
21  ***************************************************************************/
22 
23 /* This program implements a Hastings-Metropolis Monte Carlo
24   Maximum Likelihood sampling method for phylogenetic trees
25   with recombination. */
26 
27 /* Time, 'tyme', in this tree is measured from the tips to the root.
28    I.e. the tips are at tyme '0', and the root node has the largest
29    value for 'tyme'. */
30 
31 /* Modified to accept multiple loci. */
32 
33 /* Modified to "plump" input non-recombinant tree */
34 
35 /* Modified to track final coalescence and begin to handle microsats */
36 
37 /* Modified to handle panel snps, and real FC differentiation */
38 
39 /* Being modified to handle basic haplotyping */
40 /* WARNING MARY temporary variables */
41 data_fmt truehaps;
42 data_fmt starthaps;
43 
44 /* these definitions will be extern in other files*/
45 extern double **reci, **theti;
46 extern treerec ***sum;
47 extern unsigned int baseA, baseC, baseG, baseT;
48 
49 extern long countcoalbranches(tree *tr);
50 extern void model_alloc(option_struct *op, data_fmt *data);
51 extern void scoretree(option_struct *op, data_fmt *data, long chain);
52 extern void rec_estimate(option_struct *op, data_fmt *data, long lowcus,
53    long chain, boolean locusend);
54 extern boolean twiddle(option_struct *op, data_fmt *data);
55 extern boolean fliphap(option_struct *op, data_fmt *data, tree *tr);
56 extern boolean flipdrop(option_struct *op, data_fmt *data, tree *tr,
57    long numdrop);
58 extern void add_one_recomb(option_struct *op, data_fmt *data, tree *tr,
59    long *siteptr, long chain);
60 extern void addfractrecomb(option_struct *op, data_fmt *data,
61    long chain, treerec ***treesum);
62 extern boolean makedrop(option_struct *op, data_fmt *data);
63 extern void setupdnadata(dnadata **dna, long *numsites, long numseq,
64    long numloci, long numpop);
65 extern void freednadata(dnadata *dna);
66 extern void empiricaldnafreqs(option_struct *op, data_fmt *data,
67    tree *curtree);
68 extern void inputdnaweights(long numchars, dnadata *dna, option_struct *op);
69 extern void remove_branch(tlist *start, node *target);
70 extern void rename_branch(tlist *start, node *source, node *target);
71 extern void rec_scoreread(option_struct *op, data_fmt *data);
72 extern void unflag(node *p);
73 extern void traverse_unflag(tree *tr, node *p);
74 extern void read_spacefile(FILE *file, option_struct *op, data_fmt *data);
75 extern void printdnadata(option_struct *op, data_fmt *data, FILE *out);
76 
77 extern long countsites(option_struct *op, data_fmt *data);
78 extern long getdata_sitecount(option_struct *op, data_fmt *data,
79    long marker);
80 extern double getnumlinks(option_struct *op, data_fmt *data, long start,
81    long finish);
82 
83 /* functions for haplotyping */
84 extern void read_flipfile(option_struct *op, data_fmt *data, tree *tr,
85    FILE *file);
86 extern void pruneflips(option_struct *op, data_fmt *data, tree *tr);
87 extern void copyhaps(option_struct *op, data_fmt *oldhap, data_fmt *newhap,
88   long numpop, long numloci, long numind, long *numsites);
89 
90 /* functions for heating */
91 extern double coalprob(option_struct *op, data_fmt *data, tree *tr, double
92   theta, double r);
93 extern int longcmp(const void *v1, const void *v2);
94 
95 /* functions from traitlike.c for trait likelihoods */
96 extern void copytraits(node *source, node *target);
97 extern void traitlike(option_struct *op, data_fmt *data, tree *tr,
98    long numsites, double mutrait, double traitratio, double pd,
99    double *traitarray);
100 extern void traitread(tree *tr, long numseq);
101 extern void traitprint(long numsites, double *traitarray, FILE *outfile, long numout);
102 extern void traitsiteplot(option_struct *op, data_fmt *data, double *llike);
103 extern void traitresult(option_struct *op, data_fmt *data,
104    double *traitarray, FILE *outfile, long numout);
105 
106 tree **temptrees;/* the current tree for each temperature-chain */
107 tree *curtree;   /* the current tree */
108 long locus;      /* which locus are we currently working on? */
109 long population; /* which population are we currently working on? */
110 longer seed;     /* array used to hold randum seed */
111 long numdropped = 0;               /* number of trees dropped from
112                                       consideration */
113 
114 /* next are a largish set of temporary variables used to hold various
115    values before their structures are setup.  Mostly data variables. */
116 double freqa, freqc, freqg, freqt; /* dna freqs */
117 double locus_ttratio;              /* dna transition/transversion ratio */
118 
119 FILE *infile, *outfile, *treefile, *bestree, *seedfile, *simlog,
120      *parmfile, *intree, *weightfile, *spacefile;
121 long rootnum, apps, col, numtrees, totchains;
122 boolean  **sametree;
123 double clearseed, theta0, rec0, branch0;
124 long		*category;
125 contribarr	*contribution;
126 node		*freenodes;
127 rlrec		*alogf;
128 char            gch;
129 long            nodectr;
130 double	 	sumweightrat, watttheta;
131 double	 	*weightrat;
132 valrec   	*tbl;
133 long		slid, slacc, hap, hacc, swap, swacc, indecks;
134 
135 
136 /* the following are for reading in parameters (readparmfile),
137    and also writing them back out again (rec_parmfilewrite) */
138 char *booltokens[NUMBOOL] = {"interleaved","printdata","progress",
139                           "print-trees","freqs-from-data","categories",
140                           "watterson", "usertree", "autocorrelation",
141                           "newdata","same-ne","interactive","mhmcsave",
142                           "panel","map","final-coalescence","full-snp",
143                           "haplotyping","profile-like","norecsnp"},
144      *numbertokens[NUMNUMBER] = {"ttratio","short-chains",
145                           "short-steps","short-inc","long-chains",
146                           "long-inc","long-steps","rec-rate",
147                           "holding","mu-ratio","trait-ratio",
148                           "dis-freq","hapdrop","heating"};
149 
150 /* the following are for managing x array recycling */
151 long numx;
152 datalike_fmt *sparex[XARRAYSIZE];
153 
openfile(FILE ** fp,char * filename,char * mode,char * application,char * perm)154 void openfile(FILE **fp, char *filename, char *mode, char *application,
155    char *perm)
156 {
157   FILE *of;
158   char file[100];
159 
160   strcpy(file,filename);
161   while (1){
162     of = fopen(file,mode);
163     if (of)
164       break;
165     else {
166       switch (*mode){
167       case 'r':
168         fprintf(stdout,"%s:  can't read %s\n",application,file);
169         file[0] = '\0';
170         while (file[0] =='\0'){
171           fprintf(stdout,"Please enter a new filename>");
172           fgets(file,100,stdin);
173           }
174         break;
175       case 'w':
176         fprintf(stdout,"%s: can't write %s\n",application,file);
177         file[0] = '\0';
178         while (file[0] =='\0'){
179           fprintf(stdout,"Please enter a new filename>");
180           fgets(file,100,stdin);
181           }
182         break;
183       }
184     }
185   }
186   *fp=of;
187   if (perm != NULL)
188     strcpy(perm,file);
189 } /* openfile */
190 
191 
lowercase(char c)192 char lowercase(char c)
193 {
194 return((char)tolower((int) c));
195 } /* lowercase */
196 
197 
randum(void)198 double randum(void)
199 /* Mary's version--faster but needs 32 bits.  Loops have been unrolled
200    for speed. */
201 {
202   long newseed0, newseed1, newseed2;
203 
204   newseed0 = 1549*seed[0];
205     newseed1 = newseed0/2048;
206     newseed0 &= 2047;
207     newseed2 = newseed1/2048;
208     newseed1 &= 2047;
209   newseed1 += 1549*seed[1]
210               + 812*seed[0];
211     newseed2 += newseed1/2048;
212     newseed1 &= 2047;
213   newseed2 += 1549*seed[2]
214               + 812*seed[1];
215 
216   seed[0] = newseed0;
217   seed[1] = newseed1;
218   seed[2] = newseed2 & 1023;
219   return (((seed[0]/2048.0 + seed[1])/2048.0 + seed[2])/1024.0);
220 }  /* randum */
221 
222 
223 /****************************************************
224  * setupoption_struct() creates a new option_struct */
setupoption_struct(option_struct ** op)225 void setupoption_struct(option_struct **op)
226 {
227 
228 (*op) = (option_struct *)calloc(1,sizeof(option_struct));
229 
230 (*op)->rate = (double *)calloc(1,sizeof(double));
231 (*op)->probcat = (double *)calloc(1,sizeof(double));
232 (*op)->temperature = NULL;
233 (*op)->numpanel = NULL;
234 
235 } /* setupoption_struct */
236 
237 
238 /********************************************************************
239  * istip() returns TRUE if the nodelet is a tip and FALSE otherwise */
istip(node * p)240 boolean istip (node *p)
241 {
242 return(p->type == 't');
243 } /* istip */
244 
245 
246 /***********************************************************************
247  * iscoal returns TRUE if the nodelet is a coalescent, FALSE otherwise */
iscoal(node * p)248 boolean iscoal (node *p)
249 {
250 return(p->type == 'c');
251 } /* iscoal */
252 
253 
254 /************************************************************************
255  * isrecomb returns TRUE if the nodelet is recombinant, FALSE otherwise */
isrecomb(node * p)256 boolean isrecomb (node *p)
257 {
258 return(p->type == 'r');
259 } /* isrecomb */
260 
261 
262 /******************************************************************
263  * branchsub() substitutes one branch for another in one interval *
264  * in a tymelist.                                                 */
branchsub(tlist * t,node * oldbranch,node * newbranch)265 boolean branchsub(tlist *t, node *oldbranch, node *newbranch)
266 {
267 long i;
268 boolean found;
269 
270 found = FALSE;
271 for(i = 0; i < t->numbranch; i++)
272    if (t->branchlist[i] == oldbranch) {
273       t->branchlist[i] = newbranch;
274       found = TRUE;
275    }
276 
277 return(found);
278 } /* branchsub */
279 
280 
281 /************************************************************************
282  * insertaftertymelist adds the node "p" to a tymelist after, inserting *
283  *    within the tymeslice pointed to by "t" using bisection.           */
insertaftertymelist(tlist * t,node * p)284 void insertaftertymelist(tlist *t, node *p)
285 {
286 long i, j, temp;
287 tlist *s;
288 node *q;
289 boolean b1, b2;
290 
291 newtymenode(&s);
292 s->eventnode = findtop(p);
293 s->age = t->age;
294 t->age = p->tyme;
295 s->prev = t;
296 s->succ = t->succ;
297 t->succ = s;
298 if (s->succ != NULL) s->succ->prev = s;
299 
300 /* now deal with the branchlist for the new tymelist entry;
301    the "if" setting "temp" is a workaround for not being able to
302    allocate zero size chunks of memory by malloc-debug/gcc */
303 if (t->numbranch-1 > 0) temp = t->numbranch - 1;
304 else temp = t->numbranch;
305 s->branchlist = (node **)calloc(1,temp*sizeof(node *));
306 s->numbranch = 0;
307 q = findunique(p);
308 if (isrecomb(q)) { /* if we've added a recombinant node */
309    for (i = 0; i < t->numbranch; i++) {
310       if (t->branchlist[i] == q->back) continue;
311       s->branchlist[s->numbranch] = t->branchlist[i];
312       s->numbranch++;
313    }
314    s->branchlist =
315       (node **)realloc(s->branchlist, (s->numbranch+2)*sizeof(node *));
316    s->branchlist[s->numbranch] = q->next;
317    s->branchlist[s->numbranch+1] = q->next->next;
318    s->numbranch += 2;
319 } else { /* else we've added a coalescent node */
320    for (i = 0; i < t->numbranch; i++)
321      if(t->branchlist[i] != q->next->back &&
322         t->branchlist[i] != q->next->next->back) {
323         s->branchlist[s->numbranch] = t->branchlist[i];
324         s->numbranch++;
325      } else if (t->branchlist[i] == q->next->back) {
326         s->branchlist[s->numbranch] = q;
327         s->numbranch++;
328      }
329 }
330 
331 /* now remove the old branches from the rest of the tymelist */
332 if (isrecomb(q)) {
333    for (s = s->succ; s != NULL; s = s->succ) {
334       if (!branchsub(s,q->back,q->next)) break;
335       s->branchlist =
336          (node **)realloc(s->branchlist, (s->numbranch+1)*sizeof(node *));
337       s->branchlist[s->numbranch] = q->next->next;
338       s->numbranch++;
339    }
340 } else {
341    for (s = s->succ; s != NULL; s = s->succ) {
342       b1 = branchsub(s,q->next->back,q);
343       b2 = branchsub(s,q->next->next->back,q);
344       if (b1 && b2) {
345          for (i = 0; i < t->numbranch; i++)
346             if (s->branchlist[i] == q) break;
347          for (j = i; j < t->numbranch-1; j++)
348             s->branchlist[j] = s->branchlist[j+1];
349          s->numbranch--;
350       }
351       if (!b1 && !b2) break;
352    }
353 }
354 
355 } /* insertaftertymelist */
356 
357 
358 /******************************************************************
359  * subtymelist() completely removes the tymelist entry passed in. *
360  * the passed in branch should be the exact branch that wants to  *
361  * be removed in the case of a recombinant node, or the branch    *
362  * that will "replace" the missing branch in a coalescent node.   */
subtymelist(tlist * t,node * branch,boolean all)363 void subtymelist(tlist *t, node *branch, boolean all)
364 {
365 
366 if (isrecomb(t->eventnode)) {
367    remove_branch(t,branch);
368    if (all) remove_branch(t,otherparent(branch));
369    else rename_branch(t,findunique(t->eventnode)->back,otherparent(branch));
370 } else
371    if (all) remove_branch(t,t->eventnode);
372    else rename_branch(t,branch,t->eventnode);
373 
374 if (t->prev) {t->prev->age = t->age; t->prev->succ = t->succ;}
375 else {fprintf(ERRFILE,"ERROR:tried to remove first tymelist entry!\n");}
376 if (t->succ) t->succ->prev = t->prev;
377 
378 freetymenode(t);
379 
380 } /* subtymelist */
381 
382 
383 /****************************************************************
384  * printtymelist() prints the entire tymelist: a debug function */
printtymelist(tlist * t)385 void printtymelist(tlist *t)
386 {
387   long i;
388 
389   fprintf(ERRFILE,"TYMELIST BEGINS\n");
390   while (t != NULL) {
391     fprintf(ERRFILE,"%3ld age%8.6f--",
392            t->eventnode->number, t->age);
393     for (i = 0; i < t->numbranch; i++) {
394       fprintf(ERRFILE," %3ld", t->branchlist[i]->number);
395       if (t->branchlist[i]->top) fprintf(ERRFILE,"t ");
396     }
397     fprintf(ERRFILE,"\n");
398     t = t->succ;
399   }
400   fprintf(ERRFILE,"TYMELIST ENDS\n");
401 }  /* printtymelist */
402 
403 
404 /*****************************************************************
405  * lengthof() returns the length of the branch "rootward" of the *
406  * passed node                                                   */
lengthof(node * p)407 double lengthof(node *p)
408 {
409   return fabs(p->tyme - p->back->tyme);
410 }  /* lengthof */
411 
412 
413 /******************************************************************
414  * fixlength() sets the length and v values for a mother-daughter *
415  * pair of nodelets.                                              */
fixlength(option_struct * op,data_fmt * data,node * p)416 void fixlength(option_struct *op, data_fmt *data, node *p)
417 {
418 double newlength;
419 
420 newlength = lengthof(p);
421 p->length = newlength;
422 ltov(op,data,p);
423 p->back->length = newlength;
424 ltov(op,data,p->back);
425 
426 } /* fixlength */
427 
428 
429 /************************************************
430  * findtop() finds a "top" nodelet in the node. */
findtop(node * p)431 node *findtop(node *p)
432 {
433 
434 if (!istip(p))
435    while (!p->top) p = p->next;
436 
437 return(p);
438 
439 } /* findtop */
440 
441 
442 /********************************************************************
443  * findunique() returns the unique nodelet of a node (i.e. "bottom" *
444  * of a recombinant node or "top" of a coalescent node).            */
findunique(node * p)445 node *findunique(node *p)
446 {
447 
448 if (istip(p)) return(p);
449 
450 if (isrecomb(p))
451    while((p)->top) p = p->next;
452 else
453    while (!p->top) p = p->next;
454 
455 return(p);
456 
457 }  /* findunique */
458 
459 
460 /*********************************************************************
461  * otherdtr() finds the other daughter nodelet of a node, it assumes *
462  * that a daughter nodelet was passed to it.                         */
otherdtr(node * p)463 node *otherdtr(node *p)
464 {
465 node *q;
466 
467 if (isrecomb(p)) {
468    fprintf(ERRFILE,"ERROR:otherdtr() passed a recombinant node!\n");
469    return(NULL);
470 }
471 
472 if (p->top) {
473    fprintf(ERRFILE,"ERROR:otherdtr() passed a top nodelet!\n");
474    return(NULL);
475 }
476 
477 for(q = p->next; q->top; q = q->next)
478    ;
479 
480 return(q);
481 
482 } /* otherdtr */
483 
484 
485 /*********************************************************************
486  * findlink() takes a recombinant node and returns the # of the site *
487  * to the left of the cut link; or FLAGLONG in error states.         */
findlink(node * p)488 long findlink(node *p)
489 {
490 node *q;
491 
492 if (!isrecomb(p)) {
493    fprintf(ERRFILE,"ERROR:findlink passed non-recombinant node\n");
494    return(FLAGLONG);
495 }
496 
497 q = findunique(p);
498 if (q->next->recstart) return(q->next->next->recend);
499 else return(q->next->recend);
500 
501 } /* findlink */
502 
503 
504 /**********************************************************************
505  * otherparent() finds the other parent nodelet of a node, it assumes *
506  * that a parent nodelet was passed to it.                            */
otherparent(node * p)507 node *otherparent(node *p)
508 {
509 node *q;
510 
511 if (!isrecomb(p)) {
512    fprintf(ERRFILE,"ERROR:otherparent() passed a non-recombinant node!\n");
513    return(NULL);
514 }
515 
516 if (!p->top) {
517    fprintf(ERRFILE,"ERROR:otherparent() passed a non-top nodelet!\n");
518    return(NULL);
519 }
520 
521 for(q = p->next; !q->top; q = q->next)
522    ;
523 
524 return(q);
525 
526 } /* otherdtr */
527 
528 
529 /*********************************************
530  * free_z() frees the "z" field of a nodelet */
free_z(option_struct * op,node * p)531 void free_z(option_struct *op, node *p)
532 {
533 if (p->z == NULL) return;
534 
535 free(p->z);
536 p->z = NULL;
537 
538 } /* free_z */
539 
540 
541 /***************************************************************
542  * allocate_z() allocates space for the "z" field of a nodelet */
allocate_z(option_struct * op,data_fmt * data,node * p)543 void allocate_z(option_struct *op, data_fmt *data, node *p)
544 {
545 if (p->z) free(p->z);
546 
547 p->z = (double *)calloc(NUMTRAIT,sizeof(double));
548 
549 } /* allocate_z */
550 
551 
552 /*********************************************
553  * free_x() frees the "x" field of a nodelet */
free_x(option_struct * op,node * p)554 void free_x(option_struct *op, node *p)
555 {
556 
557 if (p->x == NULL) return;
558 
559 if (numx < XARRAYSIZE) {
560    sparex[numx] = p->x;
561    numx++;
562 } else {
563    switch(op->datatype) {
564       case 'a':
565          break;
566       case 'b':
567       case 'm':
568          free(p->x->a);
569          break;
570       case 'n':
571       case 's':
572          free(p->x->s[0][0][0]);
573          free(p->x->s[0][0]);
574          free(p->x->s[0]);
575          free(p->x->s);
576          break;
577       default:
578          fprintf(ERRFILE,"ERROR:free_x: can't get here!\n");
579          exit(-1);
580    }
581    free(p->x);
582 }
583 
584 p->x = NULL;
585 
586 } /* free_x */
587 
588 
589 /************************************************************
590  * allocate_x() allocates space for the "x" field of a nodelet */
allocate_x(option_struct * op,data_fmt * data,node * p)591 void allocate_x(option_struct *op, data_fmt *data, node *p)
592 {
593 long i, j, k, nummarkers = 0, numloci, numslice;
594 
595 if (p->x != NULL) free_x(op,p);
596 
597 if(numx > 0) {
598    p->x = sparex[numx-1];
599    sparex[numx-1] = NULL;
600    numx--;
601 } else {
602    p->x = (datalike_fmt *)calloc(1,sizeof(datalike_fmt));
603    switch(op->datatype) {
604       case 'a':
605          p->x->a = NULL;
606          p->x->s = NULL;
607          break;
608       case 'b':
609       case 'm':
610          numloci = getdata_numloci(op,data);
611          p->x->s = NULL;
612          p->x->a =
613             (double **)calloc(numloci,sizeof(double *));
614          p->x->a[0] = (double *)calloc(numloci*MICRO_ALLELEMAX,
615             sizeof(double));
616          for(i = 1; i < numloci; i++)
617             p->x->a[i] = p->x->a[0] + i*MICRO_ALLELEMAX;
618          break;
619       case 'n':
620          nummarkers += NUMINVAR;
621       case 's':
622          numslice = (op->panel) ? NUMSLICE : 1L;
623          nummarkers += getdata_nummarkers(op,data);
624          p->x->a = NULL;
625          p->x->s = (double ****)calloc(nummarkers,sizeof(double ***));
626          p->x->s[0] = (double ***)
627             calloc(nummarkers*op->categs,sizeof(double **));
628          for(i = 1; i < nummarkers; i++)
629             p->x->s[i] = p->x->s[0] + i*op->categs;
630          p->x->s[0][0] = (double **)
631             calloc(nummarkers*op->categs*numslice,sizeof(double *));
632          for(i = 0; i < nummarkers; i++)
633             for(j = 0; j < op->categs; j++)
634                p->x->s[i][j] = p->x->s[0][0] + i*op->categs*numslice +
635                                j*numslice;
636          p->x->s[0][0][0] = (double *)
637             calloc(nummarkers*op->categs*numslice*4L,sizeof(double));
638          for(i = 0; i < nummarkers; i++)
639             for(j = 0; j < op->categs; j++)
640                for(k = 0; k < numslice; k++)
641                   p->x->s[i][j][k] = p->x->s[0][0][0] +
642                                   i*op->categs*numslice*4L +
643                                   j*numslice*4L + k*4L;
644          break;
645       default:
646          fprintf(ERRFILE,"ERROR:alloc_x: can't get here!\n");
647          exit(-1);
648    }
649 }
650 
651 } /* allocate_x */
652 
653 
654 /************************************************************
655  * init_coal_alloc() callocs and initializes a range field. */
init_coal_alloc(long ** coal,long numcoalpairs)656 void init_coal_alloc(long **coal, long numcoalpairs)
657 {
658 
659 if (*coal != NULL) free(*coal);
660 
661 (*coal) = (long *)calloc(numcoalpairs * 2 + 2,sizeof(long));
662 (*coal)[0] = numcoalpairs;
663 (*coal)[numcoalpairs*2+1] = FLAGLONG;
664 
665 } /* init_coal_alloc */
666 
667 
668 /****************************************************************
669  * coal_Malloc() callocs or frees the "coal" field of a nodelet */
coal_Malloc(node * p,boolean allokate,long numcoalpairs)670 void coal_Malloc(node *p, boolean allokate, long numcoalpairs)
671 {
672 
673 if (allokate && p->top) {
674    init_coal_alloc(&(p->coal),numcoalpairs);
675 } else {
676    if (p->coal != NULL) {
677       free(p->coal);
678       p->coal = NULL;
679    }
680 }
681 
682 } /* coal_Malloc */
683 
684 
685 /**************************************************************
686  * init_ranges_alloc() callocs and initializes a range field. */
init_ranges_alloc(long ** ranges,long numrangepairs)687 void init_ranges_alloc(long **ranges, long numrangepairs)
688 {
689 
690 if (*ranges != NULL) free(*ranges);
691 
692 (*ranges) = (long *)calloc(numrangepairs * 2 + 2,sizeof(long));
693 (*ranges)[0] = numrangepairs;
694 (*ranges)[numrangepairs*2+1] = FLAGLONG;
695 
696 } /* init_ranges_alloc */
697 
698 
699 /********************************************************************
700  * ranges_Malloc() callocs or frees the "ranges" field of a nodelet */
ranges_Malloc(node * p,boolean allokate,long numrangepairs)701 void ranges_Malloc(node *p, boolean allokate, long numrangepairs)
702 {
703 
704 if (allokate && p->top) {
705    init_ranges_alloc(&(p->ranges),numrangepairs);
706 } else {
707    if (p->ranges != NULL) {
708       free(p->ranges);
709       p->ranges = NULL;
710    }
711 }
712 
713 } /* ranges_Malloc */
714 
715 
716 /****************************************************************
717  * meld_adjacent_ranges() concatenates elements adjacent to the *
718  * passed position if it is appropiate to do so.                */
meld_adjacent_ranges(long * cranges,long newelem)719 void meld_adjacent_ranges(long *cranges, long newelem)
720 {
721 long numpairs, nummove, prevelem, nextelem, lastelem;
722 
723 prevelem = newelem - 1;
724 nextelem = newelem + 2;
725 lastelem = 2*cranges[0]+1;
726 
727 numpairs = 0;
728 if (cranges[newelem] == cranges[prevelem]+1) numpairs++;
729 if (cranges[newelem+1]+1 == cranges[nextelem]) {
730    if (numpairs) numpairs++;
731    else numpairs = -1L;
732    }
733 if (numpairs == 2 && prevelem == 0) numpairs = -1L;
734 if (numpairs == 2 && nextelem == lastelem) numpairs--;
735 
736 if (numpairs == 1 && prevelem != 0) {
737    nummove = 2*cranges[0] - newelem;
738    cranges[prevelem] = cranges[newelem+1];
739    memmove(&cranges[newelem],&cranges[nextelem],nummove*sizeof(long));
740    cranges[0]--;
741 }
742 if (numpairs == -1 && nextelem != lastelem) {
743    nummove = 2*cranges[0] - newelem - 2;
744    cranges[newelem+1] = cranges[nextelem+1];
745    memmove(&cranges[nextelem],&cranges[nextelem+2],nummove*sizeof(long));
746    cranges[0]--;
747 }
748 if (numpairs == 2) {
749    nummove = 2*cranges[0] - newelem - 2;
750    cranges[newelem-1] = cranges[nextelem+1];
751    memmove(&cranges[newelem],&cranges[nextelem+2],nummove*sizeof(long));
752    cranges[0] -= 2;
753 }
754 
755 } /* meld_adjacent_ranges */
756 
757 
758 /****************************************************
759  * addrange() adds a new element to the range field */
addrange(long ** nnewranges,long newstart,long newend)760 void addrange(long **nnewranges, long newstart, long newend)
761 {
762 long i, first, numpairs, nummove, *newranges;
763 
764 if (newstart > newend)
765    fprintf(ERRFILE,"ERROR:addrange--start > end\n");
766 
767 newranges = (*nnewranges);
768 numpairs = newranges[0]*2;
769 
770 /* was there nothing in the initial range list? */
771 if(!numpairs) {
772    newranges[0]++;
773    newranges = (long *)realloc(newranges,4*sizeof(long));
774    (*nnewranges) = newranges;
775    newranges[1] = newstart;
776    newranges[2] = newend;
777    newranges[3] = FLAGLONG;
778    return;
779 }
780 
781 /* do I start after everything else ends? */
782 if (newstart > newranges[numpairs]) {
783    if (newstart == newranges[numpairs]+1) {
784       newranges[numpairs] = newend;
785    } else {
786       newranges[0]++; numpairs += 2;
787       newranges = (long *)realloc(newranges,(numpairs+2)*sizeof(long));
788       (*nnewranges) = newranges;
789       newranges[numpairs-1] = newstart;
790       newranges[numpairs] = newend;
791       newranges[numpairs+1] = FLAGLONG;
792    }
793    return;
794 }
795 
796 /* do I end before anything begins? */
797 if (newend < newranges[1]) {
798    if (newend == newranges[1]-1) {
799       newranges[1] = newstart;
800    } else {
801       newranges[0]++;
802       newranges =
803          (long *)realloc(newranges,(newranges[0]*2+2)*sizeof(long));
804       (*nnewranges) = newranges;
805       memmove(&newranges[3],&newranges[1],(numpairs+1)*sizeof(long));
806       newranges[1] = newstart;
807       newranges[2] = newend;
808    }
809    return;
810 }
811 
812 /* am I contained in another interval altogether? OR
813    do I overlap one or more intervals?  OR
814    am I between two already existing intervals? */
815 for(i = 1, numpairs = 0, first = 0; newranges[i] != FLAGLONG; i+=2) {
816    if(newstart >= newranges[i] && newend <= newranges[i+1]) return;
817    if (i != 1) {
818       if (newstart > newranges[i-1] && newend < newranges[i]) {
819          first = i;
820          break;
821       }
822    }
823    if((newranges[i] >= newstart && newranges[i] <= newend) ||
824       (newranges[i+1] >= newstart && newranges[i+1] <= newend)) {
825       numpairs++;
826       if (!first) first = i;
827    }
828 }
829 
830 /* do I exist between two intervals? */
831 if (!numpairs) {
832    nummove = 2*newranges[0]+2 - first;
833    newranges[0]++;
834    newranges =
835       (long *)realloc(newranges,(newranges[0]*2+2)*sizeof(long));
836    (*nnewranges) = newranges;
837    memmove(&newranges[first+2],&newranges[first],nummove*sizeof(long));
838    newranges[first] = newstart;
839    newranges[first+1] = newend;
840    meld_adjacent_ranges(newranges,first);
841    return;
842 }
843 
844 /* I must overlap one or more intervals */
845 if (numpairs == 1) {
846    newranges[first] =
847       (newranges[first] < newstart) ? newranges[first] : newstart;
848    newranges[first+1] =
849       (newranges[first+1] > newend) ? newranges[first+1] : newend;
850 } else {
851    nummove = 2*newranges[0]+2 - first - 2*numpairs;
852    newranges[0] -= numpairs-1;
853    newranges[first] =
854       (newranges[first] < newstart) ? newranges[first] : newstart;
855    newranges[first+1] =
856       (newranges[first+1+2*(numpairs-1)] > newend) ?
857          newranges[first+1+2*(numpairs-1)] : newend;
858    memmove(&newranges[first+2],&newranges[first+2*numpairs],
859            nummove*sizeof(long));
860 }
861 meld_adjacent_ranges(newranges,first);
862 
863 } /* addrange */
864 
865 
866 /******************************************************************
867  * inrange() checks to see if a site is active on a given branch */
inrange(long * ranges,long site)868 boolean inrange(long *ranges, long site)
869 {
870 long i;
871 
872 for(i = 1; ranges[i] != FLAGLONG; i+=2) {
873    if (ranges[i] > site) return(FALSE);
874    if (ranges[i+1] >= site) return(TRUE);
875 }
876 
877 return(FALSE);
878 
879 } /* inrange */
880 
881 
882 /************************************************************
883  * subrangefc() causes the passed range to be set to "dead" */
subrangefc(long ** newranges,long substart,long subend)884 void subrangefc(long **newranges, long substart, long subend)
885 {
886 long i, *nranges, numpairs, first, nummove;
887 
888 nranges = (*newranges);
889 
890 /* was there nothing in the initial range list? */
891 /* do I start after everything else ends? */
892 /* do I end before anything begins? */
893 if (!nranges[0] || substart > nranges[2*nranges[0]] ||
894     subend < nranges[1]) return;
895 
896 /* am I contained in another interval altogether? OR
897    do I overlap one or more intervals?  OR
898    am I between two already existing intervals? */
899 for(i = 1, numpairs = 0, first = 0; nranges[i] != FLAGLONG; i+=2) {
900    if(nranges[i] > subend) break;
901    if (i != 1) {
902       if (substart > nranges[i-1] && subend < nranges[i])
903          return;
904    }
905 
906 /*    if the deletion starts in this range OR
907             the deletion ends in this range OR
908             the deletion overlaps this range THEN */
909    if((substart >= nranges[i] && substart <= nranges[i+1]) ||
910       (subend >= nranges[i] && subend <= nranges[i+1]) ||
911       (substart <= nranges[i] && subend >= nranges[i+1])) {
912       numpairs++;
913       if (!first) first = i;
914    }
915 }
916 
917 /* am I contained within a single interval? */
918 if (numpairs == 1) {
919 /*    should I just remove the whole entry? */
920    if ((substart <= nranges[first]) && (subend >= nranges[first+1])) {
921       nummove = 2*nranges[0] - first;
922       memmove(&nranges[first],&nranges[first+2],nummove*sizeof(long));
923       nranges[0]--;
924       return;
925    }
926 /*    or chop off the beginning? */
927    if (substart <= nranges[first]) {
928       nranges[first] = subend+1;
929       return;
930    }
931 /*    or chop off the end? */
932    if (subend >= nranges[first+1]) {
933       nranges[first+1] = substart-1;
934       return;
935    }
936 /*    then split it in two! */
937    nranges[0]++;
938    nranges =
939       (long *)realloc(nranges,(2*nranges[0]+2)*sizeof(long));
940    (*newranges) = nranges;
941    nummove = 2*nranges[0] - (first+1);
942    memmove(&nranges[first+3],&nranges[first+1],nummove*sizeof(long));
943    nranges[first+1] = substart-1;
944    nranges[first+2] = subend+1;
945    return;
946 }
947 
948 /* I must overlap one or more intervals */
949 if (substart > nranges[first]) nranges[first+1] = substart-1;
950 if (subend < nranges[first+1+2*(numpairs-1)])
951    nranges[first+2*(numpairs-1)] = subend + 1;
952 else {
953 /*   remove the last affected rangepair */
954    nummove = 2*nranges[0] - (first+2*(numpairs-1));
955    memmove(&nranges[first+2*(numpairs-1)],&nranges[first+2*(numpairs)],
956       nummove*sizeof(long));
957    nranges[0]--;
958 }
959 /*  remove the middle affected rangepairs, if any */
960 nummove = 2*nranges[0]+2 - (first+2*(numpairs-1));
961 memmove(&nranges[first+2],&nranges[first+2*(numpairs-1)],
962    nummove*sizeof(long));
963 nranges[0] -= numpairs-2;
964 /*  remove the first affected rangepair, if necessary */
965 if (substart <= nranges[first]) {
966    nummove = 2*nranges[0] - first;
967    memmove(&nranges[first],&nranges[first+2],nummove*sizeof(long));
968    nranges[0]--;
969 }
970 
971 } /* subrangefc */
972 
973 
printrange(long * ranges)974 void printrange(long *ranges)
975 {
976 long i;
977 
978 printf("\n%ld range pairs:  ",ranges[0]);
979 for(i = 1; ranges[i] != FLAGLONG; i+=2)
980    printf("%ld to %ld ",ranges[i],ranges[i+1]);
981 printf("\n");
982 
983 } /* printrange */
984 
985 
986 /* "newnode" & "freenode" are paired memory managers for tree nodes.
987    They maintain a linked list of unused nodes which should be faster
988    to use than "calloc" & "free" (at least for recombination).
989    They can only be used for internal nodes, not tips. */
newnode(node ** p)990 void newnode(node **p)
991 {
992   long i;
993   node *q;
994 
995   if (freenodes == NULL) { /* need a new node */
996     (*p) = allocate_nodelet(3L,'i');
997     (*p)->number = nodectr;
998     for(q = (*p)->next; q != (*p); q = q->next) q->number = nodectr;
999     nodectr++;
1000     return;
1001   } else { /* recycle an old node */
1002     q=freenodes;
1003     freenodes=freenodes->back;
1004     for(i = 0; i < 3; i++) {
1005       ranges_Malloc(q,FALSE,0L);
1006       coal_Malloc(q,FALSE,0L);
1007       q->updated = FALSE;
1008       q->futileflag = 0L;
1009       q->recstart = q->recend = -1L;
1010       q->back = NULL;
1011       q = q->next;
1012     }
1013     *p = q;
1014     return;
1015   }
1016 }  /* newnode */
1017 
freenode(node * p)1018 void freenode(node *p)
1019 {
1020    p->back = freenodes;
1021    freenodes = p;
1022 }  /* freenode */
1023 /* END of treenode allocation functions */
1024 
newtymenode(tlist ** t)1025 void newtymenode(tlist **t)
1026 {
1027   *t = (tlist *)calloc(1,sizeof(tlist));
1028   (*t)->prev = NULL;
1029   (*t)->succ = NULL;
1030 }  /* newtymenode */
1031 
freetymenode(tlist * t)1032 void freetymenode(tlist *t)
1033 {
1034   if(t->branchlist!=NULL) free(t->branchlist);
1035   free(t);
1036 }  /* freetymenode*/
1037 
freetymelist(tlist * t)1038 void freetymelist(tlist *t)
1039 {
1040    if (t->succ != NULL) freetymelist(t->succ);
1041 
1042    freetymenode(t);
1043 
1044 } /* freetymelist */
1045 
hookup(node * p,node * q)1046 void hookup(node *p, node *q)
1047 {
1048   p->back = q;
1049   q->back = p;
1050 }  /* hookup */
1051 
atr(node * p)1052 void atr(node *p)
1053 /* "atr" prints out a text representation of a tree.  Pass
1054    curtree->root->back for normal results.  This is a debugging
1055    function which is not normally called. */
1056 {
1057 long i;
1058 node *q;
1059 
1060   if (p->back == curtree->root) {
1061      fprintf(ERRFILE,"next node is root\n");
1062      fprintf(ERRFILE,"Node %4ld length %12.6f tyme %10.8f",
1063              p->back->number, lengthof(p), p->back->tyme);
1064      fprintf(ERRFILE," --> %4ld\n",p->number);
1065   }
1066   fprintf(ERRFILE,"Node %4ld update %ld length %10.8f tyme %10.8f -->",
1067          p->number, (long)p->updated, lengthof(p), p->tyme);
1068   if (!istip(p))
1069      for(i = 0, q = p; i < 3; i++, q = q->next)
1070         if (!q->top) fprintf(ERRFILE,"%4ld\n",q->back->number);
1071   fprintf(ERRFILE,"\n");
1072   if (p->top && p->back->top) fprintf(ERRFILE,"TWO TOPS HERE!!!!");
1073   if (!istip(p)) {
1074      if (!p->next->top) atr(p->next->back);
1075      if (!p->next->next->top) atr(p->next->next->back);
1076   }
1077   else fprintf(ERRFILE,"\n");
1078 } /* atr */
1079 
probatr(node * p)1080 void probatr(node *p)
1081 /* "probatr" checks the internal representation of the tree.  Pass
1082    curtree->root->back for normal results.  This is a debugging
1083    function which is not normally called. */
1084 {
1085 long i;
1086 
1087 if (p->back == curtree->root) {
1088 }
1089 
1090 if(p->top) {
1091    if(p->ranges[0] < 0)
1092       fprintf(ERRFILE,"node %ld has less then 0 ranges %ld %ld\n",
1093       p->number,indecks,apps);
1094    if(p->ranges[0] > 2)
1095       fprintf(ERRFILE,"node %ld has %ld ranges %ld %ld\n",
1096       p->number,p->ranges[0],indecks,apps);
1097    if(p->ranges[2*p->ranges[0]+1] != FLAGLONG)
1098       fprintf(ERRFILE,"node %ld has misformed end of ranges %ld %ld\n",
1099       p->number,indecks,apps);
1100    for(i = 1; p->ranges[i] != FLAGLONG; i+=2) {
1101       if(p->ranges[i] > p->ranges[i+1])
1102          fprintf(ERRFILE,"node %ld has begins > ends %ld %ld\n",
1103          p->number,indecks,apps);
1104       if(p->ranges[i] < 0 || p->ranges[i+1] < 0)
1105          fprintf(ERRFILE,"node %ld has negative starts or ends %ld %ld\n",
1106          p->number,indecks,apps);
1107    }
1108 }
1109 
1110 if (p->top && p->back->top) fprintf(ERRFILE,"TWO TOPS HERE!!!!");
1111 if (!istip(p)) {
1112    if (!p->next->top) probatr(p->next->back);
1113    if (!p->next->next->top) probatr(p->next->next->back);
1114 }
1115 
1116 
1117 } /* probatr */
1118 
1119 
1120 /***************************************************************
1121  * gettymenode() returns a pointer to the tymelist entry whose *
1122  * 'eventnode' has the number of 'target'.                     */
gettymenode(tree * tr,long target)1123 tlist *gettymenode(tree *tr, long target)
1124 {
1125 boolean found;
1126 tlist *t;
1127 
1128 if (target == tr->root->number) return (NULL);
1129 if (tr->nodep[target]->type == 't') return(tr->tymelist);
1130 
1131 for(t = tr->tymelist, found = FALSE; t != NULL; t = t->succ)
1132    if (t->eventnode->number == target) {found = TRUE; break;}
1133 
1134 if (!found) {
1135    fprintf(ERRFILE,"ERROR:In gettymenode, failed to find node %12ld ", target);
1136    fprintf(ERRFILE,"CATASTROPHIC ERROR\n");
1137    exit(-1);
1138 }
1139 
1140 return(t);
1141 
1142 }  /* gettymenode */
1143 
1144 
1145 /***************************************************************
1146  * vtol() takes a "v" value for a branchlength and returns the *
1147  * branchlength.                                               */
vtol(option_struct * op,data_fmt * data,double v)1148 double vtol(option_struct *op, data_fmt *data, double v)
1149 {
1150 
1151 switch(op->datatype) {
1152    case 'a':
1153       break;
1154    case 'b':
1155    case 'm':
1156       return(v);
1157       break;
1158    case 'n':
1159    case 's':
1160       return(data->dnaptr->fracchange * -1.0 * v);
1161       break;
1162    default:
1163       fprintf(ERRFILE,"ERROR:vtol, can't get here!\n");
1164       exit(-1);
1165 }
1166 
1167 return(-1.0);
1168 
1169 } /* vtol */
1170 
1171 
1172 /******************************************************************
1173  * ltov() recalculates the proper "v" value of a branch, from the *
1174  * tymes at either end of the branch.                             */
ltov(option_struct * op,data_fmt * data,node * p)1175 void ltov(option_struct *op, data_fmt *data, node *p)
1176 {
1177 
1178 switch(op->datatype) {
1179    case 'a':
1180       break;
1181    case 'b':
1182    case 'm':
1183       p->v = lengthof(p);
1184       break;
1185    case 'n':
1186    case 's':
1187       p->v = -1.0 * lengthof(p) / data->dnaptr->fracchange;
1188       break;
1189    default:
1190       fprintf(ERRFILE,"ERROR:ltov, can't get here!\n");
1191       exit(-1);
1192 }
1193 p->back->v = p->v;
1194 
1195 }  /* ltov */
1196 
1197 
1198 /***************************************************************
1199  * findcoal_ltov() is a duplicate ltov specially for findcoal. */
findcoal_ltov(option_struct * op,data_fmt * data,double value)1200 double findcoal_ltov(option_struct *op, data_fmt *data, double value)
1201 {
1202 
1203 switch(op->datatype) {
1204    case 'a':
1205       break;
1206    case 'b':
1207    case 'm':
1208       return(value);
1209       break;
1210    case 'n':
1211    case 's':
1212       return(-1.0 * value / data->dnaptr->fracchange);
1213       break;
1214    default:
1215       fprintf(ERRFILE,"ERROR:ltov, can't get here!\n");
1216       exit(-1);
1217 }
1218 
1219 return(-1.0);
1220 
1221 } /* findcoal_ltov */
1222 
1223 
1224 /* boolcheck(), booleancheck(), numbercheck(), and readparmfile()
1225    are used in reading the parameter file "parmfile" */
boolcheck(char ch)1226 long boolcheck(char ch)
1227 {
1228    ch = toupper((int)ch);
1229    if (ch == 'F' || ch == 'N') return 0;
1230    if (ch == 'T' || ch == 'Y') return 1;
1231    return -1;
1232 } /* boolcheck */
1233 
booleancheck(option_struct * op,char * var,char * value)1234 boolean booleancheck(option_struct *op, char *var, char *value)
1235 {
1236    long i, j, check;
1237 
1238    check = boolcheck(value[0]);
1239    if(check == -1) return FALSE;
1240 
1241    for(i = 0; i < NUMBOOL; i++) {
1242       if(!strcmp(var,booltokens[i])) {
1243          if(i == 0) op->interleaved = (boolean)(check);
1244          if(i == 1) op->printdata = (boolean)(check);
1245          if(i == 2) op->progress = (boolean)(check);
1246          if(i == 3) op->treeprint = (boolean)(check);
1247          if(i == 4) {
1248             op->freqsfrom = (boolean)(check);
1249             if(!op->freqsfrom) {
1250                strtok(value,":");
1251                freqa = (double)atof(strtok(NULL,";"));
1252                freqc = (double)atof(strtok(NULL,";"));
1253                freqg = (double)atof(strtok(NULL,";"));
1254                freqt = (double)atof(strtok(NULL,";"));
1255             }
1256          }
1257          if(i == 5) {
1258             op->ctgry = (boolean)(check);
1259             if(op->ctgry) {
1260                strtok(value,":");
1261                op->categs = (long)atof(strtok(NULL,";"));
1262                op->rate =
1263                   (double *)realloc(op->rate,op->categs*sizeof(double));
1264                op->probcat =
1265                   (double *)realloc(op->probcat,op->categs*sizeof(double));
1266                for(j = 0; j < op->categs; j++) {
1267                   op->rate[j] = (double)atof(strtok(NULL,";"));
1268                   op->probcat[j] = (double)atof(strtok(NULL,";"));
1269                }
1270             }
1271          }
1272          if(i == 6) {
1273             op->watt = (boolean)(check);
1274             if (!op->watt) {
1275                strtok(value,":");
1276                theta0 = (double)atof(strtok(NULL,";"));
1277             }
1278          }
1279          if(i == 7) op->usertree = (boolean)(check);
1280          if(i == 8) {
1281             op->autocorr = (boolean)(check);
1282             if (op->autocorr) {
1283                strtok(value,":");
1284                op->lambda = 1.0 / (double)atof(strtok(NULL,";"));
1285             }
1286          }
1287          if(i == 9) op->newdata = (boolean)(check);
1288          if(i == 10) {
1289             op->same_ne = (boolean)(check);
1290             if (!op->same_ne) {
1291                strtok(value,":");
1292                check = atol(strtok(NULL,";"));
1293                op->ne_ratio = (double *)calloc(check,sizeof(double));
1294                for(j = 0; j < check; j++)
1295                   op->ne_ratio[j] = (double)atof(strtok(NULL,";"));
1296             }
1297          }
1298          if(i == 11) op->interactive = (boolean)(check);
1299          if(i == 12) op->mhmcsave = (boolean)(check);
1300          if(i == 13) {
1301              op->panel = (boolean)(check);
1302              if (op->panel) {
1303                 strtok(value,":");
1304                 check = atol(strtok(NULL,";"));
1305                 op->numpanel = (long *)calloc(check,sizeof(long));
1306                 for (j = 0; j < check; j++)
1307                    op->numpanel[j] = atol(strtok(NULL,";"));
1308              }
1309          }
1310          if(i == 14) op->map = (boolean)(check);
1311          if(i == 15) op->fc = (boolean)(check);
1312          if(i == 16) {
1313             op->full = (boolean)(check);
1314             if (op->full) {
1315                strtok(value,":");
1316                op->chance_seen = (double)atof(strtok(NULL,";"));
1317             }
1318          }
1319          if(i == 17) {
1320             op->haplotyping = (boolean)(check);
1321             if (op->haplotyping) {
1322                strtok(value,":");
1323                op->happrob = (double)atof(strtok(NULL,";"));
1324             }
1325          }
1326          if(i == 18) op->profile = (boolean)(check);
1327          if(i == 19) op->norecsnp = (boolean)(check);
1328          return TRUE;
1329       }
1330    }
1331    return FALSE;
1332 } /* booleancheck */
1333 
numbercheck(option_struct * op,char * var,char * value)1334 boolean numbercheck(option_struct *op, char *var, char *value)
1335 {
1336 long i, j;
1337 
1338 for(i = 0; i < NUMNUMBER; i++) {
1339    if(!strcmp(var,numbertokens[i])) {
1340       if(i == 0) locus_ttratio = (double)atof(value);
1341       if(i == 1) op->numchains[0] = atol(value);
1342       if(i == 2) op->steps[0] = atol(value);
1343       if(i == 3) op->increm[0] = atol(value);
1344       if(i == 4) op->numchains[1] = atol(value);
1345       if(i == 5) op->increm[1] = atol(value);
1346       if(i == 6) op->steps[1] = atol(value);
1347       if(i == 7) rec0 = (double)atof(value);
1348       if(i == 8) {
1349          value[0] = toupper((int)value[0]);
1350          switch(value[0]) {
1351              case 'N':
1352                 op->holding = 0;
1353                 break;
1354              case 'T':
1355                 op->holding = 1;
1356                 break;
1357              case 'R':
1358                 op->holding = 2;
1359                 break;
1360              default:
1361                 fprintf(ERRFILE,"unknown setting for holding: %s,",
1362                    value);
1363                 fprintf(ERRFILE,"  no holding assumed\n");
1364                 op->holding = 0;
1365                 break;
1366          }
1367       }
1368       if(i == 9) op->mutrait = (double)atof(value);
1369       if(i == 10) op->traitratio = (double)atof(value);
1370       if(i == 11) op->pd = (double)atof(value);
1371       if(i == 12) op->hapdrop = atol(value);
1372       if(i == 13) {
1373          op->numtempchains = atol(strtok(value,";"));
1374          if (!op->temperature) free(op->temperature);
1375          op->temperature = (long *)calloc(op->numtempchains,sizeof(long));
1376          for(j = 0; j < op->numtempchains; j++)
1377             op->temperature[j] = atof(strtok(NULL,";"));
1378       qsort((void *)(op->temperature),op->numtempchains,sizeof(long),longcmp);
1379       }
1380       return TRUE;
1381    }
1382 }
1383 return FALSE;
1384 
1385 } /* numbercheck */
1386 
readparmfile(option_struct * op)1387 void readparmfile(option_struct *op)
1388 {
1389 char fileline[LINESIZE],parmvar[LINESIZE],varvalue[LINESIZE];
1390 
1391 #ifdef MAC
1392 char parmfilename[100];
1393 
1394 strcpy(parmfilename,"parmfile");
1395 #endif
1396 
1397 parmfile = fopen("parmfile","r");
1398 
1399 if(parmfile) {
1400    while(fgets(fileline, LINESIZE, parmfile) != NULL) {
1401       if(fileline[0] == '#') continue;
1402       if(!strncmp(fileline,"end",3)) break;
1403       strcpy(parmvar,strtok(fileline,"="));
1404       strcpy(varvalue,strtok(NULL,"\n"));
1405       /* now to process... */
1406       if(!booleancheck(op,parmvar,varvalue))
1407          if(!numbercheck(op,parmvar,varvalue)) {
1408             fprintf(ERRFILE,
1409                "Inappropiate entry in parmfile: %s\n", fileline);
1410          }
1411    }
1412 } else
1413    if (!MENU) {
1414       fprintf(simlog,"Parameter file (parmfile) missing\n");
1415       exit(-1);
1416    }
1417 
1418 FClose(parmfile);
1419 
1420 #ifdef MAC
1421 fixmacfile(parmfilename);
1422 #endif
1423 
1424 } /* readparmfile */
1425 /* END parameter file read */
1426 
1427 
whichopbool(option_struct * op,long i)1428 boolean whichopbool(option_struct *op, long i)
1429 {
1430 if (i == 0) return(op->interleaved);
1431 if (i == 1) return(op->printdata);
1432 if (i == 2) return(op->progress);
1433 if (i == 3) return(op->treeprint);
1434 if (i == 4) return(op->freqsfrom);
1435 if (i == 5) return(op->ctgry);
1436 if (i == 6) return(op->watt);
1437 if (i == 7) return(op->usertree);
1438 if (i == 8) return(op->autocorr);
1439 if (i == 9) return(op->newdata);
1440 if (i == 10) return(op->same_ne);
1441 if (i == 11) return(op->interactive);
1442 if (i == 12) return(op->mhmcsave);
1443 if (i == 13) return(op->panel);
1444 if (i == 14) return(op->map);
1445 if (i == 15) return(op->fc);
1446 if (i == 16) return(op->full);
1447 if (i == 17) return(op->haplotyping);
1448 if (i == 18) return(op->profile);
1449 if (i == 19) return(op->norecsnp);
1450 
1451 fprintf(ERRFILE,"Warning:  whichopbool failed to identify option\n");
1452 return(FALSE);
1453 } /* whichopbool */
1454 
1455 
rec_parmfilewrite(option_struct * op,long numloci,long numpop)1456 void rec_parmfilewrite(option_struct *op, long numloci, long numpop)
1457 {
1458 long i, j;
1459 boolean b;
1460 char parmfilename[100];
1461 
1462 
1463 openfile(&parmfile,"parmfile","w+",NULL,parmfilename);
1464 
1465 for(i = 0; i < NUMBOOL; i++) {
1466    b = whichopbool(op,i);
1467    fprintf(parmfile,"%s=%s",booltokens[i],BOOLPRINT(b));
1468    if (i == 4 && !b) { /* freqsfrom */
1469       fprintf(parmfile,":%f;%f;%f;%f;",
1470          freqa, freqc, freqg, freqt);
1471    }
1472    if (i == 5 && b) { /* category */
1473       fprintf(parmfile,":%ld;",op->categs);
1474       for(j = 0; j < op->categs; j++)
1475          fprintf(parmfile,"%f;%f;",op->rate[j],op->probcat[j]);
1476    }
1477    if (i == 6 && !b) { /* starting theta */
1478       fprintf(parmfile,":%f",theta0);
1479    }
1480    if (i == 8 && !b) { /* autocorrelation */
1481       fprintf(parmfile,":%f",1.0/op->lambda);
1482    }
1483    if (i == 10 && !b) { /* same_ne */
1484       fprintf(parmfile,":%ld;",numloci);
1485       for(j = 0; j < numloci; j++)
1486          fprintf(parmfile,"%f;",op->ne_ratio[j]);
1487    }
1488    if (i == 13 && b) { /* numpanels */
1489       fprintf(parmfile,":%ld;",numpop);
1490       for(j = 0; j < numpop; j++)
1491          fprintf(parmfile,"%ld;",op->numpanel[j]);
1492    }
1493    if (i == 16 && b) { /* full snp */
1494       fprintf(parmfile,":%f;",op->chance_seen);
1495    }
1496    fprintf(parmfile,"\n");
1497 }
1498 
1499 for(i = 0; i < NUMNUMBER; i++) {
1500    fprintf(parmfile,"%s=",numbertokens[i]);
1501    if (i == 0) fprintf(parmfile,"%f",locus_ttratio);
1502    if (i == 1) fprintf(parmfile,"%ld",op->numchains[0]);
1503    if (i == 2) fprintf(parmfile,"%ld",op->steps[0]);
1504    if (i == 3) fprintf(parmfile,"%ld",op->increm[0]);
1505    if (i == 4) fprintf(parmfile,"%ld",op->numchains[1]);
1506    if (i == 5) fprintf(parmfile,"%ld",op->increm[1]);
1507    if (i == 6) fprintf(parmfile,"%ld",op->steps[1]);
1508    if (i == 7) fprintf(parmfile,"%f",rec0);
1509    if (i == 8) {
1510       if (!op->holding) fprintf(parmfile,"none");
1511       if (op->holding == 1) fprintf(parmfile,"theta");
1512       if (op->holding == 2) fprintf(parmfile,"recombination");
1513    }
1514    if (i == 9) fprintf(parmfile,"%f",op->mutrait);
1515    if (i == 10) fprintf(parmfile,"%f",op->traitratio);
1516    if (i == 11) fprintf(parmfile,"%f",op->pd);
1517    if (i == 12) fprintf(parmfile,"%ld",op->hapdrop);
1518    if (i == 13) {
1519       fprintf(parmfile,"%ld;",op->numtempchains);
1520       for(j = 0; j < op->numtempchains; j++)
1521          fprintf(parmfile,"%ld;",op->temperature[j]);
1522    }
1523    fprintf(parmfile,"\n");
1524 }
1525 
1526 fprintf(parmfile,"end\n");
1527 
1528 FClose(parmfile);
1529 
1530 #ifdef MAC
1531    fixmacfile(parmfilename);
1532 #endif
1533 
1534 } /* rec_parmfilewrite */
1535 
1536 
readseedfile(void)1537 void readseedfile(void)
1538 {
1539 long inseed;
1540 
1541 seedfile = fopen("seedfile","r");
1542 
1543 if (!MENU) {
1544    if (!seedfile) {
1545       fprintf(ERRFILE,"\nseedfile not present!\n");
1546       exit(-1);
1547    }
1548    fscanf(seedfile, "%ld%*[^\n]", &inseed);
1549    getc(seedfile);
1550 } else {
1551    if (seedfile) {
1552       fscanf(seedfile, "%ld%*[^\n]", &inseed);
1553       getc(seedfile);
1554    } else {
1555       printf("Random number seed (must be odd)?\n");
1556       scanf("%ld%*[^\n]", &inseed);
1557       getchar();
1558    }
1559 }
1560 
1561 seed[0] = inseed & 2047;
1562 inseed /= 2048;
1563 seed[1] = inseed & 2047;
1564 inseed /= 2048;
1565 seed[2] = inseed & 1023;
1566 
1567 } /* readseedfile */
1568 
1569 
print_menuheader(option_struct * op)1570 void print_menuheader(option_struct *op)
1571 {
1572 printf("\n%s", op->ansi ? "\033[2J\033[H" : "\n");
1573 printf("Metropolis-Hastings Markov Chain Monte Carlo");
1574 printf(" method, version %3.2f\n\n",VERSION_NUM);
1575 } /* print_menuheader */
1576 
1577 
print_startmenu(option_struct * op,boolean writeout)1578 void print_startmenu(option_struct *op, boolean writeout)
1579 {
1580 printf("STARTUP MENU\n");
1581 printf("  #               Goto Data/Search Menus\n");
1582 printf("  O         Save current options to file?  %s\n",
1583    writeout ? "Yes" : "No");
1584 printf("  N          Use trees from previous run?  %s\n",
1585    op->newdata ? "No" : "Yes");
1586 if (op->newdata)
1587    printf("  E        Echo the data at start of run?  %s\n",
1588       op->printdata ? "Yes" : "No");
1589 else op->printdata = FALSE;
1590 printf("  S            Save MHMC output to files?  %s\n",
1591    op->mhmcsave ? "Yes" : "No");
1592 printf("  P Print indications of progress of run?  %s\n",
1593    op->progress ? "Yes" : "No");
1594 #if 0 /* DEBUG debug WARNING warning--no tree writer yet! */
1595 printf("  G                Print out genealogies?  %s\n",
1596    op->treeprint ? "Yes" : "No");
1597 #endif
1598 printf("  U      Use user tree in file \"intree\" ?  %s\n",
1599    op->usertree ? "Yes" : "No");
1600 printf("  V          Number of temperatures used?  %ld\n",
1601    op->numtempchains);
1602 printf("  H                     Infer haplotypes?  %s\n",
1603    op->haplotyping ? "Yes" : "No");
1604 printf("  L       Calculate confidence intervals?  %s\n",
1605    op->profile ? "Yes" : "No");
1606 if (op->haplotyping) {
1607    printf("  A Strategy for haplotype rearrangement?  ");
1608    if(op->hapdrop==0)printf("No resimulation\n");
1609    else if (op->hapdrop==1)printf("Single resimulation\n");
1610    else printf("Double resimulation\n");
1611    printf("  F Frequency of haplotype rearrangement?  %f\n",
1612       op->happrob);
1613 }
1614 printf("  M     Map trait information onto trees?  %s\n",
1615    op->map ? "Yes" : "No");
1616 if (op->map) {
1617    printf("  R        Relative mutation rate of trait?  %f\n",
1618       op->mutrait);
1619    printf("  T     Forward versus back trait mutation?  %f\n",
1620       op->traitratio);
1621    printf("  D                     Frequency of trait?  %f\n",
1622       op->pd);
1623 }
1624 
1625 } /* print_startmenu */
1626 
1627 
print_datamenu(option_struct * op)1628 void print_datamenu(option_struct *op)
1629 {
1630 printf("DATA MENU\n");
1631 printf("  #                     Goto Startup Menu\n");
1632 printf("  D                             Datatype:");
1633 switch(op->datatype) {
1634    case 'a':
1635       printf("  Allelelic Markers\n");
1636       break;
1637    case 'b':
1638       printf("  Brownian-Motion Microsats\n");
1639    case 'm':
1640       if (op->datatype != 'b') printf("  Microsats\n");
1641       break;
1642    case 'n':
1643       printf("  SNPs\n");
1644       printf("  P              SNPs derived from panel?  %s\n",
1645          op->panel ? "Yes" : "No");
1646 #if 0
1647       printf("  A   SNPs are all of the variable sites?  %s\n",
1648          op->full ? "No" : "Yes");
1649       if (op->full) {
1650          printf("  B           Probability of observation?  %6.4f\n",
1651             op->chance_seen);
1652       }
1653 #endif
1654       printf("  N              SNPs with recombination?  %s\n",
1655          op->norecsnp ? "No" : "Yes");
1656    case 's':
1657       if (op->datatype != 'n') printf("  Sequence\n");
1658       printf("  I          Input sequences interleaved?  %s\n",
1659          op->interleaved ? "Yes" : "No, sequential");
1660       printf("  T        Transition/transversion ratio:  %8.4f\n",
1661          locus_ttratio);
1662       printf("  F       Use empirical base frequencies?  %s\n",
1663 	 op->freqsfrom ? "Yes" : "No");
1664       printf("  C   One category of substitution rates?");
1665       if (!op->ctgry || op->categs == 1)
1666          printf("  Yes\n");
1667       else {
1668          printf("  %ld categories\n", op->categs);
1669          if (op->datatype != 'n') {
1670             printf("  R   Rates at adjacent sites correlated?");
1671             if (!op->autocorr) printf("  No, they are independent\n");
1672             else printf("  Yes, mean block length =%6.1f\n", 1.0 / op->lambda);
1673          }
1674       }
1675       printf("  V    Poplulation size equal among loci?  %s\n",
1676          op->same_ne ? "Yes" : "No");
1677       break;
1678    default:
1679       printf("  UNKNOWN\n");
1680       break;
1681 }
1682 
1683 } /* print_datamenu */
1684 
1685 
print_searchmenu(option_struct * op)1686 void print_searchmenu(option_struct *op)
1687 {
1688 printf("SEARCH MENU\n");
1689 printf("  Q      Lots of recombinations expected?  %s\n",
1690    op->fc ? "Yes" : "No");
1691 printf("  H                Hold parameters fixed?");
1692    if (op->holding == 0) printf("  No\n");
1693    else if (op->holding == 1) printf("  Yes, theta fixed\n");
1694         else if (op->holding == 2) printf("  Yes, rec-rate fixed\n");
1695              else printf("  Unknown option!!!\n");
1696 printf("  W      Starting theta equals Watterson?  %s",
1697    op->watt ? "Yes\n" : "No");
1698 if (!op->watt) printf(", initial theta = %6.4f\n", theta0);
1699 printf("  Z          Starting recombination rate:  %e\n", rec0);
1700 printf("  S               Number of short chains?  %6ld\n",
1701    op->numchains[0]);
1702 if (op->numchains[0] > 0) {
1703    printf("  1             Short sampling increment?  %6ld\n",
1704       op->increm[0]);
1705    printf("  2   Number of steps along short chains?  %6ld\n",
1706       op->steps[0]);
1707 }
1708 printf("  L                Number of long chains?  %6ld\n",
1709    op->numchains[1]);
1710 if (op->numchains[1] > 0) {
1711    printf("  3              Long sampling increment?  %6ld\n",
1712       op->increm[1]);
1713    printf("  4    Number of steps along long chains?  %6ld\n",
1714       op->steps[1]);
1715 }
1716 
1717 } /* print_searchmenu */
1718 
1719 
print_menuend(void)1720 void print_menuend(void)
1721 {
1722 printf("\nAre these settings correct? (type Y or the letter for");
1723 printf(" one to change)\n");
1724 } /* print_menuend */
1725 
1726 
initoptions(option_struct * op)1727 void initoptions(option_struct *op)
1728 {
1729 long i;
1730 
1731 /* first some multiple rate-categories code stuff */
1732 op->ctgry = FALSE;
1733 op->rate[0] = 1.0;
1734 op->probcat[0] = 1.0;
1735 op->categs = 1;
1736 op->lambda = 1.0;
1737 op->autocorr = FALSE;  /* FALSE if categs == 1 */
1738 /* end categories code stuff */
1739 
1740 op->interactive = TRUE;
1741 op->newdata = TRUE;
1742 op->mhmcsave = FALSE;
1743 op->interleaved = TRUE;
1744 op->printdata = FALSE;
1745 op->progress = TRUE;
1746 op->treeprint = FALSE;
1747 op->profile = TRUE;
1748 
1749 weightfile = fopen("weightfile","r");
1750 op->weights = (weightfile) ? TRUE : FALSE;
1751 
1752 spacefile = fopen("spacefile","r");
1753 op->spacing = (spacefile) ? TRUE : FALSE;
1754 
1755 op->map = FALSE;
1756 op->mutrait = 1.0;
1757 op->traitratio = 1.0;
1758 op->pd = 0.1;
1759 
1760 op->panel = FALSE;
1761 op->numpanel = NULL;
1762 
1763 op->haplotyping = FALSE;
1764 op->hapdrop = 0;
1765 op->happrob = 0.2;
1766 
1767 op->full = FALSE;
1768 op->chance_seen = 1.0;
1769 op->norecsnp = FALSE;
1770 
1771 op->numtempchains = 1;
1772 op->temperature = (long *)calloc(op->numtempchains,sizeof(long));
1773 for(i = 0; i < op->numtempchains; i++) op->temperature[i] = 1;
1774 op->ctemp = 1;
1775 
1776 op->fc = FALSE;
1777 op->freqsfrom = TRUE;
1778 op->watt = FALSE;
1779 op->usertree = FALSE;
1780 op->plump = TRUE;
1781 op->holding = 0;
1782 op->same_ne = TRUE;
1783 op->ne_ratio = NULL;
1784 op->numchains[0] = 5;
1785 op->increm[0] = 20;
1786 op->steps[0] = 2000;
1787 op->numchains[1] = 1;
1788 op->increm[1] = 20;
1789 op->steps[1] = 50000;
1790 op->datatype = 's';
1791 op->datatypeset = FALSE;
1792 
1793 op->print_recbythmaxcurve = TRUE;
1794 op->thlb = FLAGLONG;
1795 op->thub = FLAGLONG;
1796 op->reclb = FLAGLONG;
1797 op->recub = FLAGLONG;
1798 
1799 } /* initoptions */
1800 
1801 
workmenu1(option_struct * op,char ch,boolean * menu1,boolean * writeout)1802 void workmenu1(option_struct *op, char ch, boolean *menu1, boolean *writeout)
1803 {
1804 long i;
1805 char input[LINESIZE];
1806 
1807 if (strchr("#FLAHONESPGUMVRTD",ch) != NULL) {
1808    switch(ch) {
1809       case '#':
1810          *menu1 = !(*menu1);
1811          break;
1812       case 'V':
1813          printf("Number of different temperatures?\n");
1814          scanf("%ld",&(op->numtempchains));
1815          scanf("%*[^\n]");
1816          do {
1817             printf("Temperature of coldest chain? (must be positive)\n");
1818             scanf("%ld",&(op->temperature[0]));
1819             scanf("%*[^\n]");
1820          } while (op->temperature[0] <= 0.0);
1821          for (i = 1; i < op->numtempchains; i++) {
1822             do {
1823                printf("Temperature of temperature-chain %ld?\n",i+1);
1824                scanf("%ld",&(op->temperature[i]));
1825                scanf("%*[^\n]");
1826             } while(op->temperature[i] <= op->temperature[0]);
1827          }
1828          break;
1829       case 'F':
1830          printf("Probability of proposing a change in haplotype?\n");
1831          scanf("%lf",&(op->happrob));
1832          scanf("%*[^\n]");
1833          break;
1834       case 'L':
1835          op->profile = !op->profile;
1836          break;
1837       case 'H':
1838          op->haplotyping = !op->haplotyping;
1839          break;
1840       case 'O':
1841          *writeout = !(*writeout);
1842          break;
1843       case 'N':
1844          op->newdata = !op->newdata;
1845          break;
1846       case 'E':
1847          op->printdata = !op->printdata;
1848 	 break;
1849       case 'P':
1850          op->progress = !op->progress;
1851          break;
1852       case 'G':
1853          op->treeprint = !op->treeprint;
1854          break;
1855       case 'U':
1856          op->usertree = !op->usertree;
1857          break;
1858       case 'S':
1859          op->mhmcsave = !op->mhmcsave;
1860          break;
1861       case 'M':
1862          op->map = !op->map;
1863          break;
1864       case 'R':
1865          do {
1866             printf("Relative mutation rate of trait?\n");
1867             fgets(input,LINESIZE,stdin);
1868             op->mutrait = atof(input);
1869          } while (op->mutrait <= 0.0);
1870          break;
1871       case 'T':
1872          do {
1873             printf("Ratio of forward to back trait mutation?\n");
1874             fgets(input,LINESIZE,stdin);
1875             op->traitratio = atof(input);
1876          } while (op->traitratio <= 0.0);
1877          break;
1878       case 'D':
1879          do {
1880             printf("Frequency of trait?\n");
1881             fgets(input,LINESIZE,stdin);
1882             op->pd = atof(input);
1883          } while (op->pd <= 0.0 || op->pd >= 1.0);
1884          break;
1885       case 'A':
1886          do {
1887             printf("Number of drops while resimulating (0-2)?\n");
1888             fgets(input,LINESIZE,stdin);
1889             op->hapdrop = atol(input);
1890          } while (op->hapdrop != 0 && op->hapdrop != 1 && op->hapdrop != 2);
1891       default:
1892          fprintf(stderr,"ERROR:Impossible option %c detected!\n",ch);
1893          break;
1894    }
1895 } else fprintf(stderr,"%c is not an option here.\n",ch);
1896 
1897 } /* workmenu1 */
1898 
1899 
workmenu2(option_struct * op,char ch,boolean * menu1,long * numloci,long * numpop)1900 void workmenu2(option_struct *op, char ch, boolean *menu1, long *numloci,
1901    long *numpop)
1902 {
1903 long i;
1904 double probsum;
1905 char input[LINESIZE];
1906 boolean done;
1907 
1908 if(strchr("#NQPDITFCRVHWZS12L34AB",ch) != NULL) {
1909    switch(ch) {
1910       case '#':
1911          *menu1 = !(*menu1);
1912          break;
1913       case 'N':
1914          op->norecsnp = !(op->norecsnp);
1915          if (op->norecsnp) {
1916             if (rec0 || op->holding != 2) {
1917                printf("\nThis SNP model requires recombination");
1918                printf(" rate to be fixed at zero.\n");
1919                printf("Fix starting rec-rate to zero first.\n");
1920                printf("----SNP model unchanged.----\n");
1921                op->norecsnp = FALSE;
1922             }
1923          }
1924          printf("press <enter> or <return> to continue\n");
1925          fgets(input,LINESIZE,stdin);
1926          break;
1927       case 'Q':
1928          op->fc = !op->fc;
1929          break;
1930       case 'D':
1931 /* WARNING--most datatypes disabled!!! */
1932 #if 0
1933          if (op->datatype == 'a') {op->datatype = 'b'; break;}
1934          if (op->datatype == 'b') {op->datatype = 'm'; break;}
1935          if (op->datatype == 'm') {op->datatype = 'n'; break;}
1936          if (op->datatype == 'n') {op->datatype = 's'; break;}
1937          if (op->datatype == 's') {op->datatype = 'a'; break;}
1938 #endif
1939          op->datatypeset = TRUE;
1940          if (op->datatype == 'n') {op->datatype = 's'; break;}
1941          if (op->datatype == 's') {
1942             op->datatype = 'n';
1943             if (op->autocorr) {
1944                printf("Can't use autocorrelation with SNPs\n");
1945                printf("----autocorrelation disabled-----\n");
1946                printf("press <enter> or <return> to continue\n");
1947                fgets(input,LINESIZE,stdin);
1948                op->autocorr = FALSE;
1949             }
1950             break;
1951          }
1952          printf("ERROR:Impossible Datatype %c present\n",op->datatype);
1953          break;
1954 #if 0
1955       case 'A': /* deliberate fall through to case 'B' */
1956          op->full = !op->full;
1957       case 'B':
1958          if (!op->full) break;
1959          do {
1960             printf("Percent chance that a variable site was observed");
1961             printf(" during sequencing?\n");
1962             scanf("%lf",&(op->chance_seen));
1963             scanf("%*[^\n]");
1964             if (op->chance_seen <= 0.0 || op->chance_seen > 1.0)
1965                printf("   the percentage must be between 0 and 1\n");
1966          } while (op->chance_seen <= 0.0 || op->chance_seen > 1.0);
1967          break;
1968 #endif
1969       case 'I':
1970          op->interleaved = !op->interleaved;
1971          break;
1972       case 'T':
1973          do {
1974             printf("Transition/transversion ratio?\n");
1975             fgets(input,LINESIZE,stdin);
1976             locus_ttratio = atof(input);
1977             if (locus_ttratio < 0.5)
1978                printf("TTratio cannot be less than 0.5\n");
1979          } while (locus_ttratio < 0.5);
1980          break;
1981       case 'F':
1982          op->freqsfrom = !op->freqsfrom;
1983          if (!op->freqsfrom) {
1984             printf("Base frequencies for A, C, G, T/U (use blanks");
1985             printf(" to separate)?\n");
1986             scanf("%lf%lf%lf%lf", &freqa, &freqc, &freqg, &freqt);
1987             scanf("%*[^\n]");
1988 	 }
1989          break;
1990       case 'P':
1991          op->panel = !op->panel;
1992          if (op->panel) {
1993             printf("Number of populations?\n");
1994             ;
1995             *numpop = atol(input);
1996             op->numpanel = (long *)calloc(*numpop,sizeof(long));
1997             for(i = 0; i < *numpop; i++) {
1998                printf("Number of panel haplotypes for population");
1999                printf(" %ld?\n",i+1);
2000                fgets(input,LINESIZE,stdin);
2001                op->numpanel[i] = atol(input);
2002             }
2003          } else
2004             if (op->numpanel) {
2005                free(op->numpanel);
2006                op->numpanel = NULL;
2007             }
2008          break;
2009       case 'C':
2010          op->ctgry = !op->ctgry;
2011          if (!op->ctgry) op->autocorr = FALSE;
2012          if (op->ctgry) {
2013             do {
2014                printf("Number of categories ?");
2015                fgets(input,LINESIZE,stdin);
2016                op->categs = atoi(input);
2017             } while (op->categs < 1);
2018             free(op->rate);
2019             free(op->probcat);
2020             printf("Rate for each category? (use a space to");
2021 	    printf(" separate)\n");
2022             op->rate = (double *)calloc(op->categs,sizeof(double));
2023             op->probcat = (double *)calloc(op->categs,sizeof(double));
2024             for (i = 0; i < op->categs; i++)
2025                scanf("%lf*[^\n]", &(op->rate[i]));
2026             getchar();
2027             do {
2028                printf("Probability for each category?");
2029                printf(" (use a space to separate)\n");
2030                for (i = 0; i < op->categs; i++)
2031                   scanf("%lf", &(op->probcat[i]));
2032                scanf("%*[^\n]");
2033                getchar();
2034                done = TRUE;
2035                probsum = 0.0;
2036                for (i = 0; i < op->categs; i++)
2037                   probsum += op->probcat[i];
2038                if (fabs(1.0 - probsum) > 0.001) {
2039                   done = FALSE;
2040                   printf("Probabilities must add up to");
2041                   printf(" 1.0, plus or minus 0.001.\n");
2042                }
2043             } while (!done);
2044 	 }
2045          break;
2046       case 'R':
2047          if (op->datatype == 'n') {
2048             printf("Can't use autocorrelation with SNPs\n");
2049             printf("----autocorrelation disabled-----\n");
2050             printf("press <enter> or <return> to continue\n");
2051             fgets(input,LINESIZE,stdin);
2052             op->autocorr = FALSE;
2053             break;
2054          }
2055          op->autocorr = !op->autocorr;
2056          if (op->autocorr) {
2057             do {
2058                printf("Mean block length of sites having the same ");
2059                printf("rate (greater than 1)?\n");
2060                scanf("%lf%*[^\n]", &(op->lambda));
2061                getchar();
2062             } while (op->lambda <= 1.0);
2063             op->lambda = 1.0 / op->lambda;
2064          }
2065          break;
2066       case 'V':
2067          op->same_ne = !op->same_ne;
2068          if (!op->same_ne) {
2069             printf("Enter number of loci: ");
2070             scanf("%ld",numloci);
2071             if (*numloci == 1) op->same_ne = !op->same_ne;
2072             if (op->ne_ratio) free(op->ne_ratio);
2073             op->ne_ratio = (double *)calloc(*numloci,sizeof(double));
2074             printf("\nEnter relative population size/mutation rate");
2075             printf(" for each locus in input order:\n");
2076             for(i = 0; i < *numloci; i++) {
2077                scanf("%lf",&(op->ne_ratio[i]));
2078                if (op->ne_ratio[i] <= 0) {
2079                   printf("\nratios must be positive, please reenter\n");
2080                   i--;
2081                }
2082             }
2083          }
2084          break;
2085       case 'H':
2086          op->holding += 1;
2087          if (op->holding > 2) op->holding = 0;
2088          break;
2089       case 'W':
2090          op->watt = !op->watt;
2091          if (!op->watt) {
2092             do {
2093                printf("Initial theta estimate?\n");
2094                fgets(input,LINESIZE,stdin);
2095                theta0 = atof(input);
2096             } while (theta0 <= 0.0);
2097          }
2098          break;
2099       case 'Z':
2100          printf("What recombination rate?\n");
2101          do {
2102             fgets(input,LINESIZE,stdin);
2103             rec0 = atof(input);
2104             if (rec0 < 0.0)
2105                printf("recombination rate must be non-negative\n");
2106          } while (rec0 < 0.0);
2107          break;
2108       case 'S':
2109          do {
2110             printf("How many Short Chains?\n");
2111             fgets(input,LINESIZE,stdin);
2112             op->numchains[0] = atoi(input);
2113             if (op->numchains[0] < 0)
2114             printf("Must be non-negative\n");
2115          } while (op->numchains[0] < 0);
2116          break;
2117       case '1':
2118          done = FALSE;
2119          while (!done) {
2120             printf("How often to sample trees?\n");
2121             fgets(input,LINESIZE,stdin);
2122             op->increm[0] = atoi(input);
2123             if (op->increm[0] > 0) done = TRUE;
2124             else printf("Must be a positive integer\n");
2125          }
2126          break;
2127       case '2':
2128          done = FALSE;
2129          while (!done) {
2130             printf("How many short steps?\n");
2131             fgets(input,LINESIZE,stdin);
2132             op->steps[0] = atoi(input);
2133             if (op->steps[0] > 0) done = TRUE;
2134             else printf("Must be a positive integer\n");
2135          }
2136          break;
2137       case 'L':
2138          do {
2139             printf("How many Long Chains?\n");
2140             fgets(input,LINESIZE,stdin);
2141             op->numchains[1] = atoi(input);
2142             if (op->numchains[1] < 1)
2143             printf("Must be a positive integer\n");
2144          } while (op->numchains[1] < 1);
2145          break;
2146       case '3':
2147          done = FALSE;
2148          while (!done) {
2149             printf("How often to sample trees?\n");
2150             fgets(input,LINESIZE,stdin);
2151             op->increm[1] = atoi(input);
2152             if (op->increm[1] > 0) done = TRUE;
2153             else printf("Must be a positive integer\n");
2154          }
2155          break;
2156       case '4':
2157          done = FALSE;
2158          while (!done) {
2159             printf("How many long steps?\n");
2160             fgets(input,LINESIZE,stdin);
2161             op->steps[1] = atoi(input);
2162             if (op->steps[1] > 0) done = TRUE;
2163             else printf("Must be a positive integer\n");
2164          }
2165          break;
2166       default:
2167          fprintf(stderr,"ERROR:Impossible option %c detected!\n",ch);
2168          break;
2169    }
2170 } else fprintf(stderr,"%c is not an option here.\n",ch);
2171 
2172 } /* workmenu2 */
2173 
2174 
2175 /****************************************************************
2176  * checkparmfile() checks the parmfile for internal consistency */
checkparmfile(option_struct * op)2177 void checkparmfile(option_struct *op)
2178 {
2179 boolean error = FALSE;
2180 
2181 if (op->norecsnp) {
2182    if (rec0) {
2183       fprintf(ERRFILE,"ERROR--this snp model is incompatible with a");
2184       fprintf(ERRFILE," non-zero recombination rate.\n");
2185       fprintf(ERRFILE,"   Please change starting rec-rate.\n");
2186       fprintf(outfile,"ERROR--this snp model is incompatible with a");
2187       fprintf(outfile," non-zero recombination rate.\n");
2188       fprintf(outfile,"   Please change starting rec-rate.\n");
2189       error = TRUE;
2190    }
2191    if (op->holding != 2) {
2192       fprintf(ERRFILE,"ERROR--this snp model is incompatible with a");
2193       fprintf(ERRFILE," non-zero recombination rate.\n");
2194       fprintf(ERRFILE,"   Please set rec-rate constant.\n");
2195       fprintf(outfile,"ERROR--this snp model is incompatible with a");
2196       fprintf(outfile," non-zero recombination rate.\n");
2197       fprintf(outfile,"   Please set rec-rate constant.\n");
2198       error = TRUE;
2199    }
2200 }
2201 
2202 if (error) {
2203    fprintf(ERRFILE,"program finished\n");
2204    exit(-1);
2205 }
2206 
2207 } /* checkparmfile */
2208 
2209 
getoptions(option_struct * op)2210 void getoptions(option_struct *op)
2211 /* interactively set options using a very basic menu */
2212 {
2213 long numloci, numpop;
2214 boolean writeout, done, menu1;
2215 char ch, input[LINESIZE];
2216 
2217 /* default initializations */
2218 initoptions(op);
2219 writeout = FALSE;
2220 locus_ttratio = 2.0;
2221 numloci = 1;
2222 numpop = 1;
2223 theta0 = 1.0;
2224 rec0 = 0.0001;
2225 /* end defaults */
2226 
2227 readparmfile(op);
2228 checkparmfile(op);
2229 readseedfile();
2230 
2231 fprintf(outfile, "\nRECOMBINE:  Recombinant HMMC ML method,");
2232 fprintf(outfile, " version %3.2f\n\n",VERSION_NUM);
2233 
2234 if (!MENU) return;
2235 
2236 menu1 = TRUE;
2237 do {
2238    print_menuheader(op);
2239    if (menu1) print_startmenu(op,writeout);
2240    else {print_datamenu(op); print_searchmenu(op);}
2241    print_menuend();
2242    fgets(input,LINESIZE,stdin);
2243    ch = toupper((int)input[0]);
2244    done = (ch == 'Y');
2245    if (!done) {
2246       if (menu1) workmenu1(op,ch,&menu1,&writeout);
2247       else workmenu2(op,ch,&menu1,&numloci,&numpop);
2248    }
2249 } while (!done);
2250 
2251 if (writeout) rec_parmfilewrite(op,numloci,numpop);
2252 
2253 }  /* getoptions */
2254 
2255 
2256 /*********************************************************************
2257  * firstinit() handles initialization for things that are recorded   *
2258  * over multiple loci/populations, and therefore are allocated once. */
firstinit(option_struct * op,data_fmt * data)2259 void firstinit(option_struct *op, data_fmt *data)
2260 {
2261 long i, numloci;
2262 
2263 numloci = getdata_numloci(op,data);
2264 
2265 totchains = op->numchains[0] + op->numchains[1];
2266 
2267 numtrees = MAX(op->steps[0]/op->increm[0],op->steps[1]/op->increm[1]);
2268 
2269 if (op->same_ne) {
2270    op->ne_ratio = (double *)calloc(1,numloci*sizeof(double));
2271    for(i = 0; i < numloci; i++) op->ne_ratio[i] = 1.0;
2272 }
2273 
2274 sametree = (boolean **)calloc(1,numloci * sizeof(boolean *));
2275 sametree[0] = (boolean *)calloc(1,numloci*numtrees * sizeof(boolean));
2276 for(i = 1; i < numloci; i++)
2277    sametree[i] = sametree[0] + i*numtrees;
2278 
2279 model_alloc(op,data);
2280 
2281 freenodes = NULL;
2282 
2283 }  /* firstinit */
2284 
2285 
2286 /********************************************************************
2287  * popinit() initializes things that are specific to one population */
popinit(option_struct * op,data_fmt * data)2288 void popinit(option_struct *op, data_fmt *data)
2289 {
2290 
2291 switch(op->datatype) {
2292    case 'a':
2293       break;
2294    case 'b':
2295    case 'm':
2296       break;
2297    case 'n':
2298    case 's':
2299       rootnum = 0;
2300       break;
2301    default:
2302       fprintf(ERRFILE,"ERROR:popinit, can't get here!\n");
2303       exit(-1);
2304 }
2305 
2306 } /* popinit */
2307 
2308 
2309 /***********************************************************************
2310  * makeinvarvalues() sets up the tip likelikelihoods for the invariant *
2311  * sites tacked on the end of the sequence for SNP data.               */
makeinvarvalues(dnadata * dna,long numcategs,tree * tr)2312 void makeinvarvalues(dnadata *dna, long numcategs, tree *tr)
2313 {
2314 long site, ind, categ, slice;
2315 
2316 site = dna->sites[locus];
2317 slice = 0L; /* we only set slice 0 here; the remainder, if any,
2318   are set elsewhere */
2319 
2320 for(ind = 1; ind <= dna->numseq[population]; ind++)
2321    for(categ = 0; categ < numcategs; categ++) {
2322       tr->nodep[ind]->x->s[site][categ][slice][baseA] = 1.0;
2323       tr->nodep[ind]->x->s[site][categ][slice][baseC] = 0.0;
2324       tr->nodep[ind]->x->s[site][categ][slice][baseG] = 0.0;
2325       tr->nodep[ind]->x->s[site][categ][slice][baseT] = 0.0;
2326 
2327       tr->nodep[ind]->x->s[site+1][categ][slice][baseA] = 0.0;
2328       tr->nodep[ind]->x->s[site+1][categ][slice][baseC] = 1.0;
2329       tr->nodep[ind]->x->s[site+1][categ][slice][baseG] = 0.0;
2330       tr->nodep[ind]->x->s[site+1][categ][slice][baseT] = 0.0;
2331 
2332       tr->nodep[ind]->x->s[site+2][categ][slice][baseA] = 0.0;
2333       tr->nodep[ind]->x->s[site+2][categ][slice][baseC] = 0.0;
2334       tr->nodep[ind]->x->s[site+2][categ][slice][baseG] = 1.0;
2335       tr->nodep[ind]->x->s[site+2][categ][slice][baseT] = 0.0;
2336 
2337       tr->nodep[ind]->x->s[site+3][categ][slice][baseA] = 0.0;
2338       tr->nodep[ind]->x->s[site+3][categ][slice][baseC] = 0.0;
2339       tr->nodep[ind]->x->s[site+3][categ][slice][baseG] = 0.0;
2340       tr->nodep[ind]->x->s[site+3][categ][slice][baseT] = 1.0;
2341    }
2342 } /* makeinvarvalues */
2343 
2344 
2345 /*****************************************************************
2346  * locusinit() initializes things that are specific to one locus */
locusinit(option_struct * op,data_fmt * data)2347 void locusinit(option_struct *op, data_fmt *data)
2348 {
2349 long i, nummarkers, numseq, numsites;
2350 dnadata *dna;
2351 FILE *flipfile;
2352 
2353 dna = data->dnaptr;
2354 
2355 if (op->spacing) read_spacefile(spacefile,op,data);
2356 nummarkers = getdata_nummarkers(op,data);
2357 /* hack done for speed-up */
2358 dna->sitecount[locus][0] = getdata_space(op,data,0L);
2359 dna->markersite[locus][0] = dna->sspace[0][0][getdata_nummarkers(op,data)];
2360 for(i = 1; i < nummarkers; i++) {
2361    dna->sitecount[locus][i] = dna->sitecount[locus][i-1] +
2362                               getdata_space(op,data,i);
2363    dna->markersite[locus][i] = dna->markersite[locus][i-1] +
2364                                dna->sspace[0][0][i-1];
2365 }
2366 numsites = countsites(op,data);
2367 dna->segranges0 = (int *)calloc(numsites,sizeof(int));
2368 dna->segranges1 = (int *)calloc(numsites,sizeof(int));
2369 dna->segranges2 = (int *)calloc(numsites,sizeof(int));
2370 dna->segranges3 = (int *)calloc(numsites,sizeof(int));
2371 for(i = 0; i < numsites; i++) {
2372    dna->segranges0[i] = 0;
2373    dna->segranges1[i] = 1;
2374    dna->segranges2[i] = 2;
2375    dna->segranges3[i] = -1;
2376 }
2377 
2378 treesetup(op,data);
2379 
2380 numseq = getdata_numseq(op,data);
2381 
2382 switch(op->datatype) {
2383    case 'a':
2384       break;
2385    case 'b':
2386    case 'm':
2387       break;
2388    case 'n':
2389       initinvartips(op,data,op->categs,curtree);
2390       if (!op->panel) invardatacheck(op,data);
2391    case 's':
2392       fprintf(outfile, "%4ld Sequences, %4ld Sites\n",numseq,nummarkers);
2393       alogf = (rlrec *)calloc(1,sizeof(rlrec));
2394       alogf->val = (double *)calloc(nummarkers+1,sizeof(double));
2395       contribution =
2396          (contribarr *)calloc(nummarkers+1,sizeof(contribarr));
2397       contribution[0] =
2398          (double *)calloc((nummarkers+1)*op->categs,sizeof(double));
2399       for(i=1;i<nummarkers+1;i++)
2400          contribution[i] = contribution[0] + i*op->categs;
2401       /* now read the categories from the infile, if applicable */
2402       inputcategories(op,data);
2403       /* setup fractional likelihoods at tree tips */
2404       makednavalues(op,data,op->categs,curtree);
2405       /* frequencies must be calculated after makednavalues() called */
2406       if (!op->freqsfrom) {
2407          data->dnaptr->freqa = freqa;
2408          data->dnaptr->freqc = freqc;
2409          data->dnaptr->freqg = freqg;
2410          data->dnaptr->freqt = freqt;
2411       } else empiricaldnafreqs(op,data,curtree);
2412       getbasednafreqs(data->dnaptr,op,locus_ttratio,outfile);
2413       /* table can only be initialized after frequencies are set */
2414       inittable(op,dna);
2415       makesiteptr(op,data);
2416       initweightrat(op,data);
2417       if (op->haplotyping) {
2418          flipfile = fopen("flipfile","r");
2419          read_flipfile(op,data,curtree,flipfile);
2420          fclose(flipfile);
2421          pruneflips(op,data,curtree);
2422       }
2423       break;
2424    default:
2425       fprintf(ERRFILE,"ERROR:locusinit, can't get here!\n");
2426       exit(-1);
2427 }
2428 
2429 
2430 if ((op->increm[0] < 0) || (op->increm[1] < 0)) {
2431    fprintf(ERRFILE,"Error in input sampling increment,");
2432    fprintf(ERRFILE," increment set to 10\n");
2433    if (op->increm[0] < 0)
2434       op->increm[0] = 10;
2435    if (op->increm[1] < 0)
2436       op->increm[1] = 10;
2437 }
2438 /* stuff for recycling x arrays */
2439 numx = 0;
2440 for (i=0;i<XARRAYSIZE;i++)
2441   sparex[i]=NULL;
2442 
2443 } /* locusinit */
2444 
2445 
inputcategories(option_struct * op,data_fmt * data)2446 void inputcategories(option_struct *op, data_fmt *data)
2447 {
2448 /*  char ch; */
2449   long i, extranum, nummarkers;
2450 
2451   nummarkers = getdata_nummarkers(op,data);
2452 
2453   category = (long *)calloc(nummarkers,sizeof(long));
2454 
2455   for (i = 0; i < nummarkers; i++) category[i] = 1;
2456   extranum = 0;
2457 /* DEBUG debug WARNING warning--categories input code commented out!
2458   while (!(eoln(infile))) {
2459     ch = getc(infile);
2460     if (ch == '\n')
2461       ch = ' ';
2462     ch = isupper(ch) ? ch : toupper((int)ch);
2463     if (ch == 'C')
2464       extranum++;
2465     else if (ch != ' ') {
2466       printf("BAD OPTION CHARACTER: %c\n", ch);
2467       exit(-1);
2468     }
2469   }
2470   fscanf(infile, "%*[^\n]");
2471   getc(infile);
2472   for (i = 1; i <= extranum; i++) {
2473     ch = getc(infile);
2474     if (ch == '\n')
2475       ch = ' ';
2476     ch = isupper(ch) ? ch : toupper((int)ch);
2477     if (ch != 'W'){
2478       printf("ERROR: INCORRECT AUXILIARY OPTIONS LINE WHICH STARTS WITH %c\n",
2479 	     ch);
2480       exit(-1);
2481     }
2482   }
2483 */
2484   if (op->categs <= 1)
2485     return;
2486   fprintf(outfile, "\nSite category   Rate of change  Probability\n");
2487   for (i = 1; i <= op->categs; i++)
2488     fprintf(outfile, "%12ld%13.3f%13.3f\n",i,op->rate[i-1],op->probcat[i-1]);
2489   putc('\n', outfile);
2490 }  /* inputcategories */
2491 
2492 
2493 /*******************************************************************
2494  * allocate_nodelet() allocates and initializes a set of nodelets, *
2495  * which are set to be non-tops of the passed "type".  "x" and all *
2496  * other datatype specific fields are NULLed and must be allocated *
2497  * elsewhere.                                                      */
allocate_nodelet(long num,char type)2498 node *allocate_nodelet(long num, char type)
2499 {
2500 static long unique_id=0;
2501 boolean isfirst=TRUE;
2502 long j;
2503 node *p, *q = NULL, *pfirst=NULL;
2504 
2505 for (j = 0; j < num; j++) {
2506    p = (node *)calloc(1,sizeof(node));
2507 
2508    p->top = FALSE;
2509    p->updated = FALSE;
2510    p->number = FLAGLONG;
2511    p->type = type;
2512    p->id = unique_id++;
2513    p->next = q;
2514    p->back = NULL;
2515    p->nayme = NULL;
2516    p->x = NULL;
2517    p->v = p->tyme = p->length = 0.0;
2518    p->z = NULL;
2519 
2520    p->recstart = p->recend = -1L;
2521    p->ranges = NULL;
2522    p->coal = NULL;
2523    p->members = NULL;
2524    p->memberstrue = FALSE;
2525    p->futileflag = 0L;
2526 
2527    p->pop = p->actualpop = -1;
2528    p->lxmax = 0.0;
2529 
2530    if(isfirst){
2531       isfirst=FALSE;
2532       pfirst = p;
2533    }
2534 
2535    q = p;
2536 }
2537 
2538 pfirst->next = q;
2539 
2540 return q;
2541 
2542 } /* allocate_nodelet */
2543 
2544 
2545 /*************************************************************
2546  * allocate_root() allocates and initializes a root for "tr" */
allocate_root(option_struct * op,data_fmt * data,tree * tr)2547 void allocate_root(option_struct *op, data_fmt *data, tree *tr)
2548 {
2549 long i, j, k, numslice, nummarkers;
2550 dnadata *dna;
2551 msatdata *ms;
2552 
2553 tr->nodep[ROOTNUM] = allocate_nodelet(1,'t');
2554 tr->nodep[ROOTNUM]->number = ROOTNUM;
2555 allocate_x(op,data,tr->nodep[ROOTNUM]);
2556 if (op->map) {
2557    tr->nodep[ROOTNUM]->z = NULL;
2558    allocate_z(op,data,tr->nodep[ROOTNUM]);
2559 }
2560 ranges_Malloc(tr->nodep[ROOTNUM],FALSE,0L);
2561 coal_Malloc(tr->nodep[ROOTNUM],FALSE,0L);
2562 tr->nodep[ROOTNUM]->nayme = (char *)calloc(NMLNGTH,sizeof(char));
2563 strncpy(tr->nodep[ROOTNUM]->nayme,"ROOT",4);
2564 
2565 /* guarantee that the root node contributes nothing to the likelihood
2566    of a tree (since its supposed to be at the end of a theoretically
2567    infinite root branch) */
2568 switch(op->datatype) {
2569    case 'a':
2570       break;
2571    case 'b':
2572    case 'm':
2573       ms = data->msptr;
2574       for(i = 0; i < ms->numloci; i++)
2575          for(j = 0; j < MICRO_ALLELEMAX; j++)
2576             tr->nodep[ROOTNUM]->x->a[i][j] = 1.0;
2577       break;
2578    case 'n':
2579    case 's':
2580       dna = data->dnaptr;
2581       numslice = (op->panel) ? NUMSLICE : 1L;
2582       nummarkers = getdata_nummarkers(op,data);
2583       for (i = 0; i < nummarkers; i++) {
2584         for (j = 0; j < op->categs; j++) {
2585           for (k = 0; k < numslice; k++) {
2586             tr->nodep[ROOTNUM]->x->s[i][j][k][baseA] = 1.0;
2587             tr->nodep[ROOTNUM]->x->s[i][j][k][baseC] = 1.0;
2588             tr->nodep[ROOTNUM]->x->s[i][j][k][baseG] = 1.0;
2589             tr->nodep[ROOTNUM]->x->s[i][j][k][baseT] = 1.0;
2590           }
2591         }
2592       }
2593       break;
2594    default:
2595       fprintf(ERRFILE,"ERROR:allocate_root: can't get here!\n");
2596       exit(-1);
2597 }
2598 
2599 } /* allocate_root */
2600 
2601 
2602 /***********************************************************
2603  * allocate_tip() allocates and initializes a tip nodelet. */
allocate_tip(option_struct * op,data_fmt * data,tree * tr,long num)2604 void allocate_tip(option_struct *op, data_fmt *data, tree *tr,
2605    long num)
2606 {
2607 
2608 tr->nodep[num] = allocate_nodelet(1,'t');
2609 tr->nodep[num]->number = num;
2610 tr->nodep[num]->top = TRUE;
2611 tr->nodep[num]->tyme = 0.0;
2612 tr->nodep[num]->nayme = (char *)calloc((NMLNGTH+1),sizeof(char));
2613 allocate_x(op,data,tr->nodep[num]);
2614 if (op->map) {
2615    tr->nodep[num]->z = NULL;
2616    allocate_z(op,data,tr->nodep[num]);
2617 }
2618 ranges_Malloc(tr->nodep[num],TRUE,1L);
2619 if (op->fc) coal_Malloc(tr->nodep[num],TRUE,1L);
2620 
2621 switch(op->datatype) {
2622    case 'a':
2623       break;
2624    case 'b':
2625    case 'm':
2626       break;
2627    case 'n':
2628    case 's':
2629       tr->nodep[num]->ranges[1] = 0L;
2630       tr->nodep[num]->ranges[2] = countsites(op,data)-1;
2631       if (op->fc) {
2632          tr->nodep[num]->coal[1] = 0L;
2633          tr->nodep[num]->coal[2] = countsites(op,data)-1;
2634       }
2635       break;
2636    default:
2637       fprintf(ERRFILE,"ERROR:allocate_tip: can't get here!\n");
2638       exit(-1);
2639 }
2640 
2641 } /* allocate_tip */
2642 
2643 
2644 /*******************************************************************
2645  * allocate_interior() allocates and initializes an interior node. */
allocate_interior(option_struct * op,data_fmt * data,tree * tr,long num)2646 void allocate_interior(option_struct *op, data_fmt *data, tree *tr, long num)
2647 {
2648 
2649 if (freenodes == NULL) tr->nodep[num] = allocate_nodelet(3,'c');
2650 else {tr->nodep[num] = freenodes; freenodes = freenodes->back;}
2651 tr->nodep[num]->number = tr->nodep[num]->next->number =
2652    tr->nodep[num]->next->next->number = num;
2653 
2654 } /* allocate_interior */
2655 
2656 
2657 /***********************************************************************
2658  * init_creature() initializes an already allocated creature structure */
init_creature(creature * cr,long numhaplotypes)2659 void init_creature(creature *cr, long numhaplotypes)
2660 {
2661 
2662 cr->numhaplotypes = numhaplotypes;
2663 cr->numflipsites = 0;
2664 cr->haplotypes = (node **)calloc(numhaplotypes,sizeof(node *));
2665 cr->flipsites = NULL;
2666 
2667 } /* init_creature */
2668 
2669 
2670 /***************************************************************
2671  * treesetup() allocates and initializes the global "curtree". *
2672  * Curtree's tymelist will be handled later.                   */
treesetup(option_struct * op,data_fmt * data)2673 void treesetup(option_struct *op, data_fmt *data)
2674 {
2675 long i, j, whichtip, numtips, numnodes, numcreatures;
2676 node *p;
2677 
2678 numtips = getdata_numtips(op,data);
2679 numnodes = 2 * numtips;
2680 numcreatures = numtips/NUMHAPLOTYPES;
2681 curtree = (tree *)calloc(1,sizeof(tree));
2682 curtree->nodep = (node **)calloc(numnodes,sizeof(node *));
2683 
2684 allocate_root(op,data,curtree);
2685 
2686 for(i = 1; i <= numtips; i++) allocate_tip(op,data,curtree,i);
2687 for(i = numtips+1; i < numnodes; i++) allocate_interior(op,data,curtree,i);
2688 for(p = freenodes; p != NULL; p = p->back, i++)
2689    p->number = p->next->number = p->next->next->number = i;
2690 nodectr = i;
2691 curtree->likelihood = NEGMAX;
2692 curtree->coalprob = NEGMAX;
2693 curtree->numcoals = numtips-1;
2694 curtree->numrecombs = 0;
2695 
2696 if (op->haplotyping) {
2697    curtree->creatures =
2698       (creature *)calloc(numcreatures,sizeof(creature));
2699    for(i = 0, whichtip = 1; i < numcreatures; i++) {
2700       init_creature(&curtree->creatures[i],NUMHAPLOTYPES);
2701       for(j = 0; j < curtree->creatures[i].numhaplotypes; j++)
2702          curtree->creatures[i].haplotypes[j] = curtree->nodep[whichtip++];
2703    }
2704    if (whichtip-1 > numtips)
2705       fprintf(ERRFILE,"\ntreesetup--whichtip too big!\n");
2706 } else curtree->creatures = NULL;
2707 
2708 newtymenode(&curtree->tymelist);
2709 curtree->tymelist->branchlist = (node **)calloc(numtips,sizeof(node *));
2710 curtree->tymelist->eventnode = curtree->nodep[1];
2711 
2712 } /* treesetup */
2713 
2714 
2715 /***************************************************************
2716  * end_of_population_free frees the working tree, curtree; and *
2717  * the freenode list plus the sparex array.                    */
end_of_population_free(option_struct * op,data_fmt * data)2718 void end_of_population_free(option_struct *op, data_fmt *data)
2719 {
2720 dnadata *dna;
2721 
2722 dna = data->dnaptr;
2723 
2724 free(op->ne_ratio);
2725 
2726 switch(op->datatype) {
2727     case 'a':
2728        break;
2729     case 'b':
2730     case 'm':
2731        break;
2732     case 'n':
2733     case 's':
2734        break;
2735     default:
2736        fprintf(ERRFILE,"ERROR:end_of_population_free, can't be here!\n");
2737        exit(-1);
2738 }
2739 
2740 } /* end_of_population_free */
2741 
2742 
2743 /***************************************************
2744  * end_of_locus_free() frees locus specific stuff. */
end_of_locus_free(option_struct * op,data_fmt * data)2745 void end_of_locus_free(option_struct *op, data_fmt *data)
2746 {
2747 long i;
2748 node *p, *q;
2749 
2750 free(data->siteptr);
2751 
2752 switch(op->datatype) {
2753     case 'a':
2754        break;
2755     case 'b':
2756     case 'm':
2757        break;
2758     case 'n':
2759     case 's':
2760        free(category);
2761        free(alogf->val);
2762        free(alogf);
2763        /* free the working arrays */
2764        free(weightrat);
2765        free(tbl);
2766        free(contribution[0]);
2767        free(contribution);
2768 #if 0 /* pre-heating */
2769        freetree(op,data,curtree);
2770 #endif
2771        for(i = 0; i < op->numtempchains; i++)
2772           freetree(op,data,temptrees[i]);
2773        for(p = freenodes; p != NULL; p = q) {
2774           q = p->back;
2775           freenodelet(op,data,p->next->next);
2776           freenodelet(op,data,p->next);
2777           freenodelet(op,data,p);
2778        }
2779        freenodes = NULL;
2780        /* the sparex arrays must be done after the tree and freenodes
2781           have been freed (or anything else that uses free_x to
2782           free it's xarrays). */
2783        for(i = 0; i < numx; i++) {
2784           free(sparex[i]->s[0][0][0]);
2785           free(sparex[i]->s[0][0]);
2786           free(sparex[i]->s[0]);
2787           free(sparex[i]->s);
2788           free(sparex[i]);
2789           sparex[i] = NULL;
2790        }
2791        numx = 0;
2792        break;
2793    default:
2794        fprintf(ERRFILE,"ERROR:end_of_locus_free, can't be here!\n");
2795        exit(-1);
2796 }
2797 
2798 } /* end_of_locus_free */
2799 
2800 
sitecompare(dnadata * dna,long site1,long site2)2801 boolean sitecompare(dnadata *dna, long site1, long site2)
2802 {
2803 long i;
2804 
2805 for(i = 0; i < dna->numseq[population]; i++)
2806    if(dna->seqs[population][locus][i][site1] !=
2807       dna->seqs[population][locus][i][site2])
2808       return FALSE;
2809 
2810 return TRUE;
2811 } /* sitecompare */
2812 
2813 
2814 /*****************************************************************
2815  * makesiteptr creates the siteptr array:                        *
2816  * if (!siteptr[site]) there is no alias, otherwise potentially  *
2817  * use siteptr[site]-1 for the alias site;                       *
2818  * if (siteptr[site]-1 < 0) then the alias is currently unusable *
2819  * due to recombination.                                         */
makesiteptr(option_struct * op,data_fmt * data)2820 void makesiteptr(option_struct *op, data_fmt *data)
2821 {
2822 long whichsite, lastsite, i, *ptrarray;
2823 boolean found;
2824 dnadata *dna;
2825 
2826 dna = data->dnaptr;
2827 
2828 if (op->datatype == 'n') lastsite = dna->sites[locus]+NUMINVAR;
2829 else lastsite = dna->sites[locus];
2830 
2831 ptrarray = (long *)calloc(lastsite,sizeof(long));
2832 ptrarray[0] = 0;
2833 
2834 #if ALIASING
2835 for(whichsite = 1; whichsite < lastsite; whichsite++) {
2836    if (op->datatype == 'n')
2837       if (whichsite >= dna->sites[locus]) {
2838          ptrarray[whichsite] = 0;
2839          continue;
2840       }
2841    found = FALSE;
2842    for(i = whichsite - 1; i >= 0; i--) {
2843       if(sitecompare(dna,i,whichsite)) {
2844          ptrarray[whichsite] = i+1;
2845          found = TRUE;
2846          break;
2847       }
2848    }
2849    if (found) continue;
2850    ptrarray[whichsite] = 0;
2851 }
2852 #endif
2853 
2854 #if !ALIASING
2855 for(i = 0; i < dna->sites[locus]; i++) ptrarray[i] = 0;
2856 #endif
2857 
2858 data->siteptr = ptrarray;
2859 
2860 } /* makesiteptr */
2861 
2862 
read_line(FILE * source,char * line)2863 void read_line(FILE *source, char *line)
2864 {
2865 char *p;
2866 
2867 fgets(line,LINESIZE,source);
2868 while(line[0] == '\n' || line[0] == '#') fgets(line,LINESIZE,source);
2869 if((p = (char *)strchr(line,'\n')) != NULL) *p = '\0';
2870 
2871 while(isspace(*line)) *line++;
2872 if(*line == '\0') read_line(source,line);
2873 
2874 } /* read_line */
2875 
2876 
read_header(option_struct * op,long * numpop,long * numloci,long ** numsites,char * title,char * dlm)2877 void read_header(option_struct *op, long *numpop, long *numloci,
2878    long **numsites, char *title, char *dlm)
2879 {
2880 long i;
2881 char *input, *tok, temp;
2882 
2883 input = (char *)calloc(LINESIZE,sizeof(char));
2884 
2885 read_line(infile,input);
2886 
2887 switch(lowercase(input[0])) {
2888    case 'a':
2889       break;
2890    case 'b':
2891    case 'm':
2892       sscanf(input,"%c%ld%ld%c%[^\n]",&temp,numpop,numloci,dlm,title);
2893       if (op->datatypeset)
2894          if (op->datatype != temp) {
2895             fprintf(ERRFILE,"\nYou chose a datatype of %c,",op->datatype);
2896             fprintf(ERRFILE,"but your infile shows a datatype of %c\n",temp);
2897             exit(-1);
2898          }
2899       op->datatype = temp;
2900       numsites = NULL;
2901       break;
2902    case 'n':
2903       if (op->autocorr) {
2904          printf("\nSNP data type is incompatible with autocorrelation");
2905          printf("\n----autocorrelation disabled-----\n");
2906          printf("press <enter> or <return> to continue\n");
2907          fgets(input,LINESIZE,stdin);
2908          fprintf(outfile,"\nSNP data type is incompatible with");
2909          fprintf(outfile," autocorrelation");
2910          fprintf(outfile,"\n----autocorrelation disabled-----\n");
2911          op->autocorr = FALSE;
2912       }
2913    case 's':
2914       sscanf(input,"%c%ld%ld%[^\n]",&temp,numpop,numloci,title);
2915       if (op->datatypeset)
2916          if (op->datatype != temp) {
2917             fprintf(ERRFILE,"\nYou chose a datatype of \"%c\",",op->datatype);
2918             fprintf(ERRFILE," but your infile shows a datatype");
2919             fprintf(ERRFILE," of \"%c\".\n",temp);
2920             exit(-1);
2921          }
2922       op->datatype = temp;
2923       read_line(infile,input);
2924       (*numsites) = (long *)calloc(*numloci,sizeof(long));
2925       for(i = 0; i < *numloci; i++) {
2926           while(isspace((int)*input)) (*input)++;
2927           if(i == 0) tok = strtok(input," ");
2928           else tok = strtok(NULL," ");
2929           (*numsites)[i] = atol(tok);
2930       }
2931       dlm = NULL;
2932       break;
2933    default:
2934       fprintf(ERRFILE,"WARNING--datatype not specified in infile\n");
2935       switch(op->datatype) {
2936          case 'a':
2937             break;
2938          case 'b':
2939             break;
2940          case 'm':
2941             sscanf(input,"%ld%ld%c%[^\n]",numpop,numloci,dlm,title);
2942             break;
2943          case 'n':
2944          case 's':
2945             sscanf(input,"%ld%ld%[^\n]",numpop,numloci,title);
2946             read_line(infile,input);
2947             (*numsites) = (long *)calloc(*numloci,sizeof(long));
2948             for(i = 0; i < *numloci; i++) {
2949                 while(isspace((int)*input)) (*input)++;
2950                 if(locus == 0) tok = strtok(input," ");
2951                 else tok = strtok(NULL," ");
2952                 (*numsites)[i] = atol(tok);
2953             }
2954             break;
2955          default:
2956             fprintf(ERRFILE,"ERROR:unknown datatype %c\n",op->datatype);
2957             fprintf(ERRFILE,"only the types a, m, s");
2958             fprintf(ERRFILE,"(electrophoretic alleles,\n");
2959             fprintf(ERRFILE,"microsatellite data, sequence data)");
2960             fprintf(ERRFILE,"are allowed.\n");
2961             exit(-1L);
2962             break;
2963       }
2964       break;
2965 }
2966 
2967 free(input);
2968 
2969 } /* read_header */
2970 
2971 
read_popheader(long * numind,char * poptitle)2972 void read_popheader(long *numind, char *poptitle)
2973 {
2974 char *input;
2975 
2976 input = (char *)calloc(LINESIZE,sizeof(char));
2977 
2978 read_line(infile,input);
2979 sscanf(input,"%ld%[^\n]",numind,poptitle);
2980 
2981 free(input);
2982 
2983 } /* read_popheader */
2984 
2985 
getinput(option_struct * op,data_fmt * data)2986 void getinput(option_struct *op, data_fmt *data)
2987 {
2988 long i, numpop, numloci, numind, *numsites;
2989 char *title, *poptitle, dlm;
2990 
2991 title = (char *)calloc(LINESIZE,sizeof(char));
2992 poptitle = (char *)calloc(LINESIZE,sizeof(char));
2993 
2994 /* setup data structures and read in the first population's
2995    worth of data */
2996 read_header(op,&numpop,&numloci,&numsites,title,&dlm);
2997 read_popheader(&numind,poptitle);
2998 setupdata(data,op->datatype,numpop,numloci,numind,numsites,title);
2999 setpopstuff(data,0L,numind,poptitle,op->datatype);
3000 read_popdata(infile,data,0L,op);
3001 /* "true haplotype" code */
3002 if (TRUEHAP) {
3003   readtruehaps(op,&truehaps,numpop,numloci,numind,numsites);
3004   copyhaps(op,data,&starthaps,numpop,numloci,numind,numsites);
3005 }
3006 
3007 /* now read in all other populations' data */
3008 for(i = 1; i < numpop; i++) {
3009    read_popheader(&numind,poptitle);
3010    setpopstuff(data,i,numind,poptitle,op->datatype);
3011    read_popdata(infile,data,i,op);
3012 }
3013 
3014 
3015 if (op->datatype == 's' || op->datatype == 'n') {
3016    if (op->weights)
3017       inputdnaweights(data->dnaptr->sites[locus],data->dnaptr,op);
3018    free(numsites);
3019 }
3020 
3021 free(poptitle);
3022 free(title);
3023 
3024 }  /* getinput */
3025 
3026 
getnodata(option_struct * op,data_fmt * data)3027 void getnodata(option_struct *op, data_fmt *data)
3028 {
3029 long i, numpop, numloci, numchains, numind, *numsites;
3030 char *title, *poptitle, dlm;
3031 FILE *kks;
3032 
3033 /* debug DEBUG warning WARNING--bogus default settings */
3034 op->datatype = 's';
3035 numpop = 1;
3036 numind = 1;
3037 title = "no data run";
3038 dlm = ' ';
3039 poptitle = "first pop";
3040 
3041 kks = fopen("kks","r");
3042 fscanf(kks,"%ld %ld",&numloci,&numchains);
3043 numsites = (long *)calloc(numloci,sizeof(long));
3044 
3045 /* debug DEBUG warning WARNING--bogus default settings */
3046 for(i = 0; i < numloci; i++) numsites[i] = 1;
3047 
3048 setupdata(data,op->datatype,numpop,numloci,numind,numsites,title);
3049 setpopstuff(data,0L,numind,poptitle,op->datatype);
3050 
3051 fseek(kks,0,SEEK_SET);
3052 fclose(kks);
3053 
3054 } /* getnodata */
3055 
3056 
watterson(option_struct * op,data_fmt * data)3057 double watterson(option_struct *op, data_fmt *data)
3058 {
3059   /* estimate theta using method of Watterson */
3060   long i, j, k, kn, numprivate, numsites;
3061   boolean varies, private;
3062   double watter;
3063   dnadata *dna;
3064 
3065   dna = data->dnaptr;
3066   numprivate = kn = 0;
3067 
3068   for (i = 0; i < dna->sites[locus]; i++) {
3069     varies = FALSE;
3070     private = TRUE;
3071     for (j = 1; j < dna->numseq[population]; j++) {
3072       if (dna->seqs[population][locus][j][i] !=
3073           dna->seqs[population][locus][0][i]) {
3074         if (varies) {
3075            for(k = 1; k < dna->numseq[population]; k++) {
3076               if (dna->seqs[population][locus][k][i] !=
3077                   dna->seqs[population][locus][j][i])
3078                  private = FALSE;
3079            }
3080         }
3081 	varies = TRUE;
3082       }
3083     }
3084     if (varies) kn++;
3085     if (private && varies) numprivate++;
3086   }
3087   watter = 0.0;
3088   if (kn > 0) {
3089     for (i = 1; i < dna->numseq[population]; i++)
3090       watter += 1.0 / i;
3091     numsites = countsites(op,data);
3092     watter = kn / (numsites * watter);
3093     fprintf(outfile,"\nThere are %ld variable sites in the data\n",kn);
3094 
3095     if (kn == 1) {
3096        op->holding = 2;
3097        fprintf(outfile,"\nWARNING:  There is only 1 variable site in");
3098        fprintf(outfile," this data set.\nRecombination-rate cannot");
3099        fprintf(outfile," be accurately estimated in this case.\n");
3100        fprintf(outfile,"The \"estimate\" of rec-rate has been fixed");
3101        fprintf(outfile," to its starting value.\n\n");
3102 
3103        printf("\nWARNING:  There is only 1 variable site in");
3104        printf(" this data set.\nRecombination-rate cannot");
3105        printf(" be accurately estimated in this case.\n");
3106        printf("The \"estimate\" of rec-rate has been fixed");
3107        printf(" to its starting value.\n\n");
3108     } else {
3109        if (kn-numprivate < 2) {
3110           op->holding = 2;
3111           fprintf(outfile,"\nWARNING:  Too many of the variable sites ");
3112           fprintf(outfile,"are uninformative.\nRecombination-rate ");
3113           fprintf(outfile,"cannot be accurately estimated in this ");
3114           fprintf(outfile,"case.\nThe \"estimate\" of rec-rate has ");
3115           fprintf(outfile,"been fixed to its starting value.\n\n");
3116 
3117           printf("\nWARNING:  Too many of the variable sites ");
3118           printf("are uninformative.\nRecombination-rate ");
3119           printf("cannot be accurately estimated in this ");
3120           printf("case.\nThe \"estimate\" of rec-rate has ");
3121           printf("been fixed to its starting value.\n\n");
3122        }
3123     }
3124 
3125     return watter;
3126   }
3127   fprintf(outfile, "Warning:  There are no variable sites");
3128   fprintf(outfile, " in this data set.\n\n");
3129   if (MENU) printf("Warning:  There are no variable sites in this data set.\n");
3130   else {
3131      fprintf(simlog, "Warning:  There are no variable sites");
3132      fprintf(simlog, " in this data set.\n\n");
3133   }
3134   exit(-1);
3135 }  /* watterson */
3136 
3137 
3138 /**********************************************************************
3139  * invardatacheck() is called to check non-panel SNP data for         *
3140  * invariant sites, which are illegal under that model.  If it finds  *
3141  * any, it changes that site to a datatype of "unknown"; "x" is used. */
invardatacheck(option_struct * op,data_fmt * data)3142 void invardatacheck(option_struct *op, data_fmt *data)
3143 {
3144 long marker, seq, nummarkers, numseq;
3145 char **dna;
3146 boolean varies;
3147 
3148 nummarkers = getdata_nummarkers(op,data);
3149 numseq = getdata_numseq(op,data);
3150 
3151 dna = data->dnaptr->seqs[population][locus];
3152 
3153 for(marker = 0; marker < nummarkers; marker++) {
3154    varies = FALSE;
3155    for(seq = 1; seq < numseq; seq++) {
3156       if(dna[0][marker] != dna[seq][marker]) {
3157          varies = TRUE;
3158          break;
3159       }
3160    }
3161    if (!varies)
3162       for(seq = 0; seq < numseq; seq++) dna[seq][marker] = 'X';
3163 }
3164 
3165 } /* invardatacheck */
3166 
3167 
3168 /*********************************************************************
3169  * buildtymelist() allocates space and initializes all the fields of *
3170  * a tymelist except for failure to initialize the branchlists.      *
3171  * Those are handled by finishsetup().                               *
3172  *                                                                   *
3173  * WARNING buildtymelist() currently assumes a non-recombinant tree! */
buildtymelist(tree * tr,option_struct * op,data_fmt * data,node * p)3174 void buildtymelist(tree *tr, option_struct *op, data_fmt *data, node *p)
3175 {
3176 long numentries;
3177 tlist *t, *u;
3178 
3179 t = tr->tymelist;
3180 
3181 newtymenode(&u);
3182 
3183 numentries = getdata_numtips(op,data);
3184 u->branchlist = (node **)calloc(numentries,sizeof(node *));
3185 
3186 u->eventnode = p;
3187 
3188 while (t != NULL) {
3189    if (u->eventnode->tyme < t->eventnode->tyme) {
3190       u->prev = t->prev;
3191       t->prev = u;
3192       u->succ = t;
3193       u->prev->succ = u;
3194       break;
3195    }
3196    if (t->succ != NULL)
3197       t = t->succ;
3198    else {
3199       t->succ = u;
3200       u->prev = t;
3201       u->succ = NULL;
3202       break;
3203    }
3204 }
3205 
3206 } /* buildtymelist */
3207 
3208 
3209 /* WARNING warning orient assumes a non-recombinant tree */
orient(tree * tr,option_struct * op,data_fmt * data,node * p)3210 void orient(tree *tr, option_struct *op, data_fmt *data, node *p)
3211 {
3212 long lastsite;
3213 
3214 lastsite = countsites(op,data)-1;
3215 
3216   if (istip(p)) {
3217     return;
3218   }
3219 
3220   curtree->nodep[p->number] = p;  /* insure that curtree->nodep points
3221                                     to nodes with info */
3222 
3223  /* since p is a top nodelet, it needs to actually store
3224     likelihood information, x is a NULL pointer
3225     in all other non-tip nodelets */
3226   p->top = TRUE;
3227   allocate_x(op,data,p);
3228   if (op->map) {
3229      p->z = NULL;
3230      allocate_z(op,data,p);
3231   }
3232   ranges_Malloc(p,TRUE,1L);
3233 if (op->fc) {
3234   coal_Malloc(p,TRUE,1L);
3235   p->ranges[1] = p->coal[1] = 0;
3236   p->ranges[2] = p->coal[2] = lastsite;
3237 } else {
3238   p->ranges[1] = 0;
3239   p->ranges[2] = lastsite;
3240 }
3241   p->next->top = FALSE;
3242   free_x(op,p->next);
3243   ranges_Malloc(p->next,FALSE,0L);
3244   coal_Malloc(p->next,FALSE,0L);
3245   p->next->next->top = FALSE;
3246   free_x(op,p->next->next);
3247   ranges_Malloc(p->next->next,FALSE,0L);
3248   coal_Malloc(p->next->next,FALSE,0L);
3249 
3250   orient(tr,op,data,p->next->back);
3251   orient(tr,op,data,p->next->next->back);
3252 
3253   p->tyme = p->next->length + p->next->back->tyme;
3254   p->next->tyme = p->tyme;
3255   p->next->next->tyme = p->tyme;
3256   if (p->number == curtree->root->back->number) {
3257     p->back->top = FALSE;
3258     p->back->tyme = ROOTLENGTH;
3259   }
3260 
3261   buildtymelist(tr,op,data,p);
3262 
3263 }  /* orient */
3264 
plumptree(option_struct * op,data_fmt * data,double th0)3265 void plumptree(option_struct *op, data_fmt *data, double th0)
3266 /* change an input tree into a perfect coalescent tree for theta0 */
3267 /* WARNING--doesn't work on a tree with recombinations!!!!! */
3268 {
3269   tlist *t;
3270   long k;
3271   double tyme;
3272 
3273   /* we assume that tips are tyme 0 already */
3274   t = curtree->tymelist->succ;
3275 
3276   k = getdata_numtips(op,data);
3277   tyme = 0.0;
3278   do {
3279     tyme += th0/(k*(k-1));
3280     if (!istip(t->eventnode)) {
3281       t->eventnode->tyme = tyme;
3282       t->eventnode->next->tyme = tyme;
3283       t->eventnode->next->next->tyme = tyme;
3284     }
3285     k--;
3286     t = t->succ;
3287   } while (t != NULL);
3288 
3289   /* now you must call finishsetup and initbranchlist or a disaster
3290   will happen! */
3291 } /* plumptree */
3292 
finishsetup(option_struct * op,data_fmt * data,node * p)3293 void finishsetup(option_struct *op, data_fmt *data, node *p)
3294 {
3295   if (istip(p)) {
3296     ltov(op,data,p);
3297     return;
3298   }
3299   ltov(op,data,p);
3300   finishsetup(op,data,p->next->back);
3301   finishsetup(op,data,p->next->next->back);
3302   return;
3303 } /* finishsetup */
3304 
initbranchlist(option_struct * op,data_fmt * data)3305 void initbranchlist(option_struct *op, data_fmt *data)
3306 {
3307   tlist *t;
3308   node *p, *q;
3309   long i, j, k, n, numtips;
3310 
3311   t = curtree->tymelist;
3312   numtips = n = getdata_numtips(op,data);
3313   t->numbranch = numtips;
3314   for(i = 0; i < numtips; i++) t->branchlist[i] = curtree->nodep[i+1];
3315   t->age = t->succ->eventnode->tyme;
3316   t = t->succ;
3317   /* WARNING assumes initial tree is not recombinant! */
3318   for (i = 0; i < numtips-1; i++) {
3319     /* for each interior node, do... */
3320     n--;
3321     t->numbranch = n;
3322     if (n == 1)
3323       t->age = t->eventnode->tyme + ROOTLENGTH;
3324     else
3325       t->age = t->succ->eventnode->tyme;
3326     p = t->eventnode->next->back;
3327     q = t->eventnode->next->next->back;
3328     k = 0;
3329     for (j = 0; j < t->prev->numbranch ; j++) {
3330       /* for the number of branches above the coalescent node, do...*/
3331       if (t->prev->branchlist[j] != p && t->prev->branchlist[j] != q) {
3332 	t->branchlist[k] = t->prev->branchlist[j];
3333 	k++;
3334       }
3335     }
3336     t->branchlist[t->numbranch - 1] = t->eventnode;
3337     t = t->succ;
3338   }
3339 }  /* initbranchlist */
3340 
inittable(option_struct * op,dnadata * dna)3341 void inittable(option_struct *op, dnadata *dna)
3342 {
3343   long i;
3344   tbl = (valrec *)calloc(1,op->categs*sizeof(valrec));
3345   /* Define a lookup table. Precompute values and store them in a table */
3346   for (i = 0; i < op->categs; i++) {
3347     tbl[i].rat_xi = op->rate[i] * dna->xi;
3348     tbl[i].rat_xv = op->rate[i] * dna->xv;
3349   }
3350 }  /* inittable */
3351 
initweightrat(option_struct * op,data_fmt * data)3352 void initweightrat(option_struct *op, data_fmt *data)
3353 {
3354 /* WARNING non-DNA data is not yet able to do this! */
3355   long i, nummarkers;
3356 
3357   nummarkers = getdata_nummarkers(op,data);
3358   weightrat = (double *)calloc(nummarkers,sizeof(double));
3359   sumweightrat = 0.0;
3360   for (i = 0; i < nummarkers; i++) {
3361     weightrat[i] = data->dnaptr->dnaweight[i] * op->rate[category[i]-1];
3362     sumweightrat += weightrat[i];
3363   }
3364 }  /* initweightrat */
3365 
rec_outtree(node * p,boolean first,FILE ** usefile)3366 void rec_outtree(node *p, boolean first, FILE **usefile)
3367 /*  write out a recombinant tree in KYB format; call with
3368     curtree.root->back.  Calling program should append a
3369     semicolon.  "first" is used to avoid an extra comma
3370     in the tree list and should be TRUE when this is called. */
3371 {
3372   long i;
3373 
3374   if(istip(p)) {
3375     if(!first)fprintf(*usefile,",");
3376     else first=FALSE;
3377     fprintf(*usefile,"\"");
3378     for(i=0;i<NMLNGTH;i++) {
3379       if(p->nayme[i]==' ') break;
3380       else putc(p->nayme[i],*usefile);
3381     }
3382     fprintf(*usefile,"\":%f",p->length);
3383     return;
3384   }
3385   if(isrecomb(p)) {
3386     if(!first)fprintf(*usefile,",");
3387     else first=FALSE;
3388     fprintf(*usefile,"%ld",p->number);
3389     fprintf(*usefile,"{%ld-%ld}",p->recstart,p->recend);
3390     fprintf(*usefile,":%f",p->length);
3391     p->updated = !p->updated;
3392     if (p->updated) {
3393       if (p->next->top) rec_outtree(p->next->back, first, usefile);
3394       else rec_outtree(p->next->next->back, first, usefile);
3395     }
3396     return;
3397   } else { /* p is coalescent */
3398     if(!first)fprintf(*usefile,",");
3399     else first=FALSE;
3400     fprintf(*usefile,"*%ld",p->number);
3401     fprintf(*usefile,":%f",p->length);
3402     rec_outtree(p->next->back, first, usefile);
3403     rec_outtree(p->next->next->back, first, usefile);
3404     return;
3405   }
3406 } /* rec_outtree */
3407 
treeout(node * p,long s,FILE ** usefile)3408 void treeout(node *p, long s, FILE **usefile)
3409 {
3410   /* write out file with representation of final tree */
3411   long i, n, w;
3412   char c;
3413   double x;
3414 
3415   if (istip(p)) {
3416     n = 0;
3417     for (i = 1; i <= NMLNGTH; i++) {
3418       if (p->nayme[i - 1] != ' ')
3419 	n = i;
3420     }
3421     for (i = 0; i < n; i++) {
3422       c = p->nayme[i];
3423       if (c == ' ')
3424 	c = '_';
3425       putc(c, *usefile);
3426     }
3427     col += n;
3428   } else {
3429     putc('(', *usefile);
3430     col++;
3431     treeout(p->next->back, s, usefile);
3432     putc(',', *usefile);
3433     col++;
3434     if (col > 45) {
3435       putc('\n', *usefile);
3436       col = 0;
3437     }
3438     treeout(p->next->next->back, s, usefile);
3439     putc(')', *usefile);
3440     col++;
3441   }
3442   if (p->v >= 1.0)
3443     x = -1.0;
3444   else
3445     x = lengthof(p);
3446   if (x > 0.0)
3447     w = (long)(0.4343 * log(x));
3448   else if (x == 0.0)
3449     w = 0;
3450   else
3451     w = (long)(0.4343 * log(-x)) + 1;
3452   if (w < 0)
3453     w = 0;
3454   if (p == curtree->root->back)
3455     putc(';', *usefile);
3456   else {
3457     fprintf(*usefile, ":%*.10f", (int)(w + 7), x);
3458     col += w + 8;
3459   }
3460 }  /* treeout */
3461 
3462 
snp_eval_calcrange(option_struct * op,data_fmt * data,tree * tr,boolean first,long start,long finish,long numcategs)3463 double snp_eval_calcrange(option_struct *op, data_fmt *data, tree *tr,
3464    boolean first, long start, long finish, long numcategs)
3465 {
3466 double sumterm, sumterm2, lterm, lterm2, temp;
3467 long i, j, k, slice, snpstart, snpfinish, nummarkers, numinvar, *siteptr;
3468 contribarr term, term2, clai;
3469 node *p;
3470 double *x1;
3471 dnadata *dna;
3472 
3473 dna = data->dnaptr;
3474 
3475 term = (double *)calloc(numcategs,sizeof(double));
3476 term2 = (double *)calloc(numcategs,sizeof(double));
3477 clai = (double *)calloc(numcategs,sizeof(double));
3478 
3479 slice = 0;
3480 temp = 0.0;
3481 p = tr->root->back;
3482 siteptr = data->siteptr;
3483 
3484 nummarkers = getdata_nummarkers(op, data);
3485 findsubtreemarkers(op,data,start,finish,&snpstart,&snpfinish);
3486 
3487 if (snpstart == FLAGLONG) numinvar = finish-start+1;
3488 else numinvar = (finish-start+1) - (snpfinish-snpstart+1);
3489 
3490 sumterm2 = 0.0;
3491 for (j = 0; j < numcategs; j++) { /* compute P(Invar) */
3492    term2[j] = 0.0;
3493    for(k = nummarkers; k < nummarkers+NUMINVAR; k++) {
3494       x1 = p->x->s[k][j][slice];
3495       term2[j] += dna->freqa * x1[baseA] +
3496           dna->freqc * x1[baseC] + dna->freqg * x1[baseG] +
3497           dna->freqt * x1[baseT];
3498    }
3499    sumterm2 += op->probcat[j] * term2[j];
3500 }
3501 lterm2 = log(sumterm2);
3502 for (j = 0; j < numcategs; j++) {
3503    clai[j] = term2[j] / sumterm2;
3504 }
3505 memcpy(contribution[nummarkers], clai, numcategs*sizeof(double));
3506 if (!op->autocorr)
3507    alogf->val[nummarkers] = lterm2;
3508 if (!op->norecsnp) temp += numinvar * lterm2;
3509 
3510 if (snpstart != FLAGLONG) { /* that is, if there are SNPs */
3511    for (i = snpstart; i <= snpfinish; i++) {
3512       for (j = 0; j < numcategs; j++) {
3513          x1 = p->x->s[i][j][slice];
3514          term[j] = dna->freqa * x1[baseA] + dna->freqc * x1[baseC] +
3515             dna->freqg * x1[baseG] + dna->freqt * x1[baseT];
3516          if(term[j] == 0) {
3517             fprintf(ERRFILE,"Encountered tree incompatible with data\n");
3518             if(first) {
3519                fprintf(ERRFILE,"starting tree needs to be legal\n");
3520                exit(-1);
3521             }
3522             curtree->likelihood = NEGMAX;
3523             return(-1);
3524          }
3525       }
3526       sumterm = 0.0;
3527       for (j = 0; j < numcategs; j++) {
3528          sumterm += op->probcat[j] * term[j];
3529       }
3530       lterm = log(sumterm);
3531       if (op->norecsnp && !op->full) lterm -= log(1.0-sumterm2);
3532       for (j = 0; j < numcategs; j++) {
3533             clai[j] = ((op->norecsnp) ?
3534                (term[j] / (1.0-term2[j])) / (sumterm / (1.0-sumterm2))
3535                : (term[j] / sumterm));
3536       }
3537       memcpy(contribution[i], clai, numcategs*sizeof(double));
3538       if (!op->autocorr)
3539          alogf->val[i] = lterm;
3540       temp += dna->dnaweight[i] * lterm;
3541    }
3542 
3543 #if 0 /* This is probably dead DEAD dead--warning WARNING DEBUG debug */
3544    if (op->datatype == 'n' && op->full) {
3545       temp += log(op->chance_seen);
3546       numunobs = getnumlinks(op,data,start,finish)-(finish-start);
3547       if (numunobs < 0 || (long)numunobs != numunobs) {
3548          fprintf(ERRFILE,"non integral SNP distances with Full model\n");
3549          exit(-1);
3550       }
3551       temp += numunobs * log(1.0 - (op->chance_seen*(1.0-sumterm2)));
3552    }
3553 #endif
3554 }
3555 
3556 free(term);
3557 free(term2);
3558 free(clai);
3559 
3560 return(temp);
3561 
3562 } /* snp_eval_calcrange */
3563 
3564 
panel_eval_calcrange(option_struct * op,data_fmt * data,tree * tr,boolean first,long start,long finish,long numcategs)3565 double panel_eval_calcrange(option_struct *op, data_fmt *data, tree *tr,
3566    boolean first, long start, long finish, long numcategs)
3567 {
3568 double sumterm, sumterm2, lterm, lterm2, temp, invarterm;
3569 long i, j, k, numunobs, nummarkers, numinvar, snpstart, snpfinish,
3570    *siteptr;
3571 contribarr term, term2, clai;
3572 node *p;
3573 double **x1;
3574 dnadata *dna;
3575 
3576 dna = data->dnaptr;
3577 
3578 term = (double *)calloc(numcategs,sizeof(double));
3579 term2 = (double *)calloc(numcategs,sizeof(double));
3580 clai = (double *)calloc(numcategs,sizeof(double));
3581 
3582 temp = 0.0;
3583 p = tr->root->back;
3584 siteptr = data->siteptr;
3585 
3586 /* compute P(INVAR) for each category, used as P(SNP) = 1.0 - P(INVAR) */
3587 nummarkers = getdata_nummarkers(op,data);
3588 findsubtreemarkers(op,data,start,finish,&snpstart,&snpfinish);
3589 
3590 if (snpstart == FLAGLONG) numinvar = finish-start+1;
3591 else numinvar = (finish-start+1) - (snpfinish-snpstart+1);
3592 
3593 sumterm2 = 0.0;
3594 for (j = 0; j < numcategs; j++) {
3595    x1 = p->x->s[nummarkers][j];
3596    term2[j] = 0.0;
3597     for (k = 1; k < NUMSLICE; k++) {
3598       term2[j] += dna->freqa * x1[k][baseA] +
3599           dna->freqc * x1[k][baseC] + dna->freqg * x1[k][baseG] +
3600           dna->freqt * x1[k][baseT];
3601    }
3602    sumterm2 += op->probcat[j] * term2[j];
3603 }
3604 lterm2 = log(sumterm2);
3605 for (j = 0; j < numcategs; j++) {
3606    clai[j] = term2[j] / sumterm2;
3607 }
3608 memcpy(contribution[nummarkers], clai, numcategs*sizeof(double));
3609 if (!op->autocorr)
3610    alogf->val[nummarkers] = lterm2;
3611 if (!op->norecsnp) temp += numinvar * lterm2;
3612 
3613 if (snpstart != FLAGLONG) {
3614    for (i = snpstart; i <= snpfinish; i++) {
3615       for (j = 0; j < numcategs; j++) {
3616       /* compute P(data) for this category */
3617          x1 = p->x->s[i][j];
3618          term[j] = dna->freqa * x1[0][baseA] + dna->freqc * x1[0][baseC] +
3619            dna->freqg * x1[0][baseG] + dna->freqt * x1[0][baseT];
3620          invarterm = 0.0;
3621       /* compute P(data && panel invariant) for this category */
3622          for (k = 1; k < NUMSLICE; k++) {
3623             invarterm += dna->freqa * x1[k][baseA] + dna->freqc *
3624               x1[k][baseC] + dna->freqg * x1[k][baseG] + dna->freqt *
3625               x1[k][baseT];
3626          }
3627          term[j] -= invarterm;
3628          if(term[j] == 0) {
3629             fprintf(ERRFILE,"Encountered tree incompatible with data\n");
3630             if(first) {
3631                fprintf(ERRFILE,"starting tree needs to be legal\n");
3632                exit(-1);
3633             }
3634             curtree->likelihood = NEGMAX;
3635             return(-1);
3636          }
3637       }
3638       sumterm = 0.0;
3639       for (j = 0; j < numcategs; j++) {
3640          sumterm += op->probcat[j] * term[j];
3641       }
3642       lterm = log(sumterm);
3643       if (op->norecsnp && !op->full) lterm -= log(1.0-sumterm2);
3644       for (j = 0; j < numcategs; j++) {
3645          clai[j] = ((op->norecsnp) ?
3646             (term[j] / (1.0 - term2[j])) / (sumterm / (1.0 - sumterm2))
3647             : (term[j] / sumterm));
3648       }
3649       memcpy(contribution[i], clai, numcategs*sizeof(double));
3650       if (!op->autocorr)
3651          alogf->val[i] = lterm;
3652       temp += dna->dnaweight[i] * lterm;
3653    }
3654 
3655    if (op->full) {
3656       temp += log(op->chance_seen);
3657       numunobs = (long)(getnumlinks(op,data,start,finish)-(finish-start));
3658       if (numunobs < 0 || (long)numunobs != numunobs) {
3659          fprintf(ERRFILE,"non integral SNP distances with Full model\n");
3660          exit(-1);
3661       }
3662       temp += numunobs * log(1.0 - (op->chance_seen*(1.0-sumterm2)));
3663    }
3664 }
3665 
3666 free(term);
3667 free(term2);
3668 free(clai);
3669 
3670 return(temp);
3671 
3672 } /* panel_eval_calcrange */
3673 
3674 
eval_calcrange(option_struct * op,data_fmt * data,tree * tr,boolean first,long start,long finish,long numcategs)3675 double eval_calcrange(option_struct *op, data_fmt *data, tree *tr,
3676    boolean first, long start, long finish, long numcategs)
3677 {
3678 double sumterm, lterm, temp;
3679 long i, j, slice, nummarkers, *siteptr;
3680 contribarr term, term2, clai;
3681 node *p;
3682 double *x1;
3683 dnadata *dna;
3684 
3685 dna = data->dnaptr;
3686 
3687 term = (double *)calloc(numcategs,sizeof(double));
3688 term2 = (double *)calloc(numcategs,sizeof(double));
3689 clai = (double *)calloc(numcategs,sizeof(double));
3690 
3691 slice = 0;
3692 temp = 0.0;
3693 p = tr->root->back;
3694 siteptr = data->siteptr;
3695 nummarkers = getdata_nummarkers(op,data);
3696 
3697 for (i = start; i <= finish; i++) {
3698    for (j = 0; j < numcategs; j++) {
3699       x1 = p->x->s[i][j][slice];
3700       term[j] = dna->freqa * x1[baseA] + dna->freqc * x1[baseC] +
3701          dna->freqg * x1[baseG] + dna->freqt * x1[baseT];
3702       if(term[j] == 0) {
3703          fprintf(ERRFILE,"Encountered tree incompatible with data\n");
3704          if(first) {
3705             fprintf(ERRFILE,"starting tree needs to be legal\n");
3706             exit(-1);
3707          }
3708          curtree->likelihood = NEGMAX;
3709          return(-1);
3710       }
3711    }
3712    sumterm = 0.0;
3713    for (j = 0; j < numcategs; j++) {
3714       sumterm += op->probcat[j] * term[j];
3715    }
3716    lterm = log(sumterm);
3717    for (j = 0; j < numcategs; j++) {
3718         clai[j] = term[j] / sumterm;
3719    }
3720    memcpy(contribution[i], clai, numcategs*sizeof(double));
3721    if (!op->autocorr)
3722       alogf->val[i] = lterm;
3723    temp += dna->dnaweight[i] * lterm;
3724 }
3725 
3726 free(term);
3727 free(term2);
3728 free(clai);
3729 
3730 return(temp);
3731 
3732 } /* eval_calcrange */
3733 
3734 
evaluate(option_struct * op,data_fmt * data,tree * tr,double llike)3735 double evaluate(option_struct *op, data_fmt *data, tree *tr,
3736    double llike)
3737 {
3738 double sum2, sumc, one_minus_lambda;
3739 contribarr like, nulike, clai;
3740 long i, j, k, nummarkers;
3741 
3742 if (op->categs > 1) {
3743    nummarkers = getdata_nummarkers(op,data);
3744 
3745    like = (double *)calloc(op->categs,sizeof(double));
3746    nulike = (double *)calloc(op->categs,sizeof(double));
3747 
3748    one_minus_lambda = 1.0 - op->lambda;
3749 
3750    for (j = 0; j < op->categs; j++) like[j] = 1.0;
3751    for (i = 0; i < nummarkers; i++) {
3752       sumc = 0.0;
3753       for (k = 1; k <= op->categs; k++)
3754          sumc += op->probcat[k - 1] * like[k - 1];
3755       sumc *= op->lambda;
3756       clai = contribution[i];
3757       for (j = 0; j < op->categs; j++)
3758          nulike[j] = (one_minus_lambda * like[j] + sumc) * clai[j];
3759       memcpy(like, nulike, op->categs*sizeof(double));
3760    }
3761    sum2 = 0.0;
3762    for (i = 0; i < op->categs; i++)
3763       sum2 += op->probcat[i] * like[i];
3764    llike += log(sum2);
3765    tr->likelihood = llike;
3766 
3767    free(like);
3768    free(nulike);
3769 
3770    return(llike);
3771 } else {
3772    tr->likelihood = llike;
3773    return(llike);
3774 }
3775 
3776 }  /* evaluate */
3777 
3778 
snp_evaluate(option_struct * op,data_fmt * data,tree * tr,double llike,long start,long finish)3779 double snp_evaluate(option_struct *op, data_fmt *data, tree *tr,
3780    double llike, long start, long finish)
3781 {
3782 double sum2, sumc;
3783 contribarr like, nulike, clai;
3784 long i, j, snpstart, snpfinish, nummarkers, numinvar;
3785 
3786 if (op->categs > 1) {
3787    nummarkers = getdata_nummarkers(op,data);
3788    findsubtreemarkers(op,data,start,finish,&snpstart,&snpfinish);
3789 
3790    like = (double *)calloc(op->categs,sizeof(double));
3791    nulike = (double *)calloc(op->categs,sizeof(double));
3792 
3793    for (j = 0; j < op->categs; j++) like[j] = 1.0;
3794 
3795    if (snpstart == FLAGLONG) numinvar = finish-start+1;
3796    else numinvar = (finish-start+1) - (snpfinish-snpstart+1);
3797 
3798    if (snpstart != FLAGLONG) {
3799       for (i = snpstart; i <= snpfinish; i++) {
3800          sumc = 0.0;
3801          for (j = 0; j < op->categs; j++)
3802             sumc += op->probcat[j] * like[j];
3803          clai = contribution[i];
3804          for (j = 0; j < op->categs; j++)
3805             nulike[j] = sumc * clai[j];
3806          memcpy(like, nulike, op->categs*sizeof(double));
3807       }
3808    }
3809 
3810    for (i = 0; i < numinvar; i++) { /* invariant sites */
3811       sumc = 0.0;
3812       for (j = 0; j < op->categs; j++)
3813          sumc += op->probcat[j] * like[j];
3814       clai = contribution[nummarkers];
3815       for (j = 0; j < op->categs; j++)
3816          nulike[j] = sumc * clai[j];
3817       memcpy(like, nulike, op->categs*sizeof(double));
3818    }
3819 
3820    sum2 = 0.0;
3821    for (j = 0; j < op->categs; j++)
3822       sum2 += op->probcat[j] * like[j];
3823    llike += log(sum2);
3824 
3825    free(like);
3826    free(nulike);
3827 
3828    tr->likelihood = llike;
3829    return(llike);
3830 } else {
3831    tr->likelihood = llike;
3832    return(llike);
3833 }
3834 
3835 }  /* snp_evaluate */
3836 
3837 
nuview_usebranch(data_fmt * data,node * p,long site)3838 boolean nuview_usebranch(data_fmt *data, node *p, long site)
3839 {
3840 
3841 if(!p->top) return(FALSE);
3842 
3843 return(inrange(p->ranges,site));
3844 
3845 } /* nuview_usebranch */
3846 
3847 
prob_micro(msatdata * ms,double t,long diff)3848 double prob_micro(msatdata *ms, double t, long diff)
3849 {
3850 double **steps = ms->steps;
3851 long stepnum = MICRO_MAXCHANGE;
3852 long k;
3853 double newsum = 0.0, oldsum=0.0;
3854 double logt = log(t);
3855 
3856 if(diff>=stepnum)
3857      return newsum;
3858 for (k = diff; k < diff + stepnum; k+=2){
3859   newsum += exp(-t + logt * k - steps[diff][k-diff]);
3860       if(oldsum-newsum < DBL_EPSILON)
3861              break;
3862        oldsum = newsum;
3863 }
3864 return newsum;
3865 
3866 } /* prob_micro */
3867 
3868 
3869 /********************************************************
3870  * findcoal() takes a non-top coalescent nodelet and    *
3871  * returns the closest tipwards top coalescent nodelet. *
3872  * It also returns the "v" value between the two nodes. *
3873  * In the case of failure it returns a NULL pointer     *
3874  * with v set to FLAGLONG.                              *
3875  *                                                      *
3876  * findcoal assumes that branchlengths are additive.    */
findcoal(option_struct * op,data_fmt * data,node * p,double * v)3877 node *findcoal(option_struct *op, data_fmt *data, node *p, double *v)
3878 {
3879 double total;
3880 node *q;
3881 
3882 if(!iscoal(p) || p->top) {*v = FLAGLONG; return(NULL);}
3883 
3884 for (q = p->back, total = 0; !iscoal(q) && !istip(q); q = findunique(q)->back)
3885    total += lengthof(q);
3886 
3887 total += lengthof(q);
3888 *v = findcoal_ltov(op,data,total);
3889 
3890 return(q);
3891 
3892 } /* findcoal */
3893 
3894 
nuview_micro(option_struct * op,data_fmt * data,node * p,long indexsite,long startsite,long endsite)3895 void nuview_micro(option_struct *op, data_fmt *data, node *p,
3896    long indexsite, long startsite, long endsite)
3897 {
3898 long i, a, s, smax, margin, diff;
3899 double *xx1, *xx2, *xx3, vv1 = 0.0, vv2 = 0.0, pija1s, pija2s,
3900    lx1, lx2, x3m;
3901 node *q, *r;
3902 boolean qactive, ractive;
3903 
3904 smax = MICRO_ALLELEMAX;
3905 margin = MICRO_MAXCHANGE;
3906 x3m = NEGMAX;
3907 
3908 xx1 = NULL;
3909 xx2 = NULL;
3910 vv1 = 0.0;
3911 vv2 = 0.0;
3912 xx3 = (double *)calloc(smax,sizeof(double));
3913 
3914 q = findcoal(op,data,p->next,&vv1);
3915 r = findcoal(op,data,p->next->next,&vv2);
3916 
3917 /* are these sites active and on an upwards branch?  */
3918 qactive = nuview_usebranch(data,p->next,indexsite);
3919 ractive = nuview_usebranch(data,p->next->next->back,indexsite);
3920 
3921 for(i = startsite; i <= endsite; i++) {
3922    if (qactive) {
3923       xx1 = q->x->a[i];
3924       lx1 = q->lxmax;
3925    } else {
3926       lx1 = 0.0;
3927    }
3928 
3929    if (ractive) {
3930       xx2 = r->x->a[i];
3931       lx2 = r->lxmax;
3932    } else {
3933       lx2 = 0.0;
3934    }
3935 
3936    for(s = 0; s < smax; s++) {
3937       if (qactive) pija1s = 0.0;
3938       else pija1s = 1.0;
3939       if (ractive) pija2s = 0.0;
3940       else pija2s = 1.0;
3941       for(a = MAX(0,s-margin); a < s + margin && a < smax; a++) {
3942          diff = labs(s-a);
3943          if(xx1[a] > 0 && qactive) {
3944             pija1s += prob_micro(data->msptr,vv1,diff) * xx1[a];
3945          }
3946          if(xx2[a] > 0 && ractive) {
3947             pija2s += prob_micro(data->msptr,vv2,diff) * xx2[a];
3948          }
3949       }
3950       xx3[s] = pija1s * pija2s;
3951       if(xx3[s] > x3m) x3m = xx3[s];
3952    }
3953    if(x3m == 0.0) p->lxmax = NEGMAX;
3954    else {
3955       for(s = 0; s < smax; s++) xx3[s] /= x3m;
3956       p->lxmax = log(x3m) + lx1 + lx2;
3957    }
3958    memcpy(p->x->a[i],xx3,smax*sizeof(double));
3959 }
3960 
3961 free(xx3);
3962 
3963 } /* nuview_micro */
3964 
3965 
nuview(option_struct * op,data_fmt * data,node * p,long indexsite,long startsite,long endsite)3966 void nuview(option_struct *op, data_fmt *data, node *p, long indexsite,
3967    long startsite, long endsite)
3968 {
3969 long i;
3970 double lw1 = 0.0, lw2 = 0.0;
3971 node *q, *r;
3972 boolean toobig;
3973 
3974 /* set "q" and "r" for use in calcrange(), either to a valid nodelet with
3975    datalikelihoods, or NULL if there is no such nodelet. */
3976 if (nuview_usebranch(data,p->next->back,indexsite)) {
3977    q = findcoal(op,data,p->next,&lw1);
3978    toobig = (exp(lw1) <= 0.0);
3979 } else {q = NULL; toobig = TRUE;}
3980 
3981 if (toobig) {
3982    for (i = 0; i < op->categs; i++) {
3983       tbl[i].ww1 = tbl[i].zz1 = 0.0;
3984       tbl[i].ww1zz1 = tbl[i].vv1zz1 = 0.0;
3985    }
3986 } else {
3987    for (i = 0; i < op->categs; i++) {
3988       tbl[i].ww1 = exp(tbl[i].rat_xi * lw1);
3989       tbl[i].zz1 = exp(tbl[i].rat_xv * lw1);
3990       tbl[i].ww1zz1 = tbl[i].ww1 * tbl[i].zz1;
3991       tbl[i].vv1zz1 = (1.0 - tbl[i].ww1) * tbl[i].zz1;
3992    }
3993 }
3994 
3995 if (nuview_usebranch(data,p->next->next->back,indexsite)) {
3996    r = findcoal(op,data,p->next->next,&lw2);
3997    toobig = (exp(lw2) <= 0.0);
3998 } else {r = NULL; toobig = TRUE;}
3999 
4000 if (toobig) {
4001    for (i = 0; i < op->categs; i++) {
4002       tbl[i].ww2 = tbl[i].zz2 = 0.0;
4003       tbl[i].ww2zz2 = tbl[i].vv2zz2 = 0.0;
4004    }
4005 } else {
4006    for (i = 0; i < op->categs; i++) {
4007       tbl[i].ww2 = exp(tbl[i].rat_xi * lw2);
4008       tbl[i].zz2 = exp(tbl[i].rat_xv * lw2);
4009       tbl[i].ww2zz2 = tbl[i].ww2 * tbl[i].zz2;
4010       tbl[i].vv2zz2 = (1.0 - tbl[i].ww2) * tbl[i].zz2;
4011    }
4012 }
4013 
4014 calcrange(op,data,p,q,r,indexsite,startsite,endsite,op->categs);
4015 
4016 }  /* nuview */
4017 
4018 
calcrange(option_struct * op,data_fmt * data,node * p,node * q,node * r,long whichtree,long start,long finish,long numcategs)4019 void calcrange(option_struct *op, data_fmt *data, node *p, node * q,
4020    node *r, long whichtree, long start, long finish, long numcategs)
4021 /* whichtree indicates which tree should be used to calculate the
4022    likelihood of this set of sites:  pass the number of a site which
4023    has the desired tree.  (This is helpful in calculations for the
4024    SNP invariant sites.) */
4025 {
4026 long i, j, k, numslice, *siteptr;
4027 double yy1, yy2, ww1zz1, vv1zz1, ww2zz2, vv2zz2, vv1zz1_sumr1,
4028    vv2zz2_sumr2, vv1zz1_sumy1, vv2zz2_sumy2, sum1, sum2,
4029    sumr1, sumr2, sumy1, sumy2;
4030 boolean qactive, ractive;
4031 dnadata *dna;
4032 double **allones, **xx1, **xx2, **xx3;
4033 long istart, ifinish;
4034 
4035 // DEBUG
4036 double temptotal;
4037 
4038 dna = data->dnaptr;
4039 numslice = (op->panel) ? NUMSLICE : 1L;
4040 siteptr = data->siteptr;
4041 
4042 if (op->datatype == 'n') {
4043   if (start == FLAGLONG) { /* extra sites */
4044      istart = getdata_nummarkers(op,data);
4045      ifinish = istart+NUMINVAR-1;
4046   } else {
4047      findsubtreemarkers(op,data,start,finish,&istart,&ifinish);
4048      if(istart==FLAGLONG) return;
4049   }
4050 } else {
4051   istart = start;
4052   ifinish = finish;
4053 }
4054 
4055 /* allocate some working space--this should probably be moved
4056 upstream at some point to save time! */
4057 
4058 allones = (double **)calloc(numslice,sizeof(double *));
4059 xx3 = (double **)calloc(numslice,sizeof(double *));
4060 allones[0] = (double *)calloc(numslice*4,sizeof(double));
4061 xx3[0] = (double *)calloc(numslice*4,sizeof(double));
4062 for (i = 0; i < numslice; i++) {
4063   allones[i] = allones[0] + i * 4;
4064   xx3[i] = xx3[0] + i * 4;
4065   for (j = 0; j < 4; j++)
4066     allones[i][j] = 1.0;
4067 }
4068 
4069 /* are these sites active and on an upwards branch?  */
4070 qactive = (q) ? TRUE : FALSE;
4071 ractive = (r) ? TRUE : FALSE;
4072 
4073 
4074 for(i = istart; i <= ifinish; i++) {
4075    if(siteptr[i] > 0) { /* is an alias site available? */
4076       memcpy(p->x->s[i][0][0], p->x->s[siteptr[i]-1][0][0],
4077         op->categs*numslice*4L*sizeof(double));
4078 // DEBUG
4079       temptotal = p->x->s[i][0][0][baseA] + p->x->s[i][0][0][baseC] + p->x->s[i][0][0][baseG] + p->x->s[i][0][0][baseT];
4080       if (temptotal == 0.0)
4081         printf("bad likelihood in memcpy!");
4082 //
4083       continue;  /* go to next site */
4084    }
4085 
4086    for (j = 0; j < numcategs; j++) {
4087       memcpy(xx3[0], allones[0], numslice * 4L * sizeof(double));
4088       if (qactive) {
4089           ww1zz1 = tbl[j].ww1zz1;
4090           vv1zz1 = tbl[j].vv1zz1;
4091           yy1 = 1.0 - tbl[j].zz1;
4092           xx1 = q->x->s[i][j];
4093 
4094           for (k=0; k<numslice; k++) {
4095             sum1 = yy1 * (dna->freqa * xx1[k][baseA] +
4096               dna->freqc * xx1[k][baseC] +
4097     	      dna->freqg * xx1[k][baseG] + dna->freqt * xx1[k][baseT]);
4098             sumr1 = dna->freqar * xx1[k][baseA] + dna->freqgr * xx1[k][baseG];
4099             sumy1 = dna->freqcy * xx1[k][baseC] + dna->freqty * xx1[k][baseT];
4100             vv1zz1_sumr1 = vv1zz1 * sumr1;
4101             vv1zz1_sumy1 = vv1zz1 * sumy1;
4102             xx3[k][baseA] *=
4103               (sum1 + ww1zz1 * xx1[k][baseA] + vv1zz1_sumr1);
4104             xx3[k][baseC] *=
4105       	      (sum1 + ww1zz1 * xx1[k][baseC] + vv1zz1_sumy1);
4106             xx3[k][baseG] *=
4107       	      (sum1 + ww1zz1 * xx1[k][baseG] + vv1zz1_sumr1);
4108             xx3[k][baseT] *=
4109       	      (sum1 + ww1zz1 * xx1[k][baseT] + vv1zz1_sumy1);
4110 // DEBUG
4111             temptotal = xx3[k][baseA] + xx3[k][baseC] + xx3[k][baseG] + xx3[k][baseT];
4112             if (temptotal == 0.0)
4113               printf("bad likelihood in q!");
4114 //
4115            }
4116         }
4117 
4118         if (ractive) {
4119           ww2zz2 = tbl[j].ww2zz2;
4120           vv2zz2 = tbl[j].vv2zz2;
4121           yy2 = 1.0 - tbl[j].zz2;
4122           xx2 = r->x->s[i][j];
4123 
4124           for (k=0; k<numslice; k++) {
4125             sum2 = yy2 * (dna->freqa * xx2[k][baseA] +
4126               dna->freqc * xx2[k][baseC] +
4127       	      dna->freqg * xx2[k][baseG] + dna->freqt * xx2[k][baseT]);
4128             sumr2 = dna->freqar * xx2[k][baseA] + dna->freqgr * xx2[k][baseG];
4129             sumy2 = dna->freqcy * xx2[k][baseC] + dna->freqty * xx2[k][baseT];
4130             vv2zz2_sumr2 = vv2zz2 * sumr2;
4131             vv2zz2_sumy2 = vv2zz2 * sumy2;
4132             xx3[k][baseA] *=
4133               (sum2 + ww2zz2 * xx2[k][baseA] + vv2zz2_sumr2);
4134             xx3[k][baseC] *=
4135               (sum2 + ww2zz2 * xx2[k][baseC] + vv2zz2_sumy2);
4136             xx3[k][baseG] *=
4137               (sum2 + ww2zz2 * xx2[k][baseG] + vv2zz2_sumr2);
4138             xx3[k][baseT] *=
4139               (sum2 + ww2zz2 * xx2[k][baseT] + vv2zz2_sumy2);
4140 // DEBUG
4141             temptotal = xx3[k][baseA] + xx3[k][baseC] + xx3[k][baseG] + xx3[k][baseT];
4142             if (temptotal == 0.0)
4143               printf("bad likelihood in r!");
4144 //
4145           }
4146         }
4147 
4148       memcpy(p->x->s[i][j][0], xx3[0], numslice * 4L * sizeof(double));
4149     }
4150 }
4151 
4152 free(allones[0]);
4153 free(allones);
4154 free(xx3[0]);
4155 free(xx3);
4156 
4157 } /* calcrange */
4158 
4159 
localsmooth(option_struct * op,data_fmt * data,node * p,long indexsite,long startsite,long endsite)4160 void localsmooth(option_struct *op, data_fmt *data, node *p,
4161    long indexsite, long startsite, long endsite)
4162 {
4163 /* never mind if it's a tip */
4164 if (istip(p)) return;
4165 
4166 /* recurse left */
4167 if (!p->next->top)
4168   if (!p->next->back->updated)
4169      if (inrange(p->next->back->ranges,indexsite))
4170        localsmooth(op,data,p->next->back,indexsite,startsite,endsite);
4171 
4172 /* recurse right */
4173 if (!p->next->next->top)
4174   if (!p->next->next->back->updated)
4175      if (inrange(p->next->next->back->ranges,indexsite))
4176        localsmooth(op,data,p->next->next->back,indexsite,startsite,
4177          endsite);
4178 
4179 /* update likelihoods */
4180 if (!p->updated && iscoal(p))
4181   switch(op->datatype) {
4182   case 'a':
4183      break;
4184   case 'b':
4185   case 'm':
4186      nuview_micro(op,data,p,indexsite,startsite,endsite);
4187      break;
4188   case 'n':
4189   case 's':
4190      nuview(op,data,p,indexsite,startsite,endsite);
4191      break;
4192   default:
4193      fprintf(ERRFILE,"localsmooth:unknown datatype %c\n",op->datatype);
4194      fprintf(ERRFILE,"only the types a, m, s");
4195      fprintf(ERRFILE,"(electrophoretic alleles,\n");
4196      fprintf(ERRFILE,"microsatellite data, sequence data)");
4197      fprintf(ERRFILE,"are allowed.\n");
4198      exit(-1);
4199   }
4200 
4201 } /* localsmooth */
4202 
4203 
snpsmooth(option_struct * op,data_fmt * data,tree * tr,long indexsite,long startsite,long endsite)4204 void snpsmooth (option_struct *op, data_fmt *data, tree *tr,
4205    long indexsite, long startsite, long endsite)
4206 {
4207 tlist *t;
4208 
4209 for(t = tr->tymelist->succ; t != NULL; t = t->succ)
4210    if (iscoal(t->eventnode))
4211       nuview(op,data,t->eventnode,indexsite,startsite,endsite);
4212 
4213 } /* snpsmooth */
4214 
4215 
localeval(option_struct * op,data_fmt * data,node * p,boolean first)4216 void localeval(option_struct *op, data_fmt *data, node *p, boolean first)
4217 {
4218 long subtree, *subtree_ranges, substart, subend, lastmarker, *siteptr;
4219 double dummy, llike;
4220 dnadata *dna;
4221 msatdata *ms;
4222 
4223 dna = data->dnaptr;
4224 ms = data->msptr;
4225 
4226 subtree_ranges = (long *)calloc(2*curtree->numrecombs+4,sizeof(long));
4227 subtree_ranges[0] = 1;
4228 
4229 siteptr = data->siteptr;
4230 
4231 findsubtrees(op,data,curtree->tymelist,subtree_ranges);
4232 
4233 llike = 0.0;
4234 for(subtree = 0; subtree < subtree_ranges[0]; subtree++) {
4235    substart = subtree_ranges[2*subtree+1];
4236    subend = subtree_ranges[2*subtree+2];
4237    /* do the regular sites */
4238    localsmooth(op,data,p,substart,substart,subend);
4239    switch(op->datatype) {
4240       case 'a':
4241          break;
4242       case 'b':
4243       case 'm':
4244 /* WARNING this throws away llike, which must be wrong! */
4245          llike += micro_evaluate(op,ms,curtree,substart,subend);
4246          break;
4247       case 'n':
4248          lastmarker = getdata_nummarkers(op,data)-1;
4249          snpsmooth(op,data,curtree,substart,FLAGLONG,FLAGLONG);
4250          if (op->panel)
4251             llike += panel_eval_calcrange(op,data,curtree,first,substart,
4252                subend,op->categs);
4253          else
4254             llike += snp_eval_calcrange(op,data,curtree,first,substart,
4255                subend,op->categs);
4256          dummy = snp_evaluate(op,data,curtree,llike,substart,subend);
4257          break;
4258       case 's':
4259          llike += eval_calcrange(op,data,curtree,first,substart,subend,
4260             op->categs);
4261          dummy = evaluate(op,data,curtree,llike);
4262          break;
4263       default:
4264          fprintf(ERRFILE,"\nlocaleval--impossible datatype %c\n",
4265             op->datatype);
4266          break;
4267    }
4268 }
4269 
4270 traverse_unflag(curtree,curtree->nodep[1]);
4271 free(subtree_ranges);
4272 
4273 } /* localeval */
4274 
4275 
4276 /********************************************************************
4277  * micro_evaluate() calculates the tree data likelihood at the root *
4278  * assuming that the tree has been properly "smoothed" first.       */
micro_evaluate(option_struct * op,msatdata * ms,tree * tr,long start,long end)4279 double micro_evaluate(option_struct *op, msatdata *ms, tree *tr,
4280   long start, long end)
4281 {
4282 long site, a;
4283 double term;
4284 node *nn;
4285 
4286 nn = tr->root->back;
4287 term = 0.0;
4288 
4289 for(site = start; site <= end; site++)
4290    for(a = 0; a < MICRO_ALLELEMAX-1; a++)
4291       term += nn->x->a[site][a];
4292 
4293 if(term == 0.0) return(NEGMAX);
4294 else return(log(term) + nn->lxmax);
4295 
4296 } /* micro_evaluate */
4297 
4298 
4299 /******************************************************************
4300  * countbranches returns the total number of branches in the tree *
4301  *    not including the root.                                     */
countbranches(option_struct * op,data_fmt * data,tree * tr)4302 long countbranches(option_struct *op, data_fmt *data, tree *tr)
4303 {
4304 
4305 return(getdata_numtips(op,data) + 2 * tr->numrecombs + tr->numcoals - 1);
4306 
4307 } /* countbranches */
4308 
4309 /******************************************************************
4310  * testratio() returns TRUE to accept a tree, FALSE to reject it. *
4311  * ratiotype = "d" if called to decide a "drop"                   *
4312  *             "t" if called to decide a "twiddle"                *
4313  *             "f" if called to decide a "flip"                   */
testratio(option_struct * op,data_fmt * data,tree * oldtree,tree * newtree,char ratiotype)4314 boolean testratio(option_struct *op, data_fmt *data, tree *oldtree,
4315    tree *newtree, char ratiotype)
4316 {
4317 double test, x, numoldbranches, numnewbranches;
4318 
4319 if(newtree->likelihood == NEGMAX)
4320    return FALSE;
4321 #if 0 /* first heating */
4322 test = (newtree->likelihood - oldtree->likelihood) * (1.0/op->temperature[0]);
4323 #endif
4324 test = (newtree->likelihood - oldtree->likelihood) * (1.0/op->ctemp);
4325 switch((int)ratiotype) {
4326 case 'd' :
4327    numoldbranches = (double)countbranches(op,data,oldtree);
4328    numnewbranches = (double)countbranches(op,data,newtree);
4329    test += log(numoldbranches/numnewbranches);
4330    break;
4331 case 't' :
4332    test += log((double)oldtree->numrecombs/(double)newtree->numrecombs);
4333    break;
4334 case 'f' :
4335    break;
4336 default :
4337    fprintf(ERRFILE,"ERROR:testratio encounters unknown type--no Hastings\n");
4338   break;
4339 }
4340 
4341 #if 0 /* first heating */
4342 if (op->temperature[0] != 1) { /* heating code */
4343   test += (newtree->coalprob - oldtree->coalprob) *
4344              (1.0/op->temperature[0] - 1.0);
4345 }
4346 #endif
4347 
4348 #if 0 /* don't heat this */
4349 test += (newtree->coalprob - oldtree->coalprob) *
4350              (1.0/op->ctemp - 1.0);
4351 #endif
4352 
4353 if (test >= 0.0)
4354    return TRUE;
4355 else {
4356    x = log(randum());
4357    if (test >= x)
4358       return TRUE;
4359    else
4360       return FALSE;
4361 }
4362 } /* testratio */
4363 
4364 
seekch(char c)4365 void seekch(char c) /* use only in reading file intree! */
4366 {
4367   if (gch == c)
4368     return;
4369   do {
4370     if (eoln(intree)) {
4371       fscanf(intree, "%*[^\n]");
4372       getc(intree);
4373     }
4374     gch = getc(intree);
4375     if (gch == '\n')
4376       gch = ' ';
4377   } while (gch != c);
4378 }  /* seekch */
4379 
getch(char * c)4380 void getch(char *c) /* use only in reading file intree! */
4381 {
4382   /* get next nonblank character */
4383   do {
4384     if (eoln(intree)) {
4385       fscanf(intree, "%*[^\n]");
4386       getc(intree);
4387     }
4388     *c = getc(intree);
4389     if (*c == '\n')
4390       *c = ' ';
4391   } while (*c == ' ');
4392 }  /* getch */
4393 
processlength(node * p)4394 void processlength(node *p)
4395 {
4396   long digit;
4397   double valyew, divisor;
4398   boolean pointread;
4399 
4400   pointread = FALSE;
4401   valyew = 0.0;
4402   divisor = 1.0;
4403   getch(&gch);
4404   digit = gch - '0';
4405   while (((unsigned long)digit <= 9) || gch == '.'){
4406     if (gch == '.')
4407       pointread = TRUE;
4408     else {
4409       valyew = valyew * 10.0 + digit;
4410       if (pointread)
4411 	divisor *= 10.0;
4412     }
4413     getch(&gch);
4414     digit = gch - '0';
4415   }
4416   p->length = valyew / divisor;
4417   p->back->length = p->length;
4418 }  /* processlength */
4419 
addelement(option_struct * op,data_fmt * data,node * p,long * nextnode)4420 void addelement(option_struct *op, data_fmt *data, node *p, long *nextnode)
4421 {
4422   node *q;
4423   long i, n;
4424   boolean found;
4425   char str[NMLNGTH];
4426 
4427   getch(&gch);
4428   if (gch == '(') {
4429     (*nextnode)++;
4430     newnode(&q);
4431     q = curtree->nodep[(*nextnode)];
4432     hookup(p, q);
4433     addelement(op,data,q->next,nextnode);
4434     seekch(',');
4435     addelement(op,data,q->next->next, nextnode);
4436     seekch(')');
4437     getch(&gch);
4438   } else {
4439     for (i = 0; i < NMLNGTH; i++)
4440       str[i] = ' ';
4441     n = 1;
4442     do {
4443       if (gch == '_')
4444 	gch = ' ';
4445       str[n - 1] = gch;
4446       if (eoln(intree)) {
4447 	fscanf(intree, "%*[^\n]");
4448 	getc(intree);
4449       }
4450       gch = getc(intree);
4451       if (gch == '\n')
4452 	gch = ' ';
4453       n++;
4454     } while (gch != ':' && gch != ',' && gch != ')' && n <= NMLNGTH);
4455     n = 1;
4456     do {
4457       found = TRUE;
4458       for (i = 0; i < NMLNGTH; i++)
4459 	found = (found && str[i] == curtree->nodep[n]->nayme[i]);
4460       if (!found)
4461 	n++;
4462     } while (!(n > getdata_numseq(op,data) || found));
4463     if (n > getdata_numseq(op,data)) {
4464       printf("Cannot find sequence: ");
4465       for (i = 0; i < NMLNGTH; i++)
4466 	putchar(str[i]);
4467       putchar('\n');
4468     }
4469     hookup(curtree->nodep[n], p);
4470   }
4471   if (gch == ':')
4472     processlength(p);
4473 }  /* addelement */
4474 
treeread(option_struct * op,data_fmt * data)4475 void treeread(option_struct *op, data_fmt *data)
4476 /* WARNING:  this only works with sequence data, not
4477   microsatellites, not panel SNPs, not allozymes! */
4478 {
4479   long nextnode;
4480   node *p;
4481 
4482   curtree->root = curtree->nodep[rootnum];
4483   getch(&gch);
4484   if (gch == '(') {
4485     nextnode = getdata_numtips(op,data) + 1;
4486     p = curtree->nodep[nextnode];
4487     addelement(op,data,p, &nextnode);
4488     seekch(',');
4489     addelement(op,data,p->next, &nextnode);
4490     hookup(p->next->next, curtree->nodep[rootnum]);
4491     p->next->next->length = ROOTLENGTH;
4492     curtree->nodep[rootnum]->length = p->next->next->length;
4493     ltov(op,data,curtree->nodep[rootnum]);
4494   }
4495   fscanf(intree, "%*[^\n]");
4496   getc(intree);
4497 }  /* treeread */
4498 
4499 
4500 /* DEBUG debug WARNING warning --
4501    actually still used in traitlike.c:traitlike() */
finddnasubtrees(option_struct * op,data_fmt * data,tlist * tstart,long * sranges)4502 void finddnasubtrees(option_struct *op, data_fmt *data,
4503    tlist *tstart, long *sranges)
4504 {
4505 long i, j, numsites, *temp;
4506 tlist *t;
4507 
4508 numsites = countsites(op,data);
4509 
4510 temp = (long *)calloc(numsites,sizeof(long));
4511 
4512 for(t = tstart, i = 1; t != NULL; t = t->succ)
4513    if(isrecomb(t->eventnode))
4514       temp[findlink(t->eventnode)] = 1;
4515 
4516 for(i = 0, j = 2, sranges[1] = 0; i < numsites; i++) {
4517    if(temp[i] == 0) continue;
4518    sranges[0]++;
4519    sranges[j] = i;
4520    sranges[j+1] = i+1;
4521    j+=2;
4522 }
4523 sranges[j] = numsites-1;
4524 sranges[j+1] = FLAGLONG;
4525 
4526 if (j+1 > 2*sranges[0]+3)
4527    fprintf(ERRFILE,"ERROR:finddnasubtree: j calculated wrong\n");
4528 
4529 free(temp);
4530 
4531 } /* finddnasubtrees */
4532 
4533 
findsubtrees_node(option_struct * op,data_fmt * data,tlist * tlast,tree * tr,long ** coal)4534 void findsubtrees_node(option_struct *op, data_fmt *data,
4535    tlist *tlast, tree *tr, long **coal)
4536 {
4537 long i, j, numsites, *temp;
4538 tlist *t;
4539 
4540 numsites = countsites(op,data);
4541 
4542 temp = (long *)calloc(numsites,sizeof(long));
4543 for(t = tr->tymelist, i = 1; t != tlast->succ; t = t->succ)
4544    if(isrecomb(t->eventnode)) {
4545       j = findlink(t->eventnode);
4546       if (temp[j]) continue;
4547       temp[j] = 1;
4548       i++;
4549    }
4550 
4551 init_coal_alloc(coal,i);
4552 
4553 for(i = 0, j = 2; i < numsites; i++) {
4554    if (temp[i] == 0) continue;
4555    (*coal)[j] = i;
4556    (*coal)[j+1] = i+1;
4557    j += 2;
4558 }
4559 (*coal)[j] = numsites-1;
4560 
4561 free(temp);
4562 
4563 } /* findsubtrees_node */
4564 
4565 
drop_findsubtrees(option_struct * op,data_fmt * data,tree * tr,node * p)4566 void drop_findsubtrees(option_struct *op, data_fmt *data, tree *tr,
4567    node *p)
4568 {
4569 node *q;
4570 tlist *t;
4571 
4572 t = gettymenode(tr,p->number);
4573 
4574 findsubtrees_node(op,data,t,tr,&(p->coal));
4575 
4576 if (isrecomb(p)) {
4577    q = otherdtr(p);
4578    findsubtrees_node(op,data,t,tr,&(q->coal));
4579 }
4580 
4581 } /* drop_findsubtrees */
4582 
4583 
findsubtrees(option_struct * op,data_fmt * data,tlist * tstart,long * sranges)4584 void findsubtrees(option_struct *op, data_fmt *data, tlist *tstart,
4585    long *sranges)
4586 {
4587 long i, j, numsites, *temp;
4588 tlist *t;
4589 
4590 numsites = countsites(op,data);
4591 
4592 temp = (long *)calloc(numsites,sizeof(long));
4593 
4594 for(t = tstart; t != NULL; t = t->succ)
4595    if(isrecomb(t->eventnode))
4596       temp[findlink(t->eventnode)] = 1;
4597 
4598 for(i = 0, j = 2, sranges[1] = 0; i < numsites; i++) {
4599    if(temp[i] == 0) continue;
4600    sranges[0]++;
4601    sranges[j] = i;
4602    sranges[j+1] = i+1;
4603    j+=2;
4604 }
4605 sranges[j] = numsites-1;
4606 sranges[j+1] = FLAGLONG;
4607 
4608 if (j+1 > 2*sranges[0]+3)
4609    fprintf(ERRFILE,"ERROR:findsubtree: j calculated wrong\n");
4610 
4611 free(temp);
4612 
4613 } /* findsubtrees */
4614 
4615 
4616 /***************************************************************
4617  * markertosite() returns the exact "site" that corresponds to *
4618  * the passed marker position.                                 */
markertosite(option_struct * op,data_fmt * data,long marker)4619 long markertosite(option_struct *op, data_fmt *data, long marker)
4620 {
4621 
4622 return(getdata_markersite(op,data,marker));
4623 
4624 } /* markertosite */
4625 
4626 
4627 /**********************************************************************
4628  * sitetorightmarker() returns the closest marker to the right of the *
4629  * given site, if that marker exists.  If that marker doesn't exist,  *
4630  * FLAGLONG is returned.  If the site exactly corresponds to a marker *
4631  * then the corresponding marker will be returned.                    */
sitetorightmarker(option_struct * op,data_fmt * data,long site)4632 long sitetorightmarker(option_struct *op, data_fmt *data, long site)
4633 {
4634 long marker, nummarkers;
4635 double msite;
4636 
4637 nummarkers = getdata_nummarkers(op,data);
4638 for(marker = 0; marker < nummarkers; marker++) {
4639    msite = markertosite(op,data,marker);
4640    if (msite >= site) return(marker);
4641 }
4642 
4643 return(FLAGLONG);
4644 
4645 } /* sitetorightmarker */
4646 
4647 
4648 /**********************************************************************
4649  * sitetomarker() returns the closest marker to the left of the given *
4650  * site, if that marker exists.  If that marker doesn't exist, the    *
4651  * closest marker on the right is returned.                           */
sitetomarker(option_struct * op,data_fmt * data,long site)4652 long sitetomarker(option_struct *op, data_fmt *data, long site)
4653 {
4654 long marker, nummarkers;
4655 double sitecounts;
4656 
4657 nummarkers = getdata_nummarkers(op,data);
4658 for(marker = 0, sitecounts = 0.0; marker < nummarkers; marker++) {
4659    sitecounts = data->dnaptr->sitecount[locus][marker];
4660    if (site < sitecounts) return(marker);
4661 }
4662 
4663 fprintf(ERRFILE,"ERROR--failure to find marker for site %ld\n\n",
4664    site);
4665 return(FLAGLONG);
4666 
4667 
4668 } /* sitetomarker */
4669 
4670 
4671 /**********************************************************
4672  * findsubtreemarkers() gets passed the beginning and end *
4673  * of a subtree (in psuedosites) and returns the position *
4674  * of the first and last markers found in that subtree.   *
4675  * Return FLAGLONG in both marker positions if there are  *
4676  * no markers present in the subtree.                     */
findsubtreemarkers(option_struct * op,data_fmt * data,long pstart,long pend,long * mstart,long * mend)4677 void findsubtreemarkers(option_struct *op, data_fmt *data, long pstart,
4678   long pend, long *mstart, long *mend)
4679 {
4680 long nummarkers, marker, site;
4681 
4682 nummarkers = getdata_nummarkers(op,data);
4683 
4684 for(marker = 0; marker < nummarkers; marker++) {
4685    site = markertosite(op,data,marker);
4686    if (site >=  pstart) break;
4687 }
4688 
4689 if (site > pend) {
4690     (*mstart) = (*mend) = FLAGLONG;
4691     return;
4692 }
4693 
4694 (*mstart) = marker;
4695 
4696 for(;marker < nummarkers; marker++) {
4697    site = markertosite(op,data,marker);
4698    if (site > pend) break;
4699 }
4700 
4701 (*mend) = marker-1;
4702 
4703 } /* findsubtreemarkers */
4704 
4705 
4706 /**********************************************************
4707  * sameranges() returns TRUE if the ranges are identical, *
4708  * FALSE otherwise.                                       */
sameranges(long * range1,long * range2)4709 boolean sameranges(long *range1, long *range2)
4710 {
4711 long i;
4712 
4713 for(i = 0; range1[i] != FLAGLONG && range2[i] != FLAGLONG; i++)
4714    if(range1[i] != range2[i]) return(FALSE);
4715 
4716 if (range1[i] != range2[i]) {
4717    printf("ERROR:sameranges bailed on FLAGLONG but not done yet!!! %ld %ld\n",
4718       indecks,apps);
4719    return(FALSE);
4720 }
4721 
4722 return(TRUE);
4723 
4724 } /* sameranges */
4725 
4726 
4727 /*************************************************
4728  * copycoal() copies the coal array of a nodelet */
copycoal(node * source,node * target)4729 void copycoal(node *source, node *target)
4730 {
4731 
4732 if (source->coal != NULL) {
4733    coal_Malloc(target,TRUE,source->coal[0]);
4734    memcpy(target->coal,source->coal,
4735       (source->coal[0]*2+2)*sizeof(long));
4736 } else coal_Malloc(target,FALSE,0L);
4737 
4738 } /* copycoal */
4739 
4740 
4741 /*****************************************************
4742  * copyranges() copies the ranges array of a nodelet */
copyranges(node * source,node * target)4743 void copyranges(node *source, node *target)
4744 {
4745 
4746 if (source->ranges != NULL) {
4747    ranges_Malloc(target,TRUE,source->ranges[0]);
4748    memcpy(target->ranges,source->ranges,
4749       (source->ranges[0]*2+2)*sizeof(long));
4750 } else ranges_Malloc(target,FALSE,0L);
4751 
4752 } /* copyranges */
4753 
4754 
4755 /***************************************************************
4756  * This function copies the likelihood, "x" and "z", arrays of *
4757  * a nodelet.                                                  */
copylikes(option_struct * op,data_fmt * data,node * source,node * target)4758 void copylikes(option_struct *op, data_fmt *data, node *source,
4759    node *target)
4760 {
4761 long nummarkers = 0, numslice;
4762 
4763 numslice = (op->panel) ? NUMSLICE : 1L;
4764 
4765 if (op->map) {
4766    if (source->z != NULL) {
4767       allocate_z(op,data,target);
4768       memcpy(target->z,source->z,NUMTRAIT*sizeof(double));
4769    } else free_z(op,target);
4770 }
4771 
4772 if (source->x != NULL) {
4773    allocate_x(op,data,target);
4774    switch (op->datatype) {
4775       case 'a':
4776          break;
4777       case 'b':
4778       case 'm':
4779          memcpy(target->x->a[0],source->x->a[0],
4780          getdata_numloci(op,data)*MICRO_ALLELEMAX*sizeof(double));
4781          break;
4782       case 'n':
4783          nummarkers += NUMINVAR;
4784       case 's':
4785          nummarkers += getdata_nummarkers(op,data);
4786          memcpy(target->x->s[0][0][0],source->x->s[0][0][0],
4787             nummarkers*op->categs*numslice*4L*sizeof(double));
4788          break;
4789       default:
4790          fprintf(ERRFILE,"\ncopylikes--impossible datatype %c\n",
4791             op->datatype);
4792          exit(-1);
4793          break;
4794    }
4795 } else free_x(op,target);
4796 
4797 } /* copylikes */
4798 
4799 
4800 /************************************************************
4801  * copynode copies the "source" node onto the "target" node */
copynode(option_struct * op,data_fmt * data,node * source,node * target)4802 void copynode(option_struct *op, data_fmt *data, node *source,
4803    node *target)
4804 {
4805   long i, nodecount;
4806 
4807   if (istip(source)) nodecount = 1;
4808   else nodecount = 3;
4809 
4810   for (i = 0; i < nodecount; i++) {
4811     target->type = source->type;
4812     /* NEVER! target->next := source->next; */
4813     target->back = source->back;
4814     target->top = source->top;
4815     /* but NOT target->number := source->number; */
4816     copylikes(op,data,source,target);
4817     copyranges(source,target);
4818     if (op->fc) copycoal(source,target);
4819     if (istip(source)) memcpy(target->nayme,source->nayme,sizeof(naym));
4820     target->v = source->v;
4821     target->lxmax = source->lxmax;
4822     target->tyme = source->tyme;
4823     target->length = source->length;
4824     target->updated = source->updated;
4825     target->recstart = source->recstart;
4826     target->recend = source->recend;
4827     source = source->next;
4828     target = target->next;
4829   }
4830 }  /* copynode */
4831 
4832 /**********************************************************************
4833  * addnode hooks nodelet "p" to node "q" by free back ptrs in p and q */
addnode(option_struct * op,data_fmt * data,node * p,node * q)4834 void addnode(option_struct *op, data_fmt *data, node *p, node *q)
4835 {
4836 
4837 if (p->top) while(q->top && q->back) q = q->next;
4838 else while(!q->top && q->back) q = q->next;
4839 
4840 hookup(p,q);
4841 fixlength(op,data,p);
4842 
4843 } /* addnode */
4844 
4845 
4846 /****************************************************************
4847  * getrecnodelet finds which nodelet in node "copy" corresponds *
4848  * to the exact nodelet pointed to by "orig".                   */
getrecnodelet(node * copy,node * orig)4849 node *getrecnodelet (node *copy, node *orig)
4850 {
4851 long i;
4852 node *p;
4853 
4854 p = copy;
4855 for(i = 0; i < 3; i++) {
4856    if ((p->recstart == orig->recstart) && (p->recend == orig->recend))
4857       return(p);
4858    p = p->next;
4859 }
4860 
4861 fprintf(ERRFILE,"ERROR:getrecnodelet failed to match\n");
4862 return(NULL);
4863 
4864 } /* getrecnodelet */
4865 
4866 
4867 /**********************************************************
4868  * make_tree_copy copies tree "source" into tree "target" */
make_tree_copy(option_struct * op,data_fmt * data,tree * source,tree * target)4869 void make_tree_copy(option_struct *op, data_fmt *data, tree *source,
4870    tree *target)
4871 {
4872 long i, nodenumber, *tnum, numno, numtips;
4873 tlist *t, *addhere;
4874 node *p, *q, *r;
4875 dnadata *dna;
4876 
4877 dna = data->dnaptr;
4878 numtips = getdata_numtips(op,data);
4879 numno = numtips + source->numcoals + source->numrecombs + 1;
4880 tnum = (long *)calloc(1,numno*sizeof(long));
4881 
4882 /* make the tips */
4883 target->nodep = (node **)calloc(1,numno*sizeof(node *));
4884 for (i = 0; i < numtips+1; i++) {
4885    allocate_tip(op,data,target,source->nodep[i]->number);
4886    strcpy(target->nodep[i]->nayme,source->nodep[i]->nayme);
4887    if (i) {
4888       target->nodep[i]->top = TRUE;
4889       target->nodep[i]->tyme = 0.0;
4890    } else {
4891       target->nodep[i]->top = FALSE;
4892       target->nodep[i]->tyme = ROOTLENGTH;
4893    }
4894    target->nodep[i]->updated = TRUE;
4895    copyranges(source->nodep[i],target->nodep[i]);
4896    if(op->map) copytraits(source->nodep[i],target->nodep[i]);
4897    if(op->fc) copycoal(source->nodep[i],target->nodep[i]);
4898    copylikes(op,data,source->nodep[i],target->nodep[i]);
4899    tnum[i] = i;
4900 }
4901 
4902 /* now construct the first tymelist entry */
4903 newtymenode(&target->tymelist);
4904 target->tymelist->numbranch = numtips;
4905 target->tymelist->branchlist =
4906    (node **)calloc(numtips,sizeof(node *));
4907 for(i = 0; i < numtips; i++)
4908    target->tymelist->branchlist[i] = target->nodep[i+1];
4909 target->tymelist->eventnode = target->nodep[1];
4910 target->tymelist->age = source->root->tyme;
4911 
4912 /* now do the rest of the tree and tymelist */
4913 for (t = source->tymelist->succ; t != NULL; t = t->succ) {
4914    nodenumber = t->eventnode->number;
4915    newnode(&p);
4916    p->number = nodenumber;
4917    p->next->number = nodenumber;
4918    p->next->next->number = nodenumber;
4919    target->nodep[nodenumber] = p;
4920    copynode(op,data,t->eventnode,p);
4921    p->back = NULL;
4922    p->next->back = NULL;
4923    p->next->next->back = NULL;
4924    tnum[t->eventnode->number] = nodenumber;
4925 }
4926 
4927 addhere = target->tymelist;
4928 for (t = source->tymelist->succ; t != NULL; t = t->succ) {
4929    p = findunique(target->nodep[tnum[t->eventnode->number]]);
4930    q = findunique(t->eventnode);
4931    if (isrecomb(p)) {
4932       r = target->nodep[tnum[q->back->number]];
4933       if (isrecomb(r)) r = getrecnodelet(r,q->back);
4934       addnode(op,data,p,r);
4935    } else {
4936       r = target->nodep[tnum[q->next->back->number]];
4937       if (isrecomb(r)) r = getrecnodelet(r,q->next->back);
4938       addnode(op,data,p->next,r);
4939       r = target->nodep[tnum[q->next->next->back->number]];
4940       if (isrecomb(r)) r = getrecnodelet(r,q->next->next->back);
4941       addnode(op,data,p->next->next,r);
4942    }
4943 
4944    insertaftertymelist(addhere,p);
4945    addhere = addhere->succ;
4946 }
4947 
4948 addnode(op,data,target->nodep[0],
4949    target->nodep[tnum[source->root->back->number]]);
4950 target->root = target->nodep[0];
4951 
4952 free(tnum);
4953 
4954 } /* make_tree_copy */
4955 
4956 
4957 /********************************************************************
4958  * copycreature() copies a single creature to the "target" pointer. *
4959  * The passed tree should be the tree associated with the "target"  *
4960  * creature.                                                        */
copycreature(option_struct * op,data_fmt * data,creature * source,creature * target,tree * tr)4961 void copycreature(option_struct *op, data_fmt *data, creature *source,
4962    creature *target, tree *tr)
4963 {
4964 long i;
4965 
4966 target->numflipsites = source->numflipsites;
4967 target->flipsites = (long *)calloc(target->numflipsites,sizeof(long));
4968 memcpy(target->flipsites,source->flipsites,
4969    (target->numflipsites)*sizeof(long));
4970 
4971 target->numhaplotypes = source->numhaplotypes;
4972 target->haplotypes = (node **)calloc(target->numhaplotypes,sizeof(node *));
4973 for(i = 0; i < target->numhaplotypes; i++) {
4974    target->haplotypes[i] = tr->nodep[source->haplotypes[i]->number];
4975 }
4976 
4977 } /* copycreature */
4978 
4979 
4980 /********************************************************
4981  * copycreatures() copies the creatures array of a tree */
copycreatures(option_struct * op,data_fmt * data,tree * source,tree * target)4982 void copycreatures(option_struct *op, data_fmt *data, tree *source,
4983    tree *target)
4984 {
4985 long cr, numcreatures;
4986 
4987 if (!op->haplotyping) return;
4988 
4989 numcreatures = getdata_numtips(op,data)/NUMHAPLOTYPES;
4990 target->creatures = (creature *)calloc(numcreatures,sizeof(creature));
4991 
4992 for(cr = 0; cr < numcreatures; cr++) {
4993    copycreature(op,data,&(source->creatures[cr]),
4994       &(target->creatures[cr]),target);
4995 }
4996 
4997 } /* copycreatures */
4998 
4999 
5000 /**********************************************************************
5001  * copytree makes a copy from scratch of the "source" tree, returning *
5002  * a pointer to the copy that it makes.                               */
copytree(option_struct * op,data_fmt * data,tree * source)5003 tree *copytree(option_struct *op, data_fmt *data, tree *source)
5004 {
5005 tree *target;
5006 
5007 target = (tree *)calloc(1,sizeof(tree));
5008 
5009 target->likelihood = source->likelihood;
5010 target->coalprob = source->coalprob;
5011 target->numcoals = source->numcoals;
5012 target->numrecombs = source->numrecombs;
5013 make_tree_copy(op,data,source,target);
5014 copycreatures(op,data,source,target);
5015 
5016 return(target);
5017 } /* copytree */
5018 
5019 
5020 /***************************************************
5021  * freetree frees the "target" tree from memory */
freetree(option_struct * op,data_fmt * data,tree * target)5022 void freetree(option_struct *op, data_fmt *data, tree *target)
5023 {
5024 long i;
5025 tlist *t;
5026 
5027 if (op->haplotyping) {
5028    for(i = 0; i < getdata_numtips(op,data)/NUMHAPLOTYPES; i++) {
5029       free(target->creatures[i].haplotypes);
5030       free(target->creatures[i].flipsites);
5031    }
5032    free(target->creatures);
5033 }
5034 
5035 t = target->tymelist->succ;
5036 while(t != NULL) {
5037    freenode(t->eventnode);
5038    t = t->succ;
5039 }
5040 freetymelist(target->tymelist);
5041 
5042 for (i = 0; i < getdata_numtips(op,data)+1; i++) {
5043    freenodelet(op,data,target->nodep[i]);
5044 }
5045 free(target->nodep);
5046 
5047 free(target);
5048 
5049 } /* freetree */
5050 
5051 /* joinnode and constructtree are used for constructing a rather bad
5052    starting tree if the user doesn't provide one */
joinnode(option_struct * op,data_fmt * data,double length,node * p,node * q)5053 void joinnode(option_struct *op, data_fmt *data, double length,
5054    node *p, node *q)
5055 {
5056    hookup(p,q);
5057    p->length = length;
5058    q->length = length;
5059    ltov(op,data,p);
5060 } /* joinnode */
5061 
constructtree(option_struct * op,data_fmt * data,double branchlength)5062 void constructtree(option_struct *op, data_fmt *data, double branchlength)
5063 {
5064 long i, j, numtips, nextnode;
5065 double height;
5066 node *p, *q;
5067 
5068 numtips = getdata_numtips(op,data);
5069 
5070 curtree->root = curtree->nodep[rootnum];
5071 nextnode = numtips+1;
5072  p = curtree->root;
5073  q = curtree->nodep[nextnode];
5074 
5075 p->back = q;
5076 q->back = p;
5077 p->length = ROOTLENGTH;
5078 q->length = ROOTLENGTH;
5079 ltov(op,data,p);
5080 
5081 height = (numtips - 1) * branchlength;
5082 p->tyme = ROOTLENGTH + height;
5083 for (i = 1; i < numtips; i++) {
5084    p = curtree->nodep[i];
5085    q = curtree->nodep[nextnode]->next;
5086    joinnode(op,data,height,p,q);
5087    q = q->next;
5088    if (i != numtips-1) {
5089       nextnode++;
5090       p = curtree->nodep[nextnode];
5091       joinnode(op,data,branchlength,p,q);
5092       height -= branchlength;
5093    } else {
5094       p = curtree->nodep[numtips];
5095       joinnode(op,data,height,p,q);
5096    }
5097    for (j = 0; j < 3; j++)
5098       q->tyme = height;
5099 }
5100 
5101 } /* constructtree */
5102 /* End bad starting tree construction */
5103 
5104 
5105 /******************************************************************
5106  * min_theta_calc() calculates the minimum value which we wish to *
5107  * let the mutation-rate parameter, theta, attain before final    *
5108  * estimation.                                                    */
min_theta_calc(option_struct * op,data_fmt * data,double th)5109 double min_theta_calc(option_struct *op, data_fmt *data, double th)
5110 {
5111 double value;
5112 
5113 value = THETAMIN;
5114 
5115 return(value);
5116 
5117 } /* min_theta_calc */
5118 
5119 
5120 /***********************************************************
5121  * hasrec_chain() returns TRUE if the passed chain has any *
5122  * recombinations currently sampled, FALSE otherwise.      */
hasrec_chain(option_struct * op,long chain)5123 boolean hasrec_chain(option_struct *op, long chain)
5124 {
5125 long i, chaintype, refchain;
5126 treerec *trii;
5127 
5128 refchain = REF_CHAIN(chain);
5129 chaintype = TYPE_CHAIN(chain);
5130 
5131 for(i = 0; i < op->numout[chaintype]; i++) {
5132    trii = &sum[locus][refchain][i];
5133    if (trii->numrecombs > 0) return(TRUE);
5134 }
5135 
5136 return(FALSE);
5137 
5138 } /* hasrec_chain */
5139 
5140 
5141 /***********************************************************************
5142  * chainendcheck() runs at the end of a chain and makes sure the chain *
5143  * had the following desired properties:                               *
5144  *                                                                     *
5145  *    --at least 1 sampled tree contains 1+ recombinations             *
5146  *      FAILURE = call addfractrecomb()                                */
chainendcheck(option_struct * op,data_fmt * data,tree * tr,long chain,boolean locusend)5147 void chainendcheck(option_struct *op, data_fmt *data, tree *tr,
5148    long chain, boolean locusend)
5149 {
5150 
5151 if(!locusend && !hasrec_chain(op,chain) && op->holding != 2) {
5152    if (apps == totchains - 1) {
5153       fprintf(outfile,"WARNING--forced a recombination");
5154       fprintf(outfile," which may have affected final result\n");
5155    }
5156    addfractrecomb(op,data,chain,sum);
5157 }
5158 
5159 } /* chainendcheck */
5160 
5161 
5162 /******************************************************************
5163  * rearrange() is the driver for basic rearrangement of a tree,   *
5164  * returning TRUE if it successfully rearranged a tree, and FALSE *
5165  * otherwise.                                                     */
rearrange(option_struct * op,data_fmt * data,long whichtchain)5166 boolean rearrange(option_struct *op, data_fmt *data, long whichtchain)
5167 {
5168 double chance;
5169 boolean accepted;
5170 
5171 curtree = temptrees[whichtchain];
5172 chance = randum();
5173 accepted = FALSE;
5174 op->ctemp = op->temperature[whichtchain];
5175 
5176 if (chance < TWIDDLE_PROB) accepted = twiddle(op,data);
5177 else {
5178    chance -= TWIDDLE_PROB;
5179    if (chance < op->happrob && op->haplotyping) {
5180       if (op->hapdrop == 0)
5181          accepted = fliphap(op,data,curtree);
5182       else if (op->hapdrop == 1)
5183          accepted = flipdrop(op,data,curtree,1L);
5184       else
5185          accepted = flipdrop(op,data,curtree,2L);
5186       hap++;
5187       if (accepted && whichtchain == 0) hacc++;
5188    } else {
5189       accepted = makedrop(op,data);
5190       slid++;
5191       if (accepted && whichtchain == 0) slacc++;
5192    }
5193 }
5194 #ifdef MAC
5195    eventloop();
5196 #endif
5197 
5198 temptrees[whichtchain] = curtree;
5199 
5200 return(accepted);
5201 
5202 } /* rearrange */
5203 
5204 
5205 /**********************************************************************
5206  * temptreeswap() checks to see if a swap of trees between different  *
5207  * temperatures is called for, executing the swap if so.              *
5208  *                                                                    *
5209  * it accepts a proposed swap using                                   *
5210  *    [ P(Gh|Tl) * P(D|Gh) ] ^ 1/l * [ P(Gl|Th) * P(D|Gl) ] ^ 1/h     *
5211  *    -----------------------------------------------------------     *
5212  *    [ P(Gh|Th) * P(D|Gh) ] ^ 1/h * [ P(Gl|Tl) * P(D|Gl) ] ^ 1/l     *
5213  *                                                                    *
5214  * where Gh = the tree generated at high temp                         *
5215  *       Gl = the tree generated at low temp                          *
5216  *       Tl = the parameter values at the low temp                    *
5217  *       Th = the parameter values at the high temp                   */
temptreeswap(option_struct * op,data_fmt * data,boolean * changed,long chain)5218 void temptreeswap(option_struct *op, data_fmt *data, boolean *changed,
5219    long chain)
5220 {
5221 long chl, chh;
5222 double chance, num, denom, templ, temph, recl, rech, thetal, thetah;
5223 boolean picked;
5224 tree *trh, *trl;
5225 
5226 if (op->numtempchains < 2) return;
5227 
5228 /* first pick which two chains to swap */
5229 
5230 picked = FALSE;
5231 while(!picked) {
5232    chl = (long)(randum()*op->numtempchains);
5233    chh = (long)(randum()*op->numtempchains);
5234    if (chl == op->numtempchains || chh == op->numtempchains) continue;
5235    if (chh - chl > 0) {
5236       if (chh - chl == 1) picked = TRUE;
5237    } else {
5238       if (chl - chh == 1) picked = TRUE;
5239    }
5240 }
5241 
5242 trh = temptrees[chh];
5243 temph = op->temperature[chh];
5244 thetah = theti[locus][chain];
5245 rech = reci[locus][chain];
5246 
5247 trl = temptrees[chl];
5248 templ = op->temperature[chl];
5249 thetal = theti[locus][chain];
5250 recl = reci[locus][chain];
5251 
5252 num = (coalprob(op,data,trh,thetal,recl)+trh->likelihood)*(1.0/templ) +
5253    (coalprob(op,data,trl,thetah,rech)+trl->likelihood)*(1.0/temph);
5254 
5255 denom = (trh->coalprob+trh->likelihood)*(1.0/temph) +
5256      (trl->coalprob+trl->likelihood)*(1.0/templ);
5257 
5258 chance = num - denom;
5259 
5260 swap++;
5261 if (chance >= log(randum())) {
5262    temptrees[chh] = trl;
5263    temptrees[chl] = trh;
5264    changed[chh] = TRUE;
5265    changed[chl] = TRUE;
5266    swacc++;
5267 }
5268 
5269 } /* temptreeswap */
5270 
5271 
5272 #define PRINTNUM     8 /* the number of digits used for printing
5273                           the number of steps done */
5274 
maketree(option_struct * op,data_fmt * data)5275 void maketree(option_struct *op, data_fmt *data)
5276 {
5277 long tempchain, incrprog, metout, progout, chaintype, i;
5278 double bestlike;
5279 boolean runend, *changetree, first;
5280 char chainlit[2][6] = {"Short","Long"};
5281 dnadata *dna;
5282 double *traitarray;
5283 long hapscore;
5284 
5285 traitarray = NULL;
5286 
5287 changetree = (boolean *)calloc(op->numtempchains,sizeof(boolean));
5288 
5289 if (op->map)
5290   traitarray = (double *)calloc(countsites(op,data),sizeof(double));
5291 
5292 chainlit[0][5] = chainlit[1][5] = '\0';
5293 
5294 /* WARNING DEBUG assumes DNA data (and shouldn't)! */
5295 dna = data->dnaptr;
5296 
5297   getc(infile);
5298 if(op->haplotyping) {
5299   fprintf(outfile,"Haplotypes being inferred");
5300   if (op->hapdrop == 0) fprintf(outfile," without resimulation\n");
5301   if (op->hapdrop == 1) fprintf(outfile," with single resimulation\n");
5302   if (op->hapdrop == 2) fprintf(outfile," with double resimulation\n");
5303 }
5304   fprintf(outfile,"Watterson estimate of theta is %12.8f\n", watttheta);
5305 #if !DISTRIBUTION
5306   fprintf(outfile,"\n\nIntermediate chains (only)\n");
5307   fprintf(outfile,"chain#     theta     rec-rate\n");
5308   fprintf(outfile,"-----------------------------\n");
5309 #endif
5310   if (op->usertree)
5311      treeread(op,data);
5312   else {
5313 #if !ALWAYS_REJECT
5314      branch0 = watttheta/getdata_numtips(op,data);
5315      constructtree(op,data,branch0);
5316 #else
5317      constructtree(op,data,0.0);
5318 #endif
5319   }
5320   orient(curtree,op,data,curtree->root->back);
5321 #if !ALWAYS_REJECT
5322   if (op->plump) plumptree(op,data,watttheta);
5323 #endif
5324   finishsetup(op,data,curtree->root->back);
5325   initbranchlist(op,data);
5326 #if !ALWAYS_REJECT
5327   localeval(op,data,curtree->root->back,TRUE);
5328   curtree->coalprob = coalprob(op,data,curtree,theta0,rec0);
5329 #endif
5330   bestlike = NEGMAX;
5331   theti[locus][0] = theta0;
5332   reci[locus][0] = rec0;
5333   runend = FALSE;
5334 
5335 /* WARNING DEBUG MARY */
5336     if (TRUEHAP) {
5337       hapscore = hapdist(op,&truehaps,data,curtree->creatures);
5338       printf("Distance from truth%ld\n",hapscore);
5339       fprintf(outfile,"Distance from truth%ld\n",hapscore);
5340       hapscore = hapdist(op,&starthaps,data,curtree->creatures);
5341       fprintf(outfile,"Distance from start%ld\n",hapscore);
5342     }
5343 
5344   if(op->map) traitread(curtree, getdata_numseq(op,data));
5345   /**********************************/
5346   /* Begin Hastings-Metropolis loop */
5347   /**********************************/
5348   for (apps = 0; apps < totchains; apps++) {
5349     if (apps >= op->numchains[0]) chaintype = 1;
5350     else chaintype = 0;
5351     if (op->progress) {
5352       printf("%s chain %ld ",chainlit[chaintype],
5353       ((chaintype) ? (apps + 1 - op->numchains[0]) : apps + 1));
5354       fflush(stdout);
5355     }
5356     numdropped = 0;
5357     metout = op->increm[chaintype] - 1 + NUMBURNIN;
5358   /* print a "." to stdout every 10% of the chain(s) finished */
5359     incrprog = (long)(op->steps[chaintype] / 10.0);
5360     progout = incrprog - 1 + NUMBURNIN;
5361     op->numout[chaintype] = 0;
5362     slacc = hacc = swacc = 0;
5363     slid = hap = swap = 0;
5364 
5365 /* if apps == 0, start all temperature chains with the same
5366    initial tree */
5367     if (apps == 0) {
5368        for(tempchain = 0; tempchain < op->numtempchains; tempchain++) {
5369           temptrees[tempchain] = copytree(op,data,curtree);
5370        }
5371        /* curtree is no longer needed as a base tree!, it's just a ptr */
5372        freetree(op,data,curtree);
5373     }
5374     for(tempchain = 0; tempchain < op->numtempchains; tempchain++)
5375        changetree[tempchain] = FALSE;
5376     first = TRUE;
5377     for (indecks=0; indecks < op->steps[chaintype]+NUMBURNIN; indecks++) {
5378 #if 0 /* first heating */
5379 /* heating code */
5380       if (max_heat != 1.0) {
5381          if (heating_cnt == heating_interval) {
5382             heating_cnt = 0;
5383             if (op->temperature[0] >= max_heat) direction = -1;
5384             if (op->temperature[0] <= 1.0) direction = 1;
5385             op->temperature[0] += direction * heating_by;
5386          }
5387          heating_cnt++;
5388       }
5389 #endif
5390       col = 0; /* column number, used in treeout */
5391 
5392 /* only count acceptances after burn-in */
5393       if (indecks == NUMBURNIN)
5394          hacc = slacc = 0;
5395 
5396       for(tempchain = 0; tempchain < op->numtempchains; tempchain++) {
5397          if (rearrange(op,data,tempchain))
5398             changetree[tempchain] = TRUE;
5399       }
5400 
5401       temptreeswap(op,data,changetree,apps);
5402 
5403 
5404       if (apps == totchains - 1) /* end of run? */
5405         runend = TRUE;
5406       if (temptrees[0]->likelihood > bestlike) {
5407         if (ONEBESTREE) {
5408            FClose(bestree);
5409            bestree = fopen("bestree","w+");
5410 	   fprintf(bestree, "Chain #%2ld (%s) Step:%8ld\n",apps+1,
5411              chainlit[chaintype], indecks+1);
5412 /* debug DEBUG warning WARNING--no rectreeout routine */
5413            /*rec_outtree(temptrees[0]->root->back,TRUE, &bestree);*/
5414 	   fprintf(bestree, "; [%12.10f]\n", temptrees[0]->likelihood);
5415 	   bestlike = temptrees[0]->likelihood;
5416         }
5417         else {
5418 	   fprintf(bestree, "Chain #%2ld (%s) Step:%8ld\n",apps+1,
5419              chainlit[chaintype], indecks+1);
5420 /* debug DEBUG warning WARNING--no rectreeout routine */
5421            /*rec_outtree(temptrees[0]->root->back,TRUE, &bestree); */
5422 	   fprintf(bestree, "; [%12.10f]\n", temptrees[0]->likelihood);
5423 	   bestlike = temptrees[0]->likelihood;
5424         }
5425       }
5426       if (indecks == metout) {
5427         if (op->numout[chaintype] == 0)
5428           sametree[locus][0] = FALSE;
5429         else
5430           sametree[locus][op->numout[chaintype]] = !changetree[0];
5431         changetree[0] = FALSE;
5432 #if !ALWAYS_REJECT
5433 #if 0 /* old heating */
5434         if (op->temperature[0] == 1.0) { /* not too heated */
5435 	  op->numout[chaintype]++;
5436        	  scoretree(op,data,apps);
5437           if(op->map)
5438             traitlike(op,data,curtree,countsites(op,data),
5439               op->mutrait,op->traitratio, op->pd, traitarray);
5440         }
5441 #endif
5442 	op->numout[chaintype]++;
5443 /* set curtree to the coldest tree for scoretree() scoring */
5444         curtree = temptrees[0];
5445        	scoretree(op,data,apps);
5446         if(op->map)
5447           traitlike(op,data,curtree,countsites(op,data),
5448             op->mutrait,op->traitratio, op->pd, traitarray);
5449 #endif
5450 	metout += op->increm[chaintype];
5451         if (op->treeprint) {
5452            fprintf(treefile,"\nlocus = %ld, chain = %ld, tree = %ld\n",
5453                    locus,apps,indecks);
5454 /* debug DEBUG warning WARNING--no rectreeout routine */
5455            /*rec_outtree(temptrees[0]->root->back,TRUE, &treefile);*/
5456            fprintf(treefile,";\n");
5457         }
5458       }
5459 #if 0
5460       if (op->progress && indecks == progout) {
5461 	printf(".");
5462         fflush(stdout);
5463 	progout += incrprog;
5464       }
5465 #endif
5466       if (op->progress) {
5467          if (!first) for(i = 0; i < PRINTNUM; i++) printf("\b");
5468          else printf(" examined: ");
5469          printf("%*ld",(int)PRINTNUM,indecks-NUMBURNIN+1);
5470          fflush(stdout);
5471          first = FALSE;
5472       }
5473     }
5474     if(runend) {
5475        if(op->map) {
5476           traitprint(countsites(op,data),traitarray,
5477             outfile,op->numout[chaintype]);
5478 #if TRUTHKNOWN
5479           traitresult(op,data,traitarray,outfile,op->numout[chaintype]);
5480 #endif
5481        }
5482     }
5483     printf(" trees");
5484     chainendcheck(op,data,curtree,apps,runend);
5485     rec_estimate(op,data,locus,apps,runend);
5486     if (TRUEHAP) {
5487       hapscore = hapdist(op,&truehaps,data,curtree->creatures);
5488       printf("Distance from truth: %ld\n",hapscore);
5489       fprintf(outfile,"Distance from truth: %ld\n",hapscore);
5490       hapscore = hapdist(op,&starthaps,data,curtree->creatures);
5491       printf("Distance from start: %ld\n",hapscore);
5492       fprintf(outfile,"Distance from start: %ld\n",hapscore);
5493     }
5494 
5495     if (op->map && runend) traitsiteplot(op,data,traitarray);
5496     theta0 = theti[locus][apps+1];
5497     rec0 = reci[locus][apps+1];
5498     if (min_theta_calc(op,data,theta0) > theta0 &&
5499        op->holding != 1) {
5500        if (apps == totchains - 2) {
5501           fprintf(outfile,"WARNING--lower bound on estimate of");
5502           fprintf(outfile," theta may have affected final result\n");
5503        }
5504        theta0 = min_theta_calc(op,data,theta0);
5505        theti[locus][apps+1] = theta0;
5506     }
5507     if (op->progress) {
5508        printf("accepted %ld/%ld trees",slacc,slid-NUMBURNIN);
5509        if (numdropped) printf(", dropped %ld",numdropped);
5510        if (swap) printf(", swapped %ld/%ld",swacc,swap);
5511        if (op->haplotyping)
5512           printf("and %ld/%ld haplotype changes\n",hacc,hap);
5513        else printf("\n");
5514     }
5515     if ((apps == totchains-1) && numdropped) {
5516        fprintf(outfile,"%ld trees were dropped from",numdropped);
5517        fprintf(outfile," the final chain\n");
5518     }
5519   }
5520   if(slacc == 0) {
5521      fprintf(outfile,"WARNING--no proposed trees ever accepted\n");
5522      fprintf(ERRFILE,"WARNING--no proposed trees ever accepted\n");
5523   }
5524   free(changetree);
5525 }  /* maketree */
5526 
5527 /************************************************************
5528  * freenodelet frees all fields of a nodelet and the actual *
5529  * nodelet too.                                             */
freenodelet(option_struct * op,data_fmt * data,node * p)5530 void freenodelet(option_struct *op, data_fmt *data, node *p)
5531 {
5532 free_x(op,p);
5533 if(op->map)free_z(op,p);
5534 if (p->nayme) free(p->nayme);
5535 ranges_Malloc(p,FALSE,0L);
5536 coal_Malloc(p,FALSE,0L);
5537 free(p);
5538 } /* freenodelet */
5539 
5540 /*******************************************************************
5541  * finalfree() frees                sum, reci, sametree,           *
5542  * theti,           the options structure, and the data structure. */
finalfree(option_struct * op,data_fmt * data)5543 void finalfree(option_struct *op, data_fmt *data)
5544 {
5545 long i, j, k, chtype, numloci;
5546 
5547 numloci = 0;
5548 
5549 numloci = getdata_numloci(op,data);
5550 
5551 free(reci[0]);
5552 free(reci);
5553 free(theti[0]);
5554 free(theti);
5555 free(sametree[0]);
5556 free(sametree);
5557 
5558 for(i = 0; i < numloci; i++)
5559    for(j = 0; j < 1+op->numchains[1]; j++) {
5560       if (j) chtype = 1;
5561       else chtype = 0;
5562       for(k = 0; k < op->numout[chtype]; k++) {
5563 #if GROWTHUSED
5564          free(sum[i][j][k].kk);
5565          free(sum[i][j][k].kend);
5566          free(sum[i][j][k].actives);
5567 #endif
5568          free(sum[i][j][k].eventtype);
5569          free(sum[i][j][k].sitescore);
5570       }
5571 }
5572 free(sum[0][0]);
5573 free(sum[0]);
5574 free(sum);
5575 
5576 freedata(data);
5577 
5578 free(op->probcat);
5579 free(op->rate);
5580 free(op->temperature);
5581 if (!op->same_ne) free(op->ne_ratio);
5582 if (op->panel) free(op->numpanel);
5583 free(op);
5584 
5585 } /* finalfree */
5586 
5587 
main(int argc,char * argv[])5588 int main(int argc, char *argv[])
5589 {  /* Recombine */
5590 long i, numloci, numpop;
5591 data_fmt data;
5592 option_struct *options;
5593 char infilename[100], outfilename[100], logfilename[100],
5594    bestrfilename[100], intrfilename[100], trfilename[100];
5595 
5596 #ifdef MAC
5597 char spafilename[100], seedfilename[100];
5598 
5599    argv[0] = "Recombine";
5600 #endif
5601 
5602 /* Open various filenames. */
5603 
5604 openfile(&infile,INFILE,"r",argv[0],infilename);
5605 openfile(&simlog,"simlog","w",argv[0],logfilename);
5606 openfile(&outfile,OUTFILE,"w",argv[0],outfilename);
5607 
5608 /* warning WARNING debug DEBUG--initialization handled wrong */
5609 population = 0;
5610 
5611 setupoption_struct(&options);
5612 options->ibmpc = IBMCRT;
5613 options->ansi = ANSICRT;
5614 getoptions(options);
5615 for (i = 1; i <= 1000; i++)
5616   clearseed = randum();
5617 if (options->usertree)
5618   openfile(&intree,INTREE,"r",argv[0],intrfilename);
5619 if (options->treeprint)
5620   openfile(&treefile,TREEFILE,"w",argv[0],trfilename);
5621 openfile(&bestree,"bestree","w",argv[0],bestrfilename);
5622 
5623 if (options->newdata) getinput(options,&data);
5624 else getnodata(options,&data);
5625 
5626 if (options->printdata) printdnadata(options,&data,outfile);
5627 
5628 firstinit(options,&data);
5629 
5630 if (ALWAYS_ACCEPT) fprintf(outfile,"***ALWAYS ACCEPT MODE!***");
5631 if (ALWAYS_REJECT) fprintf(outfile,"***ALWAYS REJECT MODE!***");
5632 
5633 if (options->newdata) {
5634    temptrees = (tree **)calloc(options->numtempchains,sizeof(tree *));
5635    numpop = getdata_numpop(options,&data);
5636    for(population = 0; population < numpop; population++) {
5637       popinit(options,&data);
5638       numloci = getdata_numloci(options,&data);
5639       for (locus = 0; locus < numloci; locus++) {
5640          if (options->progress) printf("Locus %ld\n",locus+1);
5641          fprintf(outfile,"\n---------------------------------------\n");
5642          fprintf(outfile, "      Locus %ld\n",locus+1);
5643          fprintf(outfile,"---------------------------------------\n");
5644          if (!options->same_ne)
5645             fprintf(outfile,"Assumed relative Ne = %f\n",
5646             options->ne_ratio[locus]);
5647          if (options->holding) {
5648             if (options->holding == 1)
5649                fprintf(outfile,"Theta was held contant.\n");
5650             else
5651                fprintf(outfile,"Recombination was held constant.\n");
5652          }
5653          locusinit(options,&data);
5654 #if !ALWAYS_REJECT
5655          watttheta = watterson(options,&data);
5656 #else
5657          watttheta = ARBITRARY_THETA;
5658 #endif
5659          if (options->watt) theta0 = watttheta;
5660          maketree(options,&data);
5661          end_of_locus_free(options,&data);
5662       }
5663       locus--;
5664       if (numloci > 1) rec_estimate(options,&data,-1L,totchains-1,TRUE);
5665       end_of_population_free(options,&data);
5666    }
5667    free(temptrees);
5668 } else {
5669    rec_scoreread(options,&data);
5670    numloci = getdata_numloci(options,&data);
5671    if (numloci > 1) rec_estimate(options,&data,-1L,totchains-1,TRUE);
5672    else rec_estimate(options,&data,0L,totchains-1,TRUE);
5673 }
5674 
5675 finalfree(options,&data);
5676 
5677 FClose(infile);
5678 FClose(outfile);
5679 FClose(treefile);
5680 FClose(bestree);
5681 FClose(seedfile);
5682 FClose(simlog);
5683 FClose(spacefile);
5684 
5685 #ifdef MAC
5686    strcpy(seedfilename,"seedfile");
5687    strcpy(spafilename,"spacefile");
5688    fixmacfile(outfilename);
5689    fixmacfile(infilename);
5690    fixmacfile(bestrfilename);
5691    fixmacfile(logfilename);
5692    fixmacfile(intrfilename);
5693    fixmacfile(seedfilename);
5694    fixmacfile(trfilename);
5695    fixmacfile(spafilename);
5696 #endif
5697 
5698 printf("PROGRAM DONE\n");
5699 exit(0);
5700 
5701 }  /* recombine */
5702 
eof(FILE * f)5703 int eof(FILE *f)
5704 {
5705     register int ch;
5706 
5707     if (feof(f))
5708         return 1;
5709     if (f == stdin)
5710         return 0;
5711     ch = getc(f);
5712     if (ch == EOF)
5713         return 1;
5714     ungetc(ch, f);
5715     return 0;
5716 } /* eof */
5717 
eoln(FILE * f)5718 int eoln(FILE *f)
5719 {
5720   register int ch;
5721 
5722   ch = getc(f);
5723   if (ch == EOF)
5724     return 1;
5725   ungetc(ch, f);
5726   return (ch == '\n');
5727 } /* eoln */
5728