1 #include "recombine.h"
2 #include "getdata.h"
3
4 #ifdef DMEMDEBUG
5 #define MEMDEBUG
6 #include "memdebug.h"
7 #endif
8
9 #ifdef DMALLOC_FUNC_CHECK
10 #include "/usr/local/include/dmalloc.h"
11 #endif
12
13 /***************************************************************************
14 * Version 0.40. (c) Copyright 1986, 1991, 1992 by the University of *
15 * Washington and Joseph Felsenstein. Written by Joseph Felsenstein, *
16 * Mary K. Kuhner and Jon A. Yamato, with some additional grunt work by *
17 * Sean T. Lamont. Permission is granted to copy and use this program *
18 * provided no fee is charged for it and provided that this copyright *
19 * notice is not removed. *
20 * *
21 ***************************************************************************/
22
23 /* This program implements a Hastings-Metropolis Monte Carlo
24 Maximum Likelihood sampling method for phylogenetic trees
25 with recombination. */
26
27 /* Time, 'tyme', in this tree is measured from the tips to the root.
28 I.e. the tips are at tyme '0', and the root node has the largest
29 value for 'tyme'. */
30
31 /* Modified to accept multiple loci. */
32
33 /* Modified to "plump" input non-recombinant tree */
34
35 /* Modified to track final coalescence and begin to handle microsats */
36
37 /* Modified to handle panel snps, and real FC differentiation */
38
39 /* Being modified to handle basic haplotyping */
40 /* WARNING MARY temporary variables */
41 data_fmt truehaps;
42 data_fmt starthaps;
43
44 /* these definitions will be extern in other files*/
45 extern double **reci, **theti;
46 extern treerec ***sum;
47 extern unsigned int baseA, baseC, baseG, baseT;
48
49 extern long countcoalbranches(tree *tr);
50 extern void model_alloc(option_struct *op, data_fmt *data);
51 extern void scoretree(option_struct *op, data_fmt *data, long chain);
52 extern void rec_estimate(option_struct *op, data_fmt *data, long lowcus,
53 long chain, boolean locusend);
54 extern boolean twiddle(option_struct *op, data_fmt *data);
55 extern boolean fliphap(option_struct *op, data_fmt *data, tree *tr);
56 extern boolean flipdrop(option_struct *op, data_fmt *data, tree *tr,
57 long numdrop);
58 extern void add_one_recomb(option_struct *op, data_fmt *data, tree *tr,
59 long *siteptr, long chain);
60 extern void addfractrecomb(option_struct *op, data_fmt *data,
61 long chain, treerec ***treesum);
62 extern boolean makedrop(option_struct *op, data_fmt *data);
63 extern void setupdnadata(dnadata **dna, long *numsites, long numseq,
64 long numloci, long numpop);
65 extern void freednadata(dnadata *dna);
66 extern void empiricaldnafreqs(option_struct *op, data_fmt *data,
67 tree *curtree);
68 extern void inputdnaweights(long numchars, dnadata *dna, option_struct *op);
69 extern void remove_branch(tlist *start, node *target);
70 extern void rename_branch(tlist *start, node *source, node *target);
71 extern void rec_scoreread(option_struct *op, data_fmt *data);
72 extern void unflag(node *p);
73 extern void traverse_unflag(tree *tr, node *p);
74 extern void read_spacefile(FILE *file, option_struct *op, data_fmt *data);
75 extern void printdnadata(option_struct *op, data_fmt *data, FILE *out);
76
77 extern long countsites(option_struct *op, data_fmt *data);
78 extern long getdata_sitecount(option_struct *op, data_fmt *data,
79 long marker);
80 extern double getnumlinks(option_struct *op, data_fmt *data, long start,
81 long finish);
82
83 /* functions for haplotyping */
84 extern void read_flipfile(option_struct *op, data_fmt *data, tree *tr,
85 FILE *file);
86 extern void pruneflips(option_struct *op, data_fmt *data, tree *tr);
87 extern void copyhaps(option_struct *op, data_fmt *oldhap, data_fmt *newhap,
88 long numpop, long numloci, long numind, long *numsites);
89
90 /* functions for heating */
91 extern double coalprob(option_struct *op, data_fmt *data, tree *tr, double
92 theta, double r);
93 extern int longcmp(const void *v1, const void *v2);
94
95 /* functions from traitlike.c for trait likelihoods */
96 extern void copytraits(node *source, node *target);
97 extern void traitlike(option_struct *op, data_fmt *data, tree *tr,
98 long numsites, double mutrait, double traitratio, double pd,
99 double *traitarray);
100 extern void traitread(tree *tr, long numseq);
101 extern void traitprint(long numsites, double *traitarray, FILE *outfile, long numout);
102 extern void traitsiteplot(option_struct *op, data_fmt *data, double *llike);
103 extern void traitresult(option_struct *op, data_fmt *data,
104 double *traitarray, FILE *outfile, long numout);
105
106 tree **temptrees;/* the current tree for each temperature-chain */
107 tree *curtree; /* the current tree */
108 long locus; /* which locus are we currently working on? */
109 long population; /* which population are we currently working on? */
110 longer seed; /* array used to hold randum seed */
111 long numdropped = 0; /* number of trees dropped from
112 consideration */
113
114 /* next are a largish set of temporary variables used to hold various
115 values before their structures are setup. Mostly data variables. */
116 double freqa, freqc, freqg, freqt; /* dna freqs */
117 double locus_ttratio; /* dna transition/transversion ratio */
118
119 FILE *infile, *outfile, *treefile, *bestree, *seedfile, *simlog,
120 *parmfile, *intree, *weightfile, *spacefile;
121 long rootnum, apps, col, numtrees, totchains;
122 boolean **sametree;
123 double clearseed, theta0, rec0, branch0;
124 long *category;
125 contribarr *contribution;
126 node *freenodes;
127 rlrec *alogf;
128 char gch;
129 long nodectr;
130 double sumweightrat, watttheta;
131 double *weightrat;
132 valrec *tbl;
133 long slid, slacc, hap, hacc, swap, swacc, indecks;
134
135
136 /* the following are for reading in parameters (readparmfile),
137 and also writing them back out again (rec_parmfilewrite) */
138 char *booltokens[NUMBOOL] = {"interleaved","printdata","progress",
139 "print-trees","freqs-from-data","categories",
140 "watterson", "usertree", "autocorrelation",
141 "newdata","same-ne","interactive","mhmcsave",
142 "panel","map","final-coalescence","full-snp",
143 "haplotyping","profile-like","norecsnp"},
144 *numbertokens[NUMNUMBER] = {"ttratio","short-chains",
145 "short-steps","short-inc","long-chains",
146 "long-inc","long-steps","rec-rate",
147 "holding","mu-ratio","trait-ratio",
148 "dis-freq","hapdrop","heating"};
149
150 /* the following are for managing x array recycling */
151 long numx;
152 datalike_fmt *sparex[XARRAYSIZE];
153
openfile(FILE ** fp,char * filename,char * mode,char * application,char * perm)154 void openfile(FILE **fp, char *filename, char *mode, char *application,
155 char *perm)
156 {
157 FILE *of;
158 char file[100];
159
160 strcpy(file,filename);
161 while (1){
162 of = fopen(file,mode);
163 if (of)
164 break;
165 else {
166 switch (*mode){
167 case 'r':
168 fprintf(stdout,"%s: can't read %s\n",application,file);
169 file[0] = '\0';
170 while (file[0] =='\0'){
171 fprintf(stdout,"Please enter a new filename>");
172 fgets(file,100,stdin);
173 }
174 break;
175 case 'w':
176 fprintf(stdout,"%s: can't write %s\n",application,file);
177 file[0] = '\0';
178 while (file[0] =='\0'){
179 fprintf(stdout,"Please enter a new filename>");
180 fgets(file,100,stdin);
181 }
182 break;
183 }
184 }
185 }
186 *fp=of;
187 if (perm != NULL)
188 strcpy(perm,file);
189 } /* openfile */
190
191
lowercase(char c)192 char lowercase(char c)
193 {
194 return((char)tolower((int) c));
195 } /* lowercase */
196
197
randum(void)198 double randum(void)
199 /* Mary's version--faster but needs 32 bits. Loops have been unrolled
200 for speed. */
201 {
202 long newseed0, newseed1, newseed2;
203
204 newseed0 = 1549*seed[0];
205 newseed1 = newseed0/2048;
206 newseed0 &= 2047;
207 newseed2 = newseed1/2048;
208 newseed1 &= 2047;
209 newseed1 += 1549*seed[1]
210 + 812*seed[0];
211 newseed2 += newseed1/2048;
212 newseed1 &= 2047;
213 newseed2 += 1549*seed[2]
214 + 812*seed[1];
215
216 seed[0] = newseed0;
217 seed[1] = newseed1;
218 seed[2] = newseed2 & 1023;
219 return (((seed[0]/2048.0 + seed[1])/2048.0 + seed[2])/1024.0);
220 } /* randum */
221
222
223 /****************************************************
224 * setupoption_struct() creates a new option_struct */
setupoption_struct(option_struct ** op)225 void setupoption_struct(option_struct **op)
226 {
227
228 (*op) = (option_struct *)calloc(1,sizeof(option_struct));
229
230 (*op)->rate = (double *)calloc(1,sizeof(double));
231 (*op)->probcat = (double *)calloc(1,sizeof(double));
232 (*op)->temperature = NULL;
233 (*op)->numpanel = NULL;
234
235 } /* setupoption_struct */
236
237
238 /********************************************************************
239 * istip() returns TRUE if the nodelet is a tip and FALSE otherwise */
istip(node * p)240 boolean istip (node *p)
241 {
242 return(p->type == 't');
243 } /* istip */
244
245
246 /***********************************************************************
247 * iscoal returns TRUE if the nodelet is a coalescent, FALSE otherwise */
iscoal(node * p)248 boolean iscoal (node *p)
249 {
250 return(p->type == 'c');
251 } /* iscoal */
252
253
254 /************************************************************************
255 * isrecomb returns TRUE if the nodelet is recombinant, FALSE otherwise */
isrecomb(node * p)256 boolean isrecomb (node *p)
257 {
258 return(p->type == 'r');
259 } /* isrecomb */
260
261
262 /******************************************************************
263 * branchsub() substitutes one branch for another in one interval *
264 * in a tymelist. */
branchsub(tlist * t,node * oldbranch,node * newbranch)265 boolean branchsub(tlist *t, node *oldbranch, node *newbranch)
266 {
267 long i;
268 boolean found;
269
270 found = FALSE;
271 for(i = 0; i < t->numbranch; i++)
272 if (t->branchlist[i] == oldbranch) {
273 t->branchlist[i] = newbranch;
274 found = TRUE;
275 }
276
277 return(found);
278 } /* branchsub */
279
280
281 /************************************************************************
282 * insertaftertymelist adds the node "p" to a tymelist after, inserting *
283 * within the tymeslice pointed to by "t" using bisection. */
insertaftertymelist(tlist * t,node * p)284 void insertaftertymelist(tlist *t, node *p)
285 {
286 long i, j, temp;
287 tlist *s;
288 node *q;
289 boolean b1, b2;
290
291 newtymenode(&s);
292 s->eventnode = findtop(p);
293 s->age = t->age;
294 t->age = p->tyme;
295 s->prev = t;
296 s->succ = t->succ;
297 t->succ = s;
298 if (s->succ != NULL) s->succ->prev = s;
299
300 /* now deal with the branchlist for the new tymelist entry;
301 the "if" setting "temp" is a workaround for not being able to
302 allocate zero size chunks of memory by malloc-debug/gcc */
303 if (t->numbranch-1 > 0) temp = t->numbranch - 1;
304 else temp = t->numbranch;
305 s->branchlist = (node **)calloc(1,temp*sizeof(node *));
306 s->numbranch = 0;
307 q = findunique(p);
308 if (isrecomb(q)) { /* if we've added a recombinant node */
309 for (i = 0; i < t->numbranch; i++) {
310 if (t->branchlist[i] == q->back) continue;
311 s->branchlist[s->numbranch] = t->branchlist[i];
312 s->numbranch++;
313 }
314 s->branchlist =
315 (node **)realloc(s->branchlist, (s->numbranch+2)*sizeof(node *));
316 s->branchlist[s->numbranch] = q->next;
317 s->branchlist[s->numbranch+1] = q->next->next;
318 s->numbranch += 2;
319 } else { /* else we've added a coalescent node */
320 for (i = 0; i < t->numbranch; i++)
321 if(t->branchlist[i] != q->next->back &&
322 t->branchlist[i] != q->next->next->back) {
323 s->branchlist[s->numbranch] = t->branchlist[i];
324 s->numbranch++;
325 } else if (t->branchlist[i] == q->next->back) {
326 s->branchlist[s->numbranch] = q;
327 s->numbranch++;
328 }
329 }
330
331 /* now remove the old branches from the rest of the tymelist */
332 if (isrecomb(q)) {
333 for (s = s->succ; s != NULL; s = s->succ) {
334 if (!branchsub(s,q->back,q->next)) break;
335 s->branchlist =
336 (node **)realloc(s->branchlist, (s->numbranch+1)*sizeof(node *));
337 s->branchlist[s->numbranch] = q->next->next;
338 s->numbranch++;
339 }
340 } else {
341 for (s = s->succ; s != NULL; s = s->succ) {
342 b1 = branchsub(s,q->next->back,q);
343 b2 = branchsub(s,q->next->next->back,q);
344 if (b1 && b2) {
345 for (i = 0; i < t->numbranch; i++)
346 if (s->branchlist[i] == q) break;
347 for (j = i; j < t->numbranch-1; j++)
348 s->branchlist[j] = s->branchlist[j+1];
349 s->numbranch--;
350 }
351 if (!b1 && !b2) break;
352 }
353 }
354
355 } /* insertaftertymelist */
356
357
358 /******************************************************************
359 * subtymelist() completely removes the tymelist entry passed in. *
360 * the passed in branch should be the exact branch that wants to *
361 * be removed in the case of a recombinant node, or the branch *
362 * that will "replace" the missing branch in a coalescent node. */
subtymelist(tlist * t,node * branch,boolean all)363 void subtymelist(tlist *t, node *branch, boolean all)
364 {
365
366 if (isrecomb(t->eventnode)) {
367 remove_branch(t,branch);
368 if (all) remove_branch(t,otherparent(branch));
369 else rename_branch(t,findunique(t->eventnode)->back,otherparent(branch));
370 } else
371 if (all) remove_branch(t,t->eventnode);
372 else rename_branch(t,branch,t->eventnode);
373
374 if (t->prev) {t->prev->age = t->age; t->prev->succ = t->succ;}
375 else {fprintf(ERRFILE,"ERROR:tried to remove first tymelist entry!\n");}
376 if (t->succ) t->succ->prev = t->prev;
377
378 freetymenode(t);
379
380 } /* subtymelist */
381
382
383 /****************************************************************
384 * printtymelist() prints the entire tymelist: a debug function */
printtymelist(tlist * t)385 void printtymelist(tlist *t)
386 {
387 long i;
388
389 fprintf(ERRFILE,"TYMELIST BEGINS\n");
390 while (t != NULL) {
391 fprintf(ERRFILE,"%3ld age%8.6f--",
392 t->eventnode->number, t->age);
393 for (i = 0; i < t->numbranch; i++) {
394 fprintf(ERRFILE," %3ld", t->branchlist[i]->number);
395 if (t->branchlist[i]->top) fprintf(ERRFILE,"t ");
396 }
397 fprintf(ERRFILE,"\n");
398 t = t->succ;
399 }
400 fprintf(ERRFILE,"TYMELIST ENDS\n");
401 } /* printtymelist */
402
403
404 /*****************************************************************
405 * lengthof() returns the length of the branch "rootward" of the *
406 * passed node */
lengthof(node * p)407 double lengthof(node *p)
408 {
409 return fabs(p->tyme - p->back->tyme);
410 } /* lengthof */
411
412
413 /******************************************************************
414 * fixlength() sets the length and v values for a mother-daughter *
415 * pair of nodelets. */
fixlength(option_struct * op,data_fmt * data,node * p)416 void fixlength(option_struct *op, data_fmt *data, node *p)
417 {
418 double newlength;
419
420 newlength = lengthof(p);
421 p->length = newlength;
422 ltov(op,data,p);
423 p->back->length = newlength;
424 ltov(op,data,p->back);
425
426 } /* fixlength */
427
428
429 /************************************************
430 * findtop() finds a "top" nodelet in the node. */
findtop(node * p)431 node *findtop(node *p)
432 {
433
434 if (!istip(p))
435 while (!p->top) p = p->next;
436
437 return(p);
438
439 } /* findtop */
440
441
442 /********************************************************************
443 * findunique() returns the unique nodelet of a node (i.e. "bottom" *
444 * of a recombinant node or "top" of a coalescent node). */
findunique(node * p)445 node *findunique(node *p)
446 {
447
448 if (istip(p)) return(p);
449
450 if (isrecomb(p))
451 while((p)->top) p = p->next;
452 else
453 while (!p->top) p = p->next;
454
455 return(p);
456
457 } /* findunique */
458
459
460 /*********************************************************************
461 * otherdtr() finds the other daughter nodelet of a node, it assumes *
462 * that a daughter nodelet was passed to it. */
otherdtr(node * p)463 node *otherdtr(node *p)
464 {
465 node *q;
466
467 if (isrecomb(p)) {
468 fprintf(ERRFILE,"ERROR:otherdtr() passed a recombinant node!\n");
469 return(NULL);
470 }
471
472 if (p->top) {
473 fprintf(ERRFILE,"ERROR:otherdtr() passed a top nodelet!\n");
474 return(NULL);
475 }
476
477 for(q = p->next; q->top; q = q->next)
478 ;
479
480 return(q);
481
482 } /* otherdtr */
483
484
485 /*********************************************************************
486 * findlink() takes a recombinant node and returns the # of the site *
487 * to the left of the cut link; or FLAGLONG in error states. */
findlink(node * p)488 long findlink(node *p)
489 {
490 node *q;
491
492 if (!isrecomb(p)) {
493 fprintf(ERRFILE,"ERROR:findlink passed non-recombinant node\n");
494 return(FLAGLONG);
495 }
496
497 q = findunique(p);
498 if (q->next->recstart) return(q->next->next->recend);
499 else return(q->next->recend);
500
501 } /* findlink */
502
503
504 /**********************************************************************
505 * otherparent() finds the other parent nodelet of a node, it assumes *
506 * that a parent nodelet was passed to it. */
otherparent(node * p)507 node *otherparent(node *p)
508 {
509 node *q;
510
511 if (!isrecomb(p)) {
512 fprintf(ERRFILE,"ERROR:otherparent() passed a non-recombinant node!\n");
513 return(NULL);
514 }
515
516 if (!p->top) {
517 fprintf(ERRFILE,"ERROR:otherparent() passed a non-top nodelet!\n");
518 return(NULL);
519 }
520
521 for(q = p->next; !q->top; q = q->next)
522 ;
523
524 return(q);
525
526 } /* otherdtr */
527
528
529 /*********************************************
530 * free_z() frees the "z" field of a nodelet */
free_z(option_struct * op,node * p)531 void free_z(option_struct *op, node *p)
532 {
533 if (p->z == NULL) return;
534
535 free(p->z);
536 p->z = NULL;
537
538 } /* free_z */
539
540
541 /***************************************************************
542 * allocate_z() allocates space for the "z" field of a nodelet */
allocate_z(option_struct * op,data_fmt * data,node * p)543 void allocate_z(option_struct *op, data_fmt *data, node *p)
544 {
545 if (p->z) free(p->z);
546
547 p->z = (double *)calloc(NUMTRAIT,sizeof(double));
548
549 } /* allocate_z */
550
551
552 /*********************************************
553 * free_x() frees the "x" field of a nodelet */
free_x(option_struct * op,node * p)554 void free_x(option_struct *op, node *p)
555 {
556
557 if (p->x == NULL) return;
558
559 if (numx < XARRAYSIZE) {
560 sparex[numx] = p->x;
561 numx++;
562 } else {
563 switch(op->datatype) {
564 case 'a':
565 break;
566 case 'b':
567 case 'm':
568 free(p->x->a);
569 break;
570 case 'n':
571 case 's':
572 free(p->x->s[0][0][0]);
573 free(p->x->s[0][0]);
574 free(p->x->s[0]);
575 free(p->x->s);
576 break;
577 default:
578 fprintf(ERRFILE,"ERROR:free_x: can't get here!\n");
579 exit(-1);
580 }
581 free(p->x);
582 }
583
584 p->x = NULL;
585
586 } /* free_x */
587
588
589 /************************************************************
590 * allocate_x() allocates space for the "x" field of a nodelet */
allocate_x(option_struct * op,data_fmt * data,node * p)591 void allocate_x(option_struct *op, data_fmt *data, node *p)
592 {
593 long i, j, k, nummarkers = 0, numloci, numslice;
594
595 if (p->x != NULL) free_x(op,p);
596
597 if(numx > 0) {
598 p->x = sparex[numx-1];
599 sparex[numx-1] = NULL;
600 numx--;
601 } else {
602 p->x = (datalike_fmt *)calloc(1,sizeof(datalike_fmt));
603 switch(op->datatype) {
604 case 'a':
605 p->x->a = NULL;
606 p->x->s = NULL;
607 break;
608 case 'b':
609 case 'm':
610 numloci = getdata_numloci(op,data);
611 p->x->s = NULL;
612 p->x->a =
613 (double **)calloc(numloci,sizeof(double *));
614 p->x->a[0] = (double *)calloc(numloci*MICRO_ALLELEMAX,
615 sizeof(double));
616 for(i = 1; i < numloci; i++)
617 p->x->a[i] = p->x->a[0] + i*MICRO_ALLELEMAX;
618 break;
619 case 'n':
620 nummarkers += NUMINVAR;
621 case 's':
622 numslice = (op->panel) ? NUMSLICE : 1L;
623 nummarkers += getdata_nummarkers(op,data);
624 p->x->a = NULL;
625 p->x->s = (double ****)calloc(nummarkers,sizeof(double ***));
626 p->x->s[0] = (double ***)
627 calloc(nummarkers*op->categs,sizeof(double **));
628 for(i = 1; i < nummarkers; i++)
629 p->x->s[i] = p->x->s[0] + i*op->categs;
630 p->x->s[0][0] = (double **)
631 calloc(nummarkers*op->categs*numslice,sizeof(double *));
632 for(i = 0; i < nummarkers; i++)
633 for(j = 0; j < op->categs; j++)
634 p->x->s[i][j] = p->x->s[0][0] + i*op->categs*numslice +
635 j*numslice;
636 p->x->s[0][0][0] = (double *)
637 calloc(nummarkers*op->categs*numslice*4L,sizeof(double));
638 for(i = 0; i < nummarkers; i++)
639 for(j = 0; j < op->categs; j++)
640 for(k = 0; k < numslice; k++)
641 p->x->s[i][j][k] = p->x->s[0][0][0] +
642 i*op->categs*numslice*4L +
643 j*numslice*4L + k*4L;
644 break;
645 default:
646 fprintf(ERRFILE,"ERROR:alloc_x: can't get here!\n");
647 exit(-1);
648 }
649 }
650
651 } /* allocate_x */
652
653
654 /************************************************************
655 * init_coal_alloc() callocs and initializes a range field. */
init_coal_alloc(long ** coal,long numcoalpairs)656 void init_coal_alloc(long **coal, long numcoalpairs)
657 {
658
659 if (*coal != NULL) free(*coal);
660
661 (*coal) = (long *)calloc(numcoalpairs * 2 + 2,sizeof(long));
662 (*coal)[0] = numcoalpairs;
663 (*coal)[numcoalpairs*2+1] = FLAGLONG;
664
665 } /* init_coal_alloc */
666
667
668 /****************************************************************
669 * coal_Malloc() callocs or frees the "coal" field of a nodelet */
coal_Malloc(node * p,boolean allokate,long numcoalpairs)670 void coal_Malloc(node *p, boolean allokate, long numcoalpairs)
671 {
672
673 if (allokate && p->top) {
674 init_coal_alloc(&(p->coal),numcoalpairs);
675 } else {
676 if (p->coal != NULL) {
677 free(p->coal);
678 p->coal = NULL;
679 }
680 }
681
682 } /* coal_Malloc */
683
684
685 /**************************************************************
686 * init_ranges_alloc() callocs and initializes a range field. */
init_ranges_alloc(long ** ranges,long numrangepairs)687 void init_ranges_alloc(long **ranges, long numrangepairs)
688 {
689
690 if (*ranges != NULL) free(*ranges);
691
692 (*ranges) = (long *)calloc(numrangepairs * 2 + 2,sizeof(long));
693 (*ranges)[0] = numrangepairs;
694 (*ranges)[numrangepairs*2+1] = FLAGLONG;
695
696 } /* init_ranges_alloc */
697
698
699 /********************************************************************
700 * ranges_Malloc() callocs or frees the "ranges" field of a nodelet */
ranges_Malloc(node * p,boolean allokate,long numrangepairs)701 void ranges_Malloc(node *p, boolean allokate, long numrangepairs)
702 {
703
704 if (allokate && p->top) {
705 init_ranges_alloc(&(p->ranges),numrangepairs);
706 } else {
707 if (p->ranges != NULL) {
708 free(p->ranges);
709 p->ranges = NULL;
710 }
711 }
712
713 } /* ranges_Malloc */
714
715
716 /****************************************************************
717 * meld_adjacent_ranges() concatenates elements adjacent to the *
718 * passed position if it is appropiate to do so. */
meld_adjacent_ranges(long * cranges,long newelem)719 void meld_adjacent_ranges(long *cranges, long newelem)
720 {
721 long numpairs, nummove, prevelem, nextelem, lastelem;
722
723 prevelem = newelem - 1;
724 nextelem = newelem + 2;
725 lastelem = 2*cranges[0]+1;
726
727 numpairs = 0;
728 if (cranges[newelem] == cranges[prevelem]+1) numpairs++;
729 if (cranges[newelem+1]+1 == cranges[nextelem]) {
730 if (numpairs) numpairs++;
731 else numpairs = -1L;
732 }
733 if (numpairs == 2 && prevelem == 0) numpairs = -1L;
734 if (numpairs == 2 && nextelem == lastelem) numpairs--;
735
736 if (numpairs == 1 && prevelem != 0) {
737 nummove = 2*cranges[0] - newelem;
738 cranges[prevelem] = cranges[newelem+1];
739 memmove(&cranges[newelem],&cranges[nextelem],nummove*sizeof(long));
740 cranges[0]--;
741 }
742 if (numpairs == -1 && nextelem != lastelem) {
743 nummove = 2*cranges[0] - newelem - 2;
744 cranges[newelem+1] = cranges[nextelem+1];
745 memmove(&cranges[nextelem],&cranges[nextelem+2],nummove*sizeof(long));
746 cranges[0]--;
747 }
748 if (numpairs == 2) {
749 nummove = 2*cranges[0] - newelem - 2;
750 cranges[newelem-1] = cranges[nextelem+1];
751 memmove(&cranges[newelem],&cranges[nextelem+2],nummove*sizeof(long));
752 cranges[0] -= 2;
753 }
754
755 } /* meld_adjacent_ranges */
756
757
758 /****************************************************
759 * addrange() adds a new element to the range field */
addrange(long ** nnewranges,long newstart,long newend)760 void addrange(long **nnewranges, long newstart, long newend)
761 {
762 long i, first, numpairs, nummove, *newranges;
763
764 if (newstart > newend)
765 fprintf(ERRFILE,"ERROR:addrange--start > end\n");
766
767 newranges = (*nnewranges);
768 numpairs = newranges[0]*2;
769
770 /* was there nothing in the initial range list? */
771 if(!numpairs) {
772 newranges[0]++;
773 newranges = (long *)realloc(newranges,4*sizeof(long));
774 (*nnewranges) = newranges;
775 newranges[1] = newstart;
776 newranges[2] = newend;
777 newranges[3] = FLAGLONG;
778 return;
779 }
780
781 /* do I start after everything else ends? */
782 if (newstart > newranges[numpairs]) {
783 if (newstart == newranges[numpairs]+1) {
784 newranges[numpairs] = newend;
785 } else {
786 newranges[0]++; numpairs += 2;
787 newranges = (long *)realloc(newranges,(numpairs+2)*sizeof(long));
788 (*nnewranges) = newranges;
789 newranges[numpairs-1] = newstart;
790 newranges[numpairs] = newend;
791 newranges[numpairs+1] = FLAGLONG;
792 }
793 return;
794 }
795
796 /* do I end before anything begins? */
797 if (newend < newranges[1]) {
798 if (newend == newranges[1]-1) {
799 newranges[1] = newstart;
800 } else {
801 newranges[0]++;
802 newranges =
803 (long *)realloc(newranges,(newranges[0]*2+2)*sizeof(long));
804 (*nnewranges) = newranges;
805 memmove(&newranges[3],&newranges[1],(numpairs+1)*sizeof(long));
806 newranges[1] = newstart;
807 newranges[2] = newend;
808 }
809 return;
810 }
811
812 /* am I contained in another interval altogether? OR
813 do I overlap one or more intervals? OR
814 am I between two already existing intervals? */
815 for(i = 1, numpairs = 0, first = 0; newranges[i] != FLAGLONG; i+=2) {
816 if(newstart >= newranges[i] && newend <= newranges[i+1]) return;
817 if (i != 1) {
818 if (newstart > newranges[i-1] && newend < newranges[i]) {
819 first = i;
820 break;
821 }
822 }
823 if((newranges[i] >= newstart && newranges[i] <= newend) ||
824 (newranges[i+1] >= newstart && newranges[i+1] <= newend)) {
825 numpairs++;
826 if (!first) first = i;
827 }
828 }
829
830 /* do I exist between two intervals? */
831 if (!numpairs) {
832 nummove = 2*newranges[0]+2 - first;
833 newranges[0]++;
834 newranges =
835 (long *)realloc(newranges,(newranges[0]*2+2)*sizeof(long));
836 (*nnewranges) = newranges;
837 memmove(&newranges[first+2],&newranges[first],nummove*sizeof(long));
838 newranges[first] = newstart;
839 newranges[first+1] = newend;
840 meld_adjacent_ranges(newranges,first);
841 return;
842 }
843
844 /* I must overlap one or more intervals */
845 if (numpairs == 1) {
846 newranges[first] =
847 (newranges[first] < newstart) ? newranges[first] : newstart;
848 newranges[first+1] =
849 (newranges[first+1] > newend) ? newranges[first+1] : newend;
850 } else {
851 nummove = 2*newranges[0]+2 - first - 2*numpairs;
852 newranges[0] -= numpairs-1;
853 newranges[first] =
854 (newranges[first] < newstart) ? newranges[first] : newstart;
855 newranges[first+1] =
856 (newranges[first+1+2*(numpairs-1)] > newend) ?
857 newranges[first+1+2*(numpairs-1)] : newend;
858 memmove(&newranges[first+2],&newranges[first+2*numpairs],
859 nummove*sizeof(long));
860 }
861 meld_adjacent_ranges(newranges,first);
862
863 } /* addrange */
864
865
866 /******************************************************************
867 * inrange() checks to see if a site is active on a given branch */
inrange(long * ranges,long site)868 boolean inrange(long *ranges, long site)
869 {
870 long i;
871
872 for(i = 1; ranges[i] != FLAGLONG; i+=2) {
873 if (ranges[i] > site) return(FALSE);
874 if (ranges[i+1] >= site) return(TRUE);
875 }
876
877 return(FALSE);
878
879 } /* inrange */
880
881
882 /************************************************************
883 * subrangefc() causes the passed range to be set to "dead" */
subrangefc(long ** newranges,long substart,long subend)884 void subrangefc(long **newranges, long substart, long subend)
885 {
886 long i, *nranges, numpairs, first, nummove;
887
888 nranges = (*newranges);
889
890 /* was there nothing in the initial range list? */
891 /* do I start after everything else ends? */
892 /* do I end before anything begins? */
893 if (!nranges[0] || substart > nranges[2*nranges[0]] ||
894 subend < nranges[1]) return;
895
896 /* am I contained in another interval altogether? OR
897 do I overlap one or more intervals? OR
898 am I between two already existing intervals? */
899 for(i = 1, numpairs = 0, first = 0; nranges[i] != FLAGLONG; i+=2) {
900 if(nranges[i] > subend) break;
901 if (i != 1) {
902 if (substart > nranges[i-1] && subend < nranges[i])
903 return;
904 }
905
906 /* if the deletion starts in this range OR
907 the deletion ends in this range OR
908 the deletion overlaps this range THEN */
909 if((substart >= nranges[i] && substart <= nranges[i+1]) ||
910 (subend >= nranges[i] && subend <= nranges[i+1]) ||
911 (substart <= nranges[i] && subend >= nranges[i+1])) {
912 numpairs++;
913 if (!first) first = i;
914 }
915 }
916
917 /* am I contained within a single interval? */
918 if (numpairs == 1) {
919 /* should I just remove the whole entry? */
920 if ((substart <= nranges[first]) && (subend >= nranges[first+1])) {
921 nummove = 2*nranges[0] - first;
922 memmove(&nranges[first],&nranges[first+2],nummove*sizeof(long));
923 nranges[0]--;
924 return;
925 }
926 /* or chop off the beginning? */
927 if (substart <= nranges[first]) {
928 nranges[first] = subend+1;
929 return;
930 }
931 /* or chop off the end? */
932 if (subend >= nranges[first+1]) {
933 nranges[first+1] = substart-1;
934 return;
935 }
936 /* then split it in two! */
937 nranges[0]++;
938 nranges =
939 (long *)realloc(nranges,(2*nranges[0]+2)*sizeof(long));
940 (*newranges) = nranges;
941 nummove = 2*nranges[0] - (first+1);
942 memmove(&nranges[first+3],&nranges[first+1],nummove*sizeof(long));
943 nranges[first+1] = substart-1;
944 nranges[first+2] = subend+1;
945 return;
946 }
947
948 /* I must overlap one or more intervals */
949 if (substart > nranges[first]) nranges[first+1] = substart-1;
950 if (subend < nranges[first+1+2*(numpairs-1)])
951 nranges[first+2*(numpairs-1)] = subend + 1;
952 else {
953 /* remove the last affected rangepair */
954 nummove = 2*nranges[0] - (first+2*(numpairs-1));
955 memmove(&nranges[first+2*(numpairs-1)],&nranges[first+2*(numpairs)],
956 nummove*sizeof(long));
957 nranges[0]--;
958 }
959 /* remove the middle affected rangepairs, if any */
960 nummove = 2*nranges[0]+2 - (first+2*(numpairs-1));
961 memmove(&nranges[first+2],&nranges[first+2*(numpairs-1)],
962 nummove*sizeof(long));
963 nranges[0] -= numpairs-2;
964 /* remove the first affected rangepair, if necessary */
965 if (substart <= nranges[first]) {
966 nummove = 2*nranges[0] - first;
967 memmove(&nranges[first],&nranges[first+2],nummove*sizeof(long));
968 nranges[0]--;
969 }
970
971 } /* subrangefc */
972
973
printrange(long * ranges)974 void printrange(long *ranges)
975 {
976 long i;
977
978 printf("\n%ld range pairs: ",ranges[0]);
979 for(i = 1; ranges[i] != FLAGLONG; i+=2)
980 printf("%ld to %ld ",ranges[i],ranges[i+1]);
981 printf("\n");
982
983 } /* printrange */
984
985
986 /* "newnode" & "freenode" are paired memory managers for tree nodes.
987 They maintain a linked list of unused nodes which should be faster
988 to use than "calloc" & "free" (at least for recombination).
989 They can only be used for internal nodes, not tips. */
newnode(node ** p)990 void newnode(node **p)
991 {
992 long i;
993 node *q;
994
995 if (freenodes == NULL) { /* need a new node */
996 (*p) = allocate_nodelet(3L,'i');
997 (*p)->number = nodectr;
998 for(q = (*p)->next; q != (*p); q = q->next) q->number = nodectr;
999 nodectr++;
1000 return;
1001 } else { /* recycle an old node */
1002 q=freenodes;
1003 freenodes=freenodes->back;
1004 for(i = 0; i < 3; i++) {
1005 ranges_Malloc(q,FALSE,0L);
1006 coal_Malloc(q,FALSE,0L);
1007 q->updated = FALSE;
1008 q->futileflag = 0L;
1009 q->recstart = q->recend = -1L;
1010 q->back = NULL;
1011 q = q->next;
1012 }
1013 *p = q;
1014 return;
1015 }
1016 } /* newnode */
1017
freenode(node * p)1018 void freenode(node *p)
1019 {
1020 p->back = freenodes;
1021 freenodes = p;
1022 } /* freenode */
1023 /* END of treenode allocation functions */
1024
newtymenode(tlist ** t)1025 void newtymenode(tlist **t)
1026 {
1027 *t = (tlist *)calloc(1,sizeof(tlist));
1028 (*t)->prev = NULL;
1029 (*t)->succ = NULL;
1030 } /* newtymenode */
1031
freetymenode(tlist * t)1032 void freetymenode(tlist *t)
1033 {
1034 if(t->branchlist!=NULL) free(t->branchlist);
1035 free(t);
1036 } /* freetymenode*/
1037
freetymelist(tlist * t)1038 void freetymelist(tlist *t)
1039 {
1040 if (t->succ != NULL) freetymelist(t->succ);
1041
1042 freetymenode(t);
1043
1044 } /* freetymelist */
1045
hookup(node * p,node * q)1046 void hookup(node *p, node *q)
1047 {
1048 p->back = q;
1049 q->back = p;
1050 } /* hookup */
1051
atr(node * p)1052 void atr(node *p)
1053 /* "atr" prints out a text representation of a tree. Pass
1054 curtree->root->back for normal results. This is a debugging
1055 function which is not normally called. */
1056 {
1057 long i;
1058 node *q;
1059
1060 if (p->back == curtree->root) {
1061 fprintf(ERRFILE,"next node is root\n");
1062 fprintf(ERRFILE,"Node %4ld length %12.6f tyme %10.8f",
1063 p->back->number, lengthof(p), p->back->tyme);
1064 fprintf(ERRFILE," --> %4ld\n",p->number);
1065 }
1066 fprintf(ERRFILE,"Node %4ld update %ld length %10.8f tyme %10.8f -->",
1067 p->number, (long)p->updated, lengthof(p), p->tyme);
1068 if (!istip(p))
1069 for(i = 0, q = p; i < 3; i++, q = q->next)
1070 if (!q->top) fprintf(ERRFILE,"%4ld\n",q->back->number);
1071 fprintf(ERRFILE,"\n");
1072 if (p->top && p->back->top) fprintf(ERRFILE,"TWO TOPS HERE!!!!");
1073 if (!istip(p)) {
1074 if (!p->next->top) atr(p->next->back);
1075 if (!p->next->next->top) atr(p->next->next->back);
1076 }
1077 else fprintf(ERRFILE,"\n");
1078 } /* atr */
1079
probatr(node * p)1080 void probatr(node *p)
1081 /* "probatr" checks the internal representation of the tree. Pass
1082 curtree->root->back for normal results. This is a debugging
1083 function which is not normally called. */
1084 {
1085 long i;
1086
1087 if (p->back == curtree->root) {
1088 }
1089
1090 if(p->top) {
1091 if(p->ranges[0] < 0)
1092 fprintf(ERRFILE,"node %ld has less then 0 ranges %ld %ld\n",
1093 p->number,indecks,apps);
1094 if(p->ranges[0] > 2)
1095 fprintf(ERRFILE,"node %ld has %ld ranges %ld %ld\n",
1096 p->number,p->ranges[0],indecks,apps);
1097 if(p->ranges[2*p->ranges[0]+1] != FLAGLONG)
1098 fprintf(ERRFILE,"node %ld has misformed end of ranges %ld %ld\n",
1099 p->number,indecks,apps);
1100 for(i = 1; p->ranges[i] != FLAGLONG; i+=2) {
1101 if(p->ranges[i] > p->ranges[i+1])
1102 fprintf(ERRFILE,"node %ld has begins > ends %ld %ld\n",
1103 p->number,indecks,apps);
1104 if(p->ranges[i] < 0 || p->ranges[i+1] < 0)
1105 fprintf(ERRFILE,"node %ld has negative starts or ends %ld %ld\n",
1106 p->number,indecks,apps);
1107 }
1108 }
1109
1110 if (p->top && p->back->top) fprintf(ERRFILE,"TWO TOPS HERE!!!!");
1111 if (!istip(p)) {
1112 if (!p->next->top) probatr(p->next->back);
1113 if (!p->next->next->top) probatr(p->next->next->back);
1114 }
1115
1116
1117 } /* probatr */
1118
1119
1120 /***************************************************************
1121 * gettymenode() returns a pointer to the tymelist entry whose *
1122 * 'eventnode' has the number of 'target'. */
gettymenode(tree * tr,long target)1123 tlist *gettymenode(tree *tr, long target)
1124 {
1125 boolean found;
1126 tlist *t;
1127
1128 if (target == tr->root->number) return (NULL);
1129 if (tr->nodep[target]->type == 't') return(tr->tymelist);
1130
1131 for(t = tr->tymelist, found = FALSE; t != NULL; t = t->succ)
1132 if (t->eventnode->number == target) {found = TRUE; break;}
1133
1134 if (!found) {
1135 fprintf(ERRFILE,"ERROR:In gettymenode, failed to find node %12ld ", target);
1136 fprintf(ERRFILE,"CATASTROPHIC ERROR\n");
1137 exit(-1);
1138 }
1139
1140 return(t);
1141
1142 } /* gettymenode */
1143
1144
1145 /***************************************************************
1146 * vtol() takes a "v" value for a branchlength and returns the *
1147 * branchlength. */
vtol(option_struct * op,data_fmt * data,double v)1148 double vtol(option_struct *op, data_fmt *data, double v)
1149 {
1150
1151 switch(op->datatype) {
1152 case 'a':
1153 break;
1154 case 'b':
1155 case 'm':
1156 return(v);
1157 break;
1158 case 'n':
1159 case 's':
1160 return(data->dnaptr->fracchange * -1.0 * v);
1161 break;
1162 default:
1163 fprintf(ERRFILE,"ERROR:vtol, can't get here!\n");
1164 exit(-1);
1165 }
1166
1167 return(-1.0);
1168
1169 } /* vtol */
1170
1171
1172 /******************************************************************
1173 * ltov() recalculates the proper "v" value of a branch, from the *
1174 * tymes at either end of the branch. */
ltov(option_struct * op,data_fmt * data,node * p)1175 void ltov(option_struct *op, data_fmt *data, node *p)
1176 {
1177
1178 switch(op->datatype) {
1179 case 'a':
1180 break;
1181 case 'b':
1182 case 'm':
1183 p->v = lengthof(p);
1184 break;
1185 case 'n':
1186 case 's':
1187 p->v = -1.0 * lengthof(p) / data->dnaptr->fracchange;
1188 break;
1189 default:
1190 fprintf(ERRFILE,"ERROR:ltov, can't get here!\n");
1191 exit(-1);
1192 }
1193 p->back->v = p->v;
1194
1195 } /* ltov */
1196
1197
1198 /***************************************************************
1199 * findcoal_ltov() is a duplicate ltov specially for findcoal. */
findcoal_ltov(option_struct * op,data_fmt * data,double value)1200 double findcoal_ltov(option_struct *op, data_fmt *data, double value)
1201 {
1202
1203 switch(op->datatype) {
1204 case 'a':
1205 break;
1206 case 'b':
1207 case 'm':
1208 return(value);
1209 break;
1210 case 'n':
1211 case 's':
1212 return(-1.0 * value / data->dnaptr->fracchange);
1213 break;
1214 default:
1215 fprintf(ERRFILE,"ERROR:ltov, can't get here!\n");
1216 exit(-1);
1217 }
1218
1219 return(-1.0);
1220
1221 } /* findcoal_ltov */
1222
1223
1224 /* boolcheck(), booleancheck(), numbercheck(), and readparmfile()
1225 are used in reading the parameter file "parmfile" */
boolcheck(char ch)1226 long boolcheck(char ch)
1227 {
1228 ch = toupper((int)ch);
1229 if (ch == 'F' || ch == 'N') return 0;
1230 if (ch == 'T' || ch == 'Y') return 1;
1231 return -1;
1232 } /* boolcheck */
1233
booleancheck(option_struct * op,char * var,char * value)1234 boolean booleancheck(option_struct *op, char *var, char *value)
1235 {
1236 long i, j, check;
1237
1238 check = boolcheck(value[0]);
1239 if(check == -1) return FALSE;
1240
1241 for(i = 0; i < NUMBOOL; i++) {
1242 if(!strcmp(var,booltokens[i])) {
1243 if(i == 0) op->interleaved = (boolean)(check);
1244 if(i == 1) op->printdata = (boolean)(check);
1245 if(i == 2) op->progress = (boolean)(check);
1246 if(i == 3) op->treeprint = (boolean)(check);
1247 if(i == 4) {
1248 op->freqsfrom = (boolean)(check);
1249 if(!op->freqsfrom) {
1250 strtok(value,":");
1251 freqa = (double)atof(strtok(NULL,";"));
1252 freqc = (double)atof(strtok(NULL,";"));
1253 freqg = (double)atof(strtok(NULL,";"));
1254 freqt = (double)atof(strtok(NULL,";"));
1255 }
1256 }
1257 if(i == 5) {
1258 op->ctgry = (boolean)(check);
1259 if(op->ctgry) {
1260 strtok(value,":");
1261 op->categs = (long)atof(strtok(NULL,";"));
1262 op->rate =
1263 (double *)realloc(op->rate,op->categs*sizeof(double));
1264 op->probcat =
1265 (double *)realloc(op->probcat,op->categs*sizeof(double));
1266 for(j = 0; j < op->categs; j++) {
1267 op->rate[j] = (double)atof(strtok(NULL,";"));
1268 op->probcat[j] = (double)atof(strtok(NULL,";"));
1269 }
1270 }
1271 }
1272 if(i == 6) {
1273 op->watt = (boolean)(check);
1274 if (!op->watt) {
1275 strtok(value,":");
1276 theta0 = (double)atof(strtok(NULL,";"));
1277 }
1278 }
1279 if(i == 7) op->usertree = (boolean)(check);
1280 if(i == 8) {
1281 op->autocorr = (boolean)(check);
1282 if (op->autocorr) {
1283 strtok(value,":");
1284 op->lambda = 1.0 / (double)atof(strtok(NULL,";"));
1285 }
1286 }
1287 if(i == 9) op->newdata = (boolean)(check);
1288 if(i == 10) {
1289 op->same_ne = (boolean)(check);
1290 if (!op->same_ne) {
1291 strtok(value,":");
1292 check = atol(strtok(NULL,";"));
1293 op->ne_ratio = (double *)calloc(check,sizeof(double));
1294 for(j = 0; j < check; j++)
1295 op->ne_ratio[j] = (double)atof(strtok(NULL,";"));
1296 }
1297 }
1298 if(i == 11) op->interactive = (boolean)(check);
1299 if(i == 12) op->mhmcsave = (boolean)(check);
1300 if(i == 13) {
1301 op->panel = (boolean)(check);
1302 if (op->panel) {
1303 strtok(value,":");
1304 check = atol(strtok(NULL,";"));
1305 op->numpanel = (long *)calloc(check,sizeof(long));
1306 for (j = 0; j < check; j++)
1307 op->numpanel[j] = atol(strtok(NULL,";"));
1308 }
1309 }
1310 if(i == 14) op->map = (boolean)(check);
1311 if(i == 15) op->fc = (boolean)(check);
1312 if(i == 16) {
1313 op->full = (boolean)(check);
1314 if (op->full) {
1315 strtok(value,":");
1316 op->chance_seen = (double)atof(strtok(NULL,";"));
1317 }
1318 }
1319 if(i == 17) {
1320 op->haplotyping = (boolean)(check);
1321 if (op->haplotyping) {
1322 strtok(value,":");
1323 op->happrob = (double)atof(strtok(NULL,";"));
1324 }
1325 }
1326 if(i == 18) op->profile = (boolean)(check);
1327 if(i == 19) op->norecsnp = (boolean)(check);
1328 return TRUE;
1329 }
1330 }
1331 return FALSE;
1332 } /* booleancheck */
1333
numbercheck(option_struct * op,char * var,char * value)1334 boolean numbercheck(option_struct *op, char *var, char *value)
1335 {
1336 long i, j;
1337
1338 for(i = 0; i < NUMNUMBER; i++) {
1339 if(!strcmp(var,numbertokens[i])) {
1340 if(i == 0) locus_ttratio = (double)atof(value);
1341 if(i == 1) op->numchains[0] = atol(value);
1342 if(i == 2) op->steps[0] = atol(value);
1343 if(i == 3) op->increm[0] = atol(value);
1344 if(i == 4) op->numchains[1] = atol(value);
1345 if(i == 5) op->increm[1] = atol(value);
1346 if(i == 6) op->steps[1] = atol(value);
1347 if(i == 7) rec0 = (double)atof(value);
1348 if(i == 8) {
1349 value[0] = toupper((int)value[0]);
1350 switch(value[0]) {
1351 case 'N':
1352 op->holding = 0;
1353 break;
1354 case 'T':
1355 op->holding = 1;
1356 break;
1357 case 'R':
1358 op->holding = 2;
1359 break;
1360 default:
1361 fprintf(ERRFILE,"unknown setting for holding: %s,",
1362 value);
1363 fprintf(ERRFILE," no holding assumed\n");
1364 op->holding = 0;
1365 break;
1366 }
1367 }
1368 if(i == 9) op->mutrait = (double)atof(value);
1369 if(i == 10) op->traitratio = (double)atof(value);
1370 if(i == 11) op->pd = (double)atof(value);
1371 if(i == 12) op->hapdrop = atol(value);
1372 if(i == 13) {
1373 op->numtempchains = atol(strtok(value,";"));
1374 if (!op->temperature) free(op->temperature);
1375 op->temperature = (long *)calloc(op->numtempchains,sizeof(long));
1376 for(j = 0; j < op->numtempchains; j++)
1377 op->temperature[j] = atof(strtok(NULL,";"));
1378 qsort((void *)(op->temperature),op->numtempchains,sizeof(long),longcmp);
1379 }
1380 return TRUE;
1381 }
1382 }
1383 return FALSE;
1384
1385 } /* numbercheck */
1386
readparmfile(option_struct * op)1387 void readparmfile(option_struct *op)
1388 {
1389 char fileline[LINESIZE],parmvar[LINESIZE],varvalue[LINESIZE];
1390
1391 #ifdef MAC
1392 char parmfilename[100];
1393
1394 strcpy(parmfilename,"parmfile");
1395 #endif
1396
1397 parmfile = fopen("parmfile","r");
1398
1399 if(parmfile) {
1400 while(fgets(fileline, LINESIZE, parmfile) != NULL) {
1401 if(fileline[0] == '#') continue;
1402 if(!strncmp(fileline,"end",3)) break;
1403 strcpy(parmvar,strtok(fileline,"="));
1404 strcpy(varvalue,strtok(NULL,"\n"));
1405 /* now to process... */
1406 if(!booleancheck(op,parmvar,varvalue))
1407 if(!numbercheck(op,parmvar,varvalue)) {
1408 fprintf(ERRFILE,
1409 "Inappropiate entry in parmfile: %s\n", fileline);
1410 }
1411 }
1412 } else
1413 if (!MENU) {
1414 fprintf(simlog,"Parameter file (parmfile) missing\n");
1415 exit(-1);
1416 }
1417
1418 FClose(parmfile);
1419
1420 #ifdef MAC
1421 fixmacfile(parmfilename);
1422 #endif
1423
1424 } /* readparmfile */
1425 /* END parameter file read */
1426
1427
whichopbool(option_struct * op,long i)1428 boolean whichopbool(option_struct *op, long i)
1429 {
1430 if (i == 0) return(op->interleaved);
1431 if (i == 1) return(op->printdata);
1432 if (i == 2) return(op->progress);
1433 if (i == 3) return(op->treeprint);
1434 if (i == 4) return(op->freqsfrom);
1435 if (i == 5) return(op->ctgry);
1436 if (i == 6) return(op->watt);
1437 if (i == 7) return(op->usertree);
1438 if (i == 8) return(op->autocorr);
1439 if (i == 9) return(op->newdata);
1440 if (i == 10) return(op->same_ne);
1441 if (i == 11) return(op->interactive);
1442 if (i == 12) return(op->mhmcsave);
1443 if (i == 13) return(op->panel);
1444 if (i == 14) return(op->map);
1445 if (i == 15) return(op->fc);
1446 if (i == 16) return(op->full);
1447 if (i == 17) return(op->haplotyping);
1448 if (i == 18) return(op->profile);
1449 if (i == 19) return(op->norecsnp);
1450
1451 fprintf(ERRFILE,"Warning: whichopbool failed to identify option\n");
1452 return(FALSE);
1453 } /* whichopbool */
1454
1455
rec_parmfilewrite(option_struct * op,long numloci,long numpop)1456 void rec_parmfilewrite(option_struct *op, long numloci, long numpop)
1457 {
1458 long i, j;
1459 boolean b;
1460 char parmfilename[100];
1461
1462
1463 openfile(&parmfile,"parmfile","w+",NULL,parmfilename);
1464
1465 for(i = 0; i < NUMBOOL; i++) {
1466 b = whichopbool(op,i);
1467 fprintf(parmfile,"%s=%s",booltokens[i],BOOLPRINT(b));
1468 if (i == 4 && !b) { /* freqsfrom */
1469 fprintf(parmfile,":%f;%f;%f;%f;",
1470 freqa, freqc, freqg, freqt);
1471 }
1472 if (i == 5 && b) { /* category */
1473 fprintf(parmfile,":%ld;",op->categs);
1474 for(j = 0; j < op->categs; j++)
1475 fprintf(parmfile,"%f;%f;",op->rate[j],op->probcat[j]);
1476 }
1477 if (i == 6 && !b) { /* starting theta */
1478 fprintf(parmfile,":%f",theta0);
1479 }
1480 if (i == 8 && !b) { /* autocorrelation */
1481 fprintf(parmfile,":%f",1.0/op->lambda);
1482 }
1483 if (i == 10 && !b) { /* same_ne */
1484 fprintf(parmfile,":%ld;",numloci);
1485 for(j = 0; j < numloci; j++)
1486 fprintf(parmfile,"%f;",op->ne_ratio[j]);
1487 }
1488 if (i == 13 && b) { /* numpanels */
1489 fprintf(parmfile,":%ld;",numpop);
1490 for(j = 0; j < numpop; j++)
1491 fprintf(parmfile,"%ld;",op->numpanel[j]);
1492 }
1493 if (i == 16 && b) { /* full snp */
1494 fprintf(parmfile,":%f;",op->chance_seen);
1495 }
1496 fprintf(parmfile,"\n");
1497 }
1498
1499 for(i = 0; i < NUMNUMBER; i++) {
1500 fprintf(parmfile,"%s=",numbertokens[i]);
1501 if (i == 0) fprintf(parmfile,"%f",locus_ttratio);
1502 if (i == 1) fprintf(parmfile,"%ld",op->numchains[0]);
1503 if (i == 2) fprintf(parmfile,"%ld",op->steps[0]);
1504 if (i == 3) fprintf(parmfile,"%ld",op->increm[0]);
1505 if (i == 4) fprintf(parmfile,"%ld",op->numchains[1]);
1506 if (i == 5) fprintf(parmfile,"%ld",op->increm[1]);
1507 if (i == 6) fprintf(parmfile,"%ld",op->steps[1]);
1508 if (i == 7) fprintf(parmfile,"%f",rec0);
1509 if (i == 8) {
1510 if (!op->holding) fprintf(parmfile,"none");
1511 if (op->holding == 1) fprintf(parmfile,"theta");
1512 if (op->holding == 2) fprintf(parmfile,"recombination");
1513 }
1514 if (i == 9) fprintf(parmfile,"%f",op->mutrait);
1515 if (i == 10) fprintf(parmfile,"%f",op->traitratio);
1516 if (i == 11) fprintf(parmfile,"%f",op->pd);
1517 if (i == 12) fprintf(parmfile,"%ld",op->hapdrop);
1518 if (i == 13) {
1519 fprintf(parmfile,"%ld;",op->numtempchains);
1520 for(j = 0; j < op->numtempchains; j++)
1521 fprintf(parmfile,"%ld;",op->temperature[j]);
1522 }
1523 fprintf(parmfile,"\n");
1524 }
1525
1526 fprintf(parmfile,"end\n");
1527
1528 FClose(parmfile);
1529
1530 #ifdef MAC
1531 fixmacfile(parmfilename);
1532 #endif
1533
1534 } /* rec_parmfilewrite */
1535
1536
readseedfile(void)1537 void readseedfile(void)
1538 {
1539 long inseed;
1540
1541 seedfile = fopen("seedfile","r");
1542
1543 if (!MENU) {
1544 if (!seedfile) {
1545 fprintf(ERRFILE,"\nseedfile not present!\n");
1546 exit(-1);
1547 }
1548 fscanf(seedfile, "%ld%*[^\n]", &inseed);
1549 getc(seedfile);
1550 } else {
1551 if (seedfile) {
1552 fscanf(seedfile, "%ld%*[^\n]", &inseed);
1553 getc(seedfile);
1554 } else {
1555 printf("Random number seed (must be odd)?\n");
1556 scanf("%ld%*[^\n]", &inseed);
1557 getchar();
1558 }
1559 }
1560
1561 seed[0] = inseed & 2047;
1562 inseed /= 2048;
1563 seed[1] = inseed & 2047;
1564 inseed /= 2048;
1565 seed[2] = inseed & 1023;
1566
1567 } /* readseedfile */
1568
1569
print_menuheader(option_struct * op)1570 void print_menuheader(option_struct *op)
1571 {
1572 printf("\n%s", op->ansi ? "\033[2J\033[H" : "\n");
1573 printf("Metropolis-Hastings Markov Chain Monte Carlo");
1574 printf(" method, version %3.2f\n\n",VERSION_NUM);
1575 } /* print_menuheader */
1576
1577
print_startmenu(option_struct * op,boolean writeout)1578 void print_startmenu(option_struct *op, boolean writeout)
1579 {
1580 printf("STARTUP MENU\n");
1581 printf(" # Goto Data/Search Menus\n");
1582 printf(" O Save current options to file? %s\n",
1583 writeout ? "Yes" : "No");
1584 printf(" N Use trees from previous run? %s\n",
1585 op->newdata ? "No" : "Yes");
1586 if (op->newdata)
1587 printf(" E Echo the data at start of run? %s\n",
1588 op->printdata ? "Yes" : "No");
1589 else op->printdata = FALSE;
1590 printf(" S Save MHMC output to files? %s\n",
1591 op->mhmcsave ? "Yes" : "No");
1592 printf(" P Print indications of progress of run? %s\n",
1593 op->progress ? "Yes" : "No");
1594 #if 0 /* DEBUG debug WARNING warning--no tree writer yet! */
1595 printf(" G Print out genealogies? %s\n",
1596 op->treeprint ? "Yes" : "No");
1597 #endif
1598 printf(" U Use user tree in file \"intree\" ? %s\n",
1599 op->usertree ? "Yes" : "No");
1600 printf(" V Number of temperatures used? %ld\n",
1601 op->numtempchains);
1602 printf(" H Infer haplotypes? %s\n",
1603 op->haplotyping ? "Yes" : "No");
1604 printf(" L Calculate confidence intervals? %s\n",
1605 op->profile ? "Yes" : "No");
1606 if (op->haplotyping) {
1607 printf(" A Strategy for haplotype rearrangement? ");
1608 if(op->hapdrop==0)printf("No resimulation\n");
1609 else if (op->hapdrop==1)printf("Single resimulation\n");
1610 else printf("Double resimulation\n");
1611 printf(" F Frequency of haplotype rearrangement? %f\n",
1612 op->happrob);
1613 }
1614 printf(" M Map trait information onto trees? %s\n",
1615 op->map ? "Yes" : "No");
1616 if (op->map) {
1617 printf(" R Relative mutation rate of trait? %f\n",
1618 op->mutrait);
1619 printf(" T Forward versus back trait mutation? %f\n",
1620 op->traitratio);
1621 printf(" D Frequency of trait? %f\n",
1622 op->pd);
1623 }
1624
1625 } /* print_startmenu */
1626
1627
print_datamenu(option_struct * op)1628 void print_datamenu(option_struct *op)
1629 {
1630 printf("DATA MENU\n");
1631 printf(" # Goto Startup Menu\n");
1632 printf(" D Datatype:");
1633 switch(op->datatype) {
1634 case 'a':
1635 printf(" Allelelic Markers\n");
1636 break;
1637 case 'b':
1638 printf(" Brownian-Motion Microsats\n");
1639 case 'm':
1640 if (op->datatype != 'b') printf(" Microsats\n");
1641 break;
1642 case 'n':
1643 printf(" SNPs\n");
1644 printf(" P SNPs derived from panel? %s\n",
1645 op->panel ? "Yes" : "No");
1646 #if 0
1647 printf(" A SNPs are all of the variable sites? %s\n",
1648 op->full ? "No" : "Yes");
1649 if (op->full) {
1650 printf(" B Probability of observation? %6.4f\n",
1651 op->chance_seen);
1652 }
1653 #endif
1654 printf(" N SNPs with recombination? %s\n",
1655 op->norecsnp ? "No" : "Yes");
1656 case 's':
1657 if (op->datatype != 'n') printf(" Sequence\n");
1658 printf(" I Input sequences interleaved? %s\n",
1659 op->interleaved ? "Yes" : "No, sequential");
1660 printf(" T Transition/transversion ratio: %8.4f\n",
1661 locus_ttratio);
1662 printf(" F Use empirical base frequencies? %s\n",
1663 op->freqsfrom ? "Yes" : "No");
1664 printf(" C One category of substitution rates?");
1665 if (!op->ctgry || op->categs == 1)
1666 printf(" Yes\n");
1667 else {
1668 printf(" %ld categories\n", op->categs);
1669 if (op->datatype != 'n') {
1670 printf(" R Rates at adjacent sites correlated?");
1671 if (!op->autocorr) printf(" No, they are independent\n");
1672 else printf(" Yes, mean block length =%6.1f\n", 1.0 / op->lambda);
1673 }
1674 }
1675 printf(" V Poplulation size equal among loci? %s\n",
1676 op->same_ne ? "Yes" : "No");
1677 break;
1678 default:
1679 printf(" UNKNOWN\n");
1680 break;
1681 }
1682
1683 } /* print_datamenu */
1684
1685
print_searchmenu(option_struct * op)1686 void print_searchmenu(option_struct *op)
1687 {
1688 printf("SEARCH MENU\n");
1689 printf(" Q Lots of recombinations expected? %s\n",
1690 op->fc ? "Yes" : "No");
1691 printf(" H Hold parameters fixed?");
1692 if (op->holding == 0) printf(" No\n");
1693 else if (op->holding == 1) printf(" Yes, theta fixed\n");
1694 else if (op->holding == 2) printf(" Yes, rec-rate fixed\n");
1695 else printf(" Unknown option!!!\n");
1696 printf(" W Starting theta equals Watterson? %s",
1697 op->watt ? "Yes\n" : "No");
1698 if (!op->watt) printf(", initial theta = %6.4f\n", theta0);
1699 printf(" Z Starting recombination rate: %e\n", rec0);
1700 printf(" S Number of short chains? %6ld\n",
1701 op->numchains[0]);
1702 if (op->numchains[0] > 0) {
1703 printf(" 1 Short sampling increment? %6ld\n",
1704 op->increm[0]);
1705 printf(" 2 Number of steps along short chains? %6ld\n",
1706 op->steps[0]);
1707 }
1708 printf(" L Number of long chains? %6ld\n",
1709 op->numchains[1]);
1710 if (op->numchains[1] > 0) {
1711 printf(" 3 Long sampling increment? %6ld\n",
1712 op->increm[1]);
1713 printf(" 4 Number of steps along long chains? %6ld\n",
1714 op->steps[1]);
1715 }
1716
1717 } /* print_searchmenu */
1718
1719
print_menuend(void)1720 void print_menuend(void)
1721 {
1722 printf("\nAre these settings correct? (type Y or the letter for");
1723 printf(" one to change)\n");
1724 } /* print_menuend */
1725
1726
initoptions(option_struct * op)1727 void initoptions(option_struct *op)
1728 {
1729 long i;
1730
1731 /* first some multiple rate-categories code stuff */
1732 op->ctgry = FALSE;
1733 op->rate[0] = 1.0;
1734 op->probcat[0] = 1.0;
1735 op->categs = 1;
1736 op->lambda = 1.0;
1737 op->autocorr = FALSE; /* FALSE if categs == 1 */
1738 /* end categories code stuff */
1739
1740 op->interactive = TRUE;
1741 op->newdata = TRUE;
1742 op->mhmcsave = FALSE;
1743 op->interleaved = TRUE;
1744 op->printdata = FALSE;
1745 op->progress = TRUE;
1746 op->treeprint = FALSE;
1747 op->profile = TRUE;
1748
1749 weightfile = fopen("weightfile","r");
1750 op->weights = (weightfile) ? TRUE : FALSE;
1751
1752 spacefile = fopen("spacefile","r");
1753 op->spacing = (spacefile) ? TRUE : FALSE;
1754
1755 op->map = FALSE;
1756 op->mutrait = 1.0;
1757 op->traitratio = 1.0;
1758 op->pd = 0.1;
1759
1760 op->panel = FALSE;
1761 op->numpanel = NULL;
1762
1763 op->haplotyping = FALSE;
1764 op->hapdrop = 0;
1765 op->happrob = 0.2;
1766
1767 op->full = FALSE;
1768 op->chance_seen = 1.0;
1769 op->norecsnp = FALSE;
1770
1771 op->numtempchains = 1;
1772 op->temperature = (long *)calloc(op->numtempchains,sizeof(long));
1773 for(i = 0; i < op->numtempchains; i++) op->temperature[i] = 1;
1774 op->ctemp = 1;
1775
1776 op->fc = FALSE;
1777 op->freqsfrom = TRUE;
1778 op->watt = FALSE;
1779 op->usertree = FALSE;
1780 op->plump = TRUE;
1781 op->holding = 0;
1782 op->same_ne = TRUE;
1783 op->ne_ratio = NULL;
1784 op->numchains[0] = 5;
1785 op->increm[0] = 20;
1786 op->steps[0] = 2000;
1787 op->numchains[1] = 1;
1788 op->increm[1] = 20;
1789 op->steps[1] = 50000;
1790 op->datatype = 's';
1791 op->datatypeset = FALSE;
1792
1793 op->print_recbythmaxcurve = TRUE;
1794 op->thlb = FLAGLONG;
1795 op->thub = FLAGLONG;
1796 op->reclb = FLAGLONG;
1797 op->recub = FLAGLONG;
1798
1799 } /* initoptions */
1800
1801
workmenu1(option_struct * op,char ch,boolean * menu1,boolean * writeout)1802 void workmenu1(option_struct *op, char ch, boolean *menu1, boolean *writeout)
1803 {
1804 long i;
1805 char input[LINESIZE];
1806
1807 if (strchr("#FLAHONESPGUMVRTD",ch) != NULL) {
1808 switch(ch) {
1809 case '#':
1810 *menu1 = !(*menu1);
1811 break;
1812 case 'V':
1813 printf("Number of different temperatures?\n");
1814 scanf("%ld",&(op->numtempchains));
1815 scanf("%*[^\n]");
1816 do {
1817 printf("Temperature of coldest chain? (must be positive)\n");
1818 scanf("%ld",&(op->temperature[0]));
1819 scanf("%*[^\n]");
1820 } while (op->temperature[0] <= 0.0);
1821 for (i = 1; i < op->numtempchains; i++) {
1822 do {
1823 printf("Temperature of temperature-chain %ld?\n",i+1);
1824 scanf("%ld",&(op->temperature[i]));
1825 scanf("%*[^\n]");
1826 } while(op->temperature[i] <= op->temperature[0]);
1827 }
1828 break;
1829 case 'F':
1830 printf("Probability of proposing a change in haplotype?\n");
1831 scanf("%lf",&(op->happrob));
1832 scanf("%*[^\n]");
1833 break;
1834 case 'L':
1835 op->profile = !op->profile;
1836 break;
1837 case 'H':
1838 op->haplotyping = !op->haplotyping;
1839 break;
1840 case 'O':
1841 *writeout = !(*writeout);
1842 break;
1843 case 'N':
1844 op->newdata = !op->newdata;
1845 break;
1846 case 'E':
1847 op->printdata = !op->printdata;
1848 break;
1849 case 'P':
1850 op->progress = !op->progress;
1851 break;
1852 case 'G':
1853 op->treeprint = !op->treeprint;
1854 break;
1855 case 'U':
1856 op->usertree = !op->usertree;
1857 break;
1858 case 'S':
1859 op->mhmcsave = !op->mhmcsave;
1860 break;
1861 case 'M':
1862 op->map = !op->map;
1863 break;
1864 case 'R':
1865 do {
1866 printf("Relative mutation rate of trait?\n");
1867 fgets(input,LINESIZE,stdin);
1868 op->mutrait = atof(input);
1869 } while (op->mutrait <= 0.0);
1870 break;
1871 case 'T':
1872 do {
1873 printf("Ratio of forward to back trait mutation?\n");
1874 fgets(input,LINESIZE,stdin);
1875 op->traitratio = atof(input);
1876 } while (op->traitratio <= 0.0);
1877 break;
1878 case 'D':
1879 do {
1880 printf("Frequency of trait?\n");
1881 fgets(input,LINESIZE,stdin);
1882 op->pd = atof(input);
1883 } while (op->pd <= 0.0 || op->pd >= 1.0);
1884 break;
1885 case 'A':
1886 do {
1887 printf("Number of drops while resimulating (0-2)?\n");
1888 fgets(input,LINESIZE,stdin);
1889 op->hapdrop = atol(input);
1890 } while (op->hapdrop != 0 && op->hapdrop != 1 && op->hapdrop != 2);
1891 default:
1892 fprintf(stderr,"ERROR:Impossible option %c detected!\n",ch);
1893 break;
1894 }
1895 } else fprintf(stderr,"%c is not an option here.\n",ch);
1896
1897 } /* workmenu1 */
1898
1899
workmenu2(option_struct * op,char ch,boolean * menu1,long * numloci,long * numpop)1900 void workmenu2(option_struct *op, char ch, boolean *menu1, long *numloci,
1901 long *numpop)
1902 {
1903 long i;
1904 double probsum;
1905 char input[LINESIZE];
1906 boolean done;
1907
1908 if(strchr("#NQPDITFCRVHWZS12L34AB",ch) != NULL) {
1909 switch(ch) {
1910 case '#':
1911 *menu1 = !(*menu1);
1912 break;
1913 case 'N':
1914 op->norecsnp = !(op->norecsnp);
1915 if (op->norecsnp) {
1916 if (rec0 || op->holding != 2) {
1917 printf("\nThis SNP model requires recombination");
1918 printf(" rate to be fixed at zero.\n");
1919 printf("Fix starting rec-rate to zero first.\n");
1920 printf("----SNP model unchanged.----\n");
1921 op->norecsnp = FALSE;
1922 }
1923 }
1924 printf("press <enter> or <return> to continue\n");
1925 fgets(input,LINESIZE,stdin);
1926 break;
1927 case 'Q':
1928 op->fc = !op->fc;
1929 break;
1930 case 'D':
1931 /* WARNING--most datatypes disabled!!! */
1932 #if 0
1933 if (op->datatype == 'a') {op->datatype = 'b'; break;}
1934 if (op->datatype == 'b') {op->datatype = 'm'; break;}
1935 if (op->datatype == 'm') {op->datatype = 'n'; break;}
1936 if (op->datatype == 'n') {op->datatype = 's'; break;}
1937 if (op->datatype == 's') {op->datatype = 'a'; break;}
1938 #endif
1939 op->datatypeset = TRUE;
1940 if (op->datatype == 'n') {op->datatype = 's'; break;}
1941 if (op->datatype == 's') {
1942 op->datatype = 'n';
1943 if (op->autocorr) {
1944 printf("Can't use autocorrelation with SNPs\n");
1945 printf("----autocorrelation disabled-----\n");
1946 printf("press <enter> or <return> to continue\n");
1947 fgets(input,LINESIZE,stdin);
1948 op->autocorr = FALSE;
1949 }
1950 break;
1951 }
1952 printf("ERROR:Impossible Datatype %c present\n",op->datatype);
1953 break;
1954 #if 0
1955 case 'A': /* deliberate fall through to case 'B' */
1956 op->full = !op->full;
1957 case 'B':
1958 if (!op->full) break;
1959 do {
1960 printf("Percent chance that a variable site was observed");
1961 printf(" during sequencing?\n");
1962 scanf("%lf",&(op->chance_seen));
1963 scanf("%*[^\n]");
1964 if (op->chance_seen <= 0.0 || op->chance_seen > 1.0)
1965 printf(" the percentage must be between 0 and 1\n");
1966 } while (op->chance_seen <= 0.0 || op->chance_seen > 1.0);
1967 break;
1968 #endif
1969 case 'I':
1970 op->interleaved = !op->interleaved;
1971 break;
1972 case 'T':
1973 do {
1974 printf("Transition/transversion ratio?\n");
1975 fgets(input,LINESIZE,stdin);
1976 locus_ttratio = atof(input);
1977 if (locus_ttratio < 0.5)
1978 printf("TTratio cannot be less than 0.5\n");
1979 } while (locus_ttratio < 0.5);
1980 break;
1981 case 'F':
1982 op->freqsfrom = !op->freqsfrom;
1983 if (!op->freqsfrom) {
1984 printf("Base frequencies for A, C, G, T/U (use blanks");
1985 printf(" to separate)?\n");
1986 scanf("%lf%lf%lf%lf", &freqa, &freqc, &freqg, &freqt);
1987 scanf("%*[^\n]");
1988 }
1989 break;
1990 case 'P':
1991 op->panel = !op->panel;
1992 if (op->panel) {
1993 printf("Number of populations?\n");
1994 ;
1995 *numpop = atol(input);
1996 op->numpanel = (long *)calloc(*numpop,sizeof(long));
1997 for(i = 0; i < *numpop; i++) {
1998 printf("Number of panel haplotypes for population");
1999 printf(" %ld?\n",i+1);
2000 fgets(input,LINESIZE,stdin);
2001 op->numpanel[i] = atol(input);
2002 }
2003 } else
2004 if (op->numpanel) {
2005 free(op->numpanel);
2006 op->numpanel = NULL;
2007 }
2008 break;
2009 case 'C':
2010 op->ctgry = !op->ctgry;
2011 if (!op->ctgry) op->autocorr = FALSE;
2012 if (op->ctgry) {
2013 do {
2014 printf("Number of categories ?");
2015 fgets(input,LINESIZE,stdin);
2016 op->categs = atoi(input);
2017 } while (op->categs < 1);
2018 free(op->rate);
2019 free(op->probcat);
2020 printf("Rate for each category? (use a space to");
2021 printf(" separate)\n");
2022 op->rate = (double *)calloc(op->categs,sizeof(double));
2023 op->probcat = (double *)calloc(op->categs,sizeof(double));
2024 for (i = 0; i < op->categs; i++)
2025 scanf("%lf*[^\n]", &(op->rate[i]));
2026 getchar();
2027 do {
2028 printf("Probability for each category?");
2029 printf(" (use a space to separate)\n");
2030 for (i = 0; i < op->categs; i++)
2031 scanf("%lf", &(op->probcat[i]));
2032 scanf("%*[^\n]");
2033 getchar();
2034 done = TRUE;
2035 probsum = 0.0;
2036 for (i = 0; i < op->categs; i++)
2037 probsum += op->probcat[i];
2038 if (fabs(1.0 - probsum) > 0.001) {
2039 done = FALSE;
2040 printf("Probabilities must add up to");
2041 printf(" 1.0, plus or minus 0.001.\n");
2042 }
2043 } while (!done);
2044 }
2045 break;
2046 case 'R':
2047 if (op->datatype == 'n') {
2048 printf("Can't use autocorrelation with SNPs\n");
2049 printf("----autocorrelation disabled-----\n");
2050 printf("press <enter> or <return> to continue\n");
2051 fgets(input,LINESIZE,stdin);
2052 op->autocorr = FALSE;
2053 break;
2054 }
2055 op->autocorr = !op->autocorr;
2056 if (op->autocorr) {
2057 do {
2058 printf("Mean block length of sites having the same ");
2059 printf("rate (greater than 1)?\n");
2060 scanf("%lf%*[^\n]", &(op->lambda));
2061 getchar();
2062 } while (op->lambda <= 1.0);
2063 op->lambda = 1.0 / op->lambda;
2064 }
2065 break;
2066 case 'V':
2067 op->same_ne = !op->same_ne;
2068 if (!op->same_ne) {
2069 printf("Enter number of loci: ");
2070 scanf("%ld",numloci);
2071 if (*numloci == 1) op->same_ne = !op->same_ne;
2072 if (op->ne_ratio) free(op->ne_ratio);
2073 op->ne_ratio = (double *)calloc(*numloci,sizeof(double));
2074 printf("\nEnter relative population size/mutation rate");
2075 printf(" for each locus in input order:\n");
2076 for(i = 0; i < *numloci; i++) {
2077 scanf("%lf",&(op->ne_ratio[i]));
2078 if (op->ne_ratio[i] <= 0) {
2079 printf("\nratios must be positive, please reenter\n");
2080 i--;
2081 }
2082 }
2083 }
2084 break;
2085 case 'H':
2086 op->holding += 1;
2087 if (op->holding > 2) op->holding = 0;
2088 break;
2089 case 'W':
2090 op->watt = !op->watt;
2091 if (!op->watt) {
2092 do {
2093 printf("Initial theta estimate?\n");
2094 fgets(input,LINESIZE,stdin);
2095 theta0 = atof(input);
2096 } while (theta0 <= 0.0);
2097 }
2098 break;
2099 case 'Z':
2100 printf("What recombination rate?\n");
2101 do {
2102 fgets(input,LINESIZE,stdin);
2103 rec0 = atof(input);
2104 if (rec0 < 0.0)
2105 printf("recombination rate must be non-negative\n");
2106 } while (rec0 < 0.0);
2107 break;
2108 case 'S':
2109 do {
2110 printf("How many Short Chains?\n");
2111 fgets(input,LINESIZE,stdin);
2112 op->numchains[0] = atoi(input);
2113 if (op->numchains[0] < 0)
2114 printf("Must be non-negative\n");
2115 } while (op->numchains[0] < 0);
2116 break;
2117 case '1':
2118 done = FALSE;
2119 while (!done) {
2120 printf("How often to sample trees?\n");
2121 fgets(input,LINESIZE,stdin);
2122 op->increm[0] = atoi(input);
2123 if (op->increm[0] > 0) done = TRUE;
2124 else printf("Must be a positive integer\n");
2125 }
2126 break;
2127 case '2':
2128 done = FALSE;
2129 while (!done) {
2130 printf("How many short steps?\n");
2131 fgets(input,LINESIZE,stdin);
2132 op->steps[0] = atoi(input);
2133 if (op->steps[0] > 0) done = TRUE;
2134 else printf("Must be a positive integer\n");
2135 }
2136 break;
2137 case 'L':
2138 do {
2139 printf("How many Long Chains?\n");
2140 fgets(input,LINESIZE,stdin);
2141 op->numchains[1] = atoi(input);
2142 if (op->numchains[1] < 1)
2143 printf("Must be a positive integer\n");
2144 } while (op->numchains[1] < 1);
2145 break;
2146 case '3':
2147 done = FALSE;
2148 while (!done) {
2149 printf("How often to sample trees?\n");
2150 fgets(input,LINESIZE,stdin);
2151 op->increm[1] = atoi(input);
2152 if (op->increm[1] > 0) done = TRUE;
2153 else printf("Must be a positive integer\n");
2154 }
2155 break;
2156 case '4':
2157 done = FALSE;
2158 while (!done) {
2159 printf("How many long steps?\n");
2160 fgets(input,LINESIZE,stdin);
2161 op->steps[1] = atoi(input);
2162 if (op->steps[1] > 0) done = TRUE;
2163 else printf("Must be a positive integer\n");
2164 }
2165 break;
2166 default:
2167 fprintf(stderr,"ERROR:Impossible option %c detected!\n",ch);
2168 break;
2169 }
2170 } else fprintf(stderr,"%c is not an option here.\n",ch);
2171
2172 } /* workmenu2 */
2173
2174
2175 /****************************************************************
2176 * checkparmfile() checks the parmfile for internal consistency */
checkparmfile(option_struct * op)2177 void checkparmfile(option_struct *op)
2178 {
2179 boolean error = FALSE;
2180
2181 if (op->norecsnp) {
2182 if (rec0) {
2183 fprintf(ERRFILE,"ERROR--this snp model is incompatible with a");
2184 fprintf(ERRFILE," non-zero recombination rate.\n");
2185 fprintf(ERRFILE," Please change starting rec-rate.\n");
2186 fprintf(outfile,"ERROR--this snp model is incompatible with a");
2187 fprintf(outfile," non-zero recombination rate.\n");
2188 fprintf(outfile," Please change starting rec-rate.\n");
2189 error = TRUE;
2190 }
2191 if (op->holding != 2) {
2192 fprintf(ERRFILE,"ERROR--this snp model is incompatible with a");
2193 fprintf(ERRFILE," non-zero recombination rate.\n");
2194 fprintf(ERRFILE," Please set rec-rate constant.\n");
2195 fprintf(outfile,"ERROR--this snp model is incompatible with a");
2196 fprintf(outfile," non-zero recombination rate.\n");
2197 fprintf(outfile," Please set rec-rate constant.\n");
2198 error = TRUE;
2199 }
2200 }
2201
2202 if (error) {
2203 fprintf(ERRFILE,"program finished\n");
2204 exit(-1);
2205 }
2206
2207 } /* checkparmfile */
2208
2209
getoptions(option_struct * op)2210 void getoptions(option_struct *op)
2211 /* interactively set options using a very basic menu */
2212 {
2213 long numloci, numpop;
2214 boolean writeout, done, menu1;
2215 char ch, input[LINESIZE];
2216
2217 /* default initializations */
2218 initoptions(op);
2219 writeout = FALSE;
2220 locus_ttratio = 2.0;
2221 numloci = 1;
2222 numpop = 1;
2223 theta0 = 1.0;
2224 rec0 = 0.0001;
2225 /* end defaults */
2226
2227 readparmfile(op);
2228 checkparmfile(op);
2229 readseedfile();
2230
2231 fprintf(outfile, "\nRECOMBINE: Recombinant HMMC ML method,");
2232 fprintf(outfile, " version %3.2f\n\n",VERSION_NUM);
2233
2234 if (!MENU) return;
2235
2236 menu1 = TRUE;
2237 do {
2238 print_menuheader(op);
2239 if (menu1) print_startmenu(op,writeout);
2240 else {print_datamenu(op); print_searchmenu(op);}
2241 print_menuend();
2242 fgets(input,LINESIZE,stdin);
2243 ch = toupper((int)input[0]);
2244 done = (ch == 'Y');
2245 if (!done) {
2246 if (menu1) workmenu1(op,ch,&menu1,&writeout);
2247 else workmenu2(op,ch,&menu1,&numloci,&numpop);
2248 }
2249 } while (!done);
2250
2251 if (writeout) rec_parmfilewrite(op,numloci,numpop);
2252
2253 } /* getoptions */
2254
2255
2256 /*********************************************************************
2257 * firstinit() handles initialization for things that are recorded *
2258 * over multiple loci/populations, and therefore are allocated once. */
firstinit(option_struct * op,data_fmt * data)2259 void firstinit(option_struct *op, data_fmt *data)
2260 {
2261 long i, numloci;
2262
2263 numloci = getdata_numloci(op,data);
2264
2265 totchains = op->numchains[0] + op->numchains[1];
2266
2267 numtrees = MAX(op->steps[0]/op->increm[0],op->steps[1]/op->increm[1]);
2268
2269 if (op->same_ne) {
2270 op->ne_ratio = (double *)calloc(1,numloci*sizeof(double));
2271 for(i = 0; i < numloci; i++) op->ne_ratio[i] = 1.0;
2272 }
2273
2274 sametree = (boolean **)calloc(1,numloci * sizeof(boolean *));
2275 sametree[0] = (boolean *)calloc(1,numloci*numtrees * sizeof(boolean));
2276 for(i = 1; i < numloci; i++)
2277 sametree[i] = sametree[0] + i*numtrees;
2278
2279 model_alloc(op,data);
2280
2281 freenodes = NULL;
2282
2283 } /* firstinit */
2284
2285
2286 /********************************************************************
2287 * popinit() initializes things that are specific to one population */
popinit(option_struct * op,data_fmt * data)2288 void popinit(option_struct *op, data_fmt *data)
2289 {
2290
2291 switch(op->datatype) {
2292 case 'a':
2293 break;
2294 case 'b':
2295 case 'm':
2296 break;
2297 case 'n':
2298 case 's':
2299 rootnum = 0;
2300 break;
2301 default:
2302 fprintf(ERRFILE,"ERROR:popinit, can't get here!\n");
2303 exit(-1);
2304 }
2305
2306 } /* popinit */
2307
2308
2309 /***********************************************************************
2310 * makeinvarvalues() sets up the tip likelikelihoods for the invariant *
2311 * sites tacked on the end of the sequence for SNP data. */
makeinvarvalues(dnadata * dna,long numcategs,tree * tr)2312 void makeinvarvalues(dnadata *dna, long numcategs, tree *tr)
2313 {
2314 long site, ind, categ, slice;
2315
2316 site = dna->sites[locus];
2317 slice = 0L; /* we only set slice 0 here; the remainder, if any,
2318 are set elsewhere */
2319
2320 for(ind = 1; ind <= dna->numseq[population]; ind++)
2321 for(categ = 0; categ < numcategs; categ++) {
2322 tr->nodep[ind]->x->s[site][categ][slice][baseA] = 1.0;
2323 tr->nodep[ind]->x->s[site][categ][slice][baseC] = 0.0;
2324 tr->nodep[ind]->x->s[site][categ][slice][baseG] = 0.0;
2325 tr->nodep[ind]->x->s[site][categ][slice][baseT] = 0.0;
2326
2327 tr->nodep[ind]->x->s[site+1][categ][slice][baseA] = 0.0;
2328 tr->nodep[ind]->x->s[site+1][categ][slice][baseC] = 1.0;
2329 tr->nodep[ind]->x->s[site+1][categ][slice][baseG] = 0.0;
2330 tr->nodep[ind]->x->s[site+1][categ][slice][baseT] = 0.0;
2331
2332 tr->nodep[ind]->x->s[site+2][categ][slice][baseA] = 0.0;
2333 tr->nodep[ind]->x->s[site+2][categ][slice][baseC] = 0.0;
2334 tr->nodep[ind]->x->s[site+2][categ][slice][baseG] = 1.0;
2335 tr->nodep[ind]->x->s[site+2][categ][slice][baseT] = 0.0;
2336
2337 tr->nodep[ind]->x->s[site+3][categ][slice][baseA] = 0.0;
2338 tr->nodep[ind]->x->s[site+3][categ][slice][baseC] = 0.0;
2339 tr->nodep[ind]->x->s[site+3][categ][slice][baseG] = 0.0;
2340 tr->nodep[ind]->x->s[site+3][categ][slice][baseT] = 1.0;
2341 }
2342 } /* makeinvarvalues */
2343
2344
2345 /*****************************************************************
2346 * locusinit() initializes things that are specific to one locus */
locusinit(option_struct * op,data_fmt * data)2347 void locusinit(option_struct *op, data_fmt *data)
2348 {
2349 long i, nummarkers, numseq, numsites;
2350 dnadata *dna;
2351 FILE *flipfile;
2352
2353 dna = data->dnaptr;
2354
2355 if (op->spacing) read_spacefile(spacefile,op,data);
2356 nummarkers = getdata_nummarkers(op,data);
2357 /* hack done for speed-up */
2358 dna->sitecount[locus][0] = getdata_space(op,data,0L);
2359 dna->markersite[locus][0] = dna->sspace[0][0][getdata_nummarkers(op,data)];
2360 for(i = 1; i < nummarkers; i++) {
2361 dna->sitecount[locus][i] = dna->sitecount[locus][i-1] +
2362 getdata_space(op,data,i);
2363 dna->markersite[locus][i] = dna->markersite[locus][i-1] +
2364 dna->sspace[0][0][i-1];
2365 }
2366 numsites = countsites(op,data);
2367 dna->segranges0 = (int *)calloc(numsites,sizeof(int));
2368 dna->segranges1 = (int *)calloc(numsites,sizeof(int));
2369 dna->segranges2 = (int *)calloc(numsites,sizeof(int));
2370 dna->segranges3 = (int *)calloc(numsites,sizeof(int));
2371 for(i = 0; i < numsites; i++) {
2372 dna->segranges0[i] = 0;
2373 dna->segranges1[i] = 1;
2374 dna->segranges2[i] = 2;
2375 dna->segranges3[i] = -1;
2376 }
2377
2378 treesetup(op,data);
2379
2380 numseq = getdata_numseq(op,data);
2381
2382 switch(op->datatype) {
2383 case 'a':
2384 break;
2385 case 'b':
2386 case 'm':
2387 break;
2388 case 'n':
2389 initinvartips(op,data,op->categs,curtree);
2390 if (!op->panel) invardatacheck(op,data);
2391 case 's':
2392 fprintf(outfile, "%4ld Sequences, %4ld Sites\n",numseq,nummarkers);
2393 alogf = (rlrec *)calloc(1,sizeof(rlrec));
2394 alogf->val = (double *)calloc(nummarkers+1,sizeof(double));
2395 contribution =
2396 (contribarr *)calloc(nummarkers+1,sizeof(contribarr));
2397 contribution[0] =
2398 (double *)calloc((nummarkers+1)*op->categs,sizeof(double));
2399 for(i=1;i<nummarkers+1;i++)
2400 contribution[i] = contribution[0] + i*op->categs;
2401 /* now read the categories from the infile, if applicable */
2402 inputcategories(op,data);
2403 /* setup fractional likelihoods at tree tips */
2404 makednavalues(op,data,op->categs,curtree);
2405 /* frequencies must be calculated after makednavalues() called */
2406 if (!op->freqsfrom) {
2407 data->dnaptr->freqa = freqa;
2408 data->dnaptr->freqc = freqc;
2409 data->dnaptr->freqg = freqg;
2410 data->dnaptr->freqt = freqt;
2411 } else empiricaldnafreqs(op,data,curtree);
2412 getbasednafreqs(data->dnaptr,op,locus_ttratio,outfile);
2413 /* table can only be initialized after frequencies are set */
2414 inittable(op,dna);
2415 makesiteptr(op,data);
2416 initweightrat(op,data);
2417 if (op->haplotyping) {
2418 flipfile = fopen("flipfile","r");
2419 read_flipfile(op,data,curtree,flipfile);
2420 fclose(flipfile);
2421 pruneflips(op,data,curtree);
2422 }
2423 break;
2424 default:
2425 fprintf(ERRFILE,"ERROR:locusinit, can't get here!\n");
2426 exit(-1);
2427 }
2428
2429
2430 if ((op->increm[0] < 0) || (op->increm[1] < 0)) {
2431 fprintf(ERRFILE,"Error in input sampling increment,");
2432 fprintf(ERRFILE," increment set to 10\n");
2433 if (op->increm[0] < 0)
2434 op->increm[0] = 10;
2435 if (op->increm[1] < 0)
2436 op->increm[1] = 10;
2437 }
2438 /* stuff for recycling x arrays */
2439 numx = 0;
2440 for (i=0;i<XARRAYSIZE;i++)
2441 sparex[i]=NULL;
2442
2443 } /* locusinit */
2444
2445
inputcategories(option_struct * op,data_fmt * data)2446 void inputcategories(option_struct *op, data_fmt *data)
2447 {
2448 /* char ch; */
2449 long i, extranum, nummarkers;
2450
2451 nummarkers = getdata_nummarkers(op,data);
2452
2453 category = (long *)calloc(nummarkers,sizeof(long));
2454
2455 for (i = 0; i < nummarkers; i++) category[i] = 1;
2456 extranum = 0;
2457 /* DEBUG debug WARNING warning--categories input code commented out!
2458 while (!(eoln(infile))) {
2459 ch = getc(infile);
2460 if (ch == '\n')
2461 ch = ' ';
2462 ch = isupper(ch) ? ch : toupper((int)ch);
2463 if (ch == 'C')
2464 extranum++;
2465 else if (ch != ' ') {
2466 printf("BAD OPTION CHARACTER: %c\n", ch);
2467 exit(-1);
2468 }
2469 }
2470 fscanf(infile, "%*[^\n]");
2471 getc(infile);
2472 for (i = 1; i <= extranum; i++) {
2473 ch = getc(infile);
2474 if (ch == '\n')
2475 ch = ' ';
2476 ch = isupper(ch) ? ch : toupper((int)ch);
2477 if (ch != 'W'){
2478 printf("ERROR: INCORRECT AUXILIARY OPTIONS LINE WHICH STARTS WITH %c\n",
2479 ch);
2480 exit(-1);
2481 }
2482 }
2483 */
2484 if (op->categs <= 1)
2485 return;
2486 fprintf(outfile, "\nSite category Rate of change Probability\n");
2487 for (i = 1; i <= op->categs; i++)
2488 fprintf(outfile, "%12ld%13.3f%13.3f\n",i,op->rate[i-1],op->probcat[i-1]);
2489 putc('\n', outfile);
2490 } /* inputcategories */
2491
2492
2493 /*******************************************************************
2494 * allocate_nodelet() allocates and initializes a set of nodelets, *
2495 * which are set to be non-tops of the passed "type". "x" and all *
2496 * other datatype specific fields are NULLed and must be allocated *
2497 * elsewhere. */
allocate_nodelet(long num,char type)2498 node *allocate_nodelet(long num, char type)
2499 {
2500 static long unique_id=0;
2501 boolean isfirst=TRUE;
2502 long j;
2503 node *p, *q = NULL, *pfirst=NULL;
2504
2505 for (j = 0; j < num; j++) {
2506 p = (node *)calloc(1,sizeof(node));
2507
2508 p->top = FALSE;
2509 p->updated = FALSE;
2510 p->number = FLAGLONG;
2511 p->type = type;
2512 p->id = unique_id++;
2513 p->next = q;
2514 p->back = NULL;
2515 p->nayme = NULL;
2516 p->x = NULL;
2517 p->v = p->tyme = p->length = 0.0;
2518 p->z = NULL;
2519
2520 p->recstart = p->recend = -1L;
2521 p->ranges = NULL;
2522 p->coal = NULL;
2523 p->members = NULL;
2524 p->memberstrue = FALSE;
2525 p->futileflag = 0L;
2526
2527 p->pop = p->actualpop = -1;
2528 p->lxmax = 0.0;
2529
2530 if(isfirst){
2531 isfirst=FALSE;
2532 pfirst = p;
2533 }
2534
2535 q = p;
2536 }
2537
2538 pfirst->next = q;
2539
2540 return q;
2541
2542 } /* allocate_nodelet */
2543
2544
2545 /*************************************************************
2546 * allocate_root() allocates and initializes a root for "tr" */
allocate_root(option_struct * op,data_fmt * data,tree * tr)2547 void allocate_root(option_struct *op, data_fmt *data, tree *tr)
2548 {
2549 long i, j, k, numslice, nummarkers;
2550 dnadata *dna;
2551 msatdata *ms;
2552
2553 tr->nodep[ROOTNUM] = allocate_nodelet(1,'t');
2554 tr->nodep[ROOTNUM]->number = ROOTNUM;
2555 allocate_x(op,data,tr->nodep[ROOTNUM]);
2556 if (op->map) {
2557 tr->nodep[ROOTNUM]->z = NULL;
2558 allocate_z(op,data,tr->nodep[ROOTNUM]);
2559 }
2560 ranges_Malloc(tr->nodep[ROOTNUM],FALSE,0L);
2561 coal_Malloc(tr->nodep[ROOTNUM],FALSE,0L);
2562 tr->nodep[ROOTNUM]->nayme = (char *)calloc(NMLNGTH,sizeof(char));
2563 strncpy(tr->nodep[ROOTNUM]->nayme,"ROOT",4);
2564
2565 /* guarantee that the root node contributes nothing to the likelihood
2566 of a tree (since its supposed to be at the end of a theoretically
2567 infinite root branch) */
2568 switch(op->datatype) {
2569 case 'a':
2570 break;
2571 case 'b':
2572 case 'm':
2573 ms = data->msptr;
2574 for(i = 0; i < ms->numloci; i++)
2575 for(j = 0; j < MICRO_ALLELEMAX; j++)
2576 tr->nodep[ROOTNUM]->x->a[i][j] = 1.0;
2577 break;
2578 case 'n':
2579 case 's':
2580 dna = data->dnaptr;
2581 numslice = (op->panel) ? NUMSLICE : 1L;
2582 nummarkers = getdata_nummarkers(op,data);
2583 for (i = 0; i < nummarkers; i++) {
2584 for (j = 0; j < op->categs; j++) {
2585 for (k = 0; k < numslice; k++) {
2586 tr->nodep[ROOTNUM]->x->s[i][j][k][baseA] = 1.0;
2587 tr->nodep[ROOTNUM]->x->s[i][j][k][baseC] = 1.0;
2588 tr->nodep[ROOTNUM]->x->s[i][j][k][baseG] = 1.0;
2589 tr->nodep[ROOTNUM]->x->s[i][j][k][baseT] = 1.0;
2590 }
2591 }
2592 }
2593 break;
2594 default:
2595 fprintf(ERRFILE,"ERROR:allocate_root: can't get here!\n");
2596 exit(-1);
2597 }
2598
2599 } /* allocate_root */
2600
2601
2602 /***********************************************************
2603 * allocate_tip() allocates and initializes a tip nodelet. */
allocate_tip(option_struct * op,data_fmt * data,tree * tr,long num)2604 void allocate_tip(option_struct *op, data_fmt *data, tree *tr,
2605 long num)
2606 {
2607
2608 tr->nodep[num] = allocate_nodelet(1,'t');
2609 tr->nodep[num]->number = num;
2610 tr->nodep[num]->top = TRUE;
2611 tr->nodep[num]->tyme = 0.0;
2612 tr->nodep[num]->nayme = (char *)calloc((NMLNGTH+1),sizeof(char));
2613 allocate_x(op,data,tr->nodep[num]);
2614 if (op->map) {
2615 tr->nodep[num]->z = NULL;
2616 allocate_z(op,data,tr->nodep[num]);
2617 }
2618 ranges_Malloc(tr->nodep[num],TRUE,1L);
2619 if (op->fc) coal_Malloc(tr->nodep[num],TRUE,1L);
2620
2621 switch(op->datatype) {
2622 case 'a':
2623 break;
2624 case 'b':
2625 case 'm':
2626 break;
2627 case 'n':
2628 case 's':
2629 tr->nodep[num]->ranges[1] = 0L;
2630 tr->nodep[num]->ranges[2] = countsites(op,data)-1;
2631 if (op->fc) {
2632 tr->nodep[num]->coal[1] = 0L;
2633 tr->nodep[num]->coal[2] = countsites(op,data)-1;
2634 }
2635 break;
2636 default:
2637 fprintf(ERRFILE,"ERROR:allocate_tip: can't get here!\n");
2638 exit(-1);
2639 }
2640
2641 } /* allocate_tip */
2642
2643
2644 /*******************************************************************
2645 * allocate_interior() allocates and initializes an interior node. */
allocate_interior(option_struct * op,data_fmt * data,tree * tr,long num)2646 void allocate_interior(option_struct *op, data_fmt *data, tree *tr, long num)
2647 {
2648
2649 if (freenodes == NULL) tr->nodep[num] = allocate_nodelet(3,'c');
2650 else {tr->nodep[num] = freenodes; freenodes = freenodes->back;}
2651 tr->nodep[num]->number = tr->nodep[num]->next->number =
2652 tr->nodep[num]->next->next->number = num;
2653
2654 } /* allocate_interior */
2655
2656
2657 /***********************************************************************
2658 * init_creature() initializes an already allocated creature structure */
init_creature(creature * cr,long numhaplotypes)2659 void init_creature(creature *cr, long numhaplotypes)
2660 {
2661
2662 cr->numhaplotypes = numhaplotypes;
2663 cr->numflipsites = 0;
2664 cr->haplotypes = (node **)calloc(numhaplotypes,sizeof(node *));
2665 cr->flipsites = NULL;
2666
2667 } /* init_creature */
2668
2669
2670 /***************************************************************
2671 * treesetup() allocates and initializes the global "curtree". *
2672 * Curtree's tymelist will be handled later. */
treesetup(option_struct * op,data_fmt * data)2673 void treesetup(option_struct *op, data_fmt *data)
2674 {
2675 long i, j, whichtip, numtips, numnodes, numcreatures;
2676 node *p;
2677
2678 numtips = getdata_numtips(op,data);
2679 numnodes = 2 * numtips;
2680 numcreatures = numtips/NUMHAPLOTYPES;
2681 curtree = (tree *)calloc(1,sizeof(tree));
2682 curtree->nodep = (node **)calloc(numnodes,sizeof(node *));
2683
2684 allocate_root(op,data,curtree);
2685
2686 for(i = 1; i <= numtips; i++) allocate_tip(op,data,curtree,i);
2687 for(i = numtips+1; i < numnodes; i++) allocate_interior(op,data,curtree,i);
2688 for(p = freenodes; p != NULL; p = p->back, i++)
2689 p->number = p->next->number = p->next->next->number = i;
2690 nodectr = i;
2691 curtree->likelihood = NEGMAX;
2692 curtree->coalprob = NEGMAX;
2693 curtree->numcoals = numtips-1;
2694 curtree->numrecombs = 0;
2695
2696 if (op->haplotyping) {
2697 curtree->creatures =
2698 (creature *)calloc(numcreatures,sizeof(creature));
2699 for(i = 0, whichtip = 1; i < numcreatures; i++) {
2700 init_creature(&curtree->creatures[i],NUMHAPLOTYPES);
2701 for(j = 0; j < curtree->creatures[i].numhaplotypes; j++)
2702 curtree->creatures[i].haplotypes[j] = curtree->nodep[whichtip++];
2703 }
2704 if (whichtip-1 > numtips)
2705 fprintf(ERRFILE,"\ntreesetup--whichtip too big!\n");
2706 } else curtree->creatures = NULL;
2707
2708 newtymenode(&curtree->tymelist);
2709 curtree->tymelist->branchlist = (node **)calloc(numtips,sizeof(node *));
2710 curtree->tymelist->eventnode = curtree->nodep[1];
2711
2712 } /* treesetup */
2713
2714
2715 /***************************************************************
2716 * end_of_population_free frees the working tree, curtree; and *
2717 * the freenode list plus the sparex array. */
end_of_population_free(option_struct * op,data_fmt * data)2718 void end_of_population_free(option_struct *op, data_fmt *data)
2719 {
2720 dnadata *dna;
2721
2722 dna = data->dnaptr;
2723
2724 free(op->ne_ratio);
2725
2726 switch(op->datatype) {
2727 case 'a':
2728 break;
2729 case 'b':
2730 case 'm':
2731 break;
2732 case 'n':
2733 case 's':
2734 break;
2735 default:
2736 fprintf(ERRFILE,"ERROR:end_of_population_free, can't be here!\n");
2737 exit(-1);
2738 }
2739
2740 } /* end_of_population_free */
2741
2742
2743 /***************************************************
2744 * end_of_locus_free() frees locus specific stuff. */
end_of_locus_free(option_struct * op,data_fmt * data)2745 void end_of_locus_free(option_struct *op, data_fmt *data)
2746 {
2747 long i;
2748 node *p, *q;
2749
2750 free(data->siteptr);
2751
2752 switch(op->datatype) {
2753 case 'a':
2754 break;
2755 case 'b':
2756 case 'm':
2757 break;
2758 case 'n':
2759 case 's':
2760 free(category);
2761 free(alogf->val);
2762 free(alogf);
2763 /* free the working arrays */
2764 free(weightrat);
2765 free(tbl);
2766 free(contribution[0]);
2767 free(contribution);
2768 #if 0 /* pre-heating */
2769 freetree(op,data,curtree);
2770 #endif
2771 for(i = 0; i < op->numtempchains; i++)
2772 freetree(op,data,temptrees[i]);
2773 for(p = freenodes; p != NULL; p = q) {
2774 q = p->back;
2775 freenodelet(op,data,p->next->next);
2776 freenodelet(op,data,p->next);
2777 freenodelet(op,data,p);
2778 }
2779 freenodes = NULL;
2780 /* the sparex arrays must be done after the tree and freenodes
2781 have been freed (or anything else that uses free_x to
2782 free it's xarrays). */
2783 for(i = 0; i < numx; i++) {
2784 free(sparex[i]->s[0][0][0]);
2785 free(sparex[i]->s[0][0]);
2786 free(sparex[i]->s[0]);
2787 free(sparex[i]->s);
2788 free(sparex[i]);
2789 sparex[i] = NULL;
2790 }
2791 numx = 0;
2792 break;
2793 default:
2794 fprintf(ERRFILE,"ERROR:end_of_locus_free, can't be here!\n");
2795 exit(-1);
2796 }
2797
2798 } /* end_of_locus_free */
2799
2800
sitecompare(dnadata * dna,long site1,long site2)2801 boolean sitecompare(dnadata *dna, long site1, long site2)
2802 {
2803 long i;
2804
2805 for(i = 0; i < dna->numseq[population]; i++)
2806 if(dna->seqs[population][locus][i][site1] !=
2807 dna->seqs[population][locus][i][site2])
2808 return FALSE;
2809
2810 return TRUE;
2811 } /* sitecompare */
2812
2813
2814 /*****************************************************************
2815 * makesiteptr creates the siteptr array: *
2816 * if (!siteptr[site]) there is no alias, otherwise potentially *
2817 * use siteptr[site]-1 for the alias site; *
2818 * if (siteptr[site]-1 < 0) then the alias is currently unusable *
2819 * due to recombination. */
makesiteptr(option_struct * op,data_fmt * data)2820 void makesiteptr(option_struct *op, data_fmt *data)
2821 {
2822 long whichsite, lastsite, i, *ptrarray;
2823 boolean found;
2824 dnadata *dna;
2825
2826 dna = data->dnaptr;
2827
2828 if (op->datatype == 'n') lastsite = dna->sites[locus]+NUMINVAR;
2829 else lastsite = dna->sites[locus];
2830
2831 ptrarray = (long *)calloc(lastsite,sizeof(long));
2832 ptrarray[0] = 0;
2833
2834 #if ALIASING
2835 for(whichsite = 1; whichsite < lastsite; whichsite++) {
2836 if (op->datatype == 'n')
2837 if (whichsite >= dna->sites[locus]) {
2838 ptrarray[whichsite] = 0;
2839 continue;
2840 }
2841 found = FALSE;
2842 for(i = whichsite - 1; i >= 0; i--) {
2843 if(sitecompare(dna,i,whichsite)) {
2844 ptrarray[whichsite] = i+1;
2845 found = TRUE;
2846 break;
2847 }
2848 }
2849 if (found) continue;
2850 ptrarray[whichsite] = 0;
2851 }
2852 #endif
2853
2854 #if !ALIASING
2855 for(i = 0; i < dna->sites[locus]; i++) ptrarray[i] = 0;
2856 #endif
2857
2858 data->siteptr = ptrarray;
2859
2860 } /* makesiteptr */
2861
2862
read_line(FILE * source,char * line)2863 void read_line(FILE *source, char *line)
2864 {
2865 char *p;
2866
2867 fgets(line,LINESIZE,source);
2868 while(line[0] == '\n' || line[0] == '#') fgets(line,LINESIZE,source);
2869 if((p = (char *)strchr(line,'\n')) != NULL) *p = '\0';
2870
2871 while(isspace(*line)) *line++;
2872 if(*line == '\0') read_line(source,line);
2873
2874 } /* read_line */
2875
2876
read_header(option_struct * op,long * numpop,long * numloci,long ** numsites,char * title,char * dlm)2877 void read_header(option_struct *op, long *numpop, long *numloci,
2878 long **numsites, char *title, char *dlm)
2879 {
2880 long i;
2881 char *input, *tok, temp;
2882
2883 input = (char *)calloc(LINESIZE,sizeof(char));
2884
2885 read_line(infile,input);
2886
2887 switch(lowercase(input[0])) {
2888 case 'a':
2889 break;
2890 case 'b':
2891 case 'm':
2892 sscanf(input,"%c%ld%ld%c%[^\n]",&temp,numpop,numloci,dlm,title);
2893 if (op->datatypeset)
2894 if (op->datatype != temp) {
2895 fprintf(ERRFILE,"\nYou chose a datatype of %c,",op->datatype);
2896 fprintf(ERRFILE,"but your infile shows a datatype of %c\n",temp);
2897 exit(-1);
2898 }
2899 op->datatype = temp;
2900 numsites = NULL;
2901 break;
2902 case 'n':
2903 if (op->autocorr) {
2904 printf("\nSNP data type is incompatible with autocorrelation");
2905 printf("\n----autocorrelation disabled-----\n");
2906 printf("press <enter> or <return> to continue\n");
2907 fgets(input,LINESIZE,stdin);
2908 fprintf(outfile,"\nSNP data type is incompatible with");
2909 fprintf(outfile," autocorrelation");
2910 fprintf(outfile,"\n----autocorrelation disabled-----\n");
2911 op->autocorr = FALSE;
2912 }
2913 case 's':
2914 sscanf(input,"%c%ld%ld%[^\n]",&temp,numpop,numloci,title);
2915 if (op->datatypeset)
2916 if (op->datatype != temp) {
2917 fprintf(ERRFILE,"\nYou chose a datatype of \"%c\",",op->datatype);
2918 fprintf(ERRFILE," but your infile shows a datatype");
2919 fprintf(ERRFILE," of \"%c\".\n",temp);
2920 exit(-1);
2921 }
2922 op->datatype = temp;
2923 read_line(infile,input);
2924 (*numsites) = (long *)calloc(*numloci,sizeof(long));
2925 for(i = 0; i < *numloci; i++) {
2926 while(isspace((int)*input)) (*input)++;
2927 if(i == 0) tok = strtok(input," ");
2928 else tok = strtok(NULL," ");
2929 (*numsites)[i] = atol(tok);
2930 }
2931 dlm = NULL;
2932 break;
2933 default:
2934 fprintf(ERRFILE,"WARNING--datatype not specified in infile\n");
2935 switch(op->datatype) {
2936 case 'a':
2937 break;
2938 case 'b':
2939 break;
2940 case 'm':
2941 sscanf(input,"%ld%ld%c%[^\n]",numpop,numloci,dlm,title);
2942 break;
2943 case 'n':
2944 case 's':
2945 sscanf(input,"%ld%ld%[^\n]",numpop,numloci,title);
2946 read_line(infile,input);
2947 (*numsites) = (long *)calloc(*numloci,sizeof(long));
2948 for(i = 0; i < *numloci; i++) {
2949 while(isspace((int)*input)) (*input)++;
2950 if(locus == 0) tok = strtok(input," ");
2951 else tok = strtok(NULL," ");
2952 (*numsites)[i] = atol(tok);
2953 }
2954 break;
2955 default:
2956 fprintf(ERRFILE,"ERROR:unknown datatype %c\n",op->datatype);
2957 fprintf(ERRFILE,"only the types a, m, s");
2958 fprintf(ERRFILE,"(electrophoretic alleles,\n");
2959 fprintf(ERRFILE,"microsatellite data, sequence data)");
2960 fprintf(ERRFILE,"are allowed.\n");
2961 exit(-1L);
2962 break;
2963 }
2964 break;
2965 }
2966
2967 free(input);
2968
2969 } /* read_header */
2970
2971
read_popheader(long * numind,char * poptitle)2972 void read_popheader(long *numind, char *poptitle)
2973 {
2974 char *input;
2975
2976 input = (char *)calloc(LINESIZE,sizeof(char));
2977
2978 read_line(infile,input);
2979 sscanf(input,"%ld%[^\n]",numind,poptitle);
2980
2981 free(input);
2982
2983 } /* read_popheader */
2984
2985
getinput(option_struct * op,data_fmt * data)2986 void getinput(option_struct *op, data_fmt *data)
2987 {
2988 long i, numpop, numloci, numind, *numsites;
2989 char *title, *poptitle, dlm;
2990
2991 title = (char *)calloc(LINESIZE,sizeof(char));
2992 poptitle = (char *)calloc(LINESIZE,sizeof(char));
2993
2994 /* setup data structures and read in the first population's
2995 worth of data */
2996 read_header(op,&numpop,&numloci,&numsites,title,&dlm);
2997 read_popheader(&numind,poptitle);
2998 setupdata(data,op->datatype,numpop,numloci,numind,numsites,title);
2999 setpopstuff(data,0L,numind,poptitle,op->datatype);
3000 read_popdata(infile,data,0L,op);
3001 /* "true haplotype" code */
3002 if (TRUEHAP) {
3003 readtruehaps(op,&truehaps,numpop,numloci,numind,numsites);
3004 copyhaps(op,data,&starthaps,numpop,numloci,numind,numsites);
3005 }
3006
3007 /* now read in all other populations' data */
3008 for(i = 1; i < numpop; i++) {
3009 read_popheader(&numind,poptitle);
3010 setpopstuff(data,i,numind,poptitle,op->datatype);
3011 read_popdata(infile,data,i,op);
3012 }
3013
3014
3015 if (op->datatype == 's' || op->datatype == 'n') {
3016 if (op->weights)
3017 inputdnaweights(data->dnaptr->sites[locus],data->dnaptr,op);
3018 free(numsites);
3019 }
3020
3021 free(poptitle);
3022 free(title);
3023
3024 } /* getinput */
3025
3026
getnodata(option_struct * op,data_fmt * data)3027 void getnodata(option_struct *op, data_fmt *data)
3028 {
3029 long i, numpop, numloci, numchains, numind, *numsites;
3030 char *title, *poptitle, dlm;
3031 FILE *kks;
3032
3033 /* debug DEBUG warning WARNING--bogus default settings */
3034 op->datatype = 's';
3035 numpop = 1;
3036 numind = 1;
3037 title = "no data run";
3038 dlm = ' ';
3039 poptitle = "first pop";
3040
3041 kks = fopen("kks","r");
3042 fscanf(kks,"%ld %ld",&numloci,&numchains);
3043 numsites = (long *)calloc(numloci,sizeof(long));
3044
3045 /* debug DEBUG warning WARNING--bogus default settings */
3046 for(i = 0; i < numloci; i++) numsites[i] = 1;
3047
3048 setupdata(data,op->datatype,numpop,numloci,numind,numsites,title);
3049 setpopstuff(data,0L,numind,poptitle,op->datatype);
3050
3051 fseek(kks,0,SEEK_SET);
3052 fclose(kks);
3053
3054 } /* getnodata */
3055
3056
watterson(option_struct * op,data_fmt * data)3057 double watterson(option_struct *op, data_fmt *data)
3058 {
3059 /* estimate theta using method of Watterson */
3060 long i, j, k, kn, numprivate, numsites;
3061 boolean varies, private;
3062 double watter;
3063 dnadata *dna;
3064
3065 dna = data->dnaptr;
3066 numprivate = kn = 0;
3067
3068 for (i = 0; i < dna->sites[locus]; i++) {
3069 varies = FALSE;
3070 private = TRUE;
3071 for (j = 1; j < dna->numseq[population]; j++) {
3072 if (dna->seqs[population][locus][j][i] !=
3073 dna->seqs[population][locus][0][i]) {
3074 if (varies) {
3075 for(k = 1; k < dna->numseq[population]; k++) {
3076 if (dna->seqs[population][locus][k][i] !=
3077 dna->seqs[population][locus][j][i])
3078 private = FALSE;
3079 }
3080 }
3081 varies = TRUE;
3082 }
3083 }
3084 if (varies) kn++;
3085 if (private && varies) numprivate++;
3086 }
3087 watter = 0.0;
3088 if (kn > 0) {
3089 for (i = 1; i < dna->numseq[population]; i++)
3090 watter += 1.0 / i;
3091 numsites = countsites(op,data);
3092 watter = kn / (numsites * watter);
3093 fprintf(outfile,"\nThere are %ld variable sites in the data\n",kn);
3094
3095 if (kn == 1) {
3096 op->holding = 2;
3097 fprintf(outfile,"\nWARNING: There is only 1 variable site in");
3098 fprintf(outfile," this data set.\nRecombination-rate cannot");
3099 fprintf(outfile," be accurately estimated in this case.\n");
3100 fprintf(outfile,"The \"estimate\" of rec-rate has been fixed");
3101 fprintf(outfile," to its starting value.\n\n");
3102
3103 printf("\nWARNING: There is only 1 variable site in");
3104 printf(" this data set.\nRecombination-rate cannot");
3105 printf(" be accurately estimated in this case.\n");
3106 printf("The \"estimate\" of rec-rate has been fixed");
3107 printf(" to its starting value.\n\n");
3108 } else {
3109 if (kn-numprivate < 2) {
3110 op->holding = 2;
3111 fprintf(outfile,"\nWARNING: Too many of the variable sites ");
3112 fprintf(outfile,"are uninformative.\nRecombination-rate ");
3113 fprintf(outfile,"cannot be accurately estimated in this ");
3114 fprintf(outfile,"case.\nThe \"estimate\" of rec-rate has ");
3115 fprintf(outfile,"been fixed to its starting value.\n\n");
3116
3117 printf("\nWARNING: Too many of the variable sites ");
3118 printf("are uninformative.\nRecombination-rate ");
3119 printf("cannot be accurately estimated in this ");
3120 printf("case.\nThe \"estimate\" of rec-rate has ");
3121 printf("been fixed to its starting value.\n\n");
3122 }
3123 }
3124
3125 return watter;
3126 }
3127 fprintf(outfile, "Warning: There are no variable sites");
3128 fprintf(outfile, " in this data set.\n\n");
3129 if (MENU) printf("Warning: There are no variable sites in this data set.\n");
3130 else {
3131 fprintf(simlog, "Warning: There are no variable sites");
3132 fprintf(simlog, " in this data set.\n\n");
3133 }
3134 exit(-1);
3135 } /* watterson */
3136
3137
3138 /**********************************************************************
3139 * invardatacheck() is called to check non-panel SNP data for *
3140 * invariant sites, which are illegal under that model. If it finds *
3141 * any, it changes that site to a datatype of "unknown"; "x" is used. */
invardatacheck(option_struct * op,data_fmt * data)3142 void invardatacheck(option_struct *op, data_fmt *data)
3143 {
3144 long marker, seq, nummarkers, numseq;
3145 char **dna;
3146 boolean varies;
3147
3148 nummarkers = getdata_nummarkers(op,data);
3149 numseq = getdata_numseq(op,data);
3150
3151 dna = data->dnaptr->seqs[population][locus];
3152
3153 for(marker = 0; marker < nummarkers; marker++) {
3154 varies = FALSE;
3155 for(seq = 1; seq < numseq; seq++) {
3156 if(dna[0][marker] != dna[seq][marker]) {
3157 varies = TRUE;
3158 break;
3159 }
3160 }
3161 if (!varies)
3162 for(seq = 0; seq < numseq; seq++) dna[seq][marker] = 'X';
3163 }
3164
3165 } /* invardatacheck */
3166
3167
3168 /*********************************************************************
3169 * buildtymelist() allocates space and initializes all the fields of *
3170 * a tymelist except for failure to initialize the branchlists. *
3171 * Those are handled by finishsetup(). *
3172 * *
3173 * WARNING buildtymelist() currently assumes a non-recombinant tree! */
buildtymelist(tree * tr,option_struct * op,data_fmt * data,node * p)3174 void buildtymelist(tree *tr, option_struct *op, data_fmt *data, node *p)
3175 {
3176 long numentries;
3177 tlist *t, *u;
3178
3179 t = tr->tymelist;
3180
3181 newtymenode(&u);
3182
3183 numentries = getdata_numtips(op,data);
3184 u->branchlist = (node **)calloc(numentries,sizeof(node *));
3185
3186 u->eventnode = p;
3187
3188 while (t != NULL) {
3189 if (u->eventnode->tyme < t->eventnode->tyme) {
3190 u->prev = t->prev;
3191 t->prev = u;
3192 u->succ = t;
3193 u->prev->succ = u;
3194 break;
3195 }
3196 if (t->succ != NULL)
3197 t = t->succ;
3198 else {
3199 t->succ = u;
3200 u->prev = t;
3201 u->succ = NULL;
3202 break;
3203 }
3204 }
3205
3206 } /* buildtymelist */
3207
3208
3209 /* WARNING warning orient assumes a non-recombinant tree */
orient(tree * tr,option_struct * op,data_fmt * data,node * p)3210 void orient(tree *tr, option_struct *op, data_fmt *data, node *p)
3211 {
3212 long lastsite;
3213
3214 lastsite = countsites(op,data)-1;
3215
3216 if (istip(p)) {
3217 return;
3218 }
3219
3220 curtree->nodep[p->number] = p; /* insure that curtree->nodep points
3221 to nodes with info */
3222
3223 /* since p is a top nodelet, it needs to actually store
3224 likelihood information, x is a NULL pointer
3225 in all other non-tip nodelets */
3226 p->top = TRUE;
3227 allocate_x(op,data,p);
3228 if (op->map) {
3229 p->z = NULL;
3230 allocate_z(op,data,p);
3231 }
3232 ranges_Malloc(p,TRUE,1L);
3233 if (op->fc) {
3234 coal_Malloc(p,TRUE,1L);
3235 p->ranges[1] = p->coal[1] = 0;
3236 p->ranges[2] = p->coal[2] = lastsite;
3237 } else {
3238 p->ranges[1] = 0;
3239 p->ranges[2] = lastsite;
3240 }
3241 p->next->top = FALSE;
3242 free_x(op,p->next);
3243 ranges_Malloc(p->next,FALSE,0L);
3244 coal_Malloc(p->next,FALSE,0L);
3245 p->next->next->top = FALSE;
3246 free_x(op,p->next->next);
3247 ranges_Malloc(p->next->next,FALSE,0L);
3248 coal_Malloc(p->next->next,FALSE,0L);
3249
3250 orient(tr,op,data,p->next->back);
3251 orient(tr,op,data,p->next->next->back);
3252
3253 p->tyme = p->next->length + p->next->back->tyme;
3254 p->next->tyme = p->tyme;
3255 p->next->next->tyme = p->tyme;
3256 if (p->number == curtree->root->back->number) {
3257 p->back->top = FALSE;
3258 p->back->tyme = ROOTLENGTH;
3259 }
3260
3261 buildtymelist(tr,op,data,p);
3262
3263 } /* orient */
3264
plumptree(option_struct * op,data_fmt * data,double th0)3265 void plumptree(option_struct *op, data_fmt *data, double th0)
3266 /* change an input tree into a perfect coalescent tree for theta0 */
3267 /* WARNING--doesn't work on a tree with recombinations!!!!! */
3268 {
3269 tlist *t;
3270 long k;
3271 double tyme;
3272
3273 /* we assume that tips are tyme 0 already */
3274 t = curtree->tymelist->succ;
3275
3276 k = getdata_numtips(op,data);
3277 tyme = 0.0;
3278 do {
3279 tyme += th0/(k*(k-1));
3280 if (!istip(t->eventnode)) {
3281 t->eventnode->tyme = tyme;
3282 t->eventnode->next->tyme = tyme;
3283 t->eventnode->next->next->tyme = tyme;
3284 }
3285 k--;
3286 t = t->succ;
3287 } while (t != NULL);
3288
3289 /* now you must call finishsetup and initbranchlist or a disaster
3290 will happen! */
3291 } /* plumptree */
3292
finishsetup(option_struct * op,data_fmt * data,node * p)3293 void finishsetup(option_struct *op, data_fmt *data, node *p)
3294 {
3295 if (istip(p)) {
3296 ltov(op,data,p);
3297 return;
3298 }
3299 ltov(op,data,p);
3300 finishsetup(op,data,p->next->back);
3301 finishsetup(op,data,p->next->next->back);
3302 return;
3303 } /* finishsetup */
3304
initbranchlist(option_struct * op,data_fmt * data)3305 void initbranchlist(option_struct *op, data_fmt *data)
3306 {
3307 tlist *t;
3308 node *p, *q;
3309 long i, j, k, n, numtips;
3310
3311 t = curtree->tymelist;
3312 numtips = n = getdata_numtips(op,data);
3313 t->numbranch = numtips;
3314 for(i = 0; i < numtips; i++) t->branchlist[i] = curtree->nodep[i+1];
3315 t->age = t->succ->eventnode->tyme;
3316 t = t->succ;
3317 /* WARNING assumes initial tree is not recombinant! */
3318 for (i = 0; i < numtips-1; i++) {
3319 /* for each interior node, do... */
3320 n--;
3321 t->numbranch = n;
3322 if (n == 1)
3323 t->age = t->eventnode->tyme + ROOTLENGTH;
3324 else
3325 t->age = t->succ->eventnode->tyme;
3326 p = t->eventnode->next->back;
3327 q = t->eventnode->next->next->back;
3328 k = 0;
3329 for (j = 0; j < t->prev->numbranch ; j++) {
3330 /* for the number of branches above the coalescent node, do...*/
3331 if (t->prev->branchlist[j] != p && t->prev->branchlist[j] != q) {
3332 t->branchlist[k] = t->prev->branchlist[j];
3333 k++;
3334 }
3335 }
3336 t->branchlist[t->numbranch - 1] = t->eventnode;
3337 t = t->succ;
3338 }
3339 } /* initbranchlist */
3340
inittable(option_struct * op,dnadata * dna)3341 void inittable(option_struct *op, dnadata *dna)
3342 {
3343 long i;
3344 tbl = (valrec *)calloc(1,op->categs*sizeof(valrec));
3345 /* Define a lookup table. Precompute values and store them in a table */
3346 for (i = 0; i < op->categs; i++) {
3347 tbl[i].rat_xi = op->rate[i] * dna->xi;
3348 tbl[i].rat_xv = op->rate[i] * dna->xv;
3349 }
3350 } /* inittable */
3351
initweightrat(option_struct * op,data_fmt * data)3352 void initweightrat(option_struct *op, data_fmt *data)
3353 {
3354 /* WARNING non-DNA data is not yet able to do this! */
3355 long i, nummarkers;
3356
3357 nummarkers = getdata_nummarkers(op,data);
3358 weightrat = (double *)calloc(nummarkers,sizeof(double));
3359 sumweightrat = 0.0;
3360 for (i = 0; i < nummarkers; i++) {
3361 weightrat[i] = data->dnaptr->dnaweight[i] * op->rate[category[i]-1];
3362 sumweightrat += weightrat[i];
3363 }
3364 } /* initweightrat */
3365
rec_outtree(node * p,boolean first,FILE ** usefile)3366 void rec_outtree(node *p, boolean first, FILE **usefile)
3367 /* write out a recombinant tree in KYB format; call with
3368 curtree.root->back. Calling program should append a
3369 semicolon. "first" is used to avoid an extra comma
3370 in the tree list and should be TRUE when this is called. */
3371 {
3372 long i;
3373
3374 if(istip(p)) {
3375 if(!first)fprintf(*usefile,",");
3376 else first=FALSE;
3377 fprintf(*usefile,"\"");
3378 for(i=0;i<NMLNGTH;i++) {
3379 if(p->nayme[i]==' ') break;
3380 else putc(p->nayme[i],*usefile);
3381 }
3382 fprintf(*usefile,"\":%f",p->length);
3383 return;
3384 }
3385 if(isrecomb(p)) {
3386 if(!first)fprintf(*usefile,",");
3387 else first=FALSE;
3388 fprintf(*usefile,"%ld",p->number);
3389 fprintf(*usefile,"{%ld-%ld}",p->recstart,p->recend);
3390 fprintf(*usefile,":%f",p->length);
3391 p->updated = !p->updated;
3392 if (p->updated) {
3393 if (p->next->top) rec_outtree(p->next->back, first, usefile);
3394 else rec_outtree(p->next->next->back, first, usefile);
3395 }
3396 return;
3397 } else { /* p is coalescent */
3398 if(!first)fprintf(*usefile,",");
3399 else first=FALSE;
3400 fprintf(*usefile,"*%ld",p->number);
3401 fprintf(*usefile,":%f",p->length);
3402 rec_outtree(p->next->back, first, usefile);
3403 rec_outtree(p->next->next->back, first, usefile);
3404 return;
3405 }
3406 } /* rec_outtree */
3407
treeout(node * p,long s,FILE ** usefile)3408 void treeout(node *p, long s, FILE **usefile)
3409 {
3410 /* write out file with representation of final tree */
3411 long i, n, w;
3412 char c;
3413 double x;
3414
3415 if (istip(p)) {
3416 n = 0;
3417 for (i = 1; i <= NMLNGTH; i++) {
3418 if (p->nayme[i - 1] != ' ')
3419 n = i;
3420 }
3421 for (i = 0; i < n; i++) {
3422 c = p->nayme[i];
3423 if (c == ' ')
3424 c = '_';
3425 putc(c, *usefile);
3426 }
3427 col += n;
3428 } else {
3429 putc('(', *usefile);
3430 col++;
3431 treeout(p->next->back, s, usefile);
3432 putc(',', *usefile);
3433 col++;
3434 if (col > 45) {
3435 putc('\n', *usefile);
3436 col = 0;
3437 }
3438 treeout(p->next->next->back, s, usefile);
3439 putc(')', *usefile);
3440 col++;
3441 }
3442 if (p->v >= 1.0)
3443 x = -1.0;
3444 else
3445 x = lengthof(p);
3446 if (x > 0.0)
3447 w = (long)(0.4343 * log(x));
3448 else if (x == 0.0)
3449 w = 0;
3450 else
3451 w = (long)(0.4343 * log(-x)) + 1;
3452 if (w < 0)
3453 w = 0;
3454 if (p == curtree->root->back)
3455 putc(';', *usefile);
3456 else {
3457 fprintf(*usefile, ":%*.10f", (int)(w + 7), x);
3458 col += w + 8;
3459 }
3460 } /* treeout */
3461
3462
snp_eval_calcrange(option_struct * op,data_fmt * data,tree * tr,boolean first,long start,long finish,long numcategs)3463 double snp_eval_calcrange(option_struct *op, data_fmt *data, tree *tr,
3464 boolean first, long start, long finish, long numcategs)
3465 {
3466 double sumterm, sumterm2, lterm, lterm2, temp;
3467 long i, j, k, slice, snpstart, snpfinish, nummarkers, numinvar, *siteptr;
3468 contribarr term, term2, clai;
3469 node *p;
3470 double *x1;
3471 dnadata *dna;
3472
3473 dna = data->dnaptr;
3474
3475 term = (double *)calloc(numcategs,sizeof(double));
3476 term2 = (double *)calloc(numcategs,sizeof(double));
3477 clai = (double *)calloc(numcategs,sizeof(double));
3478
3479 slice = 0;
3480 temp = 0.0;
3481 p = tr->root->back;
3482 siteptr = data->siteptr;
3483
3484 nummarkers = getdata_nummarkers(op, data);
3485 findsubtreemarkers(op,data,start,finish,&snpstart,&snpfinish);
3486
3487 if (snpstart == FLAGLONG) numinvar = finish-start+1;
3488 else numinvar = (finish-start+1) - (snpfinish-snpstart+1);
3489
3490 sumterm2 = 0.0;
3491 for (j = 0; j < numcategs; j++) { /* compute P(Invar) */
3492 term2[j] = 0.0;
3493 for(k = nummarkers; k < nummarkers+NUMINVAR; k++) {
3494 x1 = p->x->s[k][j][slice];
3495 term2[j] += dna->freqa * x1[baseA] +
3496 dna->freqc * x1[baseC] + dna->freqg * x1[baseG] +
3497 dna->freqt * x1[baseT];
3498 }
3499 sumterm2 += op->probcat[j] * term2[j];
3500 }
3501 lterm2 = log(sumterm2);
3502 for (j = 0; j < numcategs; j++) {
3503 clai[j] = term2[j] / sumterm2;
3504 }
3505 memcpy(contribution[nummarkers], clai, numcategs*sizeof(double));
3506 if (!op->autocorr)
3507 alogf->val[nummarkers] = lterm2;
3508 if (!op->norecsnp) temp += numinvar * lterm2;
3509
3510 if (snpstart != FLAGLONG) { /* that is, if there are SNPs */
3511 for (i = snpstart; i <= snpfinish; i++) {
3512 for (j = 0; j < numcategs; j++) {
3513 x1 = p->x->s[i][j][slice];
3514 term[j] = dna->freqa * x1[baseA] + dna->freqc * x1[baseC] +
3515 dna->freqg * x1[baseG] + dna->freqt * x1[baseT];
3516 if(term[j] == 0) {
3517 fprintf(ERRFILE,"Encountered tree incompatible with data\n");
3518 if(first) {
3519 fprintf(ERRFILE,"starting tree needs to be legal\n");
3520 exit(-1);
3521 }
3522 curtree->likelihood = NEGMAX;
3523 return(-1);
3524 }
3525 }
3526 sumterm = 0.0;
3527 for (j = 0; j < numcategs; j++) {
3528 sumterm += op->probcat[j] * term[j];
3529 }
3530 lterm = log(sumterm);
3531 if (op->norecsnp && !op->full) lterm -= log(1.0-sumterm2);
3532 for (j = 0; j < numcategs; j++) {
3533 clai[j] = ((op->norecsnp) ?
3534 (term[j] / (1.0-term2[j])) / (sumterm / (1.0-sumterm2))
3535 : (term[j] / sumterm));
3536 }
3537 memcpy(contribution[i], clai, numcategs*sizeof(double));
3538 if (!op->autocorr)
3539 alogf->val[i] = lterm;
3540 temp += dna->dnaweight[i] * lterm;
3541 }
3542
3543 #if 0 /* This is probably dead DEAD dead--warning WARNING DEBUG debug */
3544 if (op->datatype == 'n' && op->full) {
3545 temp += log(op->chance_seen);
3546 numunobs = getnumlinks(op,data,start,finish)-(finish-start);
3547 if (numunobs < 0 || (long)numunobs != numunobs) {
3548 fprintf(ERRFILE,"non integral SNP distances with Full model\n");
3549 exit(-1);
3550 }
3551 temp += numunobs * log(1.0 - (op->chance_seen*(1.0-sumterm2)));
3552 }
3553 #endif
3554 }
3555
3556 free(term);
3557 free(term2);
3558 free(clai);
3559
3560 return(temp);
3561
3562 } /* snp_eval_calcrange */
3563
3564
panel_eval_calcrange(option_struct * op,data_fmt * data,tree * tr,boolean first,long start,long finish,long numcategs)3565 double panel_eval_calcrange(option_struct *op, data_fmt *data, tree *tr,
3566 boolean first, long start, long finish, long numcategs)
3567 {
3568 double sumterm, sumterm2, lterm, lterm2, temp, invarterm;
3569 long i, j, k, numunobs, nummarkers, numinvar, snpstart, snpfinish,
3570 *siteptr;
3571 contribarr term, term2, clai;
3572 node *p;
3573 double **x1;
3574 dnadata *dna;
3575
3576 dna = data->dnaptr;
3577
3578 term = (double *)calloc(numcategs,sizeof(double));
3579 term2 = (double *)calloc(numcategs,sizeof(double));
3580 clai = (double *)calloc(numcategs,sizeof(double));
3581
3582 temp = 0.0;
3583 p = tr->root->back;
3584 siteptr = data->siteptr;
3585
3586 /* compute P(INVAR) for each category, used as P(SNP) = 1.0 - P(INVAR) */
3587 nummarkers = getdata_nummarkers(op,data);
3588 findsubtreemarkers(op,data,start,finish,&snpstart,&snpfinish);
3589
3590 if (snpstart == FLAGLONG) numinvar = finish-start+1;
3591 else numinvar = (finish-start+1) - (snpfinish-snpstart+1);
3592
3593 sumterm2 = 0.0;
3594 for (j = 0; j < numcategs; j++) {
3595 x1 = p->x->s[nummarkers][j];
3596 term2[j] = 0.0;
3597 for (k = 1; k < NUMSLICE; k++) {
3598 term2[j] += dna->freqa * x1[k][baseA] +
3599 dna->freqc * x1[k][baseC] + dna->freqg * x1[k][baseG] +
3600 dna->freqt * x1[k][baseT];
3601 }
3602 sumterm2 += op->probcat[j] * term2[j];
3603 }
3604 lterm2 = log(sumterm2);
3605 for (j = 0; j < numcategs; j++) {
3606 clai[j] = term2[j] / sumterm2;
3607 }
3608 memcpy(contribution[nummarkers], clai, numcategs*sizeof(double));
3609 if (!op->autocorr)
3610 alogf->val[nummarkers] = lterm2;
3611 if (!op->norecsnp) temp += numinvar * lterm2;
3612
3613 if (snpstart != FLAGLONG) {
3614 for (i = snpstart; i <= snpfinish; i++) {
3615 for (j = 0; j < numcategs; j++) {
3616 /* compute P(data) for this category */
3617 x1 = p->x->s[i][j];
3618 term[j] = dna->freqa * x1[0][baseA] + dna->freqc * x1[0][baseC] +
3619 dna->freqg * x1[0][baseG] + dna->freqt * x1[0][baseT];
3620 invarterm = 0.0;
3621 /* compute P(data && panel invariant) for this category */
3622 for (k = 1; k < NUMSLICE; k++) {
3623 invarterm += dna->freqa * x1[k][baseA] + dna->freqc *
3624 x1[k][baseC] + dna->freqg * x1[k][baseG] + dna->freqt *
3625 x1[k][baseT];
3626 }
3627 term[j] -= invarterm;
3628 if(term[j] == 0) {
3629 fprintf(ERRFILE,"Encountered tree incompatible with data\n");
3630 if(first) {
3631 fprintf(ERRFILE,"starting tree needs to be legal\n");
3632 exit(-1);
3633 }
3634 curtree->likelihood = NEGMAX;
3635 return(-1);
3636 }
3637 }
3638 sumterm = 0.0;
3639 for (j = 0; j < numcategs; j++) {
3640 sumterm += op->probcat[j] * term[j];
3641 }
3642 lterm = log(sumterm);
3643 if (op->norecsnp && !op->full) lterm -= log(1.0-sumterm2);
3644 for (j = 0; j < numcategs; j++) {
3645 clai[j] = ((op->norecsnp) ?
3646 (term[j] / (1.0 - term2[j])) / (sumterm / (1.0 - sumterm2))
3647 : (term[j] / sumterm));
3648 }
3649 memcpy(contribution[i], clai, numcategs*sizeof(double));
3650 if (!op->autocorr)
3651 alogf->val[i] = lterm;
3652 temp += dna->dnaweight[i] * lterm;
3653 }
3654
3655 if (op->full) {
3656 temp += log(op->chance_seen);
3657 numunobs = (long)(getnumlinks(op,data,start,finish)-(finish-start));
3658 if (numunobs < 0 || (long)numunobs != numunobs) {
3659 fprintf(ERRFILE,"non integral SNP distances with Full model\n");
3660 exit(-1);
3661 }
3662 temp += numunobs * log(1.0 - (op->chance_seen*(1.0-sumterm2)));
3663 }
3664 }
3665
3666 free(term);
3667 free(term2);
3668 free(clai);
3669
3670 return(temp);
3671
3672 } /* panel_eval_calcrange */
3673
3674
eval_calcrange(option_struct * op,data_fmt * data,tree * tr,boolean first,long start,long finish,long numcategs)3675 double eval_calcrange(option_struct *op, data_fmt *data, tree *tr,
3676 boolean first, long start, long finish, long numcategs)
3677 {
3678 double sumterm, lterm, temp;
3679 long i, j, slice, nummarkers, *siteptr;
3680 contribarr term, term2, clai;
3681 node *p;
3682 double *x1;
3683 dnadata *dna;
3684
3685 dna = data->dnaptr;
3686
3687 term = (double *)calloc(numcategs,sizeof(double));
3688 term2 = (double *)calloc(numcategs,sizeof(double));
3689 clai = (double *)calloc(numcategs,sizeof(double));
3690
3691 slice = 0;
3692 temp = 0.0;
3693 p = tr->root->back;
3694 siteptr = data->siteptr;
3695 nummarkers = getdata_nummarkers(op,data);
3696
3697 for (i = start; i <= finish; i++) {
3698 for (j = 0; j < numcategs; j++) {
3699 x1 = p->x->s[i][j][slice];
3700 term[j] = dna->freqa * x1[baseA] + dna->freqc * x1[baseC] +
3701 dna->freqg * x1[baseG] + dna->freqt * x1[baseT];
3702 if(term[j] == 0) {
3703 fprintf(ERRFILE,"Encountered tree incompatible with data\n");
3704 if(first) {
3705 fprintf(ERRFILE,"starting tree needs to be legal\n");
3706 exit(-1);
3707 }
3708 curtree->likelihood = NEGMAX;
3709 return(-1);
3710 }
3711 }
3712 sumterm = 0.0;
3713 for (j = 0; j < numcategs; j++) {
3714 sumterm += op->probcat[j] * term[j];
3715 }
3716 lterm = log(sumterm);
3717 for (j = 0; j < numcategs; j++) {
3718 clai[j] = term[j] / sumterm;
3719 }
3720 memcpy(contribution[i], clai, numcategs*sizeof(double));
3721 if (!op->autocorr)
3722 alogf->val[i] = lterm;
3723 temp += dna->dnaweight[i] * lterm;
3724 }
3725
3726 free(term);
3727 free(term2);
3728 free(clai);
3729
3730 return(temp);
3731
3732 } /* eval_calcrange */
3733
3734
evaluate(option_struct * op,data_fmt * data,tree * tr,double llike)3735 double evaluate(option_struct *op, data_fmt *data, tree *tr,
3736 double llike)
3737 {
3738 double sum2, sumc, one_minus_lambda;
3739 contribarr like, nulike, clai;
3740 long i, j, k, nummarkers;
3741
3742 if (op->categs > 1) {
3743 nummarkers = getdata_nummarkers(op,data);
3744
3745 like = (double *)calloc(op->categs,sizeof(double));
3746 nulike = (double *)calloc(op->categs,sizeof(double));
3747
3748 one_minus_lambda = 1.0 - op->lambda;
3749
3750 for (j = 0; j < op->categs; j++) like[j] = 1.0;
3751 for (i = 0; i < nummarkers; i++) {
3752 sumc = 0.0;
3753 for (k = 1; k <= op->categs; k++)
3754 sumc += op->probcat[k - 1] * like[k - 1];
3755 sumc *= op->lambda;
3756 clai = contribution[i];
3757 for (j = 0; j < op->categs; j++)
3758 nulike[j] = (one_minus_lambda * like[j] + sumc) * clai[j];
3759 memcpy(like, nulike, op->categs*sizeof(double));
3760 }
3761 sum2 = 0.0;
3762 for (i = 0; i < op->categs; i++)
3763 sum2 += op->probcat[i] * like[i];
3764 llike += log(sum2);
3765 tr->likelihood = llike;
3766
3767 free(like);
3768 free(nulike);
3769
3770 return(llike);
3771 } else {
3772 tr->likelihood = llike;
3773 return(llike);
3774 }
3775
3776 } /* evaluate */
3777
3778
snp_evaluate(option_struct * op,data_fmt * data,tree * tr,double llike,long start,long finish)3779 double snp_evaluate(option_struct *op, data_fmt *data, tree *tr,
3780 double llike, long start, long finish)
3781 {
3782 double sum2, sumc;
3783 contribarr like, nulike, clai;
3784 long i, j, snpstart, snpfinish, nummarkers, numinvar;
3785
3786 if (op->categs > 1) {
3787 nummarkers = getdata_nummarkers(op,data);
3788 findsubtreemarkers(op,data,start,finish,&snpstart,&snpfinish);
3789
3790 like = (double *)calloc(op->categs,sizeof(double));
3791 nulike = (double *)calloc(op->categs,sizeof(double));
3792
3793 for (j = 0; j < op->categs; j++) like[j] = 1.0;
3794
3795 if (snpstart == FLAGLONG) numinvar = finish-start+1;
3796 else numinvar = (finish-start+1) - (snpfinish-snpstart+1);
3797
3798 if (snpstart != FLAGLONG) {
3799 for (i = snpstart; i <= snpfinish; i++) {
3800 sumc = 0.0;
3801 for (j = 0; j < op->categs; j++)
3802 sumc += op->probcat[j] * like[j];
3803 clai = contribution[i];
3804 for (j = 0; j < op->categs; j++)
3805 nulike[j] = sumc * clai[j];
3806 memcpy(like, nulike, op->categs*sizeof(double));
3807 }
3808 }
3809
3810 for (i = 0; i < numinvar; i++) { /* invariant sites */
3811 sumc = 0.0;
3812 for (j = 0; j < op->categs; j++)
3813 sumc += op->probcat[j] * like[j];
3814 clai = contribution[nummarkers];
3815 for (j = 0; j < op->categs; j++)
3816 nulike[j] = sumc * clai[j];
3817 memcpy(like, nulike, op->categs*sizeof(double));
3818 }
3819
3820 sum2 = 0.0;
3821 for (j = 0; j < op->categs; j++)
3822 sum2 += op->probcat[j] * like[j];
3823 llike += log(sum2);
3824
3825 free(like);
3826 free(nulike);
3827
3828 tr->likelihood = llike;
3829 return(llike);
3830 } else {
3831 tr->likelihood = llike;
3832 return(llike);
3833 }
3834
3835 } /* snp_evaluate */
3836
3837
nuview_usebranch(data_fmt * data,node * p,long site)3838 boolean nuview_usebranch(data_fmt *data, node *p, long site)
3839 {
3840
3841 if(!p->top) return(FALSE);
3842
3843 return(inrange(p->ranges,site));
3844
3845 } /* nuview_usebranch */
3846
3847
prob_micro(msatdata * ms,double t,long diff)3848 double prob_micro(msatdata *ms, double t, long diff)
3849 {
3850 double **steps = ms->steps;
3851 long stepnum = MICRO_MAXCHANGE;
3852 long k;
3853 double newsum = 0.0, oldsum=0.0;
3854 double logt = log(t);
3855
3856 if(diff>=stepnum)
3857 return newsum;
3858 for (k = diff; k < diff + stepnum; k+=2){
3859 newsum += exp(-t + logt * k - steps[diff][k-diff]);
3860 if(oldsum-newsum < DBL_EPSILON)
3861 break;
3862 oldsum = newsum;
3863 }
3864 return newsum;
3865
3866 } /* prob_micro */
3867
3868
3869 /********************************************************
3870 * findcoal() takes a non-top coalescent nodelet and *
3871 * returns the closest tipwards top coalescent nodelet. *
3872 * It also returns the "v" value between the two nodes. *
3873 * In the case of failure it returns a NULL pointer *
3874 * with v set to FLAGLONG. *
3875 * *
3876 * findcoal assumes that branchlengths are additive. */
findcoal(option_struct * op,data_fmt * data,node * p,double * v)3877 node *findcoal(option_struct *op, data_fmt *data, node *p, double *v)
3878 {
3879 double total;
3880 node *q;
3881
3882 if(!iscoal(p) || p->top) {*v = FLAGLONG; return(NULL);}
3883
3884 for (q = p->back, total = 0; !iscoal(q) && !istip(q); q = findunique(q)->back)
3885 total += lengthof(q);
3886
3887 total += lengthof(q);
3888 *v = findcoal_ltov(op,data,total);
3889
3890 return(q);
3891
3892 } /* findcoal */
3893
3894
nuview_micro(option_struct * op,data_fmt * data,node * p,long indexsite,long startsite,long endsite)3895 void nuview_micro(option_struct *op, data_fmt *data, node *p,
3896 long indexsite, long startsite, long endsite)
3897 {
3898 long i, a, s, smax, margin, diff;
3899 double *xx1, *xx2, *xx3, vv1 = 0.0, vv2 = 0.0, pija1s, pija2s,
3900 lx1, lx2, x3m;
3901 node *q, *r;
3902 boolean qactive, ractive;
3903
3904 smax = MICRO_ALLELEMAX;
3905 margin = MICRO_MAXCHANGE;
3906 x3m = NEGMAX;
3907
3908 xx1 = NULL;
3909 xx2 = NULL;
3910 vv1 = 0.0;
3911 vv2 = 0.0;
3912 xx3 = (double *)calloc(smax,sizeof(double));
3913
3914 q = findcoal(op,data,p->next,&vv1);
3915 r = findcoal(op,data,p->next->next,&vv2);
3916
3917 /* are these sites active and on an upwards branch? */
3918 qactive = nuview_usebranch(data,p->next,indexsite);
3919 ractive = nuview_usebranch(data,p->next->next->back,indexsite);
3920
3921 for(i = startsite; i <= endsite; i++) {
3922 if (qactive) {
3923 xx1 = q->x->a[i];
3924 lx1 = q->lxmax;
3925 } else {
3926 lx1 = 0.0;
3927 }
3928
3929 if (ractive) {
3930 xx2 = r->x->a[i];
3931 lx2 = r->lxmax;
3932 } else {
3933 lx2 = 0.0;
3934 }
3935
3936 for(s = 0; s < smax; s++) {
3937 if (qactive) pija1s = 0.0;
3938 else pija1s = 1.0;
3939 if (ractive) pija2s = 0.0;
3940 else pija2s = 1.0;
3941 for(a = MAX(0,s-margin); a < s + margin && a < smax; a++) {
3942 diff = labs(s-a);
3943 if(xx1[a] > 0 && qactive) {
3944 pija1s += prob_micro(data->msptr,vv1,diff) * xx1[a];
3945 }
3946 if(xx2[a] > 0 && ractive) {
3947 pija2s += prob_micro(data->msptr,vv2,diff) * xx2[a];
3948 }
3949 }
3950 xx3[s] = pija1s * pija2s;
3951 if(xx3[s] > x3m) x3m = xx3[s];
3952 }
3953 if(x3m == 0.0) p->lxmax = NEGMAX;
3954 else {
3955 for(s = 0; s < smax; s++) xx3[s] /= x3m;
3956 p->lxmax = log(x3m) + lx1 + lx2;
3957 }
3958 memcpy(p->x->a[i],xx3,smax*sizeof(double));
3959 }
3960
3961 free(xx3);
3962
3963 } /* nuview_micro */
3964
3965
nuview(option_struct * op,data_fmt * data,node * p,long indexsite,long startsite,long endsite)3966 void nuview(option_struct *op, data_fmt *data, node *p, long indexsite,
3967 long startsite, long endsite)
3968 {
3969 long i;
3970 double lw1 = 0.0, lw2 = 0.0;
3971 node *q, *r;
3972 boolean toobig;
3973
3974 /* set "q" and "r" for use in calcrange(), either to a valid nodelet with
3975 datalikelihoods, or NULL if there is no such nodelet. */
3976 if (nuview_usebranch(data,p->next->back,indexsite)) {
3977 q = findcoal(op,data,p->next,&lw1);
3978 toobig = (exp(lw1) <= 0.0);
3979 } else {q = NULL; toobig = TRUE;}
3980
3981 if (toobig) {
3982 for (i = 0; i < op->categs; i++) {
3983 tbl[i].ww1 = tbl[i].zz1 = 0.0;
3984 tbl[i].ww1zz1 = tbl[i].vv1zz1 = 0.0;
3985 }
3986 } else {
3987 for (i = 0; i < op->categs; i++) {
3988 tbl[i].ww1 = exp(tbl[i].rat_xi * lw1);
3989 tbl[i].zz1 = exp(tbl[i].rat_xv * lw1);
3990 tbl[i].ww1zz1 = tbl[i].ww1 * tbl[i].zz1;
3991 tbl[i].vv1zz1 = (1.0 - tbl[i].ww1) * tbl[i].zz1;
3992 }
3993 }
3994
3995 if (nuview_usebranch(data,p->next->next->back,indexsite)) {
3996 r = findcoal(op,data,p->next->next,&lw2);
3997 toobig = (exp(lw2) <= 0.0);
3998 } else {r = NULL; toobig = TRUE;}
3999
4000 if (toobig) {
4001 for (i = 0; i < op->categs; i++) {
4002 tbl[i].ww2 = tbl[i].zz2 = 0.0;
4003 tbl[i].ww2zz2 = tbl[i].vv2zz2 = 0.0;
4004 }
4005 } else {
4006 for (i = 0; i < op->categs; i++) {
4007 tbl[i].ww2 = exp(tbl[i].rat_xi * lw2);
4008 tbl[i].zz2 = exp(tbl[i].rat_xv * lw2);
4009 tbl[i].ww2zz2 = tbl[i].ww2 * tbl[i].zz2;
4010 tbl[i].vv2zz2 = (1.0 - tbl[i].ww2) * tbl[i].zz2;
4011 }
4012 }
4013
4014 calcrange(op,data,p,q,r,indexsite,startsite,endsite,op->categs);
4015
4016 } /* nuview */
4017
4018
calcrange(option_struct * op,data_fmt * data,node * p,node * q,node * r,long whichtree,long start,long finish,long numcategs)4019 void calcrange(option_struct *op, data_fmt *data, node *p, node * q,
4020 node *r, long whichtree, long start, long finish, long numcategs)
4021 /* whichtree indicates which tree should be used to calculate the
4022 likelihood of this set of sites: pass the number of a site which
4023 has the desired tree. (This is helpful in calculations for the
4024 SNP invariant sites.) */
4025 {
4026 long i, j, k, numslice, *siteptr;
4027 double yy1, yy2, ww1zz1, vv1zz1, ww2zz2, vv2zz2, vv1zz1_sumr1,
4028 vv2zz2_sumr2, vv1zz1_sumy1, vv2zz2_sumy2, sum1, sum2,
4029 sumr1, sumr2, sumy1, sumy2;
4030 boolean qactive, ractive;
4031 dnadata *dna;
4032 double **allones, **xx1, **xx2, **xx3;
4033 long istart, ifinish;
4034
4035 // DEBUG
4036 double temptotal;
4037
4038 dna = data->dnaptr;
4039 numslice = (op->panel) ? NUMSLICE : 1L;
4040 siteptr = data->siteptr;
4041
4042 if (op->datatype == 'n') {
4043 if (start == FLAGLONG) { /* extra sites */
4044 istart = getdata_nummarkers(op,data);
4045 ifinish = istart+NUMINVAR-1;
4046 } else {
4047 findsubtreemarkers(op,data,start,finish,&istart,&ifinish);
4048 if(istart==FLAGLONG) return;
4049 }
4050 } else {
4051 istart = start;
4052 ifinish = finish;
4053 }
4054
4055 /* allocate some working space--this should probably be moved
4056 upstream at some point to save time! */
4057
4058 allones = (double **)calloc(numslice,sizeof(double *));
4059 xx3 = (double **)calloc(numslice,sizeof(double *));
4060 allones[0] = (double *)calloc(numslice*4,sizeof(double));
4061 xx3[0] = (double *)calloc(numslice*4,sizeof(double));
4062 for (i = 0; i < numslice; i++) {
4063 allones[i] = allones[0] + i * 4;
4064 xx3[i] = xx3[0] + i * 4;
4065 for (j = 0; j < 4; j++)
4066 allones[i][j] = 1.0;
4067 }
4068
4069 /* are these sites active and on an upwards branch? */
4070 qactive = (q) ? TRUE : FALSE;
4071 ractive = (r) ? TRUE : FALSE;
4072
4073
4074 for(i = istart; i <= ifinish; i++) {
4075 if(siteptr[i] > 0) { /* is an alias site available? */
4076 memcpy(p->x->s[i][0][0], p->x->s[siteptr[i]-1][0][0],
4077 op->categs*numslice*4L*sizeof(double));
4078 // DEBUG
4079 temptotal = p->x->s[i][0][0][baseA] + p->x->s[i][0][0][baseC] + p->x->s[i][0][0][baseG] + p->x->s[i][0][0][baseT];
4080 if (temptotal == 0.0)
4081 printf("bad likelihood in memcpy!");
4082 //
4083 continue; /* go to next site */
4084 }
4085
4086 for (j = 0; j < numcategs; j++) {
4087 memcpy(xx3[0], allones[0], numslice * 4L * sizeof(double));
4088 if (qactive) {
4089 ww1zz1 = tbl[j].ww1zz1;
4090 vv1zz1 = tbl[j].vv1zz1;
4091 yy1 = 1.0 - tbl[j].zz1;
4092 xx1 = q->x->s[i][j];
4093
4094 for (k=0; k<numslice; k++) {
4095 sum1 = yy1 * (dna->freqa * xx1[k][baseA] +
4096 dna->freqc * xx1[k][baseC] +
4097 dna->freqg * xx1[k][baseG] + dna->freqt * xx1[k][baseT]);
4098 sumr1 = dna->freqar * xx1[k][baseA] + dna->freqgr * xx1[k][baseG];
4099 sumy1 = dna->freqcy * xx1[k][baseC] + dna->freqty * xx1[k][baseT];
4100 vv1zz1_sumr1 = vv1zz1 * sumr1;
4101 vv1zz1_sumy1 = vv1zz1 * sumy1;
4102 xx3[k][baseA] *=
4103 (sum1 + ww1zz1 * xx1[k][baseA] + vv1zz1_sumr1);
4104 xx3[k][baseC] *=
4105 (sum1 + ww1zz1 * xx1[k][baseC] + vv1zz1_sumy1);
4106 xx3[k][baseG] *=
4107 (sum1 + ww1zz1 * xx1[k][baseG] + vv1zz1_sumr1);
4108 xx3[k][baseT] *=
4109 (sum1 + ww1zz1 * xx1[k][baseT] + vv1zz1_sumy1);
4110 // DEBUG
4111 temptotal = xx3[k][baseA] + xx3[k][baseC] + xx3[k][baseG] + xx3[k][baseT];
4112 if (temptotal == 0.0)
4113 printf("bad likelihood in q!");
4114 //
4115 }
4116 }
4117
4118 if (ractive) {
4119 ww2zz2 = tbl[j].ww2zz2;
4120 vv2zz2 = tbl[j].vv2zz2;
4121 yy2 = 1.0 - tbl[j].zz2;
4122 xx2 = r->x->s[i][j];
4123
4124 for (k=0; k<numslice; k++) {
4125 sum2 = yy2 * (dna->freqa * xx2[k][baseA] +
4126 dna->freqc * xx2[k][baseC] +
4127 dna->freqg * xx2[k][baseG] + dna->freqt * xx2[k][baseT]);
4128 sumr2 = dna->freqar * xx2[k][baseA] + dna->freqgr * xx2[k][baseG];
4129 sumy2 = dna->freqcy * xx2[k][baseC] + dna->freqty * xx2[k][baseT];
4130 vv2zz2_sumr2 = vv2zz2 * sumr2;
4131 vv2zz2_sumy2 = vv2zz2 * sumy2;
4132 xx3[k][baseA] *=
4133 (sum2 + ww2zz2 * xx2[k][baseA] + vv2zz2_sumr2);
4134 xx3[k][baseC] *=
4135 (sum2 + ww2zz2 * xx2[k][baseC] + vv2zz2_sumy2);
4136 xx3[k][baseG] *=
4137 (sum2 + ww2zz2 * xx2[k][baseG] + vv2zz2_sumr2);
4138 xx3[k][baseT] *=
4139 (sum2 + ww2zz2 * xx2[k][baseT] + vv2zz2_sumy2);
4140 // DEBUG
4141 temptotal = xx3[k][baseA] + xx3[k][baseC] + xx3[k][baseG] + xx3[k][baseT];
4142 if (temptotal == 0.0)
4143 printf("bad likelihood in r!");
4144 //
4145 }
4146 }
4147
4148 memcpy(p->x->s[i][j][0], xx3[0], numslice * 4L * sizeof(double));
4149 }
4150 }
4151
4152 free(allones[0]);
4153 free(allones);
4154 free(xx3[0]);
4155 free(xx3);
4156
4157 } /* calcrange */
4158
4159
localsmooth(option_struct * op,data_fmt * data,node * p,long indexsite,long startsite,long endsite)4160 void localsmooth(option_struct *op, data_fmt *data, node *p,
4161 long indexsite, long startsite, long endsite)
4162 {
4163 /* never mind if it's a tip */
4164 if (istip(p)) return;
4165
4166 /* recurse left */
4167 if (!p->next->top)
4168 if (!p->next->back->updated)
4169 if (inrange(p->next->back->ranges,indexsite))
4170 localsmooth(op,data,p->next->back,indexsite,startsite,endsite);
4171
4172 /* recurse right */
4173 if (!p->next->next->top)
4174 if (!p->next->next->back->updated)
4175 if (inrange(p->next->next->back->ranges,indexsite))
4176 localsmooth(op,data,p->next->next->back,indexsite,startsite,
4177 endsite);
4178
4179 /* update likelihoods */
4180 if (!p->updated && iscoal(p))
4181 switch(op->datatype) {
4182 case 'a':
4183 break;
4184 case 'b':
4185 case 'm':
4186 nuview_micro(op,data,p,indexsite,startsite,endsite);
4187 break;
4188 case 'n':
4189 case 's':
4190 nuview(op,data,p,indexsite,startsite,endsite);
4191 break;
4192 default:
4193 fprintf(ERRFILE,"localsmooth:unknown datatype %c\n",op->datatype);
4194 fprintf(ERRFILE,"only the types a, m, s");
4195 fprintf(ERRFILE,"(electrophoretic alleles,\n");
4196 fprintf(ERRFILE,"microsatellite data, sequence data)");
4197 fprintf(ERRFILE,"are allowed.\n");
4198 exit(-1);
4199 }
4200
4201 } /* localsmooth */
4202
4203
snpsmooth(option_struct * op,data_fmt * data,tree * tr,long indexsite,long startsite,long endsite)4204 void snpsmooth (option_struct *op, data_fmt *data, tree *tr,
4205 long indexsite, long startsite, long endsite)
4206 {
4207 tlist *t;
4208
4209 for(t = tr->tymelist->succ; t != NULL; t = t->succ)
4210 if (iscoal(t->eventnode))
4211 nuview(op,data,t->eventnode,indexsite,startsite,endsite);
4212
4213 } /* snpsmooth */
4214
4215
localeval(option_struct * op,data_fmt * data,node * p,boolean first)4216 void localeval(option_struct *op, data_fmt *data, node *p, boolean first)
4217 {
4218 long subtree, *subtree_ranges, substart, subend, lastmarker, *siteptr;
4219 double dummy, llike;
4220 dnadata *dna;
4221 msatdata *ms;
4222
4223 dna = data->dnaptr;
4224 ms = data->msptr;
4225
4226 subtree_ranges = (long *)calloc(2*curtree->numrecombs+4,sizeof(long));
4227 subtree_ranges[0] = 1;
4228
4229 siteptr = data->siteptr;
4230
4231 findsubtrees(op,data,curtree->tymelist,subtree_ranges);
4232
4233 llike = 0.0;
4234 for(subtree = 0; subtree < subtree_ranges[0]; subtree++) {
4235 substart = subtree_ranges[2*subtree+1];
4236 subend = subtree_ranges[2*subtree+2];
4237 /* do the regular sites */
4238 localsmooth(op,data,p,substart,substart,subend);
4239 switch(op->datatype) {
4240 case 'a':
4241 break;
4242 case 'b':
4243 case 'm':
4244 /* WARNING this throws away llike, which must be wrong! */
4245 llike += micro_evaluate(op,ms,curtree,substart,subend);
4246 break;
4247 case 'n':
4248 lastmarker = getdata_nummarkers(op,data)-1;
4249 snpsmooth(op,data,curtree,substart,FLAGLONG,FLAGLONG);
4250 if (op->panel)
4251 llike += panel_eval_calcrange(op,data,curtree,first,substart,
4252 subend,op->categs);
4253 else
4254 llike += snp_eval_calcrange(op,data,curtree,first,substart,
4255 subend,op->categs);
4256 dummy = snp_evaluate(op,data,curtree,llike,substart,subend);
4257 break;
4258 case 's':
4259 llike += eval_calcrange(op,data,curtree,first,substart,subend,
4260 op->categs);
4261 dummy = evaluate(op,data,curtree,llike);
4262 break;
4263 default:
4264 fprintf(ERRFILE,"\nlocaleval--impossible datatype %c\n",
4265 op->datatype);
4266 break;
4267 }
4268 }
4269
4270 traverse_unflag(curtree,curtree->nodep[1]);
4271 free(subtree_ranges);
4272
4273 } /* localeval */
4274
4275
4276 /********************************************************************
4277 * micro_evaluate() calculates the tree data likelihood at the root *
4278 * assuming that the tree has been properly "smoothed" first. */
micro_evaluate(option_struct * op,msatdata * ms,tree * tr,long start,long end)4279 double micro_evaluate(option_struct *op, msatdata *ms, tree *tr,
4280 long start, long end)
4281 {
4282 long site, a;
4283 double term;
4284 node *nn;
4285
4286 nn = tr->root->back;
4287 term = 0.0;
4288
4289 for(site = start; site <= end; site++)
4290 for(a = 0; a < MICRO_ALLELEMAX-1; a++)
4291 term += nn->x->a[site][a];
4292
4293 if(term == 0.0) return(NEGMAX);
4294 else return(log(term) + nn->lxmax);
4295
4296 } /* micro_evaluate */
4297
4298
4299 /******************************************************************
4300 * countbranches returns the total number of branches in the tree *
4301 * not including the root. */
countbranches(option_struct * op,data_fmt * data,tree * tr)4302 long countbranches(option_struct *op, data_fmt *data, tree *tr)
4303 {
4304
4305 return(getdata_numtips(op,data) + 2 * tr->numrecombs + tr->numcoals - 1);
4306
4307 } /* countbranches */
4308
4309 /******************************************************************
4310 * testratio() returns TRUE to accept a tree, FALSE to reject it. *
4311 * ratiotype = "d" if called to decide a "drop" *
4312 * "t" if called to decide a "twiddle" *
4313 * "f" if called to decide a "flip" */
testratio(option_struct * op,data_fmt * data,tree * oldtree,tree * newtree,char ratiotype)4314 boolean testratio(option_struct *op, data_fmt *data, tree *oldtree,
4315 tree *newtree, char ratiotype)
4316 {
4317 double test, x, numoldbranches, numnewbranches;
4318
4319 if(newtree->likelihood == NEGMAX)
4320 return FALSE;
4321 #if 0 /* first heating */
4322 test = (newtree->likelihood - oldtree->likelihood) * (1.0/op->temperature[0]);
4323 #endif
4324 test = (newtree->likelihood - oldtree->likelihood) * (1.0/op->ctemp);
4325 switch((int)ratiotype) {
4326 case 'd' :
4327 numoldbranches = (double)countbranches(op,data,oldtree);
4328 numnewbranches = (double)countbranches(op,data,newtree);
4329 test += log(numoldbranches/numnewbranches);
4330 break;
4331 case 't' :
4332 test += log((double)oldtree->numrecombs/(double)newtree->numrecombs);
4333 break;
4334 case 'f' :
4335 break;
4336 default :
4337 fprintf(ERRFILE,"ERROR:testratio encounters unknown type--no Hastings\n");
4338 break;
4339 }
4340
4341 #if 0 /* first heating */
4342 if (op->temperature[0] != 1) { /* heating code */
4343 test += (newtree->coalprob - oldtree->coalprob) *
4344 (1.0/op->temperature[0] - 1.0);
4345 }
4346 #endif
4347
4348 #if 0 /* don't heat this */
4349 test += (newtree->coalprob - oldtree->coalprob) *
4350 (1.0/op->ctemp - 1.0);
4351 #endif
4352
4353 if (test >= 0.0)
4354 return TRUE;
4355 else {
4356 x = log(randum());
4357 if (test >= x)
4358 return TRUE;
4359 else
4360 return FALSE;
4361 }
4362 } /* testratio */
4363
4364
seekch(char c)4365 void seekch(char c) /* use only in reading file intree! */
4366 {
4367 if (gch == c)
4368 return;
4369 do {
4370 if (eoln(intree)) {
4371 fscanf(intree, "%*[^\n]");
4372 getc(intree);
4373 }
4374 gch = getc(intree);
4375 if (gch == '\n')
4376 gch = ' ';
4377 } while (gch != c);
4378 } /* seekch */
4379
getch(char * c)4380 void getch(char *c) /* use only in reading file intree! */
4381 {
4382 /* get next nonblank character */
4383 do {
4384 if (eoln(intree)) {
4385 fscanf(intree, "%*[^\n]");
4386 getc(intree);
4387 }
4388 *c = getc(intree);
4389 if (*c == '\n')
4390 *c = ' ';
4391 } while (*c == ' ');
4392 } /* getch */
4393
processlength(node * p)4394 void processlength(node *p)
4395 {
4396 long digit;
4397 double valyew, divisor;
4398 boolean pointread;
4399
4400 pointread = FALSE;
4401 valyew = 0.0;
4402 divisor = 1.0;
4403 getch(&gch);
4404 digit = gch - '0';
4405 while (((unsigned long)digit <= 9) || gch == '.'){
4406 if (gch == '.')
4407 pointread = TRUE;
4408 else {
4409 valyew = valyew * 10.0 + digit;
4410 if (pointread)
4411 divisor *= 10.0;
4412 }
4413 getch(&gch);
4414 digit = gch - '0';
4415 }
4416 p->length = valyew / divisor;
4417 p->back->length = p->length;
4418 } /* processlength */
4419
addelement(option_struct * op,data_fmt * data,node * p,long * nextnode)4420 void addelement(option_struct *op, data_fmt *data, node *p, long *nextnode)
4421 {
4422 node *q;
4423 long i, n;
4424 boolean found;
4425 char str[NMLNGTH];
4426
4427 getch(&gch);
4428 if (gch == '(') {
4429 (*nextnode)++;
4430 newnode(&q);
4431 q = curtree->nodep[(*nextnode)];
4432 hookup(p, q);
4433 addelement(op,data,q->next,nextnode);
4434 seekch(',');
4435 addelement(op,data,q->next->next, nextnode);
4436 seekch(')');
4437 getch(&gch);
4438 } else {
4439 for (i = 0; i < NMLNGTH; i++)
4440 str[i] = ' ';
4441 n = 1;
4442 do {
4443 if (gch == '_')
4444 gch = ' ';
4445 str[n - 1] = gch;
4446 if (eoln(intree)) {
4447 fscanf(intree, "%*[^\n]");
4448 getc(intree);
4449 }
4450 gch = getc(intree);
4451 if (gch == '\n')
4452 gch = ' ';
4453 n++;
4454 } while (gch != ':' && gch != ',' && gch != ')' && n <= NMLNGTH);
4455 n = 1;
4456 do {
4457 found = TRUE;
4458 for (i = 0; i < NMLNGTH; i++)
4459 found = (found && str[i] == curtree->nodep[n]->nayme[i]);
4460 if (!found)
4461 n++;
4462 } while (!(n > getdata_numseq(op,data) || found));
4463 if (n > getdata_numseq(op,data)) {
4464 printf("Cannot find sequence: ");
4465 for (i = 0; i < NMLNGTH; i++)
4466 putchar(str[i]);
4467 putchar('\n');
4468 }
4469 hookup(curtree->nodep[n], p);
4470 }
4471 if (gch == ':')
4472 processlength(p);
4473 } /* addelement */
4474
treeread(option_struct * op,data_fmt * data)4475 void treeread(option_struct *op, data_fmt *data)
4476 /* WARNING: this only works with sequence data, not
4477 microsatellites, not panel SNPs, not allozymes! */
4478 {
4479 long nextnode;
4480 node *p;
4481
4482 curtree->root = curtree->nodep[rootnum];
4483 getch(&gch);
4484 if (gch == '(') {
4485 nextnode = getdata_numtips(op,data) + 1;
4486 p = curtree->nodep[nextnode];
4487 addelement(op,data,p, &nextnode);
4488 seekch(',');
4489 addelement(op,data,p->next, &nextnode);
4490 hookup(p->next->next, curtree->nodep[rootnum]);
4491 p->next->next->length = ROOTLENGTH;
4492 curtree->nodep[rootnum]->length = p->next->next->length;
4493 ltov(op,data,curtree->nodep[rootnum]);
4494 }
4495 fscanf(intree, "%*[^\n]");
4496 getc(intree);
4497 } /* treeread */
4498
4499
4500 /* DEBUG debug WARNING warning --
4501 actually still used in traitlike.c:traitlike() */
finddnasubtrees(option_struct * op,data_fmt * data,tlist * tstart,long * sranges)4502 void finddnasubtrees(option_struct *op, data_fmt *data,
4503 tlist *tstart, long *sranges)
4504 {
4505 long i, j, numsites, *temp;
4506 tlist *t;
4507
4508 numsites = countsites(op,data);
4509
4510 temp = (long *)calloc(numsites,sizeof(long));
4511
4512 for(t = tstart, i = 1; t != NULL; t = t->succ)
4513 if(isrecomb(t->eventnode))
4514 temp[findlink(t->eventnode)] = 1;
4515
4516 for(i = 0, j = 2, sranges[1] = 0; i < numsites; i++) {
4517 if(temp[i] == 0) continue;
4518 sranges[0]++;
4519 sranges[j] = i;
4520 sranges[j+1] = i+1;
4521 j+=2;
4522 }
4523 sranges[j] = numsites-1;
4524 sranges[j+1] = FLAGLONG;
4525
4526 if (j+1 > 2*sranges[0]+3)
4527 fprintf(ERRFILE,"ERROR:finddnasubtree: j calculated wrong\n");
4528
4529 free(temp);
4530
4531 } /* finddnasubtrees */
4532
4533
findsubtrees_node(option_struct * op,data_fmt * data,tlist * tlast,tree * tr,long ** coal)4534 void findsubtrees_node(option_struct *op, data_fmt *data,
4535 tlist *tlast, tree *tr, long **coal)
4536 {
4537 long i, j, numsites, *temp;
4538 tlist *t;
4539
4540 numsites = countsites(op,data);
4541
4542 temp = (long *)calloc(numsites,sizeof(long));
4543 for(t = tr->tymelist, i = 1; t != tlast->succ; t = t->succ)
4544 if(isrecomb(t->eventnode)) {
4545 j = findlink(t->eventnode);
4546 if (temp[j]) continue;
4547 temp[j] = 1;
4548 i++;
4549 }
4550
4551 init_coal_alloc(coal,i);
4552
4553 for(i = 0, j = 2; i < numsites; i++) {
4554 if (temp[i] == 0) continue;
4555 (*coal)[j] = i;
4556 (*coal)[j+1] = i+1;
4557 j += 2;
4558 }
4559 (*coal)[j] = numsites-1;
4560
4561 free(temp);
4562
4563 } /* findsubtrees_node */
4564
4565
drop_findsubtrees(option_struct * op,data_fmt * data,tree * tr,node * p)4566 void drop_findsubtrees(option_struct *op, data_fmt *data, tree *tr,
4567 node *p)
4568 {
4569 node *q;
4570 tlist *t;
4571
4572 t = gettymenode(tr,p->number);
4573
4574 findsubtrees_node(op,data,t,tr,&(p->coal));
4575
4576 if (isrecomb(p)) {
4577 q = otherdtr(p);
4578 findsubtrees_node(op,data,t,tr,&(q->coal));
4579 }
4580
4581 } /* drop_findsubtrees */
4582
4583
findsubtrees(option_struct * op,data_fmt * data,tlist * tstart,long * sranges)4584 void findsubtrees(option_struct *op, data_fmt *data, tlist *tstart,
4585 long *sranges)
4586 {
4587 long i, j, numsites, *temp;
4588 tlist *t;
4589
4590 numsites = countsites(op,data);
4591
4592 temp = (long *)calloc(numsites,sizeof(long));
4593
4594 for(t = tstart; t != NULL; t = t->succ)
4595 if(isrecomb(t->eventnode))
4596 temp[findlink(t->eventnode)] = 1;
4597
4598 for(i = 0, j = 2, sranges[1] = 0; i < numsites; i++) {
4599 if(temp[i] == 0) continue;
4600 sranges[0]++;
4601 sranges[j] = i;
4602 sranges[j+1] = i+1;
4603 j+=2;
4604 }
4605 sranges[j] = numsites-1;
4606 sranges[j+1] = FLAGLONG;
4607
4608 if (j+1 > 2*sranges[0]+3)
4609 fprintf(ERRFILE,"ERROR:findsubtree: j calculated wrong\n");
4610
4611 free(temp);
4612
4613 } /* findsubtrees */
4614
4615
4616 /***************************************************************
4617 * markertosite() returns the exact "site" that corresponds to *
4618 * the passed marker position. */
markertosite(option_struct * op,data_fmt * data,long marker)4619 long markertosite(option_struct *op, data_fmt *data, long marker)
4620 {
4621
4622 return(getdata_markersite(op,data,marker));
4623
4624 } /* markertosite */
4625
4626
4627 /**********************************************************************
4628 * sitetorightmarker() returns the closest marker to the right of the *
4629 * given site, if that marker exists. If that marker doesn't exist, *
4630 * FLAGLONG is returned. If the site exactly corresponds to a marker *
4631 * then the corresponding marker will be returned. */
sitetorightmarker(option_struct * op,data_fmt * data,long site)4632 long sitetorightmarker(option_struct *op, data_fmt *data, long site)
4633 {
4634 long marker, nummarkers;
4635 double msite;
4636
4637 nummarkers = getdata_nummarkers(op,data);
4638 for(marker = 0; marker < nummarkers; marker++) {
4639 msite = markertosite(op,data,marker);
4640 if (msite >= site) return(marker);
4641 }
4642
4643 return(FLAGLONG);
4644
4645 } /* sitetorightmarker */
4646
4647
4648 /**********************************************************************
4649 * sitetomarker() returns the closest marker to the left of the given *
4650 * site, if that marker exists. If that marker doesn't exist, the *
4651 * closest marker on the right is returned. */
sitetomarker(option_struct * op,data_fmt * data,long site)4652 long sitetomarker(option_struct *op, data_fmt *data, long site)
4653 {
4654 long marker, nummarkers;
4655 double sitecounts;
4656
4657 nummarkers = getdata_nummarkers(op,data);
4658 for(marker = 0, sitecounts = 0.0; marker < nummarkers; marker++) {
4659 sitecounts = data->dnaptr->sitecount[locus][marker];
4660 if (site < sitecounts) return(marker);
4661 }
4662
4663 fprintf(ERRFILE,"ERROR--failure to find marker for site %ld\n\n",
4664 site);
4665 return(FLAGLONG);
4666
4667
4668 } /* sitetomarker */
4669
4670
4671 /**********************************************************
4672 * findsubtreemarkers() gets passed the beginning and end *
4673 * of a subtree (in psuedosites) and returns the position *
4674 * of the first and last markers found in that subtree. *
4675 * Return FLAGLONG in both marker positions if there are *
4676 * no markers present in the subtree. */
findsubtreemarkers(option_struct * op,data_fmt * data,long pstart,long pend,long * mstart,long * mend)4677 void findsubtreemarkers(option_struct *op, data_fmt *data, long pstart,
4678 long pend, long *mstart, long *mend)
4679 {
4680 long nummarkers, marker, site;
4681
4682 nummarkers = getdata_nummarkers(op,data);
4683
4684 for(marker = 0; marker < nummarkers; marker++) {
4685 site = markertosite(op,data,marker);
4686 if (site >= pstart) break;
4687 }
4688
4689 if (site > pend) {
4690 (*mstart) = (*mend) = FLAGLONG;
4691 return;
4692 }
4693
4694 (*mstart) = marker;
4695
4696 for(;marker < nummarkers; marker++) {
4697 site = markertosite(op,data,marker);
4698 if (site > pend) break;
4699 }
4700
4701 (*mend) = marker-1;
4702
4703 } /* findsubtreemarkers */
4704
4705
4706 /**********************************************************
4707 * sameranges() returns TRUE if the ranges are identical, *
4708 * FALSE otherwise. */
sameranges(long * range1,long * range2)4709 boolean sameranges(long *range1, long *range2)
4710 {
4711 long i;
4712
4713 for(i = 0; range1[i] != FLAGLONG && range2[i] != FLAGLONG; i++)
4714 if(range1[i] != range2[i]) return(FALSE);
4715
4716 if (range1[i] != range2[i]) {
4717 printf("ERROR:sameranges bailed on FLAGLONG but not done yet!!! %ld %ld\n",
4718 indecks,apps);
4719 return(FALSE);
4720 }
4721
4722 return(TRUE);
4723
4724 } /* sameranges */
4725
4726
4727 /*************************************************
4728 * copycoal() copies the coal array of a nodelet */
copycoal(node * source,node * target)4729 void copycoal(node *source, node *target)
4730 {
4731
4732 if (source->coal != NULL) {
4733 coal_Malloc(target,TRUE,source->coal[0]);
4734 memcpy(target->coal,source->coal,
4735 (source->coal[0]*2+2)*sizeof(long));
4736 } else coal_Malloc(target,FALSE,0L);
4737
4738 } /* copycoal */
4739
4740
4741 /*****************************************************
4742 * copyranges() copies the ranges array of a nodelet */
copyranges(node * source,node * target)4743 void copyranges(node *source, node *target)
4744 {
4745
4746 if (source->ranges != NULL) {
4747 ranges_Malloc(target,TRUE,source->ranges[0]);
4748 memcpy(target->ranges,source->ranges,
4749 (source->ranges[0]*2+2)*sizeof(long));
4750 } else ranges_Malloc(target,FALSE,0L);
4751
4752 } /* copyranges */
4753
4754
4755 /***************************************************************
4756 * This function copies the likelihood, "x" and "z", arrays of *
4757 * a nodelet. */
copylikes(option_struct * op,data_fmt * data,node * source,node * target)4758 void copylikes(option_struct *op, data_fmt *data, node *source,
4759 node *target)
4760 {
4761 long nummarkers = 0, numslice;
4762
4763 numslice = (op->panel) ? NUMSLICE : 1L;
4764
4765 if (op->map) {
4766 if (source->z != NULL) {
4767 allocate_z(op,data,target);
4768 memcpy(target->z,source->z,NUMTRAIT*sizeof(double));
4769 } else free_z(op,target);
4770 }
4771
4772 if (source->x != NULL) {
4773 allocate_x(op,data,target);
4774 switch (op->datatype) {
4775 case 'a':
4776 break;
4777 case 'b':
4778 case 'm':
4779 memcpy(target->x->a[0],source->x->a[0],
4780 getdata_numloci(op,data)*MICRO_ALLELEMAX*sizeof(double));
4781 break;
4782 case 'n':
4783 nummarkers += NUMINVAR;
4784 case 's':
4785 nummarkers += getdata_nummarkers(op,data);
4786 memcpy(target->x->s[0][0][0],source->x->s[0][0][0],
4787 nummarkers*op->categs*numslice*4L*sizeof(double));
4788 break;
4789 default:
4790 fprintf(ERRFILE,"\ncopylikes--impossible datatype %c\n",
4791 op->datatype);
4792 exit(-1);
4793 break;
4794 }
4795 } else free_x(op,target);
4796
4797 } /* copylikes */
4798
4799
4800 /************************************************************
4801 * copynode copies the "source" node onto the "target" node */
copynode(option_struct * op,data_fmt * data,node * source,node * target)4802 void copynode(option_struct *op, data_fmt *data, node *source,
4803 node *target)
4804 {
4805 long i, nodecount;
4806
4807 if (istip(source)) nodecount = 1;
4808 else nodecount = 3;
4809
4810 for (i = 0; i < nodecount; i++) {
4811 target->type = source->type;
4812 /* NEVER! target->next := source->next; */
4813 target->back = source->back;
4814 target->top = source->top;
4815 /* but NOT target->number := source->number; */
4816 copylikes(op,data,source,target);
4817 copyranges(source,target);
4818 if (op->fc) copycoal(source,target);
4819 if (istip(source)) memcpy(target->nayme,source->nayme,sizeof(naym));
4820 target->v = source->v;
4821 target->lxmax = source->lxmax;
4822 target->tyme = source->tyme;
4823 target->length = source->length;
4824 target->updated = source->updated;
4825 target->recstart = source->recstart;
4826 target->recend = source->recend;
4827 source = source->next;
4828 target = target->next;
4829 }
4830 } /* copynode */
4831
4832 /**********************************************************************
4833 * addnode hooks nodelet "p" to node "q" by free back ptrs in p and q */
addnode(option_struct * op,data_fmt * data,node * p,node * q)4834 void addnode(option_struct *op, data_fmt *data, node *p, node *q)
4835 {
4836
4837 if (p->top) while(q->top && q->back) q = q->next;
4838 else while(!q->top && q->back) q = q->next;
4839
4840 hookup(p,q);
4841 fixlength(op,data,p);
4842
4843 } /* addnode */
4844
4845
4846 /****************************************************************
4847 * getrecnodelet finds which nodelet in node "copy" corresponds *
4848 * to the exact nodelet pointed to by "orig". */
getrecnodelet(node * copy,node * orig)4849 node *getrecnodelet (node *copy, node *orig)
4850 {
4851 long i;
4852 node *p;
4853
4854 p = copy;
4855 for(i = 0; i < 3; i++) {
4856 if ((p->recstart == orig->recstart) && (p->recend == orig->recend))
4857 return(p);
4858 p = p->next;
4859 }
4860
4861 fprintf(ERRFILE,"ERROR:getrecnodelet failed to match\n");
4862 return(NULL);
4863
4864 } /* getrecnodelet */
4865
4866
4867 /**********************************************************
4868 * make_tree_copy copies tree "source" into tree "target" */
make_tree_copy(option_struct * op,data_fmt * data,tree * source,tree * target)4869 void make_tree_copy(option_struct *op, data_fmt *data, tree *source,
4870 tree *target)
4871 {
4872 long i, nodenumber, *tnum, numno, numtips;
4873 tlist *t, *addhere;
4874 node *p, *q, *r;
4875 dnadata *dna;
4876
4877 dna = data->dnaptr;
4878 numtips = getdata_numtips(op,data);
4879 numno = numtips + source->numcoals + source->numrecombs + 1;
4880 tnum = (long *)calloc(1,numno*sizeof(long));
4881
4882 /* make the tips */
4883 target->nodep = (node **)calloc(1,numno*sizeof(node *));
4884 for (i = 0; i < numtips+1; i++) {
4885 allocate_tip(op,data,target,source->nodep[i]->number);
4886 strcpy(target->nodep[i]->nayme,source->nodep[i]->nayme);
4887 if (i) {
4888 target->nodep[i]->top = TRUE;
4889 target->nodep[i]->tyme = 0.0;
4890 } else {
4891 target->nodep[i]->top = FALSE;
4892 target->nodep[i]->tyme = ROOTLENGTH;
4893 }
4894 target->nodep[i]->updated = TRUE;
4895 copyranges(source->nodep[i],target->nodep[i]);
4896 if(op->map) copytraits(source->nodep[i],target->nodep[i]);
4897 if(op->fc) copycoal(source->nodep[i],target->nodep[i]);
4898 copylikes(op,data,source->nodep[i],target->nodep[i]);
4899 tnum[i] = i;
4900 }
4901
4902 /* now construct the first tymelist entry */
4903 newtymenode(&target->tymelist);
4904 target->tymelist->numbranch = numtips;
4905 target->tymelist->branchlist =
4906 (node **)calloc(numtips,sizeof(node *));
4907 for(i = 0; i < numtips; i++)
4908 target->tymelist->branchlist[i] = target->nodep[i+1];
4909 target->tymelist->eventnode = target->nodep[1];
4910 target->tymelist->age = source->root->tyme;
4911
4912 /* now do the rest of the tree and tymelist */
4913 for (t = source->tymelist->succ; t != NULL; t = t->succ) {
4914 nodenumber = t->eventnode->number;
4915 newnode(&p);
4916 p->number = nodenumber;
4917 p->next->number = nodenumber;
4918 p->next->next->number = nodenumber;
4919 target->nodep[nodenumber] = p;
4920 copynode(op,data,t->eventnode,p);
4921 p->back = NULL;
4922 p->next->back = NULL;
4923 p->next->next->back = NULL;
4924 tnum[t->eventnode->number] = nodenumber;
4925 }
4926
4927 addhere = target->tymelist;
4928 for (t = source->tymelist->succ; t != NULL; t = t->succ) {
4929 p = findunique(target->nodep[tnum[t->eventnode->number]]);
4930 q = findunique(t->eventnode);
4931 if (isrecomb(p)) {
4932 r = target->nodep[tnum[q->back->number]];
4933 if (isrecomb(r)) r = getrecnodelet(r,q->back);
4934 addnode(op,data,p,r);
4935 } else {
4936 r = target->nodep[tnum[q->next->back->number]];
4937 if (isrecomb(r)) r = getrecnodelet(r,q->next->back);
4938 addnode(op,data,p->next,r);
4939 r = target->nodep[tnum[q->next->next->back->number]];
4940 if (isrecomb(r)) r = getrecnodelet(r,q->next->next->back);
4941 addnode(op,data,p->next->next,r);
4942 }
4943
4944 insertaftertymelist(addhere,p);
4945 addhere = addhere->succ;
4946 }
4947
4948 addnode(op,data,target->nodep[0],
4949 target->nodep[tnum[source->root->back->number]]);
4950 target->root = target->nodep[0];
4951
4952 free(tnum);
4953
4954 } /* make_tree_copy */
4955
4956
4957 /********************************************************************
4958 * copycreature() copies a single creature to the "target" pointer. *
4959 * The passed tree should be the tree associated with the "target" *
4960 * creature. */
copycreature(option_struct * op,data_fmt * data,creature * source,creature * target,tree * tr)4961 void copycreature(option_struct *op, data_fmt *data, creature *source,
4962 creature *target, tree *tr)
4963 {
4964 long i;
4965
4966 target->numflipsites = source->numflipsites;
4967 target->flipsites = (long *)calloc(target->numflipsites,sizeof(long));
4968 memcpy(target->flipsites,source->flipsites,
4969 (target->numflipsites)*sizeof(long));
4970
4971 target->numhaplotypes = source->numhaplotypes;
4972 target->haplotypes = (node **)calloc(target->numhaplotypes,sizeof(node *));
4973 for(i = 0; i < target->numhaplotypes; i++) {
4974 target->haplotypes[i] = tr->nodep[source->haplotypes[i]->number];
4975 }
4976
4977 } /* copycreature */
4978
4979
4980 /********************************************************
4981 * copycreatures() copies the creatures array of a tree */
copycreatures(option_struct * op,data_fmt * data,tree * source,tree * target)4982 void copycreatures(option_struct *op, data_fmt *data, tree *source,
4983 tree *target)
4984 {
4985 long cr, numcreatures;
4986
4987 if (!op->haplotyping) return;
4988
4989 numcreatures = getdata_numtips(op,data)/NUMHAPLOTYPES;
4990 target->creatures = (creature *)calloc(numcreatures,sizeof(creature));
4991
4992 for(cr = 0; cr < numcreatures; cr++) {
4993 copycreature(op,data,&(source->creatures[cr]),
4994 &(target->creatures[cr]),target);
4995 }
4996
4997 } /* copycreatures */
4998
4999
5000 /**********************************************************************
5001 * copytree makes a copy from scratch of the "source" tree, returning *
5002 * a pointer to the copy that it makes. */
copytree(option_struct * op,data_fmt * data,tree * source)5003 tree *copytree(option_struct *op, data_fmt *data, tree *source)
5004 {
5005 tree *target;
5006
5007 target = (tree *)calloc(1,sizeof(tree));
5008
5009 target->likelihood = source->likelihood;
5010 target->coalprob = source->coalprob;
5011 target->numcoals = source->numcoals;
5012 target->numrecombs = source->numrecombs;
5013 make_tree_copy(op,data,source,target);
5014 copycreatures(op,data,source,target);
5015
5016 return(target);
5017 } /* copytree */
5018
5019
5020 /***************************************************
5021 * freetree frees the "target" tree from memory */
freetree(option_struct * op,data_fmt * data,tree * target)5022 void freetree(option_struct *op, data_fmt *data, tree *target)
5023 {
5024 long i;
5025 tlist *t;
5026
5027 if (op->haplotyping) {
5028 for(i = 0; i < getdata_numtips(op,data)/NUMHAPLOTYPES; i++) {
5029 free(target->creatures[i].haplotypes);
5030 free(target->creatures[i].flipsites);
5031 }
5032 free(target->creatures);
5033 }
5034
5035 t = target->tymelist->succ;
5036 while(t != NULL) {
5037 freenode(t->eventnode);
5038 t = t->succ;
5039 }
5040 freetymelist(target->tymelist);
5041
5042 for (i = 0; i < getdata_numtips(op,data)+1; i++) {
5043 freenodelet(op,data,target->nodep[i]);
5044 }
5045 free(target->nodep);
5046
5047 free(target);
5048
5049 } /* freetree */
5050
5051 /* joinnode and constructtree are used for constructing a rather bad
5052 starting tree if the user doesn't provide one */
joinnode(option_struct * op,data_fmt * data,double length,node * p,node * q)5053 void joinnode(option_struct *op, data_fmt *data, double length,
5054 node *p, node *q)
5055 {
5056 hookup(p,q);
5057 p->length = length;
5058 q->length = length;
5059 ltov(op,data,p);
5060 } /* joinnode */
5061
constructtree(option_struct * op,data_fmt * data,double branchlength)5062 void constructtree(option_struct *op, data_fmt *data, double branchlength)
5063 {
5064 long i, j, numtips, nextnode;
5065 double height;
5066 node *p, *q;
5067
5068 numtips = getdata_numtips(op,data);
5069
5070 curtree->root = curtree->nodep[rootnum];
5071 nextnode = numtips+1;
5072 p = curtree->root;
5073 q = curtree->nodep[nextnode];
5074
5075 p->back = q;
5076 q->back = p;
5077 p->length = ROOTLENGTH;
5078 q->length = ROOTLENGTH;
5079 ltov(op,data,p);
5080
5081 height = (numtips - 1) * branchlength;
5082 p->tyme = ROOTLENGTH + height;
5083 for (i = 1; i < numtips; i++) {
5084 p = curtree->nodep[i];
5085 q = curtree->nodep[nextnode]->next;
5086 joinnode(op,data,height,p,q);
5087 q = q->next;
5088 if (i != numtips-1) {
5089 nextnode++;
5090 p = curtree->nodep[nextnode];
5091 joinnode(op,data,branchlength,p,q);
5092 height -= branchlength;
5093 } else {
5094 p = curtree->nodep[numtips];
5095 joinnode(op,data,height,p,q);
5096 }
5097 for (j = 0; j < 3; j++)
5098 q->tyme = height;
5099 }
5100
5101 } /* constructtree */
5102 /* End bad starting tree construction */
5103
5104
5105 /******************************************************************
5106 * min_theta_calc() calculates the minimum value which we wish to *
5107 * let the mutation-rate parameter, theta, attain before final *
5108 * estimation. */
min_theta_calc(option_struct * op,data_fmt * data,double th)5109 double min_theta_calc(option_struct *op, data_fmt *data, double th)
5110 {
5111 double value;
5112
5113 value = THETAMIN;
5114
5115 return(value);
5116
5117 } /* min_theta_calc */
5118
5119
5120 /***********************************************************
5121 * hasrec_chain() returns TRUE if the passed chain has any *
5122 * recombinations currently sampled, FALSE otherwise. */
hasrec_chain(option_struct * op,long chain)5123 boolean hasrec_chain(option_struct *op, long chain)
5124 {
5125 long i, chaintype, refchain;
5126 treerec *trii;
5127
5128 refchain = REF_CHAIN(chain);
5129 chaintype = TYPE_CHAIN(chain);
5130
5131 for(i = 0; i < op->numout[chaintype]; i++) {
5132 trii = &sum[locus][refchain][i];
5133 if (trii->numrecombs > 0) return(TRUE);
5134 }
5135
5136 return(FALSE);
5137
5138 } /* hasrec_chain */
5139
5140
5141 /***********************************************************************
5142 * chainendcheck() runs at the end of a chain and makes sure the chain *
5143 * had the following desired properties: *
5144 * *
5145 * --at least 1 sampled tree contains 1+ recombinations *
5146 * FAILURE = call addfractrecomb() */
chainendcheck(option_struct * op,data_fmt * data,tree * tr,long chain,boolean locusend)5147 void chainendcheck(option_struct *op, data_fmt *data, tree *tr,
5148 long chain, boolean locusend)
5149 {
5150
5151 if(!locusend && !hasrec_chain(op,chain) && op->holding != 2) {
5152 if (apps == totchains - 1) {
5153 fprintf(outfile,"WARNING--forced a recombination");
5154 fprintf(outfile," which may have affected final result\n");
5155 }
5156 addfractrecomb(op,data,chain,sum);
5157 }
5158
5159 } /* chainendcheck */
5160
5161
5162 /******************************************************************
5163 * rearrange() is the driver for basic rearrangement of a tree, *
5164 * returning TRUE if it successfully rearranged a tree, and FALSE *
5165 * otherwise. */
rearrange(option_struct * op,data_fmt * data,long whichtchain)5166 boolean rearrange(option_struct *op, data_fmt *data, long whichtchain)
5167 {
5168 double chance;
5169 boolean accepted;
5170
5171 curtree = temptrees[whichtchain];
5172 chance = randum();
5173 accepted = FALSE;
5174 op->ctemp = op->temperature[whichtchain];
5175
5176 if (chance < TWIDDLE_PROB) accepted = twiddle(op,data);
5177 else {
5178 chance -= TWIDDLE_PROB;
5179 if (chance < op->happrob && op->haplotyping) {
5180 if (op->hapdrop == 0)
5181 accepted = fliphap(op,data,curtree);
5182 else if (op->hapdrop == 1)
5183 accepted = flipdrop(op,data,curtree,1L);
5184 else
5185 accepted = flipdrop(op,data,curtree,2L);
5186 hap++;
5187 if (accepted && whichtchain == 0) hacc++;
5188 } else {
5189 accepted = makedrop(op,data);
5190 slid++;
5191 if (accepted && whichtchain == 0) slacc++;
5192 }
5193 }
5194 #ifdef MAC
5195 eventloop();
5196 #endif
5197
5198 temptrees[whichtchain] = curtree;
5199
5200 return(accepted);
5201
5202 } /* rearrange */
5203
5204
5205 /**********************************************************************
5206 * temptreeswap() checks to see if a swap of trees between different *
5207 * temperatures is called for, executing the swap if so. *
5208 * *
5209 * it accepts a proposed swap using *
5210 * [ P(Gh|Tl) * P(D|Gh) ] ^ 1/l * [ P(Gl|Th) * P(D|Gl) ] ^ 1/h *
5211 * ----------------------------------------------------------- *
5212 * [ P(Gh|Th) * P(D|Gh) ] ^ 1/h * [ P(Gl|Tl) * P(D|Gl) ] ^ 1/l *
5213 * *
5214 * where Gh = the tree generated at high temp *
5215 * Gl = the tree generated at low temp *
5216 * Tl = the parameter values at the low temp *
5217 * Th = the parameter values at the high temp */
temptreeswap(option_struct * op,data_fmt * data,boolean * changed,long chain)5218 void temptreeswap(option_struct *op, data_fmt *data, boolean *changed,
5219 long chain)
5220 {
5221 long chl, chh;
5222 double chance, num, denom, templ, temph, recl, rech, thetal, thetah;
5223 boolean picked;
5224 tree *trh, *trl;
5225
5226 if (op->numtempchains < 2) return;
5227
5228 /* first pick which two chains to swap */
5229
5230 picked = FALSE;
5231 while(!picked) {
5232 chl = (long)(randum()*op->numtempchains);
5233 chh = (long)(randum()*op->numtempchains);
5234 if (chl == op->numtempchains || chh == op->numtempchains) continue;
5235 if (chh - chl > 0) {
5236 if (chh - chl == 1) picked = TRUE;
5237 } else {
5238 if (chl - chh == 1) picked = TRUE;
5239 }
5240 }
5241
5242 trh = temptrees[chh];
5243 temph = op->temperature[chh];
5244 thetah = theti[locus][chain];
5245 rech = reci[locus][chain];
5246
5247 trl = temptrees[chl];
5248 templ = op->temperature[chl];
5249 thetal = theti[locus][chain];
5250 recl = reci[locus][chain];
5251
5252 num = (coalprob(op,data,trh,thetal,recl)+trh->likelihood)*(1.0/templ) +
5253 (coalprob(op,data,trl,thetah,rech)+trl->likelihood)*(1.0/temph);
5254
5255 denom = (trh->coalprob+trh->likelihood)*(1.0/temph) +
5256 (trl->coalprob+trl->likelihood)*(1.0/templ);
5257
5258 chance = num - denom;
5259
5260 swap++;
5261 if (chance >= log(randum())) {
5262 temptrees[chh] = trl;
5263 temptrees[chl] = trh;
5264 changed[chh] = TRUE;
5265 changed[chl] = TRUE;
5266 swacc++;
5267 }
5268
5269 } /* temptreeswap */
5270
5271
5272 #define PRINTNUM 8 /* the number of digits used for printing
5273 the number of steps done */
5274
maketree(option_struct * op,data_fmt * data)5275 void maketree(option_struct *op, data_fmt *data)
5276 {
5277 long tempchain, incrprog, metout, progout, chaintype, i;
5278 double bestlike;
5279 boolean runend, *changetree, first;
5280 char chainlit[2][6] = {"Short","Long"};
5281 dnadata *dna;
5282 double *traitarray;
5283 long hapscore;
5284
5285 traitarray = NULL;
5286
5287 changetree = (boolean *)calloc(op->numtempchains,sizeof(boolean));
5288
5289 if (op->map)
5290 traitarray = (double *)calloc(countsites(op,data),sizeof(double));
5291
5292 chainlit[0][5] = chainlit[1][5] = '\0';
5293
5294 /* WARNING DEBUG assumes DNA data (and shouldn't)! */
5295 dna = data->dnaptr;
5296
5297 getc(infile);
5298 if(op->haplotyping) {
5299 fprintf(outfile,"Haplotypes being inferred");
5300 if (op->hapdrop == 0) fprintf(outfile," without resimulation\n");
5301 if (op->hapdrop == 1) fprintf(outfile," with single resimulation\n");
5302 if (op->hapdrop == 2) fprintf(outfile," with double resimulation\n");
5303 }
5304 fprintf(outfile,"Watterson estimate of theta is %12.8f\n", watttheta);
5305 #if !DISTRIBUTION
5306 fprintf(outfile,"\n\nIntermediate chains (only)\n");
5307 fprintf(outfile,"chain# theta rec-rate\n");
5308 fprintf(outfile,"-----------------------------\n");
5309 #endif
5310 if (op->usertree)
5311 treeread(op,data);
5312 else {
5313 #if !ALWAYS_REJECT
5314 branch0 = watttheta/getdata_numtips(op,data);
5315 constructtree(op,data,branch0);
5316 #else
5317 constructtree(op,data,0.0);
5318 #endif
5319 }
5320 orient(curtree,op,data,curtree->root->back);
5321 #if !ALWAYS_REJECT
5322 if (op->plump) plumptree(op,data,watttheta);
5323 #endif
5324 finishsetup(op,data,curtree->root->back);
5325 initbranchlist(op,data);
5326 #if !ALWAYS_REJECT
5327 localeval(op,data,curtree->root->back,TRUE);
5328 curtree->coalprob = coalprob(op,data,curtree,theta0,rec0);
5329 #endif
5330 bestlike = NEGMAX;
5331 theti[locus][0] = theta0;
5332 reci[locus][0] = rec0;
5333 runend = FALSE;
5334
5335 /* WARNING DEBUG MARY */
5336 if (TRUEHAP) {
5337 hapscore = hapdist(op,&truehaps,data,curtree->creatures);
5338 printf("Distance from truth%ld\n",hapscore);
5339 fprintf(outfile,"Distance from truth%ld\n",hapscore);
5340 hapscore = hapdist(op,&starthaps,data,curtree->creatures);
5341 fprintf(outfile,"Distance from start%ld\n",hapscore);
5342 }
5343
5344 if(op->map) traitread(curtree, getdata_numseq(op,data));
5345 /**********************************/
5346 /* Begin Hastings-Metropolis loop */
5347 /**********************************/
5348 for (apps = 0; apps < totchains; apps++) {
5349 if (apps >= op->numchains[0]) chaintype = 1;
5350 else chaintype = 0;
5351 if (op->progress) {
5352 printf("%s chain %ld ",chainlit[chaintype],
5353 ((chaintype) ? (apps + 1 - op->numchains[0]) : apps + 1));
5354 fflush(stdout);
5355 }
5356 numdropped = 0;
5357 metout = op->increm[chaintype] - 1 + NUMBURNIN;
5358 /* print a "." to stdout every 10% of the chain(s) finished */
5359 incrprog = (long)(op->steps[chaintype] / 10.0);
5360 progout = incrprog - 1 + NUMBURNIN;
5361 op->numout[chaintype] = 0;
5362 slacc = hacc = swacc = 0;
5363 slid = hap = swap = 0;
5364
5365 /* if apps == 0, start all temperature chains with the same
5366 initial tree */
5367 if (apps == 0) {
5368 for(tempchain = 0; tempchain < op->numtempchains; tempchain++) {
5369 temptrees[tempchain] = copytree(op,data,curtree);
5370 }
5371 /* curtree is no longer needed as a base tree!, it's just a ptr */
5372 freetree(op,data,curtree);
5373 }
5374 for(tempchain = 0; tempchain < op->numtempchains; tempchain++)
5375 changetree[tempchain] = FALSE;
5376 first = TRUE;
5377 for (indecks=0; indecks < op->steps[chaintype]+NUMBURNIN; indecks++) {
5378 #if 0 /* first heating */
5379 /* heating code */
5380 if (max_heat != 1.0) {
5381 if (heating_cnt == heating_interval) {
5382 heating_cnt = 0;
5383 if (op->temperature[0] >= max_heat) direction = -1;
5384 if (op->temperature[0] <= 1.0) direction = 1;
5385 op->temperature[0] += direction * heating_by;
5386 }
5387 heating_cnt++;
5388 }
5389 #endif
5390 col = 0; /* column number, used in treeout */
5391
5392 /* only count acceptances after burn-in */
5393 if (indecks == NUMBURNIN)
5394 hacc = slacc = 0;
5395
5396 for(tempchain = 0; tempchain < op->numtempchains; tempchain++) {
5397 if (rearrange(op,data,tempchain))
5398 changetree[tempchain] = TRUE;
5399 }
5400
5401 temptreeswap(op,data,changetree,apps);
5402
5403
5404 if (apps == totchains - 1) /* end of run? */
5405 runend = TRUE;
5406 if (temptrees[0]->likelihood > bestlike) {
5407 if (ONEBESTREE) {
5408 FClose(bestree);
5409 bestree = fopen("bestree","w+");
5410 fprintf(bestree, "Chain #%2ld (%s) Step:%8ld\n",apps+1,
5411 chainlit[chaintype], indecks+1);
5412 /* debug DEBUG warning WARNING--no rectreeout routine */
5413 /*rec_outtree(temptrees[0]->root->back,TRUE, &bestree);*/
5414 fprintf(bestree, "; [%12.10f]\n", temptrees[0]->likelihood);
5415 bestlike = temptrees[0]->likelihood;
5416 }
5417 else {
5418 fprintf(bestree, "Chain #%2ld (%s) Step:%8ld\n",apps+1,
5419 chainlit[chaintype], indecks+1);
5420 /* debug DEBUG warning WARNING--no rectreeout routine */
5421 /*rec_outtree(temptrees[0]->root->back,TRUE, &bestree); */
5422 fprintf(bestree, "; [%12.10f]\n", temptrees[0]->likelihood);
5423 bestlike = temptrees[0]->likelihood;
5424 }
5425 }
5426 if (indecks == metout) {
5427 if (op->numout[chaintype] == 0)
5428 sametree[locus][0] = FALSE;
5429 else
5430 sametree[locus][op->numout[chaintype]] = !changetree[0];
5431 changetree[0] = FALSE;
5432 #if !ALWAYS_REJECT
5433 #if 0 /* old heating */
5434 if (op->temperature[0] == 1.0) { /* not too heated */
5435 op->numout[chaintype]++;
5436 scoretree(op,data,apps);
5437 if(op->map)
5438 traitlike(op,data,curtree,countsites(op,data),
5439 op->mutrait,op->traitratio, op->pd, traitarray);
5440 }
5441 #endif
5442 op->numout[chaintype]++;
5443 /* set curtree to the coldest tree for scoretree() scoring */
5444 curtree = temptrees[0];
5445 scoretree(op,data,apps);
5446 if(op->map)
5447 traitlike(op,data,curtree,countsites(op,data),
5448 op->mutrait,op->traitratio, op->pd, traitarray);
5449 #endif
5450 metout += op->increm[chaintype];
5451 if (op->treeprint) {
5452 fprintf(treefile,"\nlocus = %ld, chain = %ld, tree = %ld\n",
5453 locus,apps,indecks);
5454 /* debug DEBUG warning WARNING--no rectreeout routine */
5455 /*rec_outtree(temptrees[0]->root->back,TRUE, &treefile);*/
5456 fprintf(treefile,";\n");
5457 }
5458 }
5459 #if 0
5460 if (op->progress && indecks == progout) {
5461 printf(".");
5462 fflush(stdout);
5463 progout += incrprog;
5464 }
5465 #endif
5466 if (op->progress) {
5467 if (!first) for(i = 0; i < PRINTNUM; i++) printf("\b");
5468 else printf(" examined: ");
5469 printf("%*ld",(int)PRINTNUM,indecks-NUMBURNIN+1);
5470 fflush(stdout);
5471 first = FALSE;
5472 }
5473 }
5474 if(runend) {
5475 if(op->map) {
5476 traitprint(countsites(op,data),traitarray,
5477 outfile,op->numout[chaintype]);
5478 #if TRUTHKNOWN
5479 traitresult(op,data,traitarray,outfile,op->numout[chaintype]);
5480 #endif
5481 }
5482 }
5483 printf(" trees");
5484 chainendcheck(op,data,curtree,apps,runend);
5485 rec_estimate(op,data,locus,apps,runend);
5486 if (TRUEHAP) {
5487 hapscore = hapdist(op,&truehaps,data,curtree->creatures);
5488 printf("Distance from truth: %ld\n",hapscore);
5489 fprintf(outfile,"Distance from truth: %ld\n",hapscore);
5490 hapscore = hapdist(op,&starthaps,data,curtree->creatures);
5491 printf("Distance from start: %ld\n",hapscore);
5492 fprintf(outfile,"Distance from start: %ld\n",hapscore);
5493 }
5494
5495 if (op->map && runend) traitsiteplot(op,data,traitarray);
5496 theta0 = theti[locus][apps+1];
5497 rec0 = reci[locus][apps+1];
5498 if (min_theta_calc(op,data,theta0) > theta0 &&
5499 op->holding != 1) {
5500 if (apps == totchains - 2) {
5501 fprintf(outfile,"WARNING--lower bound on estimate of");
5502 fprintf(outfile," theta may have affected final result\n");
5503 }
5504 theta0 = min_theta_calc(op,data,theta0);
5505 theti[locus][apps+1] = theta0;
5506 }
5507 if (op->progress) {
5508 printf("accepted %ld/%ld trees",slacc,slid-NUMBURNIN);
5509 if (numdropped) printf(", dropped %ld",numdropped);
5510 if (swap) printf(", swapped %ld/%ld",swacc,swap);
5511 if (op->haplotyping)
5512 printf("and %ld/%ld haplotype changes\n",hacc,hap);
5513 else printf("\n");
5514 }
5515 if ((apps == totchains-1) && numdropped) {
5516 fprintf(outfile,"%ld trees were dropped from",numdropped);
5517 fprintf(outfile," the final chain\n");
5518 }
5519 }
5520 if(slacc == 0) {
5521 fprintf(outfile,"WARNING--no proposed trees ever accepted\n");
5522 fprintf(ERRFILE,"WARNING--no proposed trees ever accepted\n");
5523 }
5524 free(changetree);
5525 } /* maketree */
5526
5527 /************************************************************
5528 * freenodelet frees all fields of a nodelet and the actual *
5529 * nodelet too. */
freenodelet(option_struct * op,data_fmt * data,node * p)5530 void freenodelet(option_struct *op, data_fmt *data, node *p)
5531 {
5532 free_x(op,p);
5533 if(op->map)free_z(op,p);
5534 if (p->nayme) free(p->nayme);
5535 ranges_Malloc(p,FALSE,0L);
5536 coal_Malloc(p,FALSE,0L);
5537 free(p);
5538 } /* freenodelet */
5539
5540 /*******************************************************************
5541 * finalfree() frees sum, reci, sametree, *
5542 * theti, the options structure, and the data structure. */
finalfree(option_struct * op,data_fmt * data)5543 void finalfree(option_struct *op, data_fmt *data)
5544 {
5545 long i, j, k, chtype, numloci;
5546
5547 numloci = 0;
5548
5549 numloci = getdata_numloci(op,data);
5550
5551 free(reci[0]);
5552 free(reci);
5553 free(theti[0]);
5554 free(theti);
5555 free(sametree[0]);
5556 free(sametree);
5557
5558 for(i = 0; i < numloci; i++)
5559 for(j = 0; j < 1+op->numchains[1]; j++) {
5560 if (j) chtype = 1;
5561 else chtype = 0;
5562 for(k = 0; k < op->numout[chtype]; k++) {
5563 #if GROWTHUSED
5564 free(sum[i][j][k].kk);
5565 free(sum[i][j][k].kend);
5566 free(sum[i][j][k].actives);
5567 #endif
5568 free(sum[i][j][k].eventtype);
5569 free(sum[i][j][k].sitescore);
5570 }
5571 }
5572 free(sum[0][0]);
5573 free(sum[0]);
5574 free(sum);
5575
5576 freedata(data);
5577
5578 free(op->probcat);
5579 free(op->rate);
5580 free(op->temperature);
5581 if (!op->same_ne) free(op->ne_ratio);
5582 if (op->panel) free(op->numpanel);
5583 free(op);
5584
5585 } /* finalfree */
5586
5587
main(int argc,char * argv[])5588 int main(int argc, char *argv[])
5589 { /* Recombine */
5590 long i, numloci, numpop;
5591 data_fmt data;
5592 option_struct *options;
5593 char infilename[100], outfilename[100], logfilename[100],
5594 bestrfilename[100], intrfilename[100], trfilename[100];
5595
5596 #ifdef MAC
5597 char spafilename[100], seedfilename[100];
5598
5599 argv[0] = "Recombine";
5600 #endif
5601
5602 /* Open various filenames. */
5603
5604 openfile(&infile,INFILE,"r",argv[0],infilename);
5605 openfile(&simlog,"simlog","w",argv[0],logfilename);
5606 openfile(&outfile,OUTFILE,"w",argv[0],outfilename);
5607
5608 /* warning WARNING debug DEBUG--initialization handled wrong */
5609 population = 0;
5610
5611 setupoption_struct(&options);
5612 options->ibmpc = IBMCRT;
5613 options->ansi = ANSICRT;
5614 getoptions(options);
5615 for (i = 1; i <= 1000; i++)
5616 clearseed = randum();
5617 if (options->usertree)
5618 openfile(&intree,INTREE,"r",argv[0],intrfilename);
5619 if (options->treeprint)
5620 openfile(&treefile,TREEFILE,"w",argv[0],trfilename);
5621 openfile(&bestree,"bestree","w",argv[0],bestrfilename);
5622
5623 if (options->newdata) getinput(options,&data);
5624 else getnodata(options,&data);
5625
5626 if (options->printdata) printdnadata(options,&data,outfile);
5627
5628 firstinit(options,&data);
5629
5630 if (ALWAYS_ACCEPT) fprintf(outfile,"***ALWAYS ACCEPT MODE!***");
5631 if (ALWAYS_REJECT) fprintf(outfile,"***ALWAYS REJECT MODE!***");
5632
5633 if (options->newdata) {
5634 temptrees = (tree **)calloc(options->numtempchains,sizeof(tree *));
5635 numpop = getdata_numpop(options,&data);
5636 for(population = 0; population < numpop; population++) {
5637 popinit(options,&data);
5638 numloci = getdata_numloci(options,&data);
5639 for (locus = 0; locus < numloci; locus++) {
5640 if (options->progress) printf("Locus %ld\n",locus+1);
5641 fprintf(outfile,"\n---------------------------------------\n");
5642 fprintf(outfile, " Locus %ld\n",locus+1);
5643 fprintf(outfile,"---------------------------------------\n");
5644 if (!options->same_ne)
5645 fprintf(outfile,"Assumed relative Ne = %f\n",
5646 options->ne_ratio[locus]);
5647 if (options->holding) {
5648 if (options->holding == 1)
5649 fprintf(outfile,"Theta was held contant.\n");
5650 else
5651 fprintf(outfile,"Recombination was held constant.\n");
5652 }
5653 locusinit(options,&data);
5654 #if !ALWAYS_REJECT
5655 watttheta = watterson(options,&data);
5656 #else
5657 watttheta = ARBITRARY_THETA;
5658 #endif
5659 if (options->watt) theta0 = watttheta;
5660 maketree(options,&data);
5661 end_of_locus_free(options,&data);
5662 }
5663 locus--;
5664 if (numloci > 1) rec_estimate(options,&data,-1L,totchains-1,TRUE);
5665 end_of_population_free(options,&data);
5666 }
5667 free(temptrees);
5668 } else {
5669 rec_scoreread(options,&data);
5670 numloci = getdata_numloci(options,&data);
5671 if (numloci > 1) rec_estimate(options,&data,-1L,totchains-1,TRUE);
5672 else rec_estimate(options,&data,0L,totchains-1,TRUE);
5673 }
5674
5675 finalfree(options,&data);
5676
5677 FClose(infile);
5678 FClose(outfile);
5679 FClose(treefile);
5680 FClose(bestree);
5681 FClose(seedfile);
5682 FClose(simlog);
5683 FClose(spacefile);
5684
5685 #ifdef MAC
5686 strcpy(seedfilename,"seedfile");
5687 strcpy(spafilename,"spacefile");
5688 fixmacfile(outfilename);
5689 fixmacfile(infilename);
5690 fixmacfile(bestrfilename);
5691 fixmacfile(logfilename);
5692 fixmacfile(intrfilename);
5693 fixmacfile(seedfilename);
5694 fixmacfile(trfilename);
5695 fixmacfile(spafilename);
5696 #endif
5697
5698 printf("PROGRAM DONE\n");
5699 exit(0);
5700
5701 } /* recombine */
5702
eof(FILE * f)5703 int eof(FILE *f)
5704 {
5705 register int ch;
5706
5707 if (feof(f))
5708 return 1;
5709 if (f == stdin)
5710 return 0;
5711 ch = getc(f);
5712 if (ch == EOF)
5713 return 1;
5714 ungetc(ch, f);
5715 return 0;
5716 } /* eof */
5717
eoln(FILE * f)5718 int eoln(FILE *f)
5719 {
5720 register int ch;
5721
5722 ch = getc(f);
5723 if (ch == EOF)
5724 return 1;
5725 ungetc(ch, f);
5726 return (ch == '\n');
5727 } /* eoln */
5728