1 #define programName "fastDNAml"
2 #define programVersion "1.2.2"
3 #define programVersionInt 10202
4 #define programDate "January 3, 2000"
5 #define programDateInt 20000103
6
7 /* fastDNAml, a program for estimation of phylogenetic trees from sequences.
8 * Copyright (C) 1998, 1999, 2000 by Gary J. Olsen
9 *
10 * This program is free software; you may redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 *
24 * For any other enquiries write to Gary J. Olsen, Department of Microbiology,
25 * University of Illinois, Urbana, IL 61801, USA
26 *
27 * Or send E-mail to gary@phylo.life.uiuc.edu
28 *
29 *
30 * fastDNAml is based in part on the program dnaml by Joseph Felsenstein.
31 *
32 * Copyright notice from dnaml:
33 *
34 * version 3.3. (c) Copyright 1986, 1990 by the University of Washington
35 * and Joseph Felsenstein. Written by Joseph Felsenstein. Permission is
36 * granted to copy and use this program provided no fee is charged for it
37 * and provided that this copyright notice is not removed.
38 *
39 *
40 * When publishing work that based on results from fastDNAml please cite:
41 *
42 * Felsenstein, J. 1981. Evolutionary trees from DNA sequences:
43 * A maximum likelihood approach. J. Mol. Evol. 17: 368-376.
44 *
45 * and
46 *
47 * Olsen, G. J., Matsuda, H., Hagstrom, R., and Overbeek, R. 1994.
48 * fastDNAml: A tool for construction of phylogenetic trees of DNA
49 * sequences using maximum likelihood. Comput. Appl. Biosci. 10: 41-48.
50 */
51
52 /* Conversion to C and changes in sequential code by Gary Olsen, 1991-1994
53 *
54 * p4 version by Hideo Matsuda and Ross Overbeek, 1991-1993
55 */
56
57 /*
58 * 1.0 March 14, 1992
59 * Initial "release" version
60 *
61 * 1.0.1 March 18, 1992
62 * Add ntaxa to tree comments
63 * Set minimum branch length on reading tree
64 * Add blanks around operators in treeString (for prolog parsing)
65 * Add program version to treeString comments
66 *
67 * 1.0.2 April 6, 1992
68 * Improved option line diagnostics
69 * Improved auxiliary line diagnostics
70 * Removed some trailing blanks from output
71 *
72 * 1.0.3 April 6, 1992
73 * Checkpoint trees that do not need any optimization
74 * Print restart tree likelihood before optimizing
75 * Fix treefile option so that it really toggles
76 *
77 * 1.0.4 July 13, 1992
78 * Add support for tree fact (instead of true Newick tree) in
79 * processTreeCom, treeReadLen, str_processTreeCom and
80 * str_treeReadLen
81 * Use bit operations in randum
82 * Correct error in bootstrap mask used with weighting mask
83 *
84 * 1.0.5 August 22, 1992
85 * Fix reading of underscore as first nonblank character in name
86 * Add strchr and strstr functions to source code
87 * Add output treefile name to message "Tree also written ..."
88 *
89 * 1.0.6 November 20, 1992
90 * Change (! nsites) test in setupTopol to (nsites == 0) for MIPS R4000
91 * Add vectorizing compiler directives for CRAY
92 * Include updates and corrections to parallel code from H. Matsuda
93 *
94 * 1.0.7 March 25, 1993
95 * Remove translation of underlines in taxon names
96 *
97 * 1.0.8 April 30, 1993
98 * Remove version number from fastDNAml.h file name
99 *
100 * 1.0.9 August 12, 1993
101 * Version was never released.
102 * Redefine treefile formats and default:
103 * 0 None
104 * 1 Newick
105 * 2 Prolog
106 * 3 PHYLIP (Default)
107 * Remove quote marks and comment from PHYLIP treefile format.
108 *
109 * 1.1.0 September 3-5, 1993
110 * Arrays of size maxpatterns moved from stack to heap (mallocs) in
111 * evaluateslope, makenewz, and cmpBestTrees.
112 * Correct [maxsites] to [maxpatterns] in temporary array definitions
113 * in Vectorize code of newview and evaluate. (These should also
114 * get converted to use malloc() at some point.)
115 * Change randum to use 12 bit segments, not 6. Change its seed
116 * parameter to long *.
117 * Remove the code that took the absolute value of random seeds.
118 * Correct integer divide artifact in setting default transition/
119 * transversion parameter values.
120 * When transition/transversion ratio is "reset", change to small
121 * value, not the program default.
122 * Report the "reset" transition/transversion ratio in the output.
123 * Move global data into analdef, rawDNA, and crunchedDNA structures.
124 * Change names of routines white and digit to whitechar and digitchar.
125 * Convert y[] to yType, which is normally char, but is int if the
126 * Vectorize flag is set.
127 * Split option line reading out of getoptions routine.
128 *
129 * 1.1.1 September 30, 1994
130 * Incorporate changes made in 1.0.A (Feb. 11, 1994):
131 * Remove processing of quotation marks within comments.
132 * Break label finding into copy to string and find tip.
133 * Generalize tree reading to read trees when names are and are not
134 * already known.
135 * Remove absolute value from randum seed reading.
136 * Include integer version number and program date.
137 * Remove maxsite, maxpatterns and maxsp limitations.
138 * Incorporate code for retaining multiple trees.
139 * Activate code for Hasegawa & Kishino test of tree differences.
140 * Make quick add the default, with Q turning it off.
141 * Make emperical frequencies the option with F turning it off.
142 * Allow a residue frequency option line anywhere in the options.
143 * Increase string length passed to treeString (should be length
144 * checked, but ...)
145 * Introduce (Sept.30) and fix (Oct. 26) bug in bootstrap sampling.
146 * Fix error when user frequencies are last line and start with F.
147 *
148 * 1.2 September 5, 1997
149 * Move likelihood components into structure.
150 * Change rawDNA to rawdata.
151 * Change crunchedDNA to cruncheddata.
152 * Recast the likelihoods per site into an array of stuctures,
153 * where each stucture (likelivector) includes the likelihoods
154 * of each residue type at the site, and a magnitude scale
155 * factor (exp). This requires changing the space allocation,
156 * newview, makenewz, evaluate, and sigma.
157 * Change code of newview to rescale likelihoods up by 2**256 when
158 * the largest value falls below 2**-256. This should solve
159 * floating point underflow for all practical sized trees.
160 * No changes are necessary in makenewz or sigma, since only
161 * relative likelihoods are necessary.
162 *
163 * 1.2.1 March 9, 1998
164 * Convert likelihood adjustment factor (2**256) to a constant.
165 * Fix vectorized calculation of likelihood (error introduced in 1.2)
166 *
167 * 1.2.2 December 23, 1998
168 * General code clean-up.
169 * Convert to function definitions with parameter type lists
170 *
171 * 1.2.2 January 3, 2000
172 * Add copyright and license information
173 * Make this the current release version
174 */
175
176 #ifdef Master
177 # undef Master
178 # define Master 1
179 # define Slave 0
180 # define Sequential 0
181 #else
182 # ifdef Slave
183 # undef Slave
184 # define Master 0
185 # define Slave 1
186 # define Sequential 0
187 # else
188 # ifdef Sequential
189 # undef Sequential
190 # endif
191 # define Master 0
192 # define Slave 0
193 # define Sequential 1
194 # endif
195 #endif
196
197 #ifdef CRAY
198 # define Vectorize
199 #endif
200
201 #ifdef Vectorize
202 # define maxpatterns 10000 /* maximum number of different site patterns */
203 #endif
204
205 #include <stdio.h>
206 #include <math.h>
207 #include "fastDNAml.h" /* Requires version 1.2 */
208
209 #if Master || Slave
210 # include "p4.h"
211 # include "comm_link.h"
212 #endif
213
214 /* Global variables */
215
216 xarray *usedxtip, *freextip;
217
218 #if Sequential /* Use standard input */
219 # undef DNAML_STEP
220 # define DNAML_STEP 0
221 # define INFILE stdin
222 #endif
223
224 #if Master
225 # define MAX_SEND_AHEAD 400
226 char *best_tr_recv = NULL; /* these are used for flow control */
227 double best_lk_recv;
228 int send_ahead = 0; /* number of outstanding sends */
229
230 # ifdef DNAML_STEP
231 # define DNAML_STEP 1
232 # endif
233 # define INFILE Seqf
234 # define OUTFILE Outf
235 FILE *INFILE, *OUTFILE;
236 comm_block comm_slave;
237 #endif
238
239 #if Slave
240 # undef DNAML_STEP
241 # define DNAML_STEP 0
242 # define INFILE Seqf
243 # define OUTFILE Outf
244 FILE *INFILE, *OUTFILE;
245 comm_block comm_master;
246 #endif
247
248 #if Debug
249 FILE *debug;
250 #endif
251
252 #if DNAML_STEP
253 int begin_step_time, end_step_time;
254 # define REPORT_ADD_SPECS p4_send(DNAML_ADD_SPECS, DNAML_HOST_ID, NULL, 0)
255 # define REPORT_SEND_TREE p4_send(DNAML_SEND_TREE, DNAML_HOST_ID, NULL, 0)
256 # define REPORT_RECV_TREE p4_send(DNAML_RECV_TREE, DNAML_HOST_ID, NULL, 0)
257 # define REPORT_STEP_TIME \
258 {\
259 char send_buf[80]; \
260 end_step_time = p4_clock(); \
261 (void) sprintf(send_buf, "%d", end_step_time-begin_step_time); \
262 p4_send(DNAML_STEP_TIME, DNAML_HOST_ID, send_buf,strlen(send_buf)+1); \
263 begin_step_time = end_step_time; \
264 }
265 #else
266 # define REPORT_ADD_SPECS
267 # define REPORT_SEND_TREE
268 # define REPORT_RECV_TREE
269 # define REPORT_STEP_TIME
270 #endif
271
272 /*=======================================================================*/
273 /* PROGRAM */
274 /*=======================================================================*/
275 /* Best tree handling for dnaml */
276 /*=======================================================================*/
277
278 /* Tip value comparisons
279 *
280 * Use void pointers to hide type from other routines. Only tipValPtr and
281 * cmpTipVal need to be changed to alter the nature of the values compared
282 * (e.g., names instead of node numbers).
283 *
284 * cmpTipVal(tipValPtr(nodeptr p), tipValPtr(nodeptr q)) == -1, 0 or 1.
285 *
286 * This provides direct comparison of tip values (for example, for
287 * definition of tr->start).
288 */
289
290
tipValPtr(nodeptr p)291 void *tipValPtr (nodeptr p)
292 { return (void *) & p->number;
293 }
294
295
cmpTipVal(void * v1,void * v2)296 int cmpTipVal (void *v1, void *v2)
297 { /* cmpTipVal */
298 int i1, i2;
299
300 i1 = *((int *) v1);
301 i2 = *((int *) v2);
302 return (i1 < i2) ? -1 : ((i1 == i2) ? 0 : 1);
303 } /* cmpTipVal */
304
305
306 /* These are the only routines that need to UNDERSTAND topologies */
307
308
setupTopol(int maxtips,int nsites)309 topol *setupTopol (int maxtips, int nsites)
310 { /* setupTopol */
311 topol *tpl;
312
313 if (! (tpl = (topol *) Malloc(sizeof(topol))) ||
314 ! (tpl->links = (connptr) Malloc((2*maxtips-3) * sizeof(connect))) ||
315 (nsites && ! (tpl->log_f
316 = (double *) Malloc(nsites * sizeof(double))))) {
317 printf("ERROR: Unable to get topology memory");
318 tpl = (topol *) NULL;
319 }
320
321 else {
322 if (nsites == 0) tpl->log_f = (double *) NULL;
323 tpl->likelihood = unlikely;
324 tpl->start = (node *) NULL;
325 tpl->nextlink = 0;
326 tpl->ntips = 0;
327 tpl->nextnode = 0;
328 tpl->opt_level = 0; /* degree of branch swapping explored */
329 tpl->scrNum = 0; /* position in sorted list of scores */
330 tpl->tplNum = 0; /* position in sorted list of trees */
331 tpl->log_f_valid = 0; /* log_f value sites */
332 tpl->prelabeled = TRUE;
333 tpl->smoothed = FALSE; /* branch optimization converged? */
334 }
335
336 return tpl;
337 } /* setupTopol */
338
339
freeTopol(topol * tpl)340 void freeTopol (topol *tpl)
341 { /* freeTopol */
342 Free(tpl->links);
343 if (tpl->log_f) Free(tpl->log_f);
344 Free(tpl);
345 } /* freeTopol */
346
347
saveSubtree(nodeptr p,topol * tpl)348 int saveSubtree (nodeptr p, topol *tpl)
349 /* Save a subtree in a standard order so that earlier branches
350 * from a node contain lower value tips than do second branches from
351 * the node. This code works with arbitrary furcations in the tree.
352 */
353 { /* saveSubtree */
354 connptr r, r0;
355 nodeptr q, s;
356 int t, t0, t1;
357
358 r0 = tpl->links;
359 r = r0 + (tpl->nextlink)++;
360 r->p = p;
361 r->q = q = p->back;
362 r->z = p->z;
363 r->descend = 0; /* No children (yet) */
364
365 if (q->tip) {
366 r->valptr = tipValPtr(q); /* Assign value */
367 }
368
369 else { /* Internal node, look at children */
370 s = q->next; /* First child */
371 do {
372 t = saveSubtree(s, tpl); /* Generate child's subtree */
373
374 t0 = 0; /* Merge child into list */
375 t1 = r->descend;
376 while (t1 && (cmpTipVal(r0[t1].valptr, r0[t].valptr) < 0)) {
377 t0 = t1;
378 t1 = r0[t1].sibling;
379 }
380 if (t0) r0[t0].sibling = t; else r->descend = t;
381 r0[t].sibling = t1;
382
383 s = s->next; /* Next child */
384 } while (s != q);
385
386 r->valptr = r0[r->descend].valptr; /* Inherit first child's value */
387 } /* End of internal node processing */
388
389 return r - r0;
390 } /* saveSubtree */
391
392
minSubtreeTip(nodeptr p0)393 nodeptr minSubtreeTip (nodeptr p0)
394 { /* minTreeTip */
395 nodeptr minTip, p, testTip;
396
397 if (p0->tip) return p0;
398
399 p = p0->next;
400 minTip = minSubtreeTip(p->back);
401 while ((p = p->next) != p0) {
402 testTip = minSubtreeTip(p->back);
403 if (cmpTipVal(tipValPtr(testTip), tipValPtr(minTip)) < 0)
404 minTip = testTip;
405 }
406 return minTip;
407 } /* minTreeTip */
408
409
minTreeTip(nodeptr p)410 nodeptr minTreeTip (nodeptr p)
411 { /* minTreeTip */
412 nodeptr minp, minpb;
413
414 minp = minSubtreeTip(p);
415 minpb = minSubtreeTip(p->back);
416 return cmpTipVal(tipValPtr(minp), tipValPtr(minpb)) < 0 ? minp : minpb;
417 } /* minTreeTip */
418
419
saveTree(tree * tr,topol * tpl)420 void saveTree (tree *tr, topol *tpl)
421 /* Save a tree topology in a standard order so that first branches
422 * from a node contain lower value tips than do second branches from
423 * the node. The root tip should have the lowest value of all.
424 */
425 { /* saveTree */
426 connptr r;
427 double *tr_log_f, *tpl_log_f;
428 int i;
429
430 tpl->nextlink = 0; /* Reset link pointer */
431 r = tpl->links + saveSubtree(minTreeTip(tr->start), tpl); /* Save tree */
432 r->sibling = 0;
433
434 tpl->likelihood = tr->likelihood;
435 tpl->start = tr->start;
436 tpl->ntips = tr->ntips;
437 tpl->nextnode = tr->nextnode;
438 tpl->opt_level = tr->opt_level;
439 tpl->prelabeled = tr->prelabeled;
440 tpl->smoothed = tr->smoothed;
441
442 if (tpl_log_f = tpl->log_f) {
443 tr_log_f = tr->log_f;
444 i = tpl->log_f_valid = tr->log_f_valid;
445 while (--i >= 0) *tpl_log_f++ = *tr_log_f++;
446 }
447 else {
448 tpl->log_f_valid = 0;
449 }
450 } /* saveTree */
451
452
copyTopol(topol * tpl1,topol * tpl2)453 void copyTopol (topol *tpl1, topol *tpl2)
454 { /* copyTopol */
455 connptr r1, r2, r10, r20;
456 double *tpl1_log_f, *tpl2_log_f;
457 int i;
458
459 r10 = tpl1->links;
460 r20 = tpl2->links;
461 tpl2->nextlink = tpl1->nextlink;
462
463 r1 = r10;
464 r2 = r20;
465 i = 2 * tpl1->ntips - 3;
466 while (--i >= 0) {
467 r2->z = r1->z;
468 r2->p = r1->p;
469 r2->q = r1->q;
470 r2->valptr = r1->valptr;
471 r2->descend = r1->descend;
472 r2->sibling = r1->sibling;
473 r1++;
474 r2++;
475 }
476
477 if (tpl1->log_f_valid && tpl2->log_f) {
478 tpl1_log_f = tpl1->log_f;
479 tpl2_log_f = tpl2->log_f;
480 tpl2->log_f_valid = i = tpl1->log_f_valid;
481 while (--i >= 0) *tpl2_log_f++ = *tpl1_log_f++;
482 }
483 else {
484 tpl2->log_f_valid = 0;
485 }
486
487 tpl2->likelihood = tpl1->likelihood;
488 tpl2->start = tpl1->start;
489 tpl2->ntips = tpl1->ntips;
490 tpl2->nextnode = tpl1->nextnode;
491 tpl2->opt_level = tpl1->opt_level;
492 tpl2->prelabeled = tpl1->prelabeled;
493 tpl2->scrNum = tpl1->scrNum;
494 tpl2->tplNum = tpl1->tplNum;
495 tpl2->smoothed = tpl1->smoothed;
496 } /* copyTopol */
497
498
restoreTree(topol * tpl,tree * tr)499 boolean restoreTree (topol *tpl, tree *tr)
500 { /* restoreTree */
501 void hookup();
502 boolean initrav();
503
504 connptr r;
505 nodeptr p, p0;
506 double *tr_log_f, *tpl_log_f;
507 int i;
508
509 /* Clear existing connections */
510
511 for (i = 1; i <= 2*(tr->mxtips) - 2; i++) { /* Uses p = p->next at tip */
512 p0 = p = tr->nodep[i];
513 do {
514 p->back = (nodeptr) NULL;
515 p = p->next;
516 } while (p != p0);
517 }
518
519 /* Copy connections from topology */
520
521 for (r = tpl->links, i = 0; i < tpl->nextlink; r++, i++) {
522 hookup(r->p, r->q, r->z);
523 }
524
525 tr->likelihood = tpl->likelihood;
526 tr->start = tpl->start;
527 tr->ntips = tpl->ntips;
528 tr->nextnode = tpl->nextnode;
529 tr->opt_level = tpl->opt_level;
530 tr->prelabeled = tpl->prelabeled;
531 tr->smoothed = tpl->smoothed;
532
533 if (tpl_log_f = tpl->log_f) {
534 tr_log_f = tr->log_f;
535 i = tr->log_f_valid = tpl->log_f_valid;
536 while (--i >= 0) *tr_log_f++ = *tpl_log_f++;
537 }
538 else {
539 tr->log_f_valid = 0;
540 }
541
542 return (initrav(tr, tr->start) && initrav(tr, tr->start->back));
543 } /* restoreTree */
544
545
initBestTree(bestlist * bt,int newkeep,int numsp,int sites)546 int initBestTree (bestlist *bt, int newkeep, int numsp, int sites)
547 { /* initBestTree */
548 int i, nlogf;
549
550
551 bt->nkeep = 0;
552
553 if (bt->ninit <= 0) {
554 if (! (bt->start = setupTopol(numsp, sites))) return 0;
555 bt->ninit = -1;
556 bt->nvalid = 0;
557 bt->numtrees = 0;
558 bt->best = unlikely;
559 bt->improved = FALSE;
560 bt->byScore = (topol **) Malloc((newkeep+1) * sizeof(topol *));
561 bt->byTopol = (topol **) Malloc((newkeep+1) * sizeof(topol *));
562 if (! bt->byScore || ! bt->byTopol) {
563 fprintf(stderr, "initBestTree: Malloc failure\n");
564 return 0;
565 }
566 }
567 else if (ABS(newkeep) > bt->ninit) {
568 if (newkeep < 0) newkeep = -(bt->ninit);
569 else newkeep = bt->ninit;
570 }
571
572 if (newkeep < 1) { /* Use negative newkeep to clear list */
573 newkeep = -newkeep;
574 if (newkeep < 1) newkeep = 1;
575 bt->nvalid = 0;
576 bt->best = unlikely;
577 }
578
579 if (bt->nvalid >= newkeep) {
580 bt->nvalid = newkeep;
581 bt->worst = bt->byScore[newkeep]->likelihood;
582 }
583 else {
584 bt->worst = unlikely;
585 }
586
587 for (i = bt->ninit + 1; i <= newkeep; i++) {
588 nlogf = (i <= maxlogf) ? sites : 0;
589 if (! (bt->byScore[i] = setupTopol(numsp, nlogf))) break;
590 bt->byTopol[i] = bt->byScore[i];
591 bt->ninit = i;
592 }
593
594 return (bt->nkeep = MIN(newkeep, bt->ninit));
595 } /* initBestTree */
596
597
598
resetBestTree(bestlist * bt)599 int resetBestTree (bestlist *bt)
600 { /* resetBestTree */
601 bt->best = unlikely;
602 bt->worst = unlikely;
603 bt->nvalid = 0;
604 bt->improved = FALSE;
605 } /* resetBestTree */
606
607
freeBestTree(bestlist * bt)608 boolean freeBestTree(bestlist *bt)
609 { /* freeBestTree */
610 while (bt->ninit >= 0) freeTopol(bt->byScore[(bt->ninit)--]);
611 freeTopol(bt->start);
612 return TRUE;
613 } /* freeBestTree */
614
615
616 /* Compare two trees, assuming that each is in standard order. Return
617 * -1 if first preceeds second, 0 if they are identical, or +1 if first
618 * follows second in standard order. Lower number tips preceed higher
619 * number tips. A tip preceeds a corresponding internal node. Internal
620 * nodes are ranked by their lowest number tip.
621 */
622
cmpSubtopol(connptr p10,connptr p1,connptr p20,connptr p2)623 int cmpSubtopol (connptr p10, connptr p1, connptr p20, connptr p2)
624 { /* cmpSubtopol */
625 connptr p1d, p2d;
626 int cmp;
627
628 if (! p1->descend && ! p2->descend) /* Two tips */
629 return cmpTipVal(p1->valptr, p2->valptr);
630
631 if (! p1->descend) return -1; /* p1 = tip, p2 = node */
632 if (! p2->descend) return 1; /* p2 = tip, p1 = node */
633
634 p1d = p10 + p1->descend;
635 p2d = p20 + p2->descend;
636 while (1) { /* Two nodes */
637 if (cmp = cmpSubtopol(p10, p1d, p20, p2d)) return cmp; /* Subtrees */
638 if (! p1d->sibling && ! p2d->sibling) return 0; /* Lists done */
639 if (! p1d->sibling) return -1; /* One done, other not */
640 if (! p2d->sibling) return 1; /* One done, other not */
641 p1d = p10 + p1d->sibling; /* Neither done */
642 p2d = p20 + p2d->sibling;
643 }
644 } /* cmpSubtopol */
645
646
647
cmpTopol(void * tpl1,void * tpl2)648 int cmpTopol (void *tpl1, void *tpl2)
649 { /* cmpTopol */
650 connptr r1, r2;
651 int cmp;
652
653 r1 = ((topol *) tpl1)->links;
654 r2 = ((topol *) tpl2)->links;
655 cmp = cmpTipVal(tipValPtr(r1->p), tipValPtr(r2->p));
656 if (cmp) return cmp;
657 return cmpSubtopol(r1, r1, r2, r2);
658 } /* cmpTopol */
659
660
661
cmpTplScore(void * tpl1,void * tpl2)662 int cmpTplScore (void *tpl1, void *tpl2)
663 { /* cmpTplScore */
664 double l1, l2;
665
666 l1 = ((topol *) tpl1)->likelihood;
667 l2 = ((topol *) tpl2)->likelihood;
668 return (l1 > l2) ? -1 : ((l1 == l2) ? 0 : 1);
669 } /* cmpTplScore */
670
671
672
673 /* Find an item in a sorted list of n items. If the item is in the list,
674 * return its index. If it is not in the list, return the negative of the
675 * position into which it should be inserted.
676 */
677
findInList(void * item,void * list[],int n,int (* cmpFunc)())678 int findInList (void *item, void *list[], int n, int (* cmpFunc)())
679 { /* findInList */
680 int mid, hi, lo, cmp;
681
682 if (n < 1) return -1; /* No match; first index */
683
684 lo = 1;
685 mid = 0;
686 hi = n;
687 while (lo < hi) {
688 mid = (lo + hi) >> 1;
689 cmp = (* cmpFunc)(item, list[mid-1]);
690 if (cmp) {
691 if (cmp < 0) hi = mid;
692 else lo = mid + 1;
693 }
694 else return mid; /* Exact match */
695 }
696
697 if (lo != mid) {
698 cmp = (* cmpFunc)(item, list[lo-1]);
699 if (cmp == 0) return lo;
700 }
701 if (cmp > 0) lo++; /* Result of step = 0 test */
702 return -lo;
703 } /* findInList */
704
705
706
findTreeInList(bestlist * bt,tree * tr)707 int findTreeInList (bestlist *bt, tree *tr)
708 { /* findTreeInList */
709 topol *tpl;
710
711 tpl = bt->byScore[0];
712 saveTree(tr, tpl);
713 return findInList((void *) tpl, (void **) (& (bt->byTopol[1])),
714 bt->nvalid, cmpTopol);
715 } /* findTreeInList */
716
717
saveBestTree(bestlist * bt,tree * tr)718 int saveBestTree (bestlist *bt, tree *tr)
719 { /* saveBestTree */
720 double *tr_log_f, *tpl_log_f;
721 topol *tpl, *reuse;
722 int tplNum, scrNum, reuseScrNum, reuseTplNum, i, oldValid, newValid;
723
724 tplNum = findTreeInList(bt, tr);
725 tpl = bt->byScore[0];
726 oldValid = newValid = bt->nvalid;
727
728 if (tplNum > 0) { /* Topology is in list */
729 reuse = bt->byTopol[tplNum]; /* Matching topol */
730 reuseScrNum = reuse->scrNum;
731 reuseTplNum = reuse->tplNum;
732 }
733 /* Good enough to keep? */
734 else if (tr->likelihood < bt->worst) return 0;
735
736 else { /* Topology is not in list */
737 tplNum = -tplNum; /* Add to list (not replace) */
738 if (newValid < bt->nkeep) bt->nvalid = ++newValid;
739 reuseScrNum = newValid; /* Take worst tree */
740 reuse = bt->byScore[reuseScrNum];
741 reuseTplNum = (newValid > oldValid) ? newValid : reuse->tplNum;
742 if (tr->likelihood > bt->start->likelihood) bt->improved = TRUE;
743 }
744
745 scrNum = findInList((void *) tpl, (void **) (& (bt->byScore[1])),
746 oldValid, cmpTplScore);
747 scrNum = ABS(scrNum);
748
749 if (scrNum < reuseScrNum)
750 for (i = reuseScrNum; i > scrNum; i--)
751 (bt->byScore[i] = bt->byScore[i-1])->scrNum = i;
752
753 else if (scrNum > reuseScrNum) {
754 scrNum--;
755 for (i = reuseScrNum; i < scrNum; i++)
756 (bt->byScore[i] = bt->byScore[i+1])->scrNum = i;
757 }
758
759 if (tplNum < reuseTplNum)
760 for (i = reuseTplNum; i > tplNum; i--)
761 (bt->byTopol[i] = bt->byTopol[i-1])->tplNum = i;
762
763 else if (tplNum > reuseTplNum) {
764 tplNum--;
765 for (i = reuseTplNum; i < tplNum; i++)
766 (bt->byTopol[i] = bt->byTopol[i+1])->tplNum = i;
767 }
768
769 if (tpl_log_f = tpl->log_f) {
770 tr_log_f = tr->log_f;
771 i = tpl->log_f_valid = tr->log_f_valid;
772 while (--i >= 0) *tpl_log_f++ = *tr_log_f++;
773 }
774 else {
775 tpl->log_f_valid = 0;
776 }
777
778 tpl->scrNum = scrNum;
779 tpl->tplNum = tplNum;
780 bt->byTopol[tplNum] = bt->byScore[scrNum] = tpl;
781 bt->byScore[0] = reuse;
782
783 if (scrNum == 1) bt->best = tr->likelihood;
784 if (newValid == bt->nkeep) bt->worst = bt->byScore[newValid]->likelihood;
785
786 return scrNum;
787 } /* saveBestTree */
788
789
startOpt(bestlist * bt,tree * tr)790 int startOpt (bestlist *bt, tree *tr)
791 { /* startOpt */
792 int scrNum;
793
794 scrNum = saveBestTree(bt, tr);
795 copyTopol(bt->byScore[scrNum], bt->start);
796 bt->improved = FALSE;
797 return scrNum;
798 } /* startOpt */
799
800
setOptLevel(bestlist * bt,int opt_level)801 int setOptLevel (bestlist *bt, int opt_level)
802 { /* setOptLevel */
803 int tplNum, scrNum;
804
805 tplNum = findInList((void *) bt->start, (void **) (&(bt->byTopol[1])),
806 bt->nvalid, cmpTopol);
807 if (tplNum > 0) {
808 bt->byTopol[tplNum]->opt_level = opt_level;
809 scrNum = bt->byTopol[tplNum]->scrNum;
810 }
811 else {
812 scrNum = 0;
813 }
814
815 return scrNum;
816 } /* setOptLevel */
817
818
recallBestTree(bestlist * bt,int rank,tree * tr)819 int recallBestTree (bestlist *bt, int rank, tree *tr)
820 { /* recallBestTree */
821 if (rank < 1) rank = 1;
822 if (rank > bt->nvalid) rank = bt->nvalid;
823 if (rank > 0) if (! restoreTree(bt->byScore[rank], tr)) return FALSE;
824 return rank;
825 } /* recallBestTree */
826
827
828 /*=======================================================================*/
829 /* End of best tree routines */
830 /*=======================================================================*/
831
832
833 #if 0
834 void hang(char *msg)
835 { printf("Hanging around: %s\n", msg);
836 while(1);
837 }
838 #endif
839
840
getnums(rawdata * rdta)841 boolean getnums (rawdata *rdta)
842 /* input number of species, number of sites */
843 { /* getnums */
844 printf("\n%s, version %s, %s,\nCopyright (C) 1998, 1999, 2000 by Gary J. Olsen\n\n",
845 programName,
846 programVersion,
847 programDate);
848 printf("Based in part on Joseph Felsenstein's\n\n");
849 printf(" Nucleic acid sequence Maximum Likelihood method, version 3.3\n\n\n");
850
851 if (fscanf(INFILE, "%d %d", & rdta->numsp, & rdta->sites) != 2) {
852 printf("ERROR: Problem reading number of species and sites\n");
853 return FALSE;
854 }
855 printf("%d Species, %d Sites\n\n", rdta->numsp, rdta->sites);
856
857 if (rdta->numsp < 4) {
858 printf("TOO FEW SPECIES\n");
859 return FALSE;
860 }
861
862 if (rdta->sites < 1) {
863 printf("TOO FEW SITES\n");
864 return FALSE;
865 }
866
867 return TRUE;
868 } /* getnums */
869
870
digitchar(int ch)871 boolean digitchar (int ch) {return (ch >= '0' && ch <= '9'); }
872
873
whitechar(int ch)874 boolean whitechar (int ch)
875 { return (ch == ' ' || ch == '\n' || ch == '\t');
876 }
877
878
uppercase(int * chptr)879 void uppercase (int *chptr)
880 /* convert character to upper case -- either ASCII or EBCDIC */
881 { /* uppercase */
882 int ch;
883
884 ch = *chptr;
885 if ((ch >= 'a' && ch <= 'i') || (ch >= 'j' && ch <= 'r')
886 || (ch >= 's' && ch <= 'z'))
887 *chptr = ch + 'A' - 'a';
888 } /* uppercase */
889
890
base36(int ch)891 int base36 (int ch)
892 { /* base36 */
893 if (ch >= '0' && ch <= '9') return (ch - '0');
894 else if (ch >= 'A' && ch <= 'I') return (ch - 'A' + 10);
895 else if (ch >= 'J' && ch <= 'R') return (ch - 'J' + 19);
896 else if (ch >= 'S' && ch <= 'Z') return (ch - 'S' + 28);
897 else if (ch >= 'a' && ch <= 'i') return (ch - 'a' + 10);
898 else if (ch >= 'j' && ch <= 'r') return (ch - 'j' + 19);
899 else if (ch >= 's' && ch <= 'z') return (ch - 's' + 28);
900 else return -1;
901 } /* base36 */
902
903
itobase36(int i)904 int itobase36 (int i)
905 { /* itobase36 */
906 if (i < 0) return '?';
907 else if (i < 10) return (i + '0');
908 else if (i < 19) return (i - 10 + 'A');
909 else if (i < 28) return (i - 19 + 'J');
910 else if (i < 36) return (i - 28 + 'S');
911 else return '?';
912 } /* itobase36 */
913
914
findch(int c)915 int findch (int c)
916 { /* findch */
917 int ch;
918
919 while ((ch = getc(INFILE)) != EOF && ch != c) ;
920 return ch;
921 } /* findch */
922
923
924 #if Master || Slave
str_findch(char ** treestrp,int c)925 int str_findch (char **treestrp, int c)
926 { /* str_findch */
927 int ch;
928
929 while ((ch = *(*treestrp)++) != NULL && ch != c) ;
930 return ch;
931 } /* str_findch */
932 #endif
933
934
inputboot(analdef * adef)935 boolean inputboot(analdef *adef)
936 /* read the bootstrap auxilliary info */
937 { /* inputboot */
938 if (! adef->boot) {
939 printf("ERROR: Unexpected Bootstrap auxiliary data line\n");
940 return FALSE;
941 }
942 else if (fscanf(INFILE, "%ld", & adef->boot) != 1 ||
943 findch('\n') == EOF) {
944 printf("ERROR: Problem reading boostrap random seed value\n");
945 return FALSE;
946 }
947
948 return TRUE;
949 } /* inputboot */
950
951
inputcategories(rawdata * rdta)952 boolean inputcategories (rawdata *rdta)
953 /* read the category rates and the categories for each site */
954 { /* inputcategories */
955 int i, j, ch, ci;
956
957 if (rdta->categs >= 0) {
958 printf("ERROR: Unexpected Categories auxiliary data line\n");
959 return FALSE;
960 }
961 if (fscanf(INFILE, "%d", & rdta->categs) != 1) {
962 printf("ERROR: Problem reading number of rate categories\n");
963 return FALSE;
964 }
965 if (rdta->categs < 1 || rdta->categs > maxcategories) {
966 printf("ERROR: Bad number of categories: %d\n", rdta->categs);
967 printf("Must be in range 1 - %d\n", maxcategories);
968 return FALSE;
969 }
970
971 for (j = 1; j <= rdta->categs
972 && fscanf(INFILE, "%lf", &(rdta->catrat[j])) == 1; j++) ;
973
974 if ((j <= rdta->categs) || (findch('\n') == EOF)) {
975 printf("ERROR: Problem reading rate values\n");
976 return FALSE;
977 }
978
979 for (i = 1; i <= nmlngth; i++) (void) getc(INFILE);
980
981 i = 1;
982 while (i <= rdta->sites) {
983 ch = getc(INFILE);
984 ci = base36(ch);
985 if (ci >= 0 && ci <= rdta->categs)
986 rdta->sitecat[i++] = ci;
987 else if (! whitechar(ch)) {
988 printf("ERROR: Bad category character (%c) at site %d\n", ch, i);
989 return FALSE;
990 }
991 }
992
993 if (findch('\n') == EOF) { /* skip to end of line */
994 printf("ERROR: Missing newline at end of category data\n");
995 return FALSE;
996 }
997
998 return TRUE;
999 } /* inputcategories */
1000
1001
inputextra(analdef * adef)1002 boolean inputextra (analdef *adef)
1003 { /* inputextra */
1004 if (fscanf(INFILE,"%d", & adef->extra) != 1 ||
1005 findch('\n') == EOF) {
1006 printf("ERROR: Problem reading extra info value\n");
1007 return FALSE;
1008 }
1009
1010 return TRUE;
1011 } /* inputextra */
1012
1013
inputfreqs(rawdata * rdta)1014 boolean inputfreqs (rawdata *rdta)
1015 { /* inputfreqs */
1016 if (fscanf(INFILE, "%lf%lf%lf%lf",
1017 & rdta->freqa, & rdta->freqc,
1018 & rdta->freqg, & rdta->freqt) != 4 ||
1019 findch('\n') == EOF) {
1020 printf("ERROR: Problem reading user base frequencies data\n");
1021 return FALSE;
1022 }
1023
1024 rdta->freqread = TRUE;
1025 return TRUE;
1026 } /* inputfreqs */
1027
1028
inputglobal(tree * tr)1029 boolean inputglobal (tree *tr)
1030 /* input the global option information */
1031 { /* inputglobal */
1032 int ch;
1033
1034 if (tr->global != -2) {
1035 printf("ERROR: Unexpected Global auxiliary data line\n");
1036 return FALSE;
1037 }
1038 if (fscanf(INFILE, "%d", &(tr->global)) != 1) {
1039 printf("ERROR: Problem reading rearrangement region size\n");
1040 return FALSE;
1041 }
1042 if (tr->global < 0) {
1043 printf("WARNING: Global region size too small;\n");
1044 printf(" value reset to local\n\n");
1045 tr->global = 1;
1046 }
1047 else if (tr->global == 0) tr->partswap = 0;
1048 else if (tr->global > tr->mxtips - 3) {
1049 tr->global = tr->mxtips - 3;
1050 }
1051
1052 while ((ch = getc(INFILE)) != '\n') { /* Scan for second value */
1053 if (! whitechar(ch)) {
1054 if (ch != EOF) (void) ungetc(ch, INFILE);
1055 if (ch == EOF || fscanf(INFILE, "%d", &(tr->partswap)) != 1
1056 || findch('\n') == EOF) {
1057 printf("ERROR: Problem reading insert swap region size\n");
1058 return FALSE;
1059 }
1060 else if (tr->partswap < 0) tr->partswap = 1;
1061 else if (tr->partswap > tr->mxtips - 3) {
1062 tr->partswap = tr->mxtips - 3;
1063 }
1064
1065 if (tr->partswap > tr->global) tr->global = tr->partswap;
1066 break; /* Break while loop */
1067 }
1068 }
1069
1070 return TRUE;
1071 } /* inputglobal */
1072
1073
inputjumble(analdef * adef)1074 boolean inputjumble (analdef *adef)
1075 { /* inputjumble */
1076 if (! adef->jumble) {
1077 printf("ERROR: Unexpected Jumble auxiliary data line\n");
1078 return FALSE;
1079 }
1080 else if (fscanf(INFILE, "%ld", & adef->jumble) != 1 ||
1081 findch('\n') == EOF) {
1082 printf("ERROR: Problem reading jumble random seed value\n");
1083 return FALSE;
1084 }
1085 else if (adef->jumble == 0) {
1086 printf("WARNING: Jumble random number seed is zero\n\n");
1087 }
1088
1089 return TRUE;
1090 } /* inputjumble */
1091
1092
inputkeep(analdef * adef)1093 boolean inputkeep (analdef *adef)
1094 { /* inputkeep */
1095 if (fscanf(INFILE, "%d", & adef->nkeep) != 1 ||
1096 findch('\n') == EOF || adef->nkeep < 1) {
1097 printf("ERROR: Problem reading number of kept trees\n");
1098 return FALSE;
1099 }
1100
1101 return TRUE;
1102 } /* inputkeep */
1103
1104
inputoutgroup(analdef * adef,tree * tr)1105 boolean inputoutgroup (analdef *adef, tree *tr)
1106 { /* inputoutgroup */
1107 if (! adef->root || tr->outgr > 0) {
1108 printf("ERROR: Unexpected Outgroup auxiliary data line\n");
1109 return FALSE;
1110 }
1111 else if (fscanf(INFILE, "%d", &(tr->outgr)) != 1 ||
1112 findch('\n') == EOF) {
1113 printf("ERROR: Problem reading outgroup number\n");
1114 return FALSE;
1115 }
1116 else if ((tr->outgr < 1) || (tr->outgr > tr->mxtips)) {
1117 printf("ERROR: Bad outgroup: '%d'\n", tr->outgr);
1118 return FALSE;
1119 }
1120
1121 return TRUE;
1122 } /* inputoutgroup */
1123
1124
inputratio(rawdata * rdta)1125 boolean inputratio (rawdata *rdta)
1126 { /* inputratio */
1127 if (rdta->ttratio >= 0.0) {
1128 printf("ERROR: Unexpected Transition/transversion auxiliary data\n");
1129 return FALSE;
1130 }
1131 else if (fscanf(INFILE,"%lf", & rdta->ttratio)!=1 ||
1132 findch('\n') == EOF) {
1133 printf("ERROR: Problem reading transition/transversion ratio\n");
1134 return FALSE;
1135 }
1136
1137 return TRUE;
1138 } /* inputratio */
1139
1140
1141 /* Y 0 is treeNone (no tree)
1142 Y 1 is treeNewick
1143 Y 2 is treeProlog
1144 Y 3 is treePHYLIP
1145 */
1146
inputtreeopt(analdef * adef)1147 boolean inputtreeopt (analdef *adef)
1148 { /* inputtreeopt */
1149 if (! adef->trout) {
1150 printf("ERROR: Unexpected Treefile auxiliary data\n");
1151 return FALSE;
1152 }
1153 else if (fscanf(INFILE,"%d", & adef->trout) != 1 ||
1154 findch('\n') == EOF) {
1155 printf("ERROR: Problem reading output tree-type number\n");
1156 return FALSE;
1157 }
1158 else if ((adef->trout < 0) || (adef->trout > treeMaxType)) {
1159 printf("ERROR: Bad output tree-type number: '%d'\n", adef->trout);
1160 return FALSE;
1161 }
1162
1163 return TRUE;
1164 } /* inputtreeopt */
1165
1166
inputweights(analdef * adef,rawdata * rdta,cruncheddata * cdta)1167 boolean inputweights (analdef *adef, rawdata *rdta, cruncheddata *cdta)
1168 /* input the character weights 0, 1, 2 ... 9, A, B, ... Y, Z */
1169 { /* inputweights */
1170 int i, ch, wi;
1171
1172 if (! adef->userwgt || cdta->wgtsum > 0) {
1173 printf("ERROR: Unexpected Weights auxiliary data\n");
1174 return FALSE;
1175 }
1176
1177 for (i = 2; i <= nmlngth; i++) (void) getc(INFILE);
1178 cdta->wgtsum = 0;
1179 i = 1;
1180 while (i <= rdta->sites) {
1181 ch = getc(INFILE);
1182 wi = base36(ch);
1183 if (wi >= 0)
1184 cdta->wgtsum += rdta->wgt[i++] = wi;
1185 else if (! whitechar(ch)) {
1186 printf("ERROR: Bad weight character: '%c'", ch);
1187 printf(" Weights in dnaml must be a digit or a letter.\n");
1188 return FALSE;
1189 }
1190 }
1191
1192 if (findch('\n') == EOF) { /* skip to end of line */
1193 printf("ERROR: Missing newline at end of weight data\n");
1194 return FALSE;
1195 }
1196
1197 return TRUE;
1198 } /* inputweights */
1199
1200
getoptions(analdef * adef,rawdata * rdta,cruncheddata * cdta,tree * tr)1201 boolean getoptions (analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr)
1202 { /* getoptions */
1203 int ch, i, extranum;
1204
1205 adef->boot = 0; /* Don't bootstrap column weights */
1206 adef->empf = TRUE; /* Use empirical base frequencies */
1207 adef->extra = 0; /* No extra runtime info unless requested */
1208 adef->interleaved = TRUE; /* By default, data format is interleaved */
1209 adef->jumble = FALSE; /* Use random addition sequence */
1210 adef->nkeep = 0; /* Keep only the one best tree */
1211 adef->prdata = FALSE; /* Don't echo data to output stream */
1212 adef->qadd = TRUE; /* Smooth branches globally in add */
1213 adef->restart = FALSE; /* Restart from user tree */
1214 adef->root = FALSE; /* User-defined outgroup rooting */
1215 adef->trout = treeDefType; /* Output tree file */
1216 adef->trprint = TRUE; /* Print tree to output stream */
1217 rdta->categs = 0; /* No rate categories */
1218 rdta->catrat[1] = 1.0; /* Rate values */
1219 rdta->freqread = FALSE; /* User-defined frequencies not read yet */
1220 rdta->ttratio = 2.0; /* Transition/transversion rate ratio */
1221 tr->global = -1; /* Default search locale for optimum */
1222 tr->mxtips = rdta->numsp;
1223 tr->outgr = 1; /* Outgroup number */
1224 tr->partswap = 1; /* Default to swap locally after insert */
1225 tr->userlen = FALSE; /* User-supplied branch lengths */
1226 adef->usertree = FALSE; /* User-defined tree topologies */
1227 adef->userwgt = FALSE; /* User-defined position weights */
1228 extranum = 0;
1229
1230 while ((ch = getc(INFILE)) != '\n' && ch != EOF) {
1231 uppercase(& ch);
1232 switch (ch) {
1233 case '1' : adef->prdata = ! adef->prdata; break;
1234 case '3' : adef->trprint = ! adef->trprint; break;
1235 case '4' : adef->trout = treeDefType - adef->trout; break;
1236 case 'B' : adef->boot = 1; extranum++; break;
1237 case 'C' : rdta->categs = -1; extranum++; break;
1238 case 'E' : adef->extra = -1; break;
1239 case 'F' : adef->empf = ! adef->empf; break;
1240 case 'G' : tr->global = -2; break;
1241 case 'I' : adef->interleaved = ! adef->interleaved; break;
1242 case 'J' : adef->jumble = 1; extranum++; break;
1243 case 'K' : extranum++; break;
1244 case 'L' : tr->userlen = TRUE; break;
1245 case 'O' : adef->root = TRUE; tr->outgr = 0; extranum++; break;
1246 case 'Q' : adef->qadd = FALSE; break;
1247 case 'R' : adef->restart = TRUE; break;
1248 case 'T' : rdta->ttratio = -1.0; extranum++; break;
1249 case 'U' : adef->usertree = TRUE; break;
1250 case 'W' : adef->userwgt = TRUE; cdta->wgtsum = 0; extranum++; break;
1251 case 'Y' : adef->trout = treeDefType - adef->trout; break;
1252 case ' ' : break;
1253 case '\t': break;
1254 default :
1255 printf("ERROR: Bad option character: '%c'\n", ch);
1256 return FALSE;
1257 }
1258 }
1259
1260 if (ch == EOF) {
1261 printf("ERROR: End-of-file in options list\n");
1262 return FALSE;
1263 }
1264
1265 if (adef->usertree && adef->restart) {
1266 printf("ERROR: The restart and user-tree options conflict:\n");
1267 printf(" Restart adds rest of taxa to a starting tree;\n");
1268 printf(" User-tree does not add any taxa.\n\n");
1269 return FALSE;
1270 }
1271
1272 if (adef->usertree && adef->jumble) {
1273 printf("WARNING: The jumble and user-tree options conflict:\n");
1274 printf(" Jumble adds taxa to a tree in random order;\n");
1275 printf(" User-tree does not use taxa addition.\n");
1276 printf(" Jumble option cancelled for this run.\n\n");
1277 adef->jumble = FALSE;
1278 }
1279
1280 if (tr->userlen && tr->global != -1) {
1281 printf("ERROR: The global and user-lengths options conflict:\n");
1282 printf(" Global optimizes a starting tree;\n");
1283 printf(" User-lengths constrain the starting tree.\n\n");
1284 return FALSE;
1285 }
1286
1287 if (tr->userlen && ! adef->usertree) {
1288 printf("WARNING: User lengths required user tree option.\n");
1289 printf(" User-tree option set for this run.\n\n");
1290 adef->usertree = TRUE;
1291 }
1292
1293 rdta->wgt = (int *) Malloc((rdta->sites + 1) * sizeof(int));
1294 rdta->wgt2 = (int *) Malloc((rdta->sites + 1) * sizeof(int));
1295 rdta->sitecat = (int *) Malloc((rdta->sites + 1) * sizeof(int));
1296 cdta->alias = (int *) Malloc((rdta->sites + 1) * sizeof(int));
1297 cdta->aliaswgt = (int *) Malloc((rdta->sites + 1) * sizeof(int));
1298 cdta->patcat = (int *) Malloc((rdta->sites + 1) * sizeof(int));
1299 cdta->patrat = (double *) Malloc((rdta->sites + 1) * sizeof(double));
1300 cdta->wr = (double *) Malloc((rdta->sites + 1) * sizeof(double));
1301 cdta->wr2 = (double *) Malloc((rdta->sites + 1) * sizeof(double));
1302 if ( ! rdta->wgt || ! rdta->wgt2 || ! rdta->sitecat || ! cdta->alias
1303 || ! cdta->aliaswgt || ! cdta->patcat || ! cdta->patrat
1304 || ! cdta->wr || ! cdta->wr2) {
1305 fprintf(stderr, "getoptions: Malloc failure\n");
1306 return 0;
1307 }
1308
1309 /* process lines with auxiliary data */
1310
1311 while (extranum--) {
1312 ch = getc(INFILE);
1313 uppercase(& ch);
1314 switch (ch) {
1315 case 'B': if (! inputboot(adef)) return FALSE; break;
1316 case 'C': if (! inputcategories(rdta)) return FALSE; break;
1317 case 'E': if (! inputextra(adef)) return FALSE; extranum++; break;
1318 case 'F': if (! inputfreqs(rdta)) return FALSE; break;
1319 case 'G': if (! inputglobal(tr)) return FALSE; extranum++; break;
1320 case 'J': if (! inputjumble(adef)) return FALSE; break;
1321 case 'K': if (! inputkeep(adef)) return FALSE; break;
1322 case 'O': if (! inputoutgroup(adef, tr)) return FALSE; break;
1323 case 'T': if (! inputratio(rdta)) return FALSE; break;
1324 case 'W': if (! inputweights(adef, rdta, cdta)) return FALSE; break;
1325 case 'Y': if (! inputtreeopt(adef)) return FALSE; extranum++; break;
1326
1327 default:
1328 printf("ERROR: Auxiliary options line starts with '%c'\n", ch);
1329 return FALSE;
1330 }
1331 }
1332
1333 if (! adef->userwgt) {
1334 for (i = 1; i <= rdta->sites; i++) rdta->wgt[i] = 1;
1335 cdta->wgtsum = rdta->sites;
1336 }
1337
1338 if (adef->userwgt && cdta->wgtsum < 1) {
1339 printf("ERROR: Missing or bad user-supplied weight data.\n");
1340 return FALSE;
1341 }
1342
1343 if (adef->boot) {
1344 printf("Bootstrap random number seed = %ld\n\n", adef->boot);
1345 }
1346
1347 if (adef->jumble) {
1348 printf("Jumble random number seed = %ld\n\n", adef->jumble);
1349 }
1350
1351 if (adef->qadd) {
1352 printf("Quick add (only local branches initially optimized) in effect\n\n");
1353 }
1354
1355 if (rdta->categs > 0) {
1356 printf("Site category Rate of change\n\n");
1357 for (i = 1; i <= rdta->categs; i++)
1358 printf(" %c%13.3f\n", itobase36(i), rdta->catrat[i]);
1359 putchar('\n');
1360 for (i = 1; i <= rdta->sites; i++) {
1361 if ((rdta->wgt[i] > 0) && (rdta->sitecat[i] < 1)) {
1362 printf("ERROR: Bad category (%c) at site %d\n",
1363 itobase36(rdta->sitecat[i]), i);
1364 return FALSE;
1365 }
1366 }
1367 }
1368 else if (rdta->categs < 0) {
1369 printf("ERROR: Category auxiliary data missing from input\n");
1370 return FALSE;
1371 }
1372 else { /* rdta->categs == 0 */
1373 for (i = 1; i <= rdta->sites; i++) rdta->sitecat[i] = 1;
1374 rdta->categs = 1;
1375 }
1376
1377 if (tr->outgr < 1) {
1378 printf("ERROR: Outgroup auxiliary data missing from input\n");
1379 return FALSE;
1380 }
1381
1382 if (rdta->ttratio < 0.0) {
1383 printf("ERROR: Transition/transversion auxiliary data missing from input\n");
1384 return FALSE;
1385 }
1386
1387 if (tr->global < 0) {
1388 if (tr->global == -2) tr->global = tr->mxtips - 3; /* Default global */
1389 else tr->global = adef->usertree ? 0 : 1;/* No global */
1390 }
1391
1392 if (adef->restart) {
1393 printf("Restart option in effect. ");
1394 printf("Sequence addition will start from appended tree.\n\n");
1395 }
1396
1397 if (adef->usertree && ! tr->global) {
1398 printf("User-supplied tree topology%swill be used.\n\n",
1399 tr->userlen ? " and branch lengths " : " ");
1400 }
1401 else {
1402 if (! adef->usertree) {
1403 printf("Rearrangements of partial trees may cross %d %s.\n",
1404 tr->partswap, tr->partswap == 1 ? "branch" : "branches");
1405 }
1406 printf("Rearrangements of full tree may cross %d %s.\n\n",
1407 tr->global, tr->global == 1 ? "branch" : "branches");
1408 }
1409
1410 if (! adef->usertree && adef->nkeep == 0) adef->nkeep = 1;
1411
1412 return TRUE;
1413 } /* getoptions */
1414
1415
getbasefreqs(rawdata * rdta)1416 boolean getbasefreqs (rawdata *rdta)
1417 { /* getbasefreqs */
1418 int ch;
1419
1420 if (rdta->freqread) return TRUE;
1421
1422 ch = getc(INFILE);
1423 if (! ((ch == 'F') || (ch == 'f'))) (void) ungetc(ch, INFILE);
1424
1425 if (fscanf(INFILE, "%lf%lf%lf%lf",
1426 & rdta->freqa, & rdta->freqc,
1427 & rdta->freqg, & rdta->freqt) != 4 ||
1428 findch('\n') == EOF) {
1429 printf("ERROR: Problem reading user base frequencies\n");
1430 return FALSE;
1431 }
1432
1433 return TRUE;
1434 } /* getbasefreqs */
1435
1436
getyspace(rawdata * rdta)1437 boolean getyspace (rawdata *rdta)
1438 { /* getyspace */
1439 long size;
1440 int i;
1441 yType *y0;
1442
1443 if (! (rdta->y = (yType **) Malloc((rdta->numsp + 1) * sizeof(yType *)))) {
1444 printf("ERROR: Unable to obtain space for data array pointers\n");
1445 return FALSE;
1446 }
1447
1448 size = 4 * (rdta->sites / 4 + 1);
1449 if (! (y0 = (yType *) Malloc((rdta->numsp + 1) * size * sizeof(yType)))) {
1450 printf("ERROR: Unable to obtain space for data array\n");
1451 return FALSE;
1452 }
1453
1454 for (i = 0; i <= rdta->numsp; i++) {
1455 rdta->y[i] = y0;
1456 y0 += size;
1457 }
1458
1459 return TRUE;
1460 } /* getyspace */
1461
1462
setupTree(tree * tr,int nsites)1463 boolean setupTree (tree *tr, int nsites)
1464 { /* setupTree */
1465 nodeptr p0, p, q;
1466 int i, j, tips, inter;
1467
1468 tips = tr->mxtips;
1469 inter = tr->mxtips - 1;
1470
1471 if (!(p0 = (nodeptr) Malloc((tips + 3*inter) * sizeof(node)))) {
1472 printf("ERROR: Unable to obtain sufficient tree memory\n");
1473 return FALSE;
1474 }
1475
1476 if (!(tr->nodep = (nodeptr *) Malloc((2*tr->mxtips) * sizeof(nodeptr)))) {
1477 printf("ERROR: Unable to obtain sufficient tree memory, too\n");
1478 return FALSE;
1479 }
1480
1481 tr->nodep[0] = (node *) NULL; /* Use as 1-based array */
1482
1483 for (i = 1; i <= tips; i++) { /* Set-up tips */
1484 p = p0++;
1485 p->x = (xarray *) NULL;
1486 p->tip = (yType *) NULL;
1487 p->number = i;
1488 p->next = p;
1489 p->back = (node *) NULL;
1490
1491 tr->nodep[i] = p;
1492 }
1493
1494 for (i = tips + 1; i <= tips + inter; i++) { /* Internal nodes */
1495 q = (node *) NULL;
1496 for (j = 1; j <= 3; j++) {
1497 p = p0++;
1498 p->x = (xarray *) NULL;
1499 p->tip = (yType *) NULL;
1500 p->number = i;
1501 p->next = q;
1502 p->back = (node *) NULL;
1503 q = p;
1504 }
1505 p->next->next->next = p;
1506 tr->nodep[i] = p;
1507 }
1508
1509 tr->likelihood = unlikely;
1510 tr->start = (node *) NULL;
1511 tr->outgrnode = tr->nodep[tr->outgr];
1512 tr->ntips = 0;
1513 tr->nextnode = 0;
1514 tr->opt_level = 0;
1515 tr->prelabeled = TRUE;
1516 tr->smoothed = FALSE;
1517 tr->log_f_valid = 0;
1518
1519 tr->log_f = (double *) Malloc(nsites * sizeof(double));
1520 if (! tr->log_f) {
1521 printf("ERROR: Unable to obtain sufficient tree memory, trey\n");
1522 return FALSE;
1523 }
1524
1525 return TRUE;
1526 } /* setupTree */
1527
1528
freeTreeNode(nodeptr p)1529 void freeTreeNode (nodeptr p) /* Free tree node (sector) associated data */
1530 { /* freeTreeNode */
1531 if (p) {
1532 if (p->x) {
1533 if (p->x->lv) Free(p->x->lv);
1534 Free(p->x);
1535 }
1536 }
1537 } /* freeTreeNode */
1538
1539
freeTree(tree * tr)1540 void freeTree (tree *tr)
1541 { /* freeTree */
1542 nodeptr p, q;
1543 int i, tips, inter;
1544
1545 tips = tr->mxtips;
1546 inter = tr->mxtips - 1;
1547
1548 for (i = 1; i <= tips; i++) freeTreeNode(tr->nodep[i]);
1549
1550 for (i = tips + 1; i <= tips + inter; i++) {
1551 if (p = tr->nodep[i]) {
1552 if (q = p->next) {
1553 freeTreeNode(q->next);
1554 freeTreeNode(q);
1555 }
1556 freeTreeNode(p);
1557 }
1558 }
1559
1560 Free(tr->nodep[1]); /* Free the actual nodes */
1561 } /* freeTree */
1562
1563
getdata(analdef * adef,rawdata * rdta,tree * tr)1564 boolean getdata (analdef *adef, rawdata *rdta, tree *tr)
1565 /* read sequences */
1566 { /* getdata */
1567 int i, j, k, l, basesread, basesnew, ch;
1568 int meaning[256]; /* meaning of input characters */
1569 char *nameptr;
1570 boolean allread, firstpass;
1571
1572 for (i = 0; i <= 255; i++) meaning[i] = 0;
1573 meaning['A'] = 1;
1574 meaning['B'] = 14;
1575 meaning['C'] = 2;
1576 meaning['D'] = 13;
1577 meaning['G'] = 4;
1578 meaning['H'] = 11;
1579 meaning['K'] = 12;
1580 meaning['M'] = 3;
1581 meaning['N'] = 15;
1582 meaning['O'] = 15;
1583 meaning['R'] = 5;
1584 meaning['S'] = 6;
1585 meaning['T'] = 8;
1586 meaning['U'] = 8;
1587 meaning['V'] = 7;
1588 meaning['W'] = 9;
1589 meaning['X'] = 15;
1590 meaning['Y'] = 10;
1591 meaning['?'] = 15;
1592 meaning['-'] = 15;
1593
1594 basesread = basesnew = 0;
1595
1596 allread = FALSE;
1597 firstpass = TRUE;
1598 ch = ' ';
1599
1600 while (! allread) {
1601 for (i = 1; i <= tr->mxtips; i++) { /* Read data line */
1602
1603 if (firstpass) { /* Read species names */
1604 j = 1;
1605 while (whitechar(ch = getc(INFILE))) { /* Skip blank lines */
1606 if (ch == '\n') j = 1; else j++;
1607 }
1608
1609 if (j > nmlngth) {
1610 printf("ERROR: Blank name for species %d; ", i);
1611 printf("check number of species,\n");
1612 printf(" number of sites, and interleave option.\n");
1613 return FALSE;
1614 }
1615
1616 nameptr = tr->nodep[i]->name;
1617 for (k = 1; k < j; k++) *nameptr++ = ' ';
1618
1619 while (ch != '\n' && ch != EOF) {
1620 if (whitechar(ch)) ch = ' ';
1621 *nameptr++ = ch;
1622 if (++j > nmlngth) break;
1623 ch = getc(INFILE);
1624 }
1625
1626 while (*(--nameptr) == ' ') ; /* remove trailing blanks */
1627 *(++nameptr) = '\0'; /* add null termination */
1628
1629 if (ch == EOF) {
1630 printf("ERROR: End-of-file in name of species %d\n", i);
1631 return FALSE;
1632 }
1633 } /* if (firstpass) */
1634
1635 j = basesread;
1636 while ((j < rdta->sites)
1637 && ((ch = getc(INFILE)) != EOF)
1638 && ((! adef->interleaved) || (ch != '\n'))) {
1639 uppercase(& ch);
1640 if (meaning[ch] || ch == '.') {
1641 j++;
1642 if (ch == '.') {
1643 if (i != 1) ch = rdta->y[1][j];
1644 else {
1645 printf("ERROR: Dot (.) found at site %d of sequence 1\n", j);
1646 return FALSE;
1647 }
1648 }
1649 rdta->y[i][j] = ch;
1650 }
1651 else if (whitechar(ch) || digitchar(ch)) ;
1652 else {
1653 printf("ERROR: Bad base (%c) at site %d of sequence %d\n",
1654 ch, j, i);
1655 return FALSE;
1656 }
1657 }
1658
1659 if (ch == EOF) {
1660 printf("ERROR: End-of-file at site %d of sequence %d\n", j, i);
1661 return FALSE;
1662 }
1663
1664 if (! firstpass && (j == basesread)) i--; /* no data on line */
1665 else if (i == 1) basesnew = j;
1666 else if (j != basesnew) {
1667 printf("ERROR: Sequences out of alignment\n");
1668 printf("%d (instead of %d) residues read in sequence %d\n",
1669 j - basesread, basesnew - basesread, i);
1670 return FALSE;
1671 }
1672
1673 while (ch != '\n' && ch != EOF) ch = getc(INFILE); /* flush line */
1674 } /* next sequence */
1675 firstpass = FALSE;
1676 basesread = basesnew;
1677 allread = (basesread >= rdta->sites);
1678 }
1679
1680 /* Print listing of sequence alignment */
1681
1682 if (adef->prdata) {
1683 j = nmlngth - 5 + ((rdta->sites + ((rdta->sites - 1)/10))/2);
1684 if (j < nmlngth - 1) j = nmlngth - 1;
1685 if (j > 37) j = 37;
1686 printf("Name"); for (i=1;i<=j;i++) putchar(' '); printf("Sequences\n");
1687 printf("----"); for (i=1;i<=j;i++) putchar(' '); printf("---------\n");
1688 putchar('\n');
1689
1690 for (i = 1; i <= rdta->sites; i += 60) {
1691 l = i + 59;
1692 if (l > rdta->sites) l = rdta->sites;
1693
1694 if (adef->userwgt) {
1695 printf("Weights ");
1696 for (j = 11; j <= nmlngth+3; j++) putchar(' ');
1697 for (k = i; k <= l; k++) {
1698 putchar(itobase36(rdta->wgt[k]));
1699 if (((k % 10) == 0) && (k < l)) putchar(' ');
1700 }
1701 putchar('\n');
1702 }
1703
1704 if (rdta->categs > 1) {
1705 printf("Categories");
1706 for (j = 11; j <= nmlngth+3; j++) putchar(' ');
1707 for (k = i; k <= l; k++) {
1708 putchar(itobase36(rdta->sitecat[k]));
1709 if (((k % 10) == 0) && (k < l)) putchar(' ');
1710 }
1711 putchar('\n');
1712 }
1713
1714 for (j = 1; j <= tr->mxtips; j++) {
1715 nameptr = tr->nodep[j]->name;
1716 k = nmlngth+3;
1717 while (ch = *nameptr++) {putchar(ch); k--;}
1718 while (--k >= 0) putchar(' ');
1719
1720 for (k = i; k <= l; k++) {
1721 ch = rdta->y[j][k];
1722 if ((j > 1) && (ch == rdta->y[1][k])) ch = '.';
1723 putchar(ch);
1724 if (((k % 10) == 0) && (k < l)) putchar(' ');
1725 }
1726 putchar('\n');
1727 }
1728 putchar('\n');
1729 }
1730 }
1731
1732 for (j = 1; j <= tr->mxtips; j++) /* Convert characters to meanings */
1733 for (i = 1; i <= rdta->sites; i++) {
1734 rdta->y[j][i] = meaning[rdta->y[j][i]];
1735 }
1736
1737 return TRUE;
1738 } /* getdata */
1739
1740
getntrees(analdef * adef)1741 boolean getntrees (analdef *adef)
1742 { /* getntrees */
1743
1744 if (fscanf(INFILE, "%d", &(adef->numutrees)) != 1 || findch('\n') == EOF) {
1745 printf("ERROR: Problem reading number of user trees\n");
1746 return FALSE;
1747 }
1748
1749 if (adef->nkeep == 0) adef->nkeep = adef->numutrees;
1750
1751 return TRUE;
1752 } /* getntrees */
1753
1754
getinput(analdef * adef,rawdata * rdta,cruncheddata * cdta,tree * tr)1755 boolean getinput (analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr)
1756 { /* getinput */
1757 if (! getnums(rdta)) return FALSE;
1758 if (! getoptions(adef, rdta, cdta, tr)) return FALSE;
1759 if (! adef->empf && ! getbasefreqs(rdta)) return FALSE;
1760 if (! getyspace(rdta)) return FALSE;
1761 if (! setupTree(tr, rdta->sites)) return FALSE;
1762 if (! getdata(adef, rdta, tr)) return FALSE;
1763 if (adef->usertree && ! getntrees(adef)) return FALSE;
1764
1765 return TRUE;
1766 } /* getinput */
1767
1768
makeboot(analdef * adef,rawdata * rdta,cruncheddata * cdta)1769 void makeboot (analdef *adef, rawdata *rdta, cruncheddata *cdta)
1770 { /* makeboot */
1771 int i, j, nonzero;
1772 double randum();
1773
1774 nonzero = 0;
1775 for (i = 1; i <= rdta->sites; i++) if (rdta->wgt[i] > 0) nonzero++;
1776
1777 for (j = 1; j <= nonzero; j++) cdta->aliaswgt[j] = 0;
1778 for (j = 1; j <= nonzero; j++)
1779 cdta->aliaswgt[(int) (nonzero*randum(& adef->boot)) + 1]++;
1780
1781 j = 0;
1782 cdta->wgtsum = 0;
1783 for (i = 1; i <= rdta->sites; i++) {
1784 if (rdta->wgt[i] > 0)
1785 cdta->wgtsum += (rdta->wgt2[i] = rdta->wgt[i] * cdta->aliaswgt[++j]);
1786 else
1787 rdta->wgt2[i] = 0;
1788 }
1789 } /* makeboot */
1790
1791
sitesort(rawdata * rdta,cruncheddata * cdta)1792 void sitesort (rawdata *rdta, cruncheddata *cdta)
1793 /* Shell sort keeping sites with identical residues and weights in
1794 * the original order (i.e., a stable sort).
1795 * The index created in cdta->alias is 1 based. The
1796 * sitecombcrunch routine packs it to a 0 based index.
1797 */
1798 { /* sitesort */
1799 int gap, i, j, jj, jg, k, n, nsp;
1800 int *index, *category;
1801 boolean flip, tied;
1802 yType **data;
1803
1804 index = cdta->alias;
1805 category = rdta->sitecat;
1806 data = rdta->y;
1807 n = rdta->sites;
1808 nsp = rdta->numsp;
1809
1810 for (gap = n / 2; gap > 0; gap /= 2) {
1811 for (i = gap + 1; i <= n; i++) {
1812 j = i - gap;
1813
1814 do {
1815 jj = index[j];
1816 jg = index[j+gap];
1817 flip = (category[jj] > category[jg]);
1818 tied = (category[jj] == category[jg]);
1819 for (k = 1; (k <= nsp) && tied; k++) {
1820 flip = (data[k][jj] > data[k][jg]);
1821 tied = (data[k][jj] == data[k][jg]);
1822 }
1823 if (flip) {
1824 index[j] = jg;
1825 index[j+gap] = jj;
1826 j -= gap;
1827 }
1828 } while (flip && (j > 0));
1829
1830 } /* for (i ... */
1831 } /* for (gap ... */
1832 } /* sitesort */
1833
1834
sitecombcrunch(rawdata * rdta,cruncheddata * cdta)1835 void sitecombcrunch (rawdata *rdta, cruncheddata *cdta)
1836 /* combine sites that have identical patterns (and nonzero weight) */
1837 { /* sitecombcrunch */
1838 int i, sitei, j, sitej, k;
1839 boolean tied;
1840
1841 i = 0;
1842 cdta->alias[0] = cdta->alias[1];
1843 cdta->aliaswgt[0] = 0;
1844
1845 for (j = 1; j <= rdta->sites; j++) {
1846 sitei = cdta->alias[i];
1847 sitej = cdta->alias[j];
1848 tied = (rdta->sitecat[sitei] == rdta->sitecat[sitej]);
1849
1850 for (k = 1; tied && (k <= rdta->numsp); k++)
1851 tied = (rdta->y[k][sitei] == rdta->y[k][sitej]);
1852
1853 if (tied) {
1854 cdta->aliaswgt[i] += rdta->wgt2[sitej];
1855 }
1856 else {
1857 if (cdta->aliaswgt[i] > 0) i++;
1858 cdta->aliaswgt[i] = rdta->wgt2[sitej];
1859 cdta->alias[i] = sitej;
1860 }
1861 }
1862
1863 cdta->endsite = i;
1864 if (cdta->aliaswgt[i] > 0) cdta->endsite++;
1865 } /* sitecombcrunch */
1866
1867
makeweights(analdef * adef,rawdata * rdta,cruncheddata * cdta)1868 boolean makeweights (analdef *adef, rawdata *rdta, cruncheddata *cdta)
1869 /* make up weights vector to avoid duplicate computations */
1870 { /* makeweights */
1871 int i;
1872
1873 if (adef->boot) makeboot(adef, rdta, cdta);
1874 else for (i = 1; i <= rdta->sites; i++) rdta->wgt2[i] = rdta->wgt[i];
1875
1876 for (i = 1; i <= rdta->sites; i++) cdta->alias[i] = i;
1877 sitesort(rdta, cdta);
1878 sitecombcrunch(rdta, cdta);
1879
1880 printf("Total weight of positions in analysis = %d\n", cdta->wgtsum);
1881 printf("There are %d distinct data patterns (columns)\n\n", cdta->endsite);
1882
1883 return TRUE;
1884 } /* makeweights */
1885
1886
makevalues(rawdata * rdta,cruncheddata * cdta)1887 boolean makevalues (rawdata *rdta, cruncheddata *cdta)
1888 /* set up fractional likelihoods at tips */
1889 { /* makevalues */
1890 double temp, wtemp;
1891 int i, j;
1892
1893 for (i = 1; i <= rdta->numsp; i++) { /* Pack and move tip data */
1894 for (j = 0; j < cdta->endsite; j++) {
1895 rdta->y[i-1][j] = rdta->y[i][cdta->alias[j]];
1896 }
1897 }
1898
1899 for (j = 0; j < cdta->endsite; j++) {
1900 cdta->patcat[j] = i = rdta->sitecat[cdta->alias[j]];
1901 cdta->patrat[j] = temp = rdta->catrat[i];
1902 cdta->wr[j] = wtemp = temp * cdta->aliaswgt[j];
1903 cdta->wr2[j] = temp * wtemp;
1904 }
1905
1906 return TRUE;
1907 } /* makevalues */
1908
1909
empiricalfreqs(rawdata * rdta,cruncheddata * cdta)1910 boolean empiricalfreqs (rawdata *rdta, cruncheddata *cdta)
1911 /* Get empirical base frequencies from the data */
1912 { /* empiricalfreqs */
1913 double sum, suma, sumc, sumg, sumt, wj, fa, fc, fg, ft;
1914 int i, j, k, code;
1915 yType *yptr;
1916
1917 rdta->freqa = 0.25;
1918 rdta->freqc = 0.25;
1919 rdta->freqg = 0.25;
1920 rdta->freqt = 0.25;
1921 for (k = 1; k <= 8; k++) {
1922 suma = 0.0;
1923 sumc = 0.0;
1924 sumg = 0.0;
1925 sumt = 0.0;
1926 for (i = 0; i < rdta->numsp; i++) {
1927 yptr = rdta->y[i];
1928 for (j = 0; j < cdta->endsite; j++) {
1929 code = *yptr++;
1930 fa = rdta->freqa * ( code & 1);
1931 fc = rdta->freqc * ((code >> 1) & 1);
1932 fg = rdta->freqg * ((code >> 2) & 1);
1933 ft = rdta->freqt * ((code >> 3) & 1);
1934 wj = cdta->aliaswgt[j] / (fa + fc + fg + ft);
1935 suma += wj * fa;
1936 sumc += wj * fc;
1937 sumg += wj * fg;
1938 sumt += wj * ft;
1939 }
1940 }
1941 sum = suma + sumc + sumg + sumt;
1942 rdta->freqa = suma / sum;
1943 rdta->freqc = sumc / sum;
1944 rdta->freqg = sumg / sum;
1945 rdta->freqt = sumt / sum;
1946 }
1947
1948 return TRUE;
1949 } /* empiricalfreqs */
1950
1951
reportfreqs(analdef * adef,rawdata * rdta)1952 void reportfreqs (analdef *adef, rawdata *rdta)
1953 { /* reportfreqs */
1954 double suma, sumb;
1955
1956 if (adef->empf) printf("Empirical ");
1957 printf("Base Frequencies:\n\n");
1958 printf(" A %10.5f\n", rdta->freqa);
1959 printf(" C %10.5f\n", rdta->freqc);
1960 printf(" G %10.5f\n", rdta->freqg);
1961 printf(" T(U) %10.5f\n\n", rdta->freqt);
1962 rdta->freqr = rdta->freqa + rdta->freqg;
1963 rdta->invfreqr = 1.0/rdta->freqr;
1964 rdta->freqar = rdta->freqa * rdta->invfreqr;
1965 rdta->freqgr = rdta->freqg * rdta->invfreqr;
1966 rdta->freqy = rdta->freqc + rdta->freqt;
1967 rdta->invfreqy = 1.0/rdta->freqy;
1968 rdta->freqcy = rdta->freqc * rdta->invfreqy;
1969 rdta->freqty = rdta->freqt * rdta->invfreqy;
1970 printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio);
1971 suma = rdta->ttratio*rdta->freqr*rdta->freqy
1972 - (rdta->freqa*rdta->freqg + rdta->freqc*rdta->freqt);
1973 sumb = rdta->freqa*rdta->freqgr + rdta->freqc*rdta->freqty;
1974 rdta->xi = suma/(suma+sumb);
1975 rdta->xv = 1.0 - rdta->xi;
1976 if (rdta->xi <= 0.0) {
1977 printf("WARNING: This transition/transversion ratio\n");
1978 printf(" is impossible with these base frequencies!\n");
1979 printf("Transition/transversion parameter reset\n\n");
1980 rdta->xi = 0.000001;
1981 rdta->xv = 1.0 - rdta->xi;
1982 rdta->ttratio = (sumb * rdta->xi / rdta->xv
1983 + rdta->freqa * rdta->freqg
1984 + rdta->freqc * rdta->freqt)
1985 / (rdta->freqr * rdta->freqy);
1986 printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio);
1987 }
1988 printf("(Transition/transversion parameter = %10.6f)\n\n",
1989 rdta->xi/rdta->xv);
1990 rdta->fracchange = 2.0 * rdta->xi * (rdta->freqa * rdta->freqgr
1991 + rdta->freqc * rdta->freqty)
1992 + rdta->xv * (1.0 - rdta->freqa * rdta->freqa
1993 - rdta->freqc * rdta->freqc
1994 - rdta->freqg * rdta->freqg
1995 - rdta->freqt * rdta->freqt);
1996 } /* reportfreqs */
1997
1998
linkdata2tree(rawdata * rdta,cruncheddata * cdta,tree * tr)1999 boolean linkdata2tree (rawdata *rdta, cruncheddata *cdta, tree *tr)
2000 /* Link data array to the tree tips */
2001 { /* linkdata2tree */
2002 int i;
2003
2004 for (i = 1; i <= tr->mxtips; i++) { /* Associate data with tips */
2005 tr->nodep[i]->tip = &(rdta->y[i-1][0]);
2006 }
2007
2008 tr->rdta = rdta;
2009 tr->cdta = cdta;
2010 return TRUE;
2011 } /* linkdata2tree */
2012
2013
setupxarray(int npat)2014 xarray *setupxarray (int npat)
2015 { /* setupxarray */
2016 xarray *x;
2017 likelivector *data;
2018
2019 x = (xarray *) Malloc(sizeof(xarray));
2020 if (x) {
2021 data = (likelivector *) Malloc(npat * sizeof(likelivector));
2022 if (data) {
2023 x->lv = data;
2024 x->prev = x->next = x;
2025 x->owner = (node *) NULL;
2026 }
2027 else {
2028 Free(x);
2029 return (xarray *) NULL;
2030 }
2031 }
2032 return x;
2033 } /* setupxarray */
2034
2035
linkxarray(int req,int min,int npat,xarray ** freexptr,xarray ** usedxptr)2036 boolean linkxarray (int req, int min, int npat,
2037 xarray **freexptr, xarray **usedxptr)
2038 /* Link a set of xarrays */
2039 { /* linkxarray */
2040 xarray *first, *prev, *x;
2041 int i;
2042
2043 first = prev = (xarray *) NULL;
2044 i = 0;
2045
2046 do {
2047 x = setupxarray(npat);
2048 if (x) {
2049 if (! first) first = x;
2050 else {
2051 prev->next = x;
2052 x->prev = prev;
2053 }
2054 prev = x;
2055 i++;
2056 }
2057 else {
2058 printf("ERROR: Failure to get requested xarray memory\n");
2059 if (i < min) return FALSE;
2060 }
2061 } while ((i < req) && x);
2062
2063 if (first) {
2064 first->prev = prev;
2065 prev->next = first;
2066 }
2067
2068 *freexptr = first;
2069 *usedxptr = (xarray *) NULL;
2070
2071 return TRUE;
2072 } /* linkxarray */
2073
2074
setupnodex(tree * tr)2075 boolean setupnodex (tree *tr)
2076 { /* setupnodex */
2077 nodeptr p;
2078 int i;
2079
2080 for (i = tr->mxtips + 1; (i <= 2*(tr->mxtips) - 2); i++) {
2081 p = tr->nodep[i];
2082 if (! (p->x = setupxarray(tr->cdta->endsite))) {
2083 printf("ERROR: Failure to get internal node xarray memory\n");
2084 return FALSE;
2085 }
2086 }
2087
2088 return TRUE;
2089 } /* setupnodex */
2090
2091
getxtip(nodeptr p)2092 xarray *getxtip (nodeptr p)
2093 { /* getxtip */
2094 xarray *new;
2095 boolean splice;
2096
2097 if (! p) return (xarray *) NULL;
2098
2099 splice = FALSE;
2100
2101 if (p->x) { /* array is there; move to tail of list */
2102 new = p->x;
2103 if (new == new->prev) ; /* linked to self; leave it */
2104 else if (new == usedxtip) usedxtip = usedxtip->next; /* at head */
2105 else if (new == usedxtip->prev) ; /* already at tail */
2106 else { /* move to tail of list */
2107 new->prev->next = new->next;
2108 new->next->prev = new->prev;
2109 splice = TRUE;
2110 }
2111 }
2112
2113 else if (freextip) { /* take from unused list */
2114 p->x = new = freextip;
2115 new->owner = p;
2116 if (new->prev != new) { /* not only member of freelist */
2117 new->prev->next = new->next;
2118 new->next->prev = new->prev;
2119 freextip = new->next;
2120 }
2121 else
2122 freextip = (xarray *) NULL;
2123
2124 splice = TRUE;
2125 }
2126
2127 else if (usedxtip) { /* take from head of used list */
2128 usedxtip->owner->x = (xarray *) NULL;
2129 p->x = new = usedxtip;
2130 new->owner = p;
2131 usedxtip = usedxtip->next;
2132 }
2133
2134 else {
2135 printf("ERROR: Unable to locate memory for tip %d.\n", p->number);
2136 return (xarray *) NULL;
2137 }
2138
2139 if (splice) {
2140 if (usedxtip) { /* list is not empty */
2141 usedxtip->prev->next = new;
2142 new->prev = usedxtip->prev;
2143 usedxtip->prev = new;
2144 new->next = usedxtip;
2145 }
2146 else
2147 usedxtip = new->prev = new->next = new;
2148 }
2149
2150 return new;
2151 } /* getxtip */
2152
2153
getxnode(nodeptr p)2154 xarray *getxnode (nodeptr p)
2155 /* Ensure that internal node p has memory */
2156 { /* getxnode */
2157 nodeptr s;
2158
2159 if (! (p->x)) { /* Move likelihood array on this node to sector p */
2160 if ((s = p->next)->x || (s = s->next)->x) {
2161 p->x = s->x;
2162 s->x = (xarray *) NULL;
2163 }
2164 else {
2165 printf("ERROR: Unable to locate memory at node %d.\n", p->number);
2166 exit(1);
2167 }
2168 }
2169 return p->x;
2170 } /* getxnode */
2171
2172
newview(tree * tr,nodeptr p)2173 boolean newview (tree *tr, nodeptr p) /* Update likelihoods at node */
2174 { /* newview */
2175 double zq, lzq, xvlzq, zr, lzr, xvlzr;
2176 nodeptr q, r;
2177 likelivector *lp, *lq, *lr;
2178 int i;
2179
2180 if (p->tip) { /* Make sure that data are at tip */
2181 likelivector *l;
2182 int code;
2183 yType *yptr;
2184
2185 if (p->x) return TRUE; /* They are already there */
2186
2187 if (! getxtip(p)) return FALSE; /* They are not, so get memory */
2188 l = p->x->lv; /* Pointer to first likelihood vector value */
2189 yptr = p->tip; /* Pointer to first nucleotide datum */
2190 for (i = 0; i < tr->cdta->endsite; i++) {
2191 code = *yptr++;
2192 l->a = code & 1;
2193 l->c = (code >> 1) & 1;
2194 l->g = (code >> 2) & 1;
2195 l->t = (code >> 3) & 1;
2196 l->exp = 0;
2197 l++;
2198 }
2199 return TRUE;
2200 }
2201
2202 /* Internal node needs update */
2203
2204 q = p->next->back;
2205 r = p->next->next->back;
2206
2207 while ((! p->x) || (! q->x) || (! r->x)) {
2208 if (! q->x) if (! newview(tr, q)) return FALSE;
2209 if (! r->x) if (! newview(tr, r)) return FALSE;
2210 if (! p->x) if (! getxnode(p)) return FALSE;
2211 }
2212
2213 lp = p->x->lv;
2214
2215 lq = q->x->lv;
2216 zq = q->z;
2217 lzq = (zq > zmin) ? log(zq) : log(zmin);
2218 xvlzq = tr->rdta->xv * lzq;
2219
2220 lr = r->x->lv;
2221 zr = r->z;
2222 lzr = (zr > zmin) ? log(zr) : log(zmin);
2223 xvlzr = tr->rdta->xv * lzr;
2224
2225 { double zzqtable[maxcategories+1], zvqtable[maxcategories+1],
2226 zzrtable[maxcategories+1], zvrtable[maxcategories+1],
2227 *zzqptr, *zvqptr, *zzrptr, *zvrptr, *rptr;
2228 double fxqr, fxqy, fxqn, sumaq, sumgq, sumcq, sumtq,
2229 fxrr, fxry, fxrn, ki, tempi, tempj;
2230 int *cptr;
2231
2232 # ifdef Vectorize
2233 double zzq[maxpatterns], zvq[maxpatterns],
2234 zzr[maxpatterns], zvr[maxpatterns];
2235 int cat;
2236 # else
2237 double zzq, zvq, zzr, zvr;
2238 int cat;
2239 # endif
2240
2241 rptr = &(tr->rdta->catrat[1]);
2242 zzqptr = &(zzqtable[1]);
2243 zvqptr = &(zvqtable[1]);
2244 zzrptr = &(zzrtable[1]);
2245 zvrptr = &(zvrtable[1]);
2246
2247 # ifdef Vectorize
2248 # pragma IVDEP
2249 # endif
2250
2251 for (i = 1; i <= tr->rdta->categs; i++) { /* exps for each category */
2252 ki = *rptr++;
2253 *zzqptr++ = exp(ki * lzq);
2254 *zvqptr++ = exp(ki * xvlzq);
2255 *zzrptr++ = exp(ki * lzr);
2256 *zvrptr++ = exp(ki * xvlzr);
2257 }
2258
2259 cptr = &(tr->cdta->patcat[0]);
2260
2261 # ifdef Vectorize
2262 # pragma IVDEP
2263 for (i = 0; i < tr->cdta->endsite; i++) {
2264 cat = *cptr++;
2265 zzq[i] = zzqtable[cat];
2266 zvq[i] = zvqtable[cat];
2267 zzr[i] = zzrtable[cat];
2268 zvr[i] = zvrtable[cat];
2269 }
2270
2271 # pragma IVDEP
2272 for (i = 0; i < tr->cdta->endsite; i++) {
2273 fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
2274 fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
2275 fxqn = fxqr + fxqy;
2276 tempi = fxqr * tr->rdta->invfreqr;
2277 tempj = zvq[i] * (tempi-fxqn) + fxqn;
2278 sumaq = zzq[i] * (lq->a - tempi) + tempj;
2279 sumgq = zzq[i] * (lq->g - tempi) + tempj;
2280 tempi = fxqy * tr->rdta->invfreqy;
2281 tempj = zvq[i] * (tempi-fxqn) + fxqn;
2282 sumcq = zzq[i] * (lq->c - tempi) + tempj;
2283 sumtq = zzq[i] * (lq->t - tempi) + tempj;
2284
2285 fxrr = tr->rdta->freqa * lr->a + tr->rdta->freqg * lr->g;
2286 fxry = tr->rdta->freqc * lr->c + tr->rdta->freqt * lr->t;
2287 fxrn = fxrr + fxry;
2288 tempi = fxrr * tr->rdta->invfreqr;
2289 tempj = zvr[i] * (tempi-fxrn) + fxrn;
2290 lp->a = sumaq * (zzr[i] * (lr->a - tempi) + tempj);
2291 lp->g = sumgq * (zzr[i] * (lr->g - tempi) + tempj);
2292 tempi = fxry * tr->rdta->invfreqy;
2293 tempj = zvr[i] * (tempi-fxrn) + fxrn;
2294 lp->c = sumcq * (zzr[i] * (lr->c - tempi) + tempj);
2295 lp->t = sumtq * (zzr[i] * (lr->t - tempi) + tempj);
2296 lp->exp = lq->exp + lr->exp;
2297
2298 if (lp->a < minlikelihood && lp->g < minlikelihood
2299 && lp->c < minlikelihood && lp->t < minlikelihood) {
2300 lp->a *= twotothe256;
2301 lp->g *= twotothe256;
2302 lp->c *= twotothe256;
2303 lp->t *= twotothe256;
2304 lp->exp += 1;
2305 }
2306 lp++;
2307 lq++;
2308 lr++;
2309 }
2310
2311 # else /* Not Vectorize */
2312 for (i = 0; i < tr->cdta->endsite; i++) {
2313 cat = *cptr++;
2314 zzq = zzqtable[cat];
2315 zvq = zvqtable[cat];
2316 fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
2317 fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
2318 fxqn = fxqr + fxqy;
2319 tempi = fxqr * tr->rdta->invfreqr;
2320 tempj = zvq * (tempi-fxqn) + fxqn;
2321 sumaq = zzq * (lq->a - tempi) + tempj;
2322 sumgq = zzq * (lq->g - tempi) + tempj;
2323 tempi = fxqy * tr->rdta->invfreqy;
2324 tempj = zvq * (tempi-fxqn) + fxqn;
2325 sumcq = zzq * (lq->c - tempi) + tempj;
2326 sumtq = zzq * (lq->t - tempi) + tempj;
2327
2328 zzr = zzrtable[cat];
2329 zvr = zvrtable[cat];
2330 fxrr = tr->rdta->freqa * lr->a + tr->rdta->freqg * lr->g;
2331 fxry = tr->rdta->freqc * lr->c + tr->rdta->freqt * lr->t;
2332 fxrn = fxrr + fxry;
2333 tempi = fxrr * tr->rdta->invfreqr;
2334 tempj = zvr * (tempi-fxrn) + fxrn;
2335 lp->a = sumaq * (zzr * (lr->a - tempi) + tempj);
2336 lp->g = sumgq * (zzr * (lr->g - tempi) + tempj);
2337 tempi = fxry * tr->rdta->invfreqy;
2338 tempj = zvr * (tempi-fxrn) + fxrn;
2339 lp->c = sumcq * (zzr * (lr->c - tempi) + tempj);
2340 lp->t = sumtq * (zzr * (lr->t - tempi) + tempj);
2341 lp->exp = lq->exp + lr->exp;
2342
2343 if (lp->a < minlikelihood && lp->g < minlikelihood
2344 && lp->c < minlikelihood && lp->t < minlikelihood) {
2345 lp->a *= twotothe256;
2346 lp->g *= twotothe256;
2347 lp->c *= twotothe256;
2348 lp->t *= twotothe256;
2349 lp->exp += 1;
2350 }
2351 lp++;
2352 lq++;
2353 lr++;
2354 }
2355 # endif /* Vectorize or not */
2356
2357 return TRUE;
2358 }
2359 } /* newview */
2360
2361
evaluate(tree * tr,nodeptr p)2362 double evaluate (tree *tr, nodeptr p)
2363 { /* evaluate */
2364 double sum, z, lz, xvlz,
2365 ki, fxpa, fxpc, fxpg, fxpt, fxpr, fxpy, fxqr, fxqy,
2366 suma, sumb, sumc, term;
2367
2368 # ifdef Vectorize
2369 double zz[maxpatterns], zv[maxpatterns];
2370 # else
2371 double zz, zv;
2372 # endif
2373
2374 double zztable[maxcategories+1], zvtable[maxcategories+1],
2375 *zzptr, *zvptr;
2376 double *log_f, *rptr;
2377 likelivector *lp, *lq;
2378 nodeptr q;
2379 int cat, *cptr, i, *wptr;
2380
2381 q = p->back;
2382 while ((! p->x) || (! q->x)) {
2383 if (! (p->x)) if (! newview(tr, p)) return badEval;
2384 if (! (q->x)) if (! newview(tr, q)) return badEval;
2385 }
2386
2387 lp = p->x->lv;
2388 lq = q->x->lv;
2389
2390 z = p->z;
2391 if (z < zmin) z = zmin;
2392 lz = log(z);
2393 xvlz = tr->rdta->xv * lz;
2394
2395 rptr = &(tr->rdta->catrat[1]);
2396 zzptr = &(zztable[1]);
2397 zvptr = &(zvtable[1]);
2398
2399 # ifdef Vectorize
2400 # pragma IVDEP
2401 # endif
2402
2403 for (i = 1; i <= tr->rdta->categs; i++) {
2404 ki = *rptr++;
2405 *zzptr++ = exp(ki * lz);
2406 *zvptr++ = exp(ki * xvlz);
2407 }
2408
2409 wptr = &(tr->cdta->aliaswgt[0]);
2410 cptr = &(tr->cdta->patcat[0]);
2411 log_f = tr->log_f;
2412 tr->log_f_valid = tr->cdta->endsite;
2413 sum = 0.0;
2414
2415 # ifdef Vectorize
2416 # pragma IVDEP
2417 for (i = 0; i < tr->cdta->endsite; i++) {
2418 cat = *cptr++;
2419 zz[i] = zztable[cat];
2420 zv[i] = zvtable[cat];
2421 }
2422
2423 # pragma IVDEP
2424 for (i = 0; i < tr->cdta->endsite; i++) {
2425 fxpa = tr->rdta->freqa * lp->a;
2426 fxpg = tr->rdta->freqg * lp->g;
2427 fxpc = tr->rdta->freqc * lp->c;
2428 fxpt = tr->rdta->freqt * lp->t;
2429 suma = fxpa * lq->a + fxpc * lq->c + fxpg * lq->g + fxpt * lq->t;
2430 fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
2431 fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
2432 fxpr = fxpa + fxpg;
2433 fxpy = fxpc + fxpt;
2434 sumc = (fxpr + fxpy) * (fxqr + fxqy);
2435 sumb = fxpr * fxqr * tr->rdta->invfreqr + fxpy * fxqy * tr->rdta->invfreqy;
2436 suma -= sumb;
2437 sumb -= sumc;
2438 term = log(zz[i] * suma + zv[i] * sumb + sumc) + (lp->exp + lq->exp)*log(minlikelihood);
2439 sum += *wptr++ * term;
2440 *log_f++ = term;
2441 lp++;
2442 lq++;
2443 }
2444
2445 # else /* Not Vectorize */
2446 for (i = 0; i < tr->cdta->endsite; i++) {
2447 cat = *cptr++;
2448 zz = zztable[cat];
2449 zv = zvtable[cat];
2450 fxpa = tr->rdta->freqa * lp->a;
2451 fxpg = tr->rdta->freqg * lp->g;
2452 fxpc = tr->rdta->freqc * lp->c;
2453 fxpt = tr->rdta->freqt * lp->t;
2454 suma = fxpa * lq->a + fxpc * lq->c + fxpg * lq->g + fxpt * lq->t;
2455 fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
2456 fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
2457 fxpr = fxpa + fxpg;
2458 fxpy = fxpc + fxpt;
2459 sumc = (fxpr + fxpy) * (fxqr + fxqy);
2460 sumb = fxpr * fxqr * tr->rdta->invfreqr + fxpy * fxqy * tr->rdta->invfreqy;
2461 suma -= sumb;
2462 sumb -= sumc;
2463 term = log(zz * suma + zv * sumb + sumc) + (lp->exp + lq->exp)*log(minlikelihood);
2464 /* printf("evaluate: %le\n", term); */
2465 sum += *wptr++ * term;
2466 *log_f++ = term;
2467 lp++;
2468 lq++;
2469 }
2470 # endif /* Vectorize or not */
2471
2472 tr->likelihood = sum;
2473 return sum;
2474 } /* evaluate */
2475
2476
makenewz(tree * tr,nodeptr p,nodeptr q,double z0,int maxiter)2477 double makenewz (tree *tr, nodeptr p, nodeptr q, double z0, int maxiter)
2478 { /* makenewz */
2479 likelivector *lp, *lq;
2480 double *abi, *bci, *sumci, *abptr, *bcptr, *sumcptr;
2481 double dlnLidlz, dlnLdlz, d2lnLdlz2, z, zprev, zstep, lz, xvlz,
2482 ki, suma, sumb, sumc, ab, bc, inv_Li, t1, t2,
2483 fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y;
2484 double zztable[maxcategories+1], zvtable[maxcategories+1],
2485 *zzptr, *zvptr;
2486 double *rptr, *wrptr, *wr2ptr;
2487 int cat, *cptr, i, curvatOK;
2488
2489
2490 while ((! p->x) || (! q->x)) {
2491 if (! (p->x)) if (! newview(tr, p)) return badZ;
2492 if (! (q->x)) if (! newview(tr, q)) return badZ;
2493 }
2494
2495 lp = p->x->lv;
2496 lq = q->x->lv;
2497
2498 { unsigned scratch_size;
2499 scratch_size = sizeof(double) * tr->cdta->endsite;
2500 if ((abi = (double *) Malloc(scratch_size)) &&
2501 (bci = (double *) Malloc(scratch_size)) &&
2502 (sumci = (double *) Malloc(scratch_size))) ;
2503 else {
2504 printf("ERROR: makenewz unable to obtain space for arrays\n");
2505 return badZ;
2506 }
2507 }
2508 abptr = abi;
2509 bcptr = bci;
2510 sumcptr = sumci;
2511
2512 # ifdef Vectorize
2513 # pragma IVDEP
2514 # endif
2515
2516 for (i = 0; i < tr->cdta->endsite; i++) {
2517 fx1a = tr->rdta->freqa * lp->a;
2518 fx1g = tr->rdta->freqg * lp->g;
2519 fx1c = tr->rdta->freqc * lp->c;
2520 fx1t = tr->rdta->freqt * lp->t;
2521 suma = fx1a * lq->a + fx1c * lq->c + fx1g * lq->g + fx1t * lq->t;
2522 fx2r = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
2523 fx2y = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
2524 fx1r = fx1a + fx1g;
2525 fx1y = fx1c + fx1t;
2526 *sumcptr++ = sumc = (fx1r + fx1y) * (fx2r + fx2y);
2527 sumb = fx1r * fx2r * tr->rdta->invfreqr + fx1y * fx2y * tr->rdta->invfreqy;
2528 *abptr++ = suma - sumb;
2529 *bcptr++ = sumb - sumc;
2530 lp++;
2531 lq++;
2532 }
2533
2534 z = z0;
2535 do {
2536 zprev = z;
2537 zstep = (1.0 - zmax) * z + zmin;
2538 curvatOK = FALSE;
2539
2540 do {
2541 if (z < zmin) z = zmin;
2542 else if (z > zmax) z = zmax;
2543
2544 lz = log(z);
2545 xvlz = tr->rdta->xv * lz;
2546 rptr = &(tr->rdta->catrat[1]);
2547 zzptr = &(zztable[1]);
2548 zvptr = &(zvtable[1]);
2549
2550 # ifdef Vectorize
2551 # pragma IVDEP
2552 # endif
2553
2554 for (i = 1; i <= tr->rdta->categs; i++) {
2555 ki = *rptr++;
2556 *zzptr++ = exp(ki * lz);
2557 *zvptr++ = exp(ki * xvlz);
2558 }
2559
2560 abptr = abi;
2561 bcptr = bci;
2562 sumcptr = sumci;
2563 cptr = &(tr->cdta->patcat[0]);
2564 wrptr = &(tr->cdta->wr[0]);
2565 wr2ptr = &(tr->cdta->wr2[0]);
2566 dlnLdlz = 0.0; /* = d(ln(likelihood))/d(lz) */
2567 d2lnLdlz2 = 0.0; /* = d2(ln(likelihood))/d(lz)2 */
2568
2569 # ifdef Vectorize
2570 # pragma IVDEP
2571 # endif
2572
2573 for (i = 0; i < tr->cdta->endsite; i++) {
2574 cat = *cptr++; /* ratecategory(i) */
2575 ab = *abptr++ * zztable[cat];
2576 bc = *bcptr++ * zvtable[cat];
2577 sumc = *sumcptr++;
2578 inv_Li = 1.0/(ab + bc + sumc);
2579 t1 = ab * inv_Li;
2580 t2 = tr->rdta->xv * bc * inv_Li;
2581 dlnLidlz = t1 + t2;
2582 dlnLdlz += *wrptr++ * dlnLidlz;
2583 d2lnLdlz2 += *wr2ptr++ * (t1 + tr->rdta->xv * t2 - dlnLidlz * dlnLidlz);
2584 }
2585
2586 if ((d2lnLdlz2 >= 0.0) && (z < zmax))
2587 zprev = z = 0.37 * z + 0.63; /* Bad curvature, shorten branch */
2588 else
2589 curvatOK = TRUE;
2590
2591 } while (! curvatOK);
2592
2593 if (d2lnLdlz2 < 0.0) {
2594 z *= exp(-dlnLdlz / d2lnLdlz2);
2595 if (z < zmin) z = zmin;
2596 if (z > 0.25 * zprev + 0.75) /* Limit steps toward z = 1.0 */
2597 z = 0.25 * zprev + 0.75;
2598 }
2599
2600 if (z > zmax) z = zmax;
2601
2602 } while ((--maxiter > 0) && (ABS(z - zprev) > zstep));
2603
2604 Free(abi);
2605 Free(bci);
2606 Free(sumci);
2607
2608 /* printf("makenewz: %le\n", z); */
2609
2610 return z;
2611 } /* makenewz */
2612
2613
update(tree * tr,nodeptr p)2614 boolean update (tree *tr, nodeptr p)
2615 { /* update */
2616 nodeptr q;
2617 double z0, z;
2618
2619 q = p->back;
2620 z0 = q->z;
2621 if ((z = makenewz(tr, p, q, z0, newzpercycle)) == badZ) return FALSE;
2622 p->z = q->z = z;
2623 if (ABS(z - z0) > deltaz) tr->smoothed = FALSE;
2624 return TRUE;
2625 } /* update */
2626
2627
smooth(tree * tr,nodeptr p)2628 boolean smooth (tree *tr, nodeptr p)
2629 { /* smooth */
2630 nodeptr q;
2631
2632 if (! update(tr, p)) return FALSE; /* Adjust branch */
2633 if (! p->tip) { /* Adjust descendants */
2634 q = p->next;
2635 while (q != p) {
2636 if (! smooth(tr, q->back)) return FALSE;
2637 q = q->next;
2638 }
2639
2640 # if ReturnSmoothedView
2641 if (! newview(tr, p)) return FALSE;
2642 # endif
2643 }
2644
2645 return TRUE;
2646 } /* smooth */
2647
2648
smoothTree(tree * tr,int maxtimes)2649 boolean smoothTree (tree *tr, int maxtimes)
2650 { /* smoothTree */
2651 nodeptr p, q;
2652
2653 p = tr->start;
2654
2655 while (--maxtimes >= 0) {
2656 tr->smoothed = TRUE;
2657 if (! smooth(tr, p->back)) return FALSE;
2658 if (! p->tip) {
2659 q = p->next;
2660 while (q != p) {
2661 if (! smooth(tr, q->back)) return FALSE;
2662 q = q->next;
2663 }
2664 }
2665 if (tr->smoothed) break;
2666 }
2667
2668 return TRUE;
2669 } /* smoothTree */
2670
2671
localSmooth(tree * tr,nodeptr p,int maxtimes)2672 boolean localSmooth (tree *tr, nodeptr p, int maxtimes)
2673 { /* localSmooth -- Smooth branches around p */
2674 nodeptr q;
2675
2676 if (p->tip) return FALSE; /* Should be an error */
2677
2678 while (--maxtimes >= 0) {
2679 tr->smoothed = TRUE;
2680 q = p;
2681 do {
2682 if (! update(tr, q)) return FALSE;
2683 q = q->next;
2684 } while (q != p);
2685 if (tr->smoothed) break;
2686 }
2687
2688 tr->smoothed = FALSE; /* Only smooth locally */
2689 return TRUE;
2690 } /* localSmooth */
2691
2692
hookup(nodeptr p,nodeptr q,double z)2693 void hookup (nodeptr p, nodeptr q, double z)
2694 { /* hookup */
2695 p->back = q;
2696 q->back = p;
2697 p->z = q->z = z;
2698 } /* hookup */
2699
2700
2701 /* Insert node p into branch q <-> q->back */
2702
insert(tree * tr,nodeptr p,nodeptr q,boolean glob)2703 boolean insert (tree *tr, nodeptr p, nodeptr q, boolean glob)
2704 /* glob -- Smooth tree globally? */
2705
2706 /* q
2707 /.
2708 add/ .
2709 / .
2710 pn .
2711 s ---- p .remove
2712 pnn .
2713 \ .
2714 add\ .
2715 \. pn = p->next;
2716 r pnn = p->next->next;
2717 */
2718
2719 { /* insert */
2720 nodeptr r, s;
2721
2722 r = q->back;
2723 s = p->back;
2724
2725 # if BestInsertAverage && ! Master
2726 { double zqr, zqs, zrs, lzqr, lzqs, lzrs, lzsum, lzq, lzr, lzs, lzmax;
2727
2728 if ((zqr = makenewz(tr, q, r, q->z, iterations)) == badZ) return FALSE;
2729 if ((zqs = makenewz(tr, q, s, defaultz, iterations)) == badZ) return FALSE;
2730 if ((zrs = makenewz(tr, r, s, defaultz, iterations)) == badZ) return FALSE;
2731
2732 lzqr = (zqr > zmin) ? log(zqr) : log(zmin); /* long branches */
2733 lzqs = (zqs > zmin) ? log(zqs) : log(zmin);
2734 lzrs = (zrs > zmin) ? log(zrs) : log(zmin);
2735 lzsum = 0.5 * (lzqr + lzqs + lzrs);
2736
2737 lzq = lzsum - lzrs;
2738 lzr = lzsum - lzqs;
2739 lzs = lzsum - lzqr;
2740 lzmax = log(zmax);
2741
2742 if (lzq > lzmax) {lzq = lzmax; lzr = lzqr; lzs = lzqs;} /* short */
2743 else if (lzr > lzmax) {lzr = lzmax; lzq = lzqr; lzs = lzrs;}
2744 else if (lzs > lzmax) {lzs = lzmax; lzq = lzqs; lzr = lzrs;}
2745
2746 hookup(p->next, q, exp(lzq));
2747 hookup(p->next->next, r, exp(lzr));
2748 hookup(p, s, exp(lzs));
2749 }
2750
2751 # else
2752 { double z;
2753 z = sqrt(q->z);
2754 hookup(p->next, q, z);
2755 hookup(p->next->next, r, z);
2756 }
2757
2758 # endif
2759
2760 if (! newview(tr, p)) return FALSE; /* So that p is valid at update */
2761 tr->opt_level = 0;
2762
2763 # if ! Master /* Smoothings are done by slave */
2764 if (glob) { /* Smooth whole tree */
2765 if (! smoothTree(tr, smoothings)) return FALSE;
2766 }
2767 else { /* Smooth locale of p */
2768 if (! localSmooth(tr, p, smoothings)) return FALSE;
2769 }
2770
2771 # else
2772 tr->likelihood = unlikely;
2773 # endif
2774 return TRUE;
2775 } /* insert */
2776
2777
removeNode(tree * tr,nodeptr p)2778 nodeptr removeNode (tree *tr, nodeptr p)
2779
2780 /* q
2781 .|
2782 remove. |
2783 . |
2784 pn |
2785 s ---- p |add
2786 pnn |
2787 . |
2788 remove. |
2789 .| pn = p->next;
2790 r pnn = p->next->next;
2791 */
2792
2793 /* remove p and return where it was */
2794 { /* removeNode */
2795 double zqr;
2796 nodeptr q, r;
2797
2798 q = p->next->back;
2799 r = p->next->next->back;
2800 zqr = q->z * r->z;
2801 # if ! Master
2802 if ((zqr = makenewz(tr, q, r, zqr, iterations)) == badZ) return (node *) NULL;
2803 # endif
2804 hookup(q, r, zqr);
2805
2806 p->next->next->back = p->next->back = (node *) NULL;
2807 return q;
2808 } /* removeNode */
2809
2810
initrav(tree * tr,nodeptr p)2811 boolean initrav (tree *tr, nodeptr p)
2812 { /* initrav */
2813 nodeptr q;
2814
2815 if (! p->tip) {
2816 q = p->next;
2817 do {
2818 if (! initrav(tr, q->back)) return FALSE;
2819 q = q->next;
2820 } while (q != p);
2821 if (! newview(tr, p)) return FALSE;
2822 }
2823
2824 return TRUE;
2825 } /* initrav */
2826
2827
buildNewTip(tree * tr,nodeptr p)2828 nodeptr buildNewTip (tree *tr, nodeptr p)
2829 { /* buildNewTip */
2830 nodeptr q;
2831
2832 q = tr->nodep[(tr->nextnode)++];
2833 hookup(p, q, defaultz);
2834 return q;
2835 } /* buildNewTip */
2836
2837
buildSimpleTree(tree * tr,int ip,int iq,int ir)2838 boolean buildSimpleTree (tree *tr, int ip, int iq, int ir)
2839 { /* buildSimpleTree */
2840 /* p, q and r are tips meeting at s */
2841 nodeptr p, s;
2842 int i;
2843
2844 i = MIN(ip, iq);
2845 if (ir < i) i = ir;
2846 tr->start = tr->nodep[i];
2847 tr->ntips = 3;
2848 p = tr->nodep[ip];
2849 hookup(p, tr->nodep[iq], defaultz);
2850 s = buildNewTip(tr, tr->nodep[ir]);
2851
2852 return insert(tr, s, p, FALSE); /* Smoothing is local to s */
2853 } /* buildSimpleTree */
2854
2855
strchr(char * str,int chr)2856 char * strchr (char *str, int chr)
2857 { /* strchr */
2858 int c;
2859
2860 while (c = *str) {if (c == chr) return str; str++;}
2861 return (char *) NULL;
2862 } /* strchr */
2863
2864
strstr(char * str1,char * str2)2865 char * strstr (char *str1, char *str2)
2866 { /* strstr */
2867 char *s1, *s2;
2868 int c;
2869
2870 while (*(s1 = str1)) {
2871 s2 = str2;
2872 do {
2873 if (! (c = *s2++)) return str1;
2874 }
2875 while (*s1++ == c);
2876 str1++;
2877 }
2878 return (char *) NULL;
2879 } /* strstr */
2880
2881
readKeyValue(char * string,char * key,char * format,void * value)2882 boolean readKeyValue (char *string, char *key, char *format, void *value)
2883 { /* readKeyValue */
2884
2885 if (! (string = strstr(string, key))) return FALSE;
2886 string += strlen(key);
2887 if (! (string = strchr(string, '='))) return FALSE;
2888 string++;
2889 return sscanf(string, format, value); /* 1 if read, otherwise 0 */
2890 } /* readKeyValue */
2891
2892
2893 #if Master || Slave
2894
str_readTreeLikelihood(char * treestr)2895 double str_readTreeLikelihood (char *treestr)
2896 { /* str_readTreeLikelihood */
2897 double lk1;
2898 char *com, *com_end;
2899 boolean readKeyValue();
2900
2901 if ((com = strchr(treestr, '[')) && (com < strchr(treestr, '('))
2902 && (com_end = strchr(com, ']'))) {
2903 com++;
2904 *com_end = 0;
2905 if (readKeyValue(com, likelihood_key, "%lg", (void *) &(lk1))) {
2906 *com_end = ']';
2907 return lk1;
2908 }
2909 }
2910
2911 fprintf(stderr, "ERROR reading likelihood in receiveTree\n");
2912 return badEval;
2913 } /* str_readTreeLikelihood */
2914
2915
sendTree(comm_block * comm,tree * tr)2916 boolean sendTree (comm_block *comm, tree *tr)
2917 { /* sendTree */
2918 char *treestr;
2919 char *treeString();
2920 # if Master
2921 void sendTreeNum();
2922 # endif
2923
2924 comm->done_flag = tr->likelihood > 0.0;
2925 if (comm->done_flag)
2926 write_comm_msg(comm, NULL);
2927
2928 else {
2929 treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256);
2930 if (! treestr) {
2931 fprintf(stderr, "sendTree: Malloc failure\n");
2932 return 0;
2933 }
2934
2935 # if Master
2936 if (send_ahead >= MAX_SEND_AHEAD) {
2937 double new_likelihood;
2938 int n_to_get;
2939
2940 n_to_get = (send_ahead+1)/2;
2941 sendTreeNum(n_to_get);
2942 send_ahead -= n_to_get;
2943 read_comm_msg(& comm_slave, treestr);
2944 new_likelihood = str_readTreeLikelihood(treestr);
2945 if (new_likelihood == badEval) return FALSE;
2946 if (! best_tr_recv || (new_likelihood > best_lk_recv)) {
2947 if (best_tr_recv) Free(best_tr_recv);
2948 best_tr_recv = Malloc(strlen(treestr) + 1);
2949 strcpy(best_tr_recv, treestr);
2950 best_lk_recv = new_likelihood;
2951 }
2952 }
2953 send_ahead++;
2954 # endif /* End #if Master */
2955
2956 REPORT_SEND_TREE;
2957 (void) treeString(treestr, tr, tr->start->back, 1);
2958 write_comm_msg(comm, treestr);
2959
2960 Free(treestr);
2961 }
2962
2963 return TRUE;
2964 } /* sendTree */
2965
2966
receiveTree(comm_block * comm,tree * tr)2967 boolean receiveTree (comm_block *comm, tree *tr)
2968 { /* receiveTree */
2969 char *treestr;
2970 boolean status;
2971 boolean str_treeReadLen();
2972
2973 treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256);
2974 if (! treestr) {
2975 fprintf(stderr, "receiveTree: Malloc failure\n");
2976 return 0;
2977 }
2978
2979 read_comm_msg(comm, treestr);
2980 if (comm->done_flag) {
2981 tr->likelihood = 1.0;
2982 status = TRUE;
2983 }
2984
2985 else {
2986 # if Master
2987 if (best_tr_recv) {
2988 if (str_readTreeLikelihood(treestr) < best_lk_recv) {
2989 strcpy(treestr, best_tr_recv); /* Overwrite new tree with best */
2990 }
2991 Free(best_tr_recv);
2992 best_tr_recv = NULL;
2993 }
2994 # endif /* End #if Master */
2995
2996 status = str_treeReadLen(treestr, tr);
2997 }
2998
2999 Free(treestr);
3000 return status;
3001 } /* receiveTree */
3002
3003
requestForWork(void)3004 void requestForWork (void)
3005 { /* requestForWork */
3006 p4_send(DNAML_REQUEST, DNAML_DISPATCHER_ID, NULL, 0);
3007 } /* requestForWork */
3008 #endif /* End #if Master || Slave */
3009
3010
3011 #if Master
sendTreeNum(int n_to_get)3012 void sendTreeNum(int n_to_get)
3013 { /* sendTreeNum */
3014 char scr[512];
3015
3016 sprintf(scr, "%d", n_to_get);
3017 p4_send(DNAML_NUM_TREE, DNAML_MERGER_ID, scr, strlen(scr)+1);
3018 } /* sendTreeNum */
3019
3020
getReturnedTrees(tree * tr,bestlist * bt,int n_tree_sent)3021 boolean getReturnedTrees (tree *tr, bestlist *bt, int n_tree_sent)
3022 /* n_tree_sent -- number of trees sent to slaves */
3023 { /* getReturnedTrees */
3024 void sendTreeNum();
3025 boolean receiveTree();
3026
3027 sendTreeNum(send_ahead);
3028 send_ahead = 0;
3029
3030 if (! receiveTree(& comm_slave, tr)) return FALSE;
3031 tr->smoothed = TRUE;
3032 (void) saveBestTree(bt, tr);
3033
3034 return TRUE;
3035 } /* getReturnedTrees */
3036 #endif
3037
3038
cacheZ(tree * tr)3039 void cacheZ (tree *tr)
3040 { /* cacheZ */
3041 nodeptr p;
3042 int nodes;
3043
3044 nodes = tr->mxtips + 3 * (tr->mxtips - 2);
3045 p = tr->nodep[1];
3046 while (nodes-- > 0) {p->z0 = p->z; p++;}
3047 } /* cacheZ */
3048
3049
restoreZ(tree * tr)3050 void restoreZ (tree *tr)
3051 { /* restoreZ */
3052 nodeptr p;
3053 int nodes;
3054
3055 nodes = tr->mxtips + 3 * (tr->mxtips - 2);
3056 p = tr->nodep[1];
3057 while (nodes-- > 0) {p->z = p->z0; p++;}
3058 } /* restoreZ */
3059
3060
testInsert(tree * tr,nodeptr p,nodeptr q,bestlist * bt,boolean fast)3061 boolean testInsert (tree *tr, nodeptr p, nodeptr q, bestlist *bt, boolean fast)
3062 { /* testInsert */
3063 double qz;
3064 nodeptr r;
3065
3066 r = q->back; /* Save original connection */
3067 qz = q->z;
3068 if (! insert(tr, p, q, ! fast)) return FALSE;
3069
3070 # if ! Master
3071 if (evaluate(tr, fast ? p->next->next : tr->start) == badEval) return FALSE;
3072 (void) saveBestTree(bt, tr);
3073 # else /* Master */
3074 tr->likelihood = unlikely;
3075 if (! sendTree(& comm_slave, tr)) return FALSE;
3076 # endif
3077
3078 /* remove p from this branch */
3079
3080 hookup(q, r, qz);
3081 p->next->next->back = p->next->back = (nodeptr) NULL;
3082 if (! fast) { /* With fast add, other values are still OK */
3083 restoreZ(tr); /* Restore branch lengths */
3084 # if ! Master /* Regenerate x values */
3085 if (! initrav(tr, p->back)) return FALSE;
3086 if (! initrav(tr, q)) return FALSE;
3087 if (! initrav(tr, r)) return FALSE;
3088 # endif
3089 }
3090
3091 return TRUE;
3092 } /* testInsert */
3093
3094
addTraverse(tree * tr,nodeptr p,nodeptr q,int mintrav,int maxtrav,bestlist * bt,boolean fast)3095 int addTraverse (tree *tr, nodeptr p, nodeptr q,
3096 int mintrav, int maxtrav, bestlist *bt, boolean fast)
3097 { /* addTraverse */
3098 int tested, newtested;
3099
3100 tested = 0;
3101 if (--mintrav <= 0) { /* Moved minimum distance? */
3102 if (! testInsert(tr, p, q, bt, fast)) return badRear;
3103 tested++;
3104 }
3105
3106 if ((! q->tip) && (--maxtrav > 0)) { /* Continue traverse? */
3107 newtested = addTraverse(tr, p, q->next->back,
3108 mintrav, maxtrav, bt, fast);
3109 if (newtested == badRear) return badRear;
3110 tested += newtested;
3111 newtested = addTraverse(tr, p, q->next->next->back,
3112 mintrav, maxtrav, bt, fast);
3113 if (newtested == badRear) return badRear;
3114 tested += newtested;
3115 }
3116
3117 return tested;
3118 } /* addTraverse */
3119
3120
rearrange(tree * tr,nodeptr p,int mintrav,int maxtrav,bestlist * bt)3121 int rearrange (tree *tr, nodeptr p, int mintrav, int maxtrav, bestlist *bt)
3122 /* rearranges the tree, globally or locally */
3123 { /* rearrange */
3124 double p1z, p2z, q1z, q2z;
3125 nodeptr p1, p2, q, q1, q2;
3126 int tested, mintrav2, newtested;
3127
3128 tested = 0;
3129 if (maxtrav < 1 || mintrav > maxtrav) return tested;
3130
3131 /* Moving subtree forward in tree. */
3132
3133 if (! p->tip) {
3134 p1 = p->next->back;
3135 p2 = p->next->next->back;
3136 if (! p1->tip || ! p2->tip) {
3137 p1z = p1->z;
3138 p2z = p2->z;
3139 if (! removeNode(tr, p)) return badRear;
3140 cacheZ(tr);
3141 if (! p1->tip) {
3142 newtested = addTraverse(tr, p, p1->next->back,
3143 mintrav, maxtrav, bt, FALSE);
3144 if (newtested == badRear) return badRear;
3145 tested += newtested;
3146 newtested = addTraverse(tr, p, p1->next->next->back,
3147 mintrav, maxtrav, bt, FALSE);
3148 if (newtested == badRear) return badRear;
3149 tested += newtested;
3150 }
3151
3152 if (! p2->tip) {
3153 newtested = addTraverse(tr, p, p2->next->back,
3154 mintrav, maxtrav, bt, FALSE);
3155 if (newtested == badRear) return badRear;
3156 tested += newtested;
3157 newtested = addTraverse(tr, p, p2->next->next->back,
3158 mintrav, maxtrav, bt, FALSE);
3159 if (newtested == badRear) return badRear;
3160 tested += newtested;
3161 }
3162
3163 hookup(p->next, p1, p1z); /* Restore original tree */
3164 hookup(p->next->next, p2, p2z);
3165 if (! (initrav(tr, tr->start)
3166 && initrav(tr, tr->start->back))) return badRear;
3167 }
3168 } /* if (! p->tip) */
3169
3170 /* Moving subtree backward in tree. Minimum move is 2 to avoid duplicates */
3171
3172 q = p->back;
3173 if (! q->tip && maxtrav > 1) {
3174 q1 = q->next->back;
3175 q2 = q->next->next->back;
3176 if (! q1->tip && (!q1->next->back->tip || !q1->next->next->back->tip) ||
3177 ! q2->tip && (!q2->next->back->tip || !q2->next->next->back->tip)) {
3178 q1z = q1->z;
3179 q2z = q2->z;
3180 if (! removeNode(tr, q)) return badRear;
3181 cacheZ(tr);
3182 mintrav2 = mintrav > 2 ? mintrav : 2;
3183
3184 if (! q1->tip) {
3185 newtested = addTraverse(tr, q, q1->next->back,
3186 mintrav2 , maxtrav, bt, FALSE);
3187 if (newtested == badRear) return badRear;
3188 tested += newtested;
3189 newtested = addTraverse(tr, q, q1->next->next->back,
3190 mintrav2 , maxtrav, bt, FALSE);
3191 if (newtested == badRear) return badRear;
3192 tested += newtested;
3193 }
3194
3195 if (! q2->tip) {
3196 newtested = addTraverse(tr, q, q2->next->back,
3197 mintrav2 , maxtrav, bt, FALSE);
3198 if (newtested == badRear) return badRear;
3199 tested += newtested;
3200 newtested = addTraverse(tr, q, q2->next->next->back,
3201 mintrav2 , maxtrav, bt, FALSE);
3202 if (newtested == badRear) return badRear;
3203 tested += newtested;
3204 }
3205
3206 hookup(q->next, q1, q1z); /* Restore original tree */
3207 hookup(q->next->next, q2, q2z);
3208 if (! (initrav(tr, tr->start)
3209 && initrav(tr, tr->start->back))) return badRear;
3210 }
3211 } /* if (! q->tip && maxtrav > 1) */
3212
3213 /* Move other subtrees */
3214
3215 if (! p->tip) {
3216 newtested = rearrange(tr, p->next->back, mintrav, maxtrav, bt);
3217 if (newtested == badRear) return badRear;
3218 tested += newtested;
3219 newtested = rearrange(tr, p->next->next->back, mintrav, maxtrav, bt);
3220 if (newtested == badRear) return badRear;
3221 tested += newtested;
3222 }
3223
3224 return tested;
3225 } /* rearrange */
3226
3227
fopen_pid(char * filenm,char * mode,char * name_pid)3228 FILE *fopen_pid (char *filenm, char *mode, char *name_pid)
3229 { /* fopen_pid */
3230
3231 (void) sprintf(name_pid, "%s.%d", filenm, getpid());
3232 return fopen(name_pid, mode);
3233 } /* fopen_pid */
3234
3235
3236 #if DeleteCheckpointFile
unlink_pid(char * filenm)3237 void unlink_pid (char *filenm)
3238 { /* unlink_pid */
3239 char scr[512];
3240
3241 (void) sprintf(scr, "%s.%d", filenm, getpid());
3242 unlink(scr);
3243 } /* unlink_pid */
3244 #endif
3245
3246
writeCheckpoint(tree * tr)3247 void writeCheckpoint (tree *tr)
3248 { /* writeCheckpoint */
3249 char filename[128];
3250 FILE *checkpointf;
3251 void treeOut();
3252
3253 checkpointf = fopen_pid(checkpointname, "a", filename);
3254 if (checkpointf) {
3255 treeOut(checkpointf, tr, treeNewick);
3256 (void) fclose(checkpointf);
3257 }
3258 } /* writeCheckpoint */
3259
3260
findAnyTip(nodeptr p)3261 node * findAnyTip(nodeptr p)
3262 { /* findAnyTip */
3263 return p->tip ? p : findAnyTip(p->next->back);
3264 } /* findAnyTip */
3265
3266
optimize(tree * tr,int maxtrav,bestlist * bt)3267 boolean optimize (tree *tr, int maxtrav, bestlist *bt)
3268 { /* optimize */
3269 nodeptr p;
3270 int mintrav, tested;
3271
3272 if (tr->ntips < 4) return TRUE;
3273
3274 writeCheckpoint(tr); /* checkpoint the starting tree */
3275
3276 if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3;
3277 if (maxtrav <= tr->opt_level) return TRUE;
3278
3279 printf(" Doing %s rearrangements\n",
3280 (maxtrav == 1) ? "local" :
3281 (maxtrav < tr->ntips - 3) ? "regional" : "global");
3282
3283 /* loop while tree gets better */
3284
3285 do {
3286 (void) startOpt(bt, tr);
3287 mintrav = tr->opt_level + 1;
3288
3289 /* rearrange must start from a tip or it will miss some trees */
3290
3291 p = findAnyTip(tr->start);
3292 tested = rearrange(tr, p->back, mintrav, maxtrav, bt);
3293 if (tested == badRear) return FALSE;
3294
3295 # if Master
3296 if (! getReturnedTrees(tr, bt, tested)) return FALSE;
3297 # endif
3298
3299 bt->numtrees += tested;
3300 (void) setOptLevel(bt, maxtrav);
3301 if (! recallBestTree(bt, 1, tr)) return FALSE; /* recover best tree */
3302
3303 printf(" Tested %d alternative trees\n", tested);
3304 if (bt->improved) {
3305 printf(" Ln Likelihood =%14.5f\n", tr->likelihood);
3306 }
3307
3308 writeCheckpoint(tr); /* checkpoint the new tree */
3309 } while (maxtrav > tr->opt_level);
3310
3311 return TRUE;
3312 } /* optimize */
3313
3314
coordinates(tree * tr,nodeptr p,double lengthsum,drawdata * tdptr)3315 void coordinates (tree *tr, nodeptr p, double lengthsum, drawdata *tdptr)
3316 { /* coordinates */
3317 /* establishes coordinates of nodes */
3318 double x, z;
3319 nodeptr q, first, last;
3320
3321 if (p->tip) {
3322 p->xcoord = NINT(over * lengthsum);
3323 p->ymax = p->ymin = p->ycoord = tdptr->tipy;
3324 tdptr->tipy += down;
3325 if (lengthsum > tdptr->tipmax) tdptr->tipmax = lengthsum;
3326 }
3327
3328 else {
3329 q = p->next;
3330 do {
3331 z = q->z;
3332 if (z < zmin) z = zmin;
3333 x = lengthsum - tr->rdta->fracchange * log(z);
3334 coordinates(tr, q->back, x, tdptr);
3335 q = q->next;
3336 } while (p == tr->start->back ? q != p->next : q != p);
3337
3338 first = p->next->back;
3339 q = p;
3340 while (q->next != p) q = q->next;
3341 last = q->back;
3342 p->xcoord = NINT(over * lengthsum);
3343 p->ycoord = (first->ycoord + last->ycoord)/2;
3344 p->ymin = first->ymin;
3345 p->ymax = last->ymax;
3346 }
3347 } /* coordinates */
3348
3349
drawline(tree * tr,int i,double scale)3350 void drawline (tree *tr, int i, double scale)
3351 /* draws one row of the tree diagram by moving up tree */
3352 /* Modified to handle 1000 taxa, October 16, 1991 */
3353 { /* drawline */
3354 nodeptr p, q, r, first, last;
3355 int n, j, k, l, extra;
3356 boolean done;
3357
3358 p = q = tr->start->back;
3359 extra = 0;
3360
3361 if (i == p->ycoord) {
3362 k = q->number - tr->mxtips;
3363 for (j = k; j < 1000; j *= 10) putchar('-');
3364 printf("%d", k);
3365 extra = 1;
3366 }
3367 else printf(" ");
3368
3369 do {
3370 if (! p->tip) {
3371 r = p->next;
3372 done = FALSE;
3373 do {
3374 if ((i >= r->back->ymin) && (i <= r->back->ymax)) {
3375 q = r->back;
3376 done = TRUE;
3377 }
3378 r = r->next;
3379 } while (! done && (p == tr->start->back ? r != p->next : r != p));
3380
3381 first = p->next->back;
3382 r = p;
3383 while (r->next != p) r = r->next;
3384 last = r->back;
3385 if (p == tr->start->back) last = p->back;
3386 }
3387
3388 done = (p->tip) || (p == q);
3389 n = NINT(scale*(q->xcoord - p->xcoord));
3390 if ((n < 3) && (! q->tip)) n = 3;
3391 n -= extra;
3392 extra = 0;
3393
3394 if ((q->ycoord == i) && (! done)) {
3395 if (p->ycoord != q->ycoord) putchar('+');
3396 else putchar('-');
3397
3398 if (! q->tip) {
3399 k = q->number - tr->mxtips;
3400 l = n - 3;
3401 for (j = k; j < 100; j *= 10) l++;
3402 for (j = 1; j <= l; j++) putchar('-');
3403 printf("%d", k);
3404 extra = 1;
3405 }
3406 else for (j = 1; j <= n-1; j++) putchar('-');
3407 }
3408
3409 else if (! p->tip) {
3410 if ((last->ycoord > i) && (first->ycoord < i) && (i != p->ycoord)) {
3411 putchar('!');
3412 for (j = 1; j <= n-1; j++) putchar(' ');
3413 }
3414 else for (j = 1; j <= n; j++) putchar(' ');
3415 }
3416
3417 else
3418 for (j = 1; j <= n; j++) putchar(' ');
3419
3420 p = q;
3421 } while (! done);
3422
3423 if ((p->ycoord == i) && p->tip) {
3424 printf(" %s", p->name);
3425 }
3426
3427 putchar('\n');
3428 } /* drawline */
3429
3430
printTree(tree * tr,analdef * adef)3431 void printTree (tree *tr, analdef *adef)
3432 /* prints out diagram of the tree */
3433 { /* printTree */
3434 drawdata tipdata;
3435 double scale;
3436 int i, imax;
3437
3438 if (adef->trprint) {
3439 putchar('\n');
3440 tipdata.tipy = 1;
3441 tipdata.tipmax = 0.0;
3442 coordinates(tr, tr->start->back, (double) 0.0, & tipdata);
3443 scale = 1.0 / tipdata.tipmax;
3444 imax = tipdata.tipy - down;
3445 for (i = 1; i <= imax; i++) drawline(tr, i, scale);
3446 printf("\nRemember: ");
3447 if (adef->root) printf("(although rooted by outgroup) ");
3448 printf("this is an unrooted tree!\n\n");
3449 }
3450 } /* printTree */
3451
3452
sigma(tree * tr,nodeptr p,double * sumlrptr)3453 double sigma (tree *tr, nodeptr p, double *sumlrptr)
3454 /* compute standard deviation */
3455 { /* sigma */
3456 likelivector *lp, *lq;
3457 double slope, sum, sumlr, z, zv, zz, lz,
3458 rat, suma, sumb, sumc, d2, d, li, temp, abzz, bczv, t3,
3459 fxpa, fxpc, fxpg, fxpt, fxpr, fxpy, fxqr, fxqy, w;
3460 double *rptr;
3461 nodeptr q;
3462 int i, *wptr;
3463
3464 q = p->back;
3465 while ((! p->x) || (! q->x)) {
3466 if (! (p->x)) if (! newview(tr, p)) return -1.0;
3467 if (! (q->x)) if (! newview(tr, q)) return -1.0;
3468 }
3469
3470 lp = p->x->lv;
3471 lq = q->x->lv;
3472
3473 z = p->z;
3474 if (z < zmin) z = zmin;
3475 lz = log(z);
3476
3477 wptr = &(tr->cdta->aliaswgt[0]);
3478 rptr = &(tr->cdta->patrat[0]);
3479 sum = sumlr = slope = 0.0;
3480
3481 # ifdef Vectorize
3482 # pragma IVDEP
3483 # endif
3484
3485 for (i = 0; i < tr->cdta->endsite; i++) {
3486 rat = *rptr++;
3487 zz = exp(rat * lz);
3488 zv = exp(rat * tr->rdta->xv * lz);
3489
3490 fxpa = tr->rdta->freqa * lp->a;
3491 fxpg = tr->rdta->freqg * lp->g;
3492 fxpc = tr->rdta->freqc * lp->c;
3493 fxpt = tr->rdta->freqt * lp->t;
3494 fxpr = fxpa + fxpg;
3495 fxpy = fxpc + fxpt;
3496 suma = fxpa * lq->a + fxpc * lq->c + fxpg * lq->g + fxpt * lq->t;
3497 fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
3498 fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
3499 sumc = (fxpr + fxpy) * (fxqr + fxqy);
3500 sumb = fxpr * fxqr * tr->rdta->invfreqr + fxpy * fxqy * tr->rdta->invfreqy;
3501 abzz = zz * (suma - sumb);
3502 bczv = zv * (sumb - sumc);
3503 li = sumc + abzz + bczv;
3504 t3 = tr->rdta->xv * bczv;
3505 d = abzz + t3;
3506 d2 = rat * (abzz*(rat-1.0) + t3*(rat * tr->rdta->xv - 1.0));
3507
3508 temp = rat * d / li;
3509 w = *wptr++;
3510 slope += w * temp;
3511 sum += w * (temp * temp - d2/li);
3512 sumlr += w * log(li / (suma + 1.0E-300));
3513 lp++;
3514 lq++;
3515 }
3516
3517 *sumlrptr = sumlr;
3518 return (sum > 1.0E-300) ? z*(-slope + sqrt(slope*slope + 3.841*sum))/sum
3519 : 1.0;
3520 } /* sigma */
3521
3522
describe(tree * tr,nodeptr p)3523 void describe (tree *tr, nodeptr p)
3524 /* print out information for one branch */
3525 { /* describe */
3526 double z, s, sumlr;
3527 nodeptr q;
3528 char *nameptr;
3529 int k, ch;
3530
3531 q = p->back;
3532 printf("%4d ", q->number - tr->mxtips);
3533 if (p->tip) {
3534 nameptr = p->name;
3535 k = nmlngth;
3536 while (ch = *nameptr++) {putchar(ch); k--;}
3537 while (--k >= 0) putchar(' ');
3538 }
3539 else {
3540 printf("%4d", p->number - tr->mxtips);
3541 for (k = 4; k < nmlngth; k++) putchar(' ');
3542 }
3543
3544 z = q->z;
3545 if (z <= zmin) printf(" infinity");
3546 else printf("%15.5f", -log(z) * tr->rdta->fracchange);
3547
3548 s = sigma(tr, q, & sumlr);
3549 printf(" (");
3550 if (z + s >= zmax) printf(" zero");
3551 else printf("%9.5f", (double) -log(z + s) * tr->rdta->fracchange);
3552 putchar(',');
3553 if (z - s <= zmin) printf(" infinity");
3554 else printf("%12.5f", (double) -log(z - s) * tr->rdta->fracchange);
3555 putchar(')');
3556
3557 if (sumlr > 2.995 ) printf(" **");
3558 else if (sumlr > 1.9205) printf(" *");
3559 putchar('\n');
3560
3561 if (! p->tip) {
3562 describe(tr, p->next->back);
3563 describe(tr, p->next->next->back);
3564 }
3565 } /* describe */
3566
3567
summarize(tree * tr)3568 void summarize (tree *tr)
3569 /* print out branch length information and node numbers */
3570 { /* summarize */
3571 printf("Ln Likelihood =%14.5f\n", tr->likelihood);
3572 putchar('\n');
3573 printf(" Between And Length");
3574 printf(" Approx. Confidence Limits\n");
3575 printf(" ------- --- ------");
3576 printf(" ------- ---------- ------\n");
3577
3578 describe(tr, tr->start->back->next->back);
3579 describe(tr, tr->start->back->next->next->back);
3580 describe(tr, tr->start);
3581 putchar('\n');
3582 printf(" * = significantly positive, P < 0.05\n");
3583 printf(" ** = significantly positive, P < 0.01\n\n\n");
3584 } /* summarize */
3585
3586
3587 /*=========== This is a problem if tr->start->back is a tip! ===========*/
3588 /* All routines should be contrived so that tr->start->back is not a tip */
3589
treeString(char * treestr,tree * tr,nodeptr p,int form)3590 char *treeString (char *treestr, tree *tr, nodeptr p, int form)
3591 /* write string with representation of tree */
3592 /* form == 1 -> Newick tree */
3593 /* form == 2 -> Prolog fact */
3594 /* form == 3 -> PHYLIP tree */
3595 { /* treeString */
3596 double x, z;
3597 char *nameptr;
3598 int c;
3599
3600 if (p == tr->start->back) {
3601 if (form != treePHYLIP) {
3602 if (form == treeProlog) {
3603 (void) sprintf(treestr, "phylip_tree(");
3604 while (*treestr) treestr++; /* move pointer to null */
3605 }
3606
3607 (void) sprintf(treestr, "[&&%s: version = '%s'",
3608 programName, programVersion);
3609 while (*treestr) treestr++;
3610
3611 (void) sprintf(treestr, ", %s = %15.13g",
3612 likelihood_key, tr->likelihood);
3613 while (*treestr) treestr++;
3614
3615 (void) sprintf(treestr, ", %s = %d", ntaxa_key, tr->ntips);
3616 while (*treestr) treestr++;
3617
3618 (void) sprintf(treestr,", %s = %d", opt_level_key, tr->opt_level);
3619 while (*treestr) treestr++;
3620
3621 (void) sprintf(treestr, ", %s = %d", smoothed_key, tr->smoothed);
3622 while (*treestr) treestr++;
3623
3624 (void) sprintf(treestr, "]%s", form == treeProlog ? ", " : " ");
3625 while (*treestr) treestr++;
3626 }
3627 }
3628
3629 if (p->tip) {
3630 if (form != treePHYLIP) *treestr++ = '\'';
3631 nameptr = p->name;
3632 while (c = *nameptr++) {
3633 if (form != treePHYLIP) {if (c == '\'') *treestr++ = '\'';}
3634 else if (c == ' ') {c = '_';}
3635 *treestr++ = c;
3636 }
3637 if (form != treePHYLIP) *treestr++ = '\'';
3638 }
3639
3640 else {
3641 *treestr++ = '(';
3642 treestr = treeString(treestr, tr, p->next->back, form);
3643 *treestr++ = ',';
3644 treestr = treeString(treestr, tr, p->next->next->back, form);
3645 if (p == tr->start->back) {
3646 *treestr++ = ',';
3647 treestr = treeString(treestr, tr, p->back, form);
3648 }
3649 *treestr++ = ')';
3650 }
3651
3652 if (p == tr->start->back) {
3653 (void) sprintf(treestr, ":0.0%s\n", (form != treeProlog) ? ";" : ").");
3654 }
3655 else {
3656 z = p->z;
3657 if (z < zmin) z = zmin;
3658 x = -log(z) * tr->rdta->fracchange;
3659 (void) sprintf(treestr, ": %8.6f", x); /* prolog needs the space */
3660 }
3661
3662 while (*treestr) treestr++; /* move pointer up to null termination */
3663 return treestr;
3664 } /* treeString */
3665
3666
treeOut(FILE * treefile,tree * tr,int form)3667 void treeOut (FILE *treefile, tree *tr, int form)
3668 /* write out file with representation of final tree */
3669 { /* treeOut */
3670 int c;
3671 char *cptr, *treestr;
3672
3673 treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256);
3674 if (! treestr) {
3675 fprintf(stderr, "treeOut: Malloc failure\n");
3676 exit(1);
3677 }
3678
3679 (void) treeString(treestr, tr, tr->start->back, form);
3680 cptr = treestr;
3681 while (c = *cptr++) putc(c, treefile);
3682
3683 Free(treestr);
3684 } /* treeOut */
3685
3686
3687 /*=======================================================================*/
3688 /* Read a tree from a file */
3689 /*=======================================================================*/
3690
3691
3692 /* 1.0.A Processing of quotation marks in comment removed
3693 */
3694
treeFinishCom(FILE * fp,char ** strp)3695 int treeFinishCom (FILE *fp, char **strp)
3696 { /* treeFinishCom */
3697 int ch;
3698
3699 while ((ch = getc(fp)) != EOF && ch != ']') {
3700 if (strp != NULL) *(*strp)++ = ch; /* save character */
3701 if (ch == '[') { /* nested comment; find its end */
3702 if ((ch = treeFinishCom(fp, strp)) == EOF) break;
3703 if (strp != NULL) *(*strp)++ = ch; /* save closing ] */
3704 }
3705 }
3706
3707 if (strp != NULL) **strp = '\0'; /* terminate string */
3708 return ch;
3709 } /* treeFinishCom */
3710
3711
treeGetCh(FILE * fp)3712 int treeGetCh (FILE *fp) /* get next nonblank, noncomment character */
3713 { /* treeGetCh */
3714 int ch;
3715
3716 while ((ch = getc(fp)) != EOF) {
3717 if (whitechar(ch)) ;
3718 else if (ch == '[') { /* comment; find its end */
3719 if ((ch = treeFinishCom(fp, (char **) NULL)) == EOF) break;
3720 }
3721 else break;
3722 }
3723
3724 return ch;
3725 } /* treeGetCh */
3726
3727
treeLabelEnd(int ch)3728 boolean treeLabelEnd (int ch)
3729 { /* treeLabelEnd */
3730 switch (ch) {
3731 case EOF: case '\0': case '\t': case '\n': case ' ':
3732 case ':': case ',': case '(': case ')': case '[':
3733 case ';':
3734 return TRUE;
3735 default:
3736 break;
3737 }
3738 return FALSE;
3739 } /* treeLabelEnd */
3740
3741
treeGetLabel(FILE * fp,char * lblPtr,int maxlen)3742 boolean treeGetLabel (FILE *fp, char *lblPtr, int maxlen)
3743 { /* treeGetLabel */
3744 int ch;
3745 boolean done, quoted, lblfound;
3746
3747 if (--maxlen < 0) lblPtr = (char *) NULL; /* reserves space for '\0' */
3748 else if (lblPtr == NULL) maxlen = 0;
3749
3750 ch = getc(fp);
3751 done = treeLabelEnd(ch);
3752
3753 lblfound = ! done;
3754 quoted = (ch == '\'');
3755 if (quoted && ! done) {ch = getc(fp); done = (ch == EOF);}
3756
3757 while (! done) {
3758 if (quoted) {
3759 if (ch == '\'') {ch = getc(fp); if (ch != '\'') break;}
3760 }
3761
3762 else if (treeLabelEnd(ch)) break;
3763
3764 else if (ch == '_') ch = ' '; /* unquoted _ goes to space */
3765
3766 if (--maxlen >= 0) *lblPtr++ = ch;
3767 ch = getc(fp);
3768 if (ch == EOF) break;
3769 }
3770
3771 if (ch != EOF) (void) ungetc(ch, fp);
3772
3773 if (lblPtr != NULL) *lblPtr = '\0';
3774
3775 return lblfound;
3776 } /* treeGetLabel */
3777
3778
treeFlushLabel(FILE * fp)3779 boolean treeFlushLabel (FILE *fp)
3780 { /* treeFlushLabel */
3781 return treeGetLabel(fp, (char *) NULL, (int) 0);
3782 } /* treeFlushLabel */
3783
3784
treeFindTipByLabel(char * str,tree * tr)3785 int treeFindTipByLabel (char *str, tree *tr)
3786 /* str -- label string pointer */
3787 { /* treeFindTipByLabel */
3788 nodeptr q;
3789 char *nameptr;
3790 int ch, i, n;
3791 boolean found;
3792
3793 for (n = 1; n <= tr->mxtips; n++) {
3794 q = tr->nodep[n];
3795 if (! (q->back)) { /* Only consider unused tips */
3796 i = 0;
3797 nameptr = q->name;
3798 while ((found = (str[i++] == (ch = *nameptr++))) && ch) ;
3799 if (found) return n;
3800 }
3801 }
3802
3803 printf("ERROR: Cannot find tree species: %s\n", str);
3804
3805 return 0;
3806 } /* treeFindTipByLabel */
3807
3808
treeFindTipName(FILE * fp,tree * tr)3809 int treeFindTipName (FILE *fp, tree *tr)
3810 { /* treeFindTipName */
3811 char *nameptr, str[nmlngth+2];
3812 int n;
3813
3814 if (tr->prelabeled) {
3815 if (treeGetLabel(fp, str, nmlngth+2))
3816 n = treeFindTipByLabel(str, tr);
3817 else
3818 n = 0;
3819 }
3820
3821 else if (tr->ntips < tr->mxtips) {
3822 n = tr->ntips + 1;
3823 nameptr = tr->nodep[n]->name;
3824 if (! treeGetLabel(fp, nameptr, nmlngth+1)) n = 0;
3825 }
3826
3827 else {
3828 n = 0;
3829 }
3830
3831 return n;
3832 } /* treeFindTipName */
3833
3834
treeEchoContext(FILE * fp1,FILE * fp2,int n)3835 void treeEchoContext (FILE *fp1, FILE *fp2, int n)
3836 { /* treeEchoContext */
3837 int ch;
3838 boolean waswhite;
3839
3840 waswhite = TRUE;
3841
3842 while (n > 0 && ((ch = getc(fp1)) != EOF)) {
3843 if (whitechar(ch)) {
3844 ch = waswhite ? '\0' : ' ';
3845 waswhite = TRUE;
3846 }
3847 else {
3848 waswhite = FALSE;
3849 }
3850
3851 if (ch > '\0') {putc(ch, fp2); n--;}
3852 }
3853 } /* treeEchoContext */
3854
3855
treeProcessLength(FILE * fp,double * dptr)3856 boolean treeProcessLength (FILE *fp, double *dptr)
3857 { /* treeProcessLength */
3858 int ch;
3859
3860 if ((ch = treeGetCh(fp)) == EOF) return FALSE; /* Skip comments */
3861 (void) ungetc(ch, fp);
3862
3863 if (fscanf(fp, "%lf", dptr) != 1) {
3864 printf("ERROR: treeProcessLength: Problem reading branch length\n");
3865 treeEchoContext(fp, stdout, 40);
3866 printf("\n");
3867 return FALSE;
3868 }
3869
3870 return TRUE;
3871 } /* treeProcessLength */
3872
3873
treeFlushLen(FILE * fp)3874 boolean treeFlushLen (FILE *fp)
3875 { /* treeFlushLen */
3876 double dummy;
3877 int ch;
3878
3879 if ((ch = treeGetCh(fp)) == ':') return treeProcessLength(fp, & dummy);
3880
3881 if (ch != EOF) (void) ungetc(ch, fp);
3882 return TRUE;
3883 } /* treeFlushLen */
3884
3885
treeNeedCh(FILE * fp,int c1,char * where)3886 boolean treeNeedCh (FILE *fp, int c1, char *where)
3887 { /* treeNeedCh */
3888 int c2;
3889
3890 if ((c2 = treeGetCh(fp)) == c1) return TRUE;
3891
3892 printf("ERROR: Expecting '%c' %s tree; found:", c1, where);
3893 if (c2 == EOF) {
3894 printf("End-of-File");
3895 }
3896 else {
3897 ungetc(c2, fp);
3898 treeEchoContext(fp, stdout, 40);
3899 }
3900 putchar('\n');
3901 return FALSE;
3902 } /* treeNeedCh */
3903
3904
addElementLen(FILE * fp,tree * tr,nodeptr p)3905 boolean addElementLen (FILE *fp, tree *tr, nodeptr p)
3906 { /* addElementLen */
3907 double z, branch;
3908 nodeptr q;
3909 int n, ch;
3910
3911 if ((ch = treeGetCh(fp)) == '(') { /* A new internal node */
3912 n = (tr->nextnode)++;
3913 if (n > 2*(tr->mxtips) - 2) {
3914 if (tr->rooted || n > 2*(tr->mxtips) - 1) {
3915 printf("ERROR: Too many internal nodes. Is tree rooted?\n");
3916 printf(" Deepest splitting should be a trifurcation.\n");
3917 return FALSE;
3918 }
3919 else {
3920 tr->rooted = TRUE;
3921 }
3922 }
3923 q = tr->nodep[n];
3924 if (! addElementLen(fp, tr, q->next)) return FALSE;
3925 if (! treeNeedCh(fp, ',', "in")) return FALSE;
3926 if (! addElementLen(fp, tr, q->next->next)) return FALSE;
3927 if (! treeNeedCh(fp, ')', "in")) return FALSE;
3928 (void) treeFlushLabel(fp);
3929 }
3930
3931 else { /* A new tip */
3932 ungetc(ch, fp);
3933 if ((n = treeFindTipName(fp, tr)) <= 0) return FALSE;
3934 q = tr->nodep[n];
3935 if (tr->start->number > n) tr->start = q;
3936 (tr->ntips)++;
3937 } /* End of tip processing */
3938
3939 if (tr->userlen) {
3940 if (! treeNeedCh(fp, ':', "in")) return FALSE;
3941 if (! treeProcessLength(fp, & branch)) return FALSE;
3942 z = exp(-branch / tr->rdta->fracchange);
3943 if (z > zmax) z = zmax;
3944 hookup(p, q, z);
3945 }
3946 else {
3947 if (! treeFlushLen(fp)) return FALSE;
3948 hookup(p, q, defaultz);
3949 }
3950
3951 return TRUE;
3952 } /* addElementLen */
3953
3954
saveTreeCom(char ** comstrp)3955 int saveTreeCom (char **comstrp)
3956 { /* saveTreeCom */
3957 int ch;
3958 boolean inquote;
3959
3960 inquote = FALSE;
3961 while ((ch = getc(INFILE)) != EOF && (inquote || ch != ']')) {
3962 *(*comstrp)++ = ch; /* save character */
3963 if (ch == '[' && ! inquote) { /* comment; find its end */
3964 if ((ch = saveTreeCom(comstrp)) == EOF) break;
3965 *(*comstrp)++ = ch; /* add ] */
3966 }
3967 else if (ch == '\'') inquote = ! inquote; /* start or end of quote */
3968 }
3969
3970 return ch;
3971 } /* saveTreeCom */
3972
3973
processTreeCom(FILE * fp,tree * tr)3974 boolean processTreeCom (FILE *fp, tree *tr)
3975 { /* processTreeCom */
3976 int text_started, functor_read, com_open;
3977
3978 /* Accept prefatory "phylip_tree(" or "pseudoNewick(" */
3979
3980 functor_read = text_started = 0;
3981 (void) fscanf(fp, " p%nhylip_tree(%n", & text_started, & functor_read);
3982 if (text_started && ! functor_read) {
3983 (void) fscanf(fp, "seudoNewick(%n", & functor_read);
3984 if (! functor_read) {
3985 printf("Start of tree 'p...' not understood.\n");
3986 return FALSE;
3987 }
3988 }
3989
3990 com_open = 0;
3991 (void) fscanf(fp, " [%n", & com_open);
3992
3993 if (com_open) { /* comment; read it */
3994 char com[1024], *com_end;
3995
3996 com_end = com;
3997 if (treeFinishCom(fp, & com_end) == EOF) { /* omits enclosing []s */
3998 printf("Missing end of tree comment\n");
3999 return FALSE;
4000 }
4001
4002 (void) readKeyValue(com, likelihood_key, "%lg",
4003 (void *) &(tr->likelihood));
4004 (void) readKeyValue(com, opt_level_key, "%d",
4005 (void *) &(tr->opt_level));
4006 (void) readKeyValue(com, smoothed_key, "%d",
4007 (void *) &(tr->smoothed));
4008
4009 if (functor_read) (void) fscanf(fp, " ,"); /* remove trailing comma */
4010 }
4011
4012 return (functor_read > 0);
4013 } /* processTreeCom */
4014
4015
uprootTree(tree * tr,nodeptr p)4016 nodeptr uprootTree (tree *tr, nodeptr p)
4017 { /* uprootTree */
4018 nodeptr q, r, s, start;
4019 int n;
4020
4021 if (p->tip || p->back) {
4022 printf("ERROR: Unable to uproot tree.\n");
4023 printf(" Inappropriate node marked for removal.\n");
4024 return (nodeptr) NULL;
4025 }
4026
4027 n = --(tr->nextnode); /* last internal node added */
4028 if (n != tr->mxtips + tr->ntips - 1) {
4029 printf("ERROR: Unable to uproot tree. Inconsistent\n");
4030 printf(" number of tips and nodes for rooted tree.\n");
4031 return (nodeptr) NULL;
4032 }
4033
4034 q = p->next->back; /* remove p from tree */
4035 r = p->next->next->back;
4036 hookup(q, r, tr->userlen ? (q->z * r->z) : defaultz);
4037
4038 start = (r->tip || (! q->tip)) ? r : r->next->next->back;
4039
4040 if (tr->ntips > 2 && p->number != n) {
4041 q = tr->nodep[n]; /* transfer last node's conections to p */
4042 r = q->next;
4043 s = q->next->next;
4044 hookup(p, q->back, q->z); /* move connections to p */
4045 hookup(p->next, r->back, r->z);
4046 hookup(p->next->next, s->back, s->z);
4047 if (start->number == q->number) start = start->back->back;
4048 q->back = r->back = s->back = (nodeptr) NULL;
4049 }
4050 else {
4051 p->back = p->next->back = p->next->next->back = (nodeptr) NULL;
4052 }
4053
4054 tr->rooted = FALSE;
4055 return start;
4056 } /* uprootTree */
4057
4058
treeReadLen(FILE * fp,tree * tr)4059 boolean treeReadLen (FILE *fp, tree *tr)
4060 { /* treeReadLen */
4061 nodeptr p;
4062 int i, ch;
4063 boolean is_fact;
4064
4065 for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL;
4066 tr->start = tr->nodep[tr->mxtips];
4067 tr->ntips = 0;
4068 tr->nextnode = tr->mxtips + 1;
4069 tr->opt_level = 0;
4070 tr->log_f_valid = 0;
4071 tr->smoothed = FALSE;
4072 tr->rooted = FALSE;
4073
4074 is_fact = processTreeCom(fp, tr);
4075
4076 p = tr->nodep[(tr->nextnode)++];
4077 if (! treeNeedCh(fp, '(', "at start of")) return FALSE;
4078 if (! addElementLen(fp, tr, p)) return FALSE;
4079 if (! treeNeedCh(fp, ',', "in")) return FALSE;
4080 if (! addElementLen(fp, tr, p->next)) return FALSE;
4081 if (! tr->rooted) {
4082 if ((ch = treeGetCh(fp)) == ',') { /* An unrooted format */
4083 if (! addElementLen(fp, tr, p->next->next)) return FALSE;
4084 }
4085 else { /* A rooted format */
4086 tr->rooted = TRUE;
4087 if (ch != EOF) (void) ungetc(ch, fp);
4088 }
4089 }
4090 else {
4091 p->next->next->back = (nodeptr) NULL;
4092 }
4093 if (! treeNeedCh(fp, ')', "in")) return FALSE;
4094 (void) treeFlushLabel(fp);
4095 if (! treeFlushLen(fp)) return FALSE;
4096 if (is_fact) {
4097 if (! treeNeedCh(fp, ')', "at end of")) return FALSE;
4098 if (! treeNeedCh(fp, '.', "at end of")) return FALSE;
4099 }
4100 else {
4101 if (! treeNeedCh(fp, ';', "at end of")) return FALSE;
4102 }
4103
4104 if (tr->rooted) {
4105 p->next->next->back = (nodeptr) NULL;
4106 tr->start = uprootTree(tr, p->next->next);
4107 if (! tr->start) return FALSE;
4108 }
4109 else {
4110 tr->start = p->next->next->back; /* This is start used by treeString */
4111 }
4112
4113 return (initrav(tr, tr->start) && initrav(tr, tr->start->back));
4114 } /* treeReadLen */
4115
4116
4117 /*=======================================================================*/
4118 /* Read a tree from a string */
4119 /*=======================================================================*/
4120
4121
4122 #if Master || Slave
str_treeFinishCom(char ** treestrp,char ** strp)4123 int str_treeFinishCom (char **treestrp, char **strp)
4124 /* treestrp -- tree string pointer */
4125 /* strp -- comment string pointer */
4126 { /* str_treeFinishCom */
4127 int ch;
4128
4129 while ((ch = *(*treestrp)++) != NULL && ch != ']') {
4130 if (strp != NULL) *(*strp)++ = ch; /* save character */
4131 if (ch == '[') { /* nested comment; find its end */
4132 if ((ch = str_treeFinishCom(treestrp)) == NULL) break;
4133 if (strp != NULL) *(*strp)++ = ch; /* save closing ] */
4134 }
4135 }
4136 if (strp != NULL) **strp = '\0'; /* terminate string */
4137 return ch;
4138 } /* str_treeFinishCom */
4139
4140
str_treeGetCh(char ** treestrp)4141 int str_treeGetCh (char **treestrp)
4142 /* get next nonblank, noncomment character */
4143 { /* str_treeGetCh */
4144 int ch;
4145
4146 while ((ch = *(*treestrp)++) != NULL) {
4147 if (whitechar(ch)) ;
4148 else if (ch == '[') { /* comment; find its end */
4149 if ((ch = str_treeFinishCom(treestrp, (char *) NULL)) == NULL) break;
4150 }
4151 else break;
4152 }
4153
4154 return ch;
4155 } /* str_treeGetCh */
4156
4157
str_treeGetLabel(char ** treestrp,char * lblPtr,int maxlen)4158 boolean str_treeGetLabel (char **treestrp, char *lblPtr, int maxlen)
4159 { /* str_treeGetLabel */
4160 int ch;
4161 boolean done, quoted, lblfound;
4162
4163 if (--maxlen < 0) lblPtr = (char *) NULL; /* reserves space for '\0' */
4164 else if (lblPtr == NULL) maxlen = 0;
4165
4166 ch = *(*treestrp)++;
4167 done = treeLabelEnd(ch);
4168
4169 lblfound = ! done;
4170 quoted = (ch == '\'');
4171 if (quoted && ! done) {ch = *(*treestrp)++; done = (ch == '\0');}
4172
4173 while (! done) {
4174 if (quoted) {
4175 if (ch == '\'') {ch = *(*treestrp)++; if (ch != '\'') break;}
4176 }
4177
4178 else if (treeLabelEnd(ch)) break;
4179
4180 else if (ch == '_') ch = ' '; /* unquoted _ goes to space */
4181
4182 if (--maxlen >= 0) *lblPtr++ = ch;
4183 ch = *(*treestrp)++;
4184 if (ch == '\0') break;
4185 }
4186
4187 (*treestrp)--;
4188
4189 if (lblPtr != NULL) *lblPtr = '\0';
4190
4191 return lblfound;
4192 } /* str_treeGetLabel */
4193
4194
str_treeFlushLabel(char ** treestrp)4195 boolean str_treeFlushLabel (char **treestrp)
4196 { /* str_treeFlushLabel */
4197 return str_treeGetLabel(treestrp, (char *) NULL, (int) 0);
4198 } /* str_treeFlushLabel */
4199
4200
str_treeFindTipName(char ** treestrp,tree * tr)4201 int str_treeFindTipName (char **treestrp, tree *tr)
4202 { /* str_treeFindTipName */
4203 nodeptr q;
4204 char *nameptr, str[nmlngth+2];
4205 int ch, i, n;
4206
4207 if (tr->prelabeled) {
4208 if (str_treeGetLabel(treestrp, str, nmlngth+2))
4209 n = treeFindTipByLabel(str, tr);
4210 else
4211 n = 0;
4212 }
4213
4214 else if (tr->ntips < tr->mxtips) {
4215 n = tr->ntips + 1;
4216 nameptr = tr->nodep[n]->name;
4217 if (! str_treeGetLabel(treestrp, nameptr, nmlngth+1)) n = 0;
4218 }
4219
4220 else {
4221 n = 0;
4222 }
4223
4224 return n;
4225 } /* str_treeFindTipName */
4226
4227
str_treeProcessLength(char ** treestrp,double * dptr)4228 boolean str_treeProcessLength (char **treestrp, double *dptr)
4229 { /* str_treeProcessLength */
4230 int used;
4231
4232 if(! str_treeGetCh(treestrp)) return FALSE; /* Skip comments */
4233 (*treestrp)--;
4234
4235 if (sscanf(*treestrp, "%lf%n", dptr, & used) != 1) {
4236 printf("ERROR: str_treeProcessLength: Problem reading branch length\n");
4237 printf("%40s\n", *treestrp);
4238 *dptr = 0.0;
4239 return FALSE;
4240 }
4241 else {
4242 *treestrp += used;
4243 }
4244
4245 return TRUE;
4246 } /* str_treeProcessLength */
4247
4248
str_treeFlushLen(char ** treestrp)4249 boolean str_treeFlushLen (char **treestrp)
4250 { /* str_treeFlushLen */
4251 int ch;
4252
4253 if ((ch = str_treeGetCh(treestrp)) == ':')
4254 return str_treeProcessLength(treestrp, (double *) NULL);
4255 else {
4256 (*treestrp)--;
4257 return TRUE;
4258 }
4259 } /* str_treeFlushLen */
4260
4261
str_treeNeedCh(char ** treestrp,int c1,char * where)4262 boolean str_treeNeedCh (char **treestrp, int c1, char *where)
4263 { /* str_treeNeedCh */
4264 int c2, i;
4265
4266 if ((c2 = str_treeGetCh(treestrp)) == c1) return TRUE;
4267
4268 printf("ERROR: Missing '%c' %s tree; ", c1, where);
4269 if (c2 == '\0')
4270 printf("end-of-string");
4271 else {
4272 putchar('"');
4273 for (i = 24; i-- && (c2 != '\0'); c2 = *(*treestrp)++) putchar(c2);
4274 putchar('"');
4275 }
4276
4277 printf(" found instead\n");
4278 return FALSE;
4279 } /* str_treeNeedCh */
4280
4281
str_addElementLen(char ** treestrp,tree * tr,nodeptr p)4282 boolean str_addElementLen (char **treestrp, tree *tr, nodeptr p)
4283 { /* str_addElementLen */
4284 double z, branch;
4285 nodeptr q;
4286 int n, ch;
4287
4288 if ((ch = str_treeGetCh(treestrp)) == '(') { /* A new internal node */
4289 n = (tr->nextnode)++;
4290 if (n > 2*(tr->mxtips) - 2) {
4291 if (tr->rooted || n > 2*(tr->mxtips) - 1) {
4292 printf("ERROR: too many internal nodes. Is tree rooted?\n");
4293 printf("Deepest splitting should be a trifurcation.\n");
4294 return FALSE;
4295 }
4296 else {
4297 tr->rooted = TRUE;
4298 }
4299 }
4300 q = tr->nodep[n];
4301 if (! str_addElementLen(treestrp, tr, q->next)) return FALSE;
4302 if (! str_treeNeedCh(treestrp, ',', "in")) return FALSE;
4303 if (! str_addElementLen(treestrp, tr, q->next->next)) return FALSE;
4304 if (! str_treeNeedCh(treestrp, ')', "in")) return FALSE;
4305 if (! str_treeFlushLabel(treestrp)) return FALSE;
4306 }
4307
4308 else { /* A new tip */
4309 n = str_treeFindTipName(treestrp, tr, ch);
4310 if (n <= 0) return FALSE;
4311 q = tr->nodep[n];
4312 if (tr->start->number > n) tr->start = q;
4313 (tr->ntips)++;
4314 } /* End of tip processing */
4315
4316 /* Master and Slave always use lengths */
4317
4318 if (! str_treeNeedCh(treestrp, ':', "in")) return FALSE;
4319 if (! str_treeProcessLength(treestrp, & branch)) return FALSE;
4320 z = exp(-branch / tr->rdta->fracchange);
4321 if (z > zmax) z = zmax;
4322 hookup(p, q, z);
4323
4324 return TRUE;
4325 } /* str_addElementLen */
4326
4327
str_processTreeCom(tree * tr,char ** treestrp)4328 boolean str_processTreeCom(tree *tr, char **treestrp)
4329 { /* str_processTreeCom */
4330 char *com, *com_end;
4331 int text_started, functor_read, com_open;
4332
4333 com = *treestrp;
4334
4335 functor_read = text_started = 0;
4336 sscanf(com, " p%nhylip_tree(%n", & text_started, & functor_read);
4337 if (functor_read) {
4338 com += functor_read;
4339 }
4340 else if (text_started) {
4341 com += text_started;
4342 sscanf(com, "seudoNewick(%n", & functor_read);
4343 if (! functor_read) {
4344 printf("Start of tree 'p...' not understood.\n");
4345 return FALSE;
4346 }
4347 else {
4348 com += functor_read;
4349 }
4350 }
4351
4352 com_open = 0;
4353 sscanf(com, " [%n", & com_open);
4354 com += com_open;
4355
4356 if (com_open) { /* comment; read it */
4357 if (!(com_end = strchr(com, ']'))) {
4358 printf("Missing end of tree comment.\n");
4359 return FALSE;
4360 }
4361
4362 *com_end = 0;
4363 (void) readKeyValue(com, likelihood_key, "%lg",
4364 (void *) &(tr->likelihood));
4365 (void) readKeyValue(com, opt_level_key, "%d",
4366 (void *) &(tr->opt_level));
4367 (void) readKeyValue(com, smoothed_key, "%d",
4368 (void *) &(tr->smoothed));
4369 *com_end = ']';
4370 com_end++;
4371
4372 if (functor_read) { /* remove trailing comma */
4373 text_started = 0;
4374 sscanf(com_end, " ,%n", & text_started);
4375 com_end += text_started;
4376 }
4377
4378 *treestrp = com_end;
4379 }
4380
4381 return (functor_read > 0);
4382 } /* str_processTreeCom */
4383
4384
str_treeReadLen(char * treestr,tree * tr)4385 boolean str_treeReadLen (char *treestr, tree *tr)
4386 /* read string with representation of tree */
4387 { /* str_treeReadLen */
4388 nodeptr p;
4389 int i;
4390 boolean is_fact, found;
4391
4392 for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL;
4393 tr->start = tr->nodep[tr->mxtips];
4394 tr->ntips = 0;
4395 tr->nextnode = tr->mxtips + 1;
4396 tr->opt_level = 0;
4397 tr->log_f_valid = 0;
4398 tr->smoothed = Master;
4399 tr->rooted = FALSE;
4400
4401 is_fact = str_processTreeCom(tr, & treestr);
4402
4403 p = tr->nodep[(tr->nextnode)++];
4404 if (! str_treeNeedCh(& treestr, '(', "at start of")) return FALSE;
4405 if (! str_addElementLen(& treestr, tr, p)) return FALSE;
4406 if (! str_treeNeedCh(& treestr, ',', "in")) return FALSE;
4407 if (! str_addElementLen(& treestr, tr, p->next)) return FALSE;
4408 if (! tr->rooted) {
4409 if (str_treeGetCh(& treestr) == ',') { /* An unrooted format */
4410 if (! str_addElementLen(& treestr, tr, p->next->next)) return FALSE;
4411 }
4412 else { /* A rooted format */
4413 p->next->next->back = (nodeptr) NULL;
4414 tr->rooted = TRUE;
4415 treestr--;
4416 }
4417 }
4418 if (! str_treeNeedCh(& treestr, ')', "in")) return FALSE;
4419 if (! str_treeFlushLabel(& treestr)) return FALSE;
4420 if (! str_treeFlushLen(& treestr)) return FALSE;
4421 if (is_fact) {
4422 if (! str_treeNeedCh(& treestr, ')', "at end of")) return FALSE;
4423 if (! str_treeNeedCh(& treestr, '.', "at end of")) return FALSE;
4424 }
4425 else {
4426 if (! str_treeNeedCh(& treestr, ';', "at end of")) return FALSE;
4427 }
4428
4429 if (tr->rooted) if (! uprootTree(tr, p->next->next)) return FALSE;
4430 tr->start = p->next->next->back; /* This is start used by treeString */
4431
4432 return (initrav(tr, tr->start) && initrav(tr, tr->start->back));
4433 } /* str_treeReadLen */
4434 #endif
4435
4436
treeEvaluate(tree * tr,bestlist * bt)4437 boolean treeEvaluate (tree *tr, bestlist *bt) /* Evaluate a user tree */
4438 { /* treeEvaluate */
4439
4440 if (Slave || ! tr->userlen) {
4441 if (! smoothTree(tr, 4 * smoothings)) return FALSE;
4442 }
4443
4444 if (evaluate(tr, tr->start) == badEval) return FALSE;
4445
4446 # if ! Slave
4447 (void) saveBestTree(bt, tr);
4448 # endif
4449 return TRUE;
4450 } /* treeEvaluate */
4451
4452
4453 #if Master || Slave
freopen_pid(char * filenm,char * mode,FILE * stream)4454 FILE *freopen_pid (char *filenm, char *mode, FILE *stream)
4455 { /* freopen_pid */
4456 char scr[512];
4457
4458 (void) sprintf(scr, "%s.%d", filenm, getpid());
4459 return freopen(scr, mode, stream);
4460 } /* freopen_pid */
4461 #endif
4462
4463
showBestTrees(bestlist * bt,tree * tr,analdef * adef,FILE * treefile)4464 boolean showBestTrees (bestlist *bt, tree *tr, analdef *adef, FILE *treefile)
4465 { /* showBestTrees */
4466 int rank;
4467
4468 for (rank = 1; rank <= bt->nvalid; rank++) {
4469 if (rank > 1) {
4470 if (rank != recallBestTree(bt, rank, tr)) break;
4471 }
4472 if (evaluate(tr, tr->start) == badEval) return FALSE;
4473 if (tr->outgrnode->back) tr->start = tr->outgrnode;
4474 printTree(tr, adef);
4475 summarize(tr);
4476 if (treefile) treeOut(treefile, tr, adef->trout);
4477 }
4478
4479 return TRUE;
4480 } /* showBestTrees */
4481
4482
cmpBestTrees(bestlist * bt,tree * tr)4483 boolean cmpBestTrees (bestlist *bt, tree *tr)
4484 { /* cmpBestTrees */
4485 double sum, sum2, sd, temp, wtemp, bestscore;
4486 double *log_f0, *log_f0_ptr; /* Save a copy of best log_f */
4487 double *log_f_ptr;
4488 int i, j, num, besttips;
4489
4490 num = bt->nvalid;
4491 if ((num <= 1) || (tr->cdta->wgtsum <= 1)) return TRUE;
4492
4493 if (! (log_f0 = (double *) Malloc(sizeof(double) * tr->cdta->endsite))) {
4494 printf("ERROR: cmpBestTrees unable to obtain space for log_f0\n");
4495 return FALSE;
4496 }
4497
4498 printf("Tree Ln L Diff Ln L Its S.D.");
4499 printf(" Significantly worse?\n\n");
4500
4501 for (i = 1; i <= num; i++) {
4502 if (i != recallBestTree(bt, i, tr)) break;
4503 if (! (tr->log_f_valid)) {
4504 if (evaluate(tr, tr->start) == badEval) return FALSE;
4505 }
4506
4507 printf("%3d%14.5f", i, tr->likelihood);
4508 if (i == 1) {
4509 printf(" <------ best\n");
4510 besttips = tr->ntips;
4511 bestscore = tr->likelihood;
4512 log_f0_ptr = log_f0;
4513 log_f_ptr = tr->log_f;
4514 for (j = 0; j < tr->cdta->endsite; j++) *log_f0_ptr++ = *log_f_ptr++;
4515 }
4516 else if (tr->ntips != besttips)
4517 printf(" (different number of species)\n");
4518 else {
4519 sum = sum2 = 0.0;
4520 log_f0_ptr = log_f0;
4521 log_f_ptr = tr->log_f;
4522 for (j = 0; j < tr->cdta->endsite; j++) {
4523 temp = *log_f0_ptr++ - *log_f_ptr++;
4524 wtemp = tr->cdta->aliaswgt[j] * temp;
4525 sum += wtemp;
4526 sum2 += wtemp * temp;
4527 }
4528 sd = sqrt( tr->cdta->wgtsum * (sum2 - sum*sum / tr->cdta->wgtsum)
4529 / (tr->cdta->wgtsum - 1) );
4530 printf("%14.5f%14.4f", tr->likelihood - bestscore, sd);
4531 printf(" %s\n", (sum > 1.95996 * sd) ? "Yes" : " No");
4532 }
4533 }
4534
4535 Free(log_f0);
4536 printf("\n\n");
4537
4538 return TRUE;
4539 } /* cmpBestTrees */
4540
4541
makeUserTree(tree * tr,bestlist * bt,analdef * adef)4542 boolean makeUserTree (tree *tr, bestlist *bt, analdef *adef)
4543 { /* makeUserTree */
4544 char filename[128];
4545 FILE *treefile;
4546 int nusertrees, which;
4547
4548 nusertrees = adef->numutrees;
4549
4550 printf("User-defined %s:\n\n", (nusertrees == 1) ? "tree" : "trees");
4551
4552 treefile = adef->trout ? fopen_pid("treefile", "w", filename) : (FILE *) NULL;
4553
4554 for (which = 1; which <= nusertrees; which++) {
4555 if (! treeReadLen(INFILE, tr)) return FALSE;
4556 if (! treeEvaluate(tr, bt)) return FALSE;
4557 if (tr->global <= 0) {
4558 if (tr->outgrnode->back) tr->start = tr->outgrnode;
4559 printTree(tr, adef);
4560 summarize(tr);
4561 if (treefile) treeOut(treefile, tr, adef->trout);
4562 }
4563 else {
4564 printf("%6d: Ln Likelihood =%14.5f\n", which, tr->likelihood);
4565 }
4566 }
4567
4568 if (tr->global > 0) {
4569 putchar('\n');
4570 if (! recallBestTree(bt, 1, tr)) return FALSE;
4571 printf(" Ln Likelihood =%14.5f\n", tr->likelihood);
4572 if (! optimize(tr, tr->global, bt)) return FALSE;
4573 if (tr->outgrnode->back) tr->start = tr->outgrnode;
4574 printTree(tr, adef);
4575 summarize(tr);
4576 if (treefile) treeOut(treefile, tr, adef->trout);
4577 }
4578
4579 if (treefile) {
4580 (void) fclose(treefile);
4581 printf("Tree also written to %s\n", filename);
4582 }
4583
4584 putchar('\n');
4585
4586 (void) cmpBestTrees(bt, tr);
4587 return TRUE;
4588 } /* makeUserTree */
4589
4590
4591 #if Slave
slaveTreeEvaluate(tree * tr,bestlist * bt)4592 boolean slaveTreeEvaluate (tree *tr, bestlist *bt)
4593 { /* slaveTreeEvaluate */
4594 boolean done;
4595
4596 do {
4597 requestForWork();
4598 if (! receiveTree(& comm_master, tr)) return FALSE;
4599 done = tr->likelihood > 0.0;
4600 if (! done) {
4601 if (! treeEvaluate(tr, bt)) return FALSE;
4602 if (! sendTree(& comm_master, tr)) return FALSE;
4603 }
4604 } while (! done);
4605
4606 return TRUE;
4607 } /* slaveTreeEvaluate */
4608 #endif
4609
4610
randum(long * seed)4611 double randum (long *seed)
4612 /* random number generator, modified to use 12 bit chunks */
4613 { /* randum */
4614 long sum, mult0, mult1, seed0, seed1, seed2, newseed0, newseed1, newseed2;
4615
4616 mult0 = 1549;
4617 seed0 = *seed & 4095;
4618 sum = mult0 * seed0;
4619 newseed0 = sum & 4095;
4620 sum >>= 12;
4621 seed1 = (*seed >> 12) & 4095;
4622 mult1 = 406;
4623 sum += mult0 * seed1 + mult1 * seed0;
4624 newseed1 = sum & 4095;
4625 sum >>= 12;
4626 seed2 = (*seed >> 24) & 255;
4627 sum += mult0 * seed2 + mult1 * seed1;
4628 newseed2 = sum & 255;
4629
4630 *seed = newseed2 << 24 | newseed1 << 12 | newseed0;
4631 return 0.00390625 * (newseed2
4632 + 0.000244140625 * (newseed1
4633 + 0.000244140625 * newseed0));
4634 } /* randum */
4635
4636
makeDenovoTree(tree * tr,bestlist * bt,analdef * adef)4637 boolean makeDenovoTree (tree *tr, bestlist *bt, analdef *adef)
4638 { /* makeDenovoTree */
4639 char filename[128];
4640 FILE *treefile;
4641 nodeptr p;
4642 int *enterorder; /* random entry order */
4643 int i, j, k, nextsp, newsp, maxtrav, tested;
4644
4645 double randum();
4646
4647
4648 enterorder = (int *) Malloc(sizeof(int) * (tr->mxtips + 1));
4649 if (! enterorder) {
4650 fprintf(stderr, "makeDenovoTree: Malloc failure for enterorder\n");
4651 return 0;
4652 }
4653
4654 if (adef->restart) {
4655 printf("Restarting from tree with the following sequences:\n");
4656 tr->userlen = TRUE;
4657 if (! treeReadLen(INFILE, tr)) return FALSE;
4658 if (! smoothTree(tr, smoothings)) return FALSE;
4659 if (evaluate(tr, tr->start) == badEval) return FALSE;
4660 if (saveBestTree(bt, tr) < 1) return FALSE;
4661
4662 for (i = 1, j = tr->ntips; i <= tr->mxtips; i++) { /* find loose tips */
4663 if (! tr->nodep[i]->back) {
4664 enterorder[++j] = i;
4665 }
4666 else {
4667 printf(" %s\n", tr->nodep[i]->name);
4668
4669 # if Master
4670 if (i>3) REPORT_ADD_SPECS;
4671 # endif
4672 }
4673 }
4674 putchar('\n');
4675 }
4676
4677 else { /* start from scratch */
4678 tr->ntips = 0;
4679 for (i = 1; i <= tr->mxtips; i++) enterorder[i] = i;
4680 }
4681
4682 if (adef->jumble) for (i = tr->ntips + 1; i <= tr->mxtips; i++) {
4683 j = randum(&(adef->jumble))*(tr->mxtips - tr->ntips) + tr->ntips + 1;
4684 k = enterorder[j];
4685 enterorder[j] = enterorder[i];
4686 enterorder[i] = k;
4687 }
4688
4689 bt->numtrees = 1;
4690 if (tr->ntips < tr->mxtips) printf("Adding species:\n");
4691
4692 if (tr->ntips == 0) {
4693 for (i = 1; i <= 3; i++) {
4694 printf(" %s\n", tr->nodep[enterorder[i]]->name);
4695 }
4696 tr->nextnode = tr->mxtips + 1;
4697 if (! buildSimpleTree(tr, enterorder[1], enterorder[2], enterorder[3]))
4698 return FALSE;
4699 }
4700
4701 while (tr->ntips < tr->mxtips || tr->opt_level < tr->global) {
4702 maxtrav = (tr->ntips == tr->mxtips) ? tr->global : tr->partswap;
4703 if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3;
4704
4705 if (tr->opt_level >= maxtrav) {
4706 nextsp = ++(tr->ntips);
4707 newsp = enterorder[nextsp];
4708 p = tr->nodep[newsp];
4709 printf(" %s\n", p->name);
4710
4711 # if Master
4712 if (nextsp % DNAML_STEP_TIME_COUNT == 1) {
4713 REPORT_STEP_TIME;
4714 }
4715 REPORT_ADD_SPECS;
4716 # endif
4717
4718 (void) buildNewTip(tr, p);
4719
4720 resetBestTree(bt);
4721 cacheZ(tr);
4722 tested = addTraverse(tr, p->back, findAnyTip(tr->start)->back,
4723 1, tr->ntips - 2, bt, adef->qadd);
4724 if (tested == badRear) return FALSE;
4725 bt->numtrees += tested;
4726
4727 # if Master
4728 getReturnedTrees(tr, bt, tested);
4729 # endif
4730
4731 printf(" Tested %d alternative trees\n", tested);
4732
4733 (void) recallBestTree(bt, 1, tr);
4734 if (! tr->smoothed) {
4735 if (! smoothTree(tr, smoothings)) return FALSE;
4736 if (evaluate(tr, tr->start) == badEval) return FALSE;
4737 (void) saveBestTree(bt, tr);
4738 }
4739
4740 if (tr->ntips == 4) tr->opt_level = 1; /* All 4 taxon trees done */
4741 maxtrav = (tr->ntips == tr->mxtips) ? tr->global : tr->partswap;
4742 if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3;
4743 }
4744
4745 printf(" Ln Likelihood =%14.5f\n", tr->likelihood);
4746 if (! optimize(tr, maxtrav, bt)) return FALSE;
4747 }
4748
4749 printf("\nExamined %d %s\n", bt->numtrees,
4750 bt->numtrees != 1 ? "trees" : "tree");
4751
4752 treefile = adef->trout ? fopen_pid("treefile", "w", filename) : (FILE *) NULL;
4753 (void) showBestTrees(bt, tr, adef, treefile);
4754 if (treefile) {
4755 (void) fclose(treefile);
4756 printf("Tree also written to %s\n\n", filename);
4757 }
4758
4759 (void) cmpBestTrees(bt, tr);
4760
4761 # if DeleteCheckpointFile
4762 unlink_pid(checkpointname);
4763 # endif
4764
4765 Free(enterorder);
4766
4767 return TRUE;
4768 } /* makeDenovoTree */
4769
4770 /*==========================================================================*/
4771 /* "main" routine */
4772 /*==========================================================================*/
4773
4774 #if Sequential
main()4775 main ()
4776 #else
4777 slave ()
4778 #endif
4779 { /* DNA Maximum Likelihood */
4780 # if Master
4781 int starttime, inputtime, endtime;
4782 # endif
4783
4784 # if Master || Slave
4785 int my_id, nprocs, type, from, sz;
4786 char *msg;
4787 # endif
4788
4789 analdef *adef;
4790 rawdata *rdta;
4791 cruncheddata *cdta;
4792 tree *tr; /* current tree */
4793 bestlist *bt; /* topology of best found tree */
4794
4795
4796 # if Debug
4797 {
4798 char debugfilename[128];
4799 debug = fopen_pid("dnaml_debug", "w", debugfilename);
4800 }
4801 # endif
4802
4803 # if Master
4804 starttime = p4_clock();
4805 nprocs = p4_num_total_slaves();
4806
4807 if ((OUTFILE = freopen_pid("master.out", "w", stdout)) == NULL) {
4808 fprintf(stderr, "Could not open output file\n");
4809 exit(1);
4810 }
4811
4812 /* Receive input file name from host */
4813 type = DNAML_FILE_NAME;
4814 from = DNAML_HOST_ID;
4815 msg = NULL;
4816 p4_recv(& type, & from, & msg, & sz);
4817 if ((INFILE = fopen(msg, "r")) == NULL) {
4818 fprintf(stderr, "master could not open input file %s\n", msg);
4819 exit(1);
4820 }
4821 p4_msg_free(msg);
4822
4823 open_link(& comm_slave);
4824 # endif
4825
4826 # if DNAML_STEP
4827 begin_step_time = starttime;
4828 # endif
4829
4830 # if Slave
4831 my_id = p4_get_my_id();
4832 nprocs = p4_num_total_slaves();
4833
4834 /* Receive input file name from host */
4835 type = DNAML_FILE_NAME;
4836 from = DNAML_HOST_ID;
4837 msg = NULL;
4838 p4_recv(& type, & from, & msg, & sz);
4839 if ((INFILE = fopen(msg, "r")) == NULL) {
4840 fprintf(stderr, "slave could not open input file %s\n",msg);
4841 exit(1);
4842 }
4843 p4_msg_free(msg);
4844
4845 # ifdef P4DEBUG
4846 if ((OUTFILE = freopen_pid("slave.out", "w", stdout)) == NULL) {
4847 fprintf(stderr, "Could not open output file\n");
4848 exit(1);
4849 }
4850 # else
4851 if ((OUTFILE = freopen("/dev/null", "w", stdout)) == NULL) {
4852 fprintf(stderr, "Could not open output file\n");
4853 exit(1);
4854 }
4855 # endif
4856
4857 open_link(& comm_master);
4858 # endif
4859
4860
4861 /* Get data structure memory */
4862
4863 if (! (adef = (analdef *) Malloc(sizeof(analdef)))) {
4864 printf("ERROR: Unable to get memory for analysis definition\n\n");
4865 return 1;
4866 }
4867
4868 if (! (rdta = (rawdata *) Malloc(sizeof(rawdata)))) {
4869 printf("ERROR: Unable to get memory for raw DNA\n\n");
4870 return 1;
4871 }
4872
4873 if (! (cdta = (cruncheddata *) Malloc(sizeof(cruncheddata)))) {
4874 printf("ERROR: Unable to get memory for crunched DNA\n\n");
4875 return 1;
4876 }
4877
4878 if ((tr = (tree *) Malloc(sizeof(tree))) &&
4879 (bt = (bestlist *) Malloc(sizeof(bestlist)))) ;
4880 else {
4881 printf("ERROR: Unable to get memory for trees\n\n");
4882 return 1;
4883 }
4884 bt->ninit = 0;
4885
4886 if (! getinput(adef, rdta, cdta, tr)) return 1;
4887
4888 # if Master
4889 inputtime = p4_clock();
4890 printf("Input time %d milliseconds\n", inputtime - starttime);
4891 REPORT_STEP_TIME;
4892 # endif
4893
4894 # if Slave
4895 (void) fclose(INFILE);
4896 # endif
4897
4898 /* The material below would be a loop over jumbles and/or boots */
4899
4900 if (! makeweights(adef, rdta, cdta)) return 1;
4901 if (! makevalues(rdta, cdta)) return 1;
4902 if (adef->empf && ! empiricalfreqs(rdta, cdta)) return 1;
4903 reportfreqs(adef, rdta);
4904 if (! linkdata2tree(rdta, cdta, tr)) return 1;
4905
4906 if (! linkxarray(3, 3, cdta->endsite, & freextip, & usedxtip)) return 1;
4907 if (! setupnodex(tr)) return 1;
4908
4909 # if Slave
4910 if (! slaveTreeEvaluate(tr, bt)) return 1;
4911 # else
4912 if (! initBestTree(bt, adef->nkeep, tr->mxtips, tr->cdta->endsite)) return 1;
4913 if (! adef->usertree) {
4914 if (! makeDenovoTree(tr, bt, adef)) return 1;
4915 }
4916 else {
4917 if (! makeUserTree(tr, bt, adef)) return 1;
4918 }
4919 if (! freeBestTree(bt)) return 1;
4920 # endif
4921
4922 /* Endpoint for jumble and/or boot loop */
4923
4924 # if Master
4925 tr->likelihood = 1.0; /* terminate slaves */
4926 (void) sendTree(& comm_slave, tr);
4927 # endif
4928
4929 freeTree(tr);
4930
4931 # if Master
4932 close_link(& comm_slave);
4933 (void) fclose(INFILE);
4934
4935 REPORT_STEP_TIME;
4936 endtime = p4_clock();
4937 printf("Execution time %d milliseconds\n", endtime - inputtime);
4938 (void) fclose(OUTFILE);
4939 # endif
4940
4941 # if Slave
4942 close_link(& comm_master);
4943 (void) fclose(OUTFILE);
4944 # endif
4945
4946 # if Debug
4947 (void) fclose(debug);
4948 # endif
4949
4950 # if Master || Slave
4951 p4_send(DNAML_DONE, DNAML_HOST_ID, NULL, 0);
4952 # else
4953 return 0;
4954 # endif
4955 } /* DNA Maximum Likelihood */
4956