1 #define programName        "fastDNAml"
2 #define programVersion     "1.2.2"
3 #define programVersionInt   10202
4 #define programDate        "January 3, 2000"
5 #define programDateInt      20000103
6 
7 /*  fastDNAml, a program for estimation of phylogenetic trees from sequences.
8  *  Copyright (C) 1998, 1999, 2000 by Gary J. Olsen
9  *
10  *  This program is free software; you may redistribute it and/or modify it
11  *  under the terms of the GNU General Public License as published by the Free
12  *  Software Foundation; either version 2 of the License, or (at your option)
13  *  any later version.
14  *
15  *  This program is distributed in the hope that it will be useful, but
16  *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17  *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18  *  for more details.
19  *
20  *  You should have received a copy of the GNU General Public License along
21  *  with this program; if not, write to the Free Software Foundation, Inc.,
22  *  59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
23  *
24  *  For any other enquiries write to Gary J. Olsen, Department of Microbiology,
25  *  University of Illinois, Urbana, IL  61801, USA
26  *
27  *  Or send E-mail to gary@phylo.life.uiuc.edu
28  *
29  *
30  *  fastDNAml is based in part on the program dnaml by Joseph Felsenstein.
31  *
32  *  Copyright notice from dnaml:
33  *
34  *     version 3.3. (c) Copyright 1986, 1990 by the University of Washington
35  *     and Joseph Felsenstein.  Written by Joseph Felsenstein.  Permission is
36  *     granted to copy and use this program provided no fee is charged for it
37  *     and provided that this copyright notice is not removed.
38  *
39  *
40  *  When publishing work that based on results from fastDNAml please cite:
41  *
42  *  Felsenstein, J.  1981.  Evolutionary trees from DNA sequences:
43  *  A maximum likelihood approach.  J. Mol. Evol. 17: 368-376.
44  *
45  *  and
46  *
47  *  Olsen, G. J., Matsuda, H., Hagstrom, R., and Overbeek, R.  1994.
48  *  fastDNAml:  A tool for construction of phylogenetic trees of DNA
49  *  sequences using maximum likelihood.  Comput. Appl. Biosci. 10: 41-48.
50  */
51 
52 /*  Conversion to C and changes in sequential code by Gary Olsen, 1991-1994
53  *
54  *  p4 version by Hideo Matsuda and Ross Overbeek, 1991-1993
55  */
56 
57 /*
58  *  1.0    March 14, 1992
59  *         Initial "release" version
60  *
61  *  1.0.1  March 18, 1992
62  *         Add ntaxa to tree comments
63  *         Set minimum branch length on reading tree
64  *         Add blanks around operators in treeString (for prolog parsing)
65  *         Add program version to treeString comments
66  *
67  *  1.0.2  April 6, 1992
68  *         Improved option line diagnostics
69  *         Improved auxiliary line diagnostics
70  *         Removed some trailing blanks from output
71  *
72  *  1.0.3  April 6, 1992
73  *         Checkpoint trees that do not need any optimization
74  *         Print restart tree likelihood before optimizing
75  *         Fix treefile option so that it really toggles
76  *
77  *  1.0.4  July 13, 1992
78  *         Add support for tree fact (instead of true Newick tree) in
79  *            processTreeCom, treeReadLen, str_processTreeCom and
80  *            str_treeReadLen
81  *         Use bit operations in randum
82  *         Correct error in bootstrap mask used with weighting mask
83  *
84  *  1.0.5  August 22, 1992
85  *         Fix reading of underscore as first nonblank character in name
86  *         Add strchr and strstr functions to source code
87  *         Add output treefile name to message "Tree also written ..."
88  *
89  *  1.0.6  November 20, 1992
90  *         Change (! nsites) test in setupTopol to (nsites == 0) for MIPS R4000
91  *         Add vectorizing compiler directives for CRAY
92  *         Include updates and corrections to parallel code from H. Matsuda
93  *
94  *  1.0.7  March 25, 1993
95  *         Remove translation of underlines in taxon names
96  *
97  *  1.0.8  April 30, 1993
98  *         Remove version number from fastDNAml.h file name
99  *
100  *  1.0.9  August 12, 1993
101  *         Version was never released.
102  *         Redefine treefile formats and default:
103  *             0  None
104  *             1  Newick
105  *             2  Prolog
106  *             3  PHYLIP (Default)
107  *         Remove quote marks and comment from PHYLIP treefile format.
108  *
109  *  1.1.0  September 3-5, 1993
110  *         Arrays of size maxpatterns moved from stack to heap (mallocs) in
111  *            evaluateslope, makenewz, and cmpBestTrees.
112  *         Correct [maxsites] to [maxpatterns] in temporary array definitions
113  *            in Vectorize code of newview and evaluate.  (These should also
114  *            get converted to use malloc() at some point.)
115  *         Change randum to use 12 bit segments, not 6.  Change its seed
116  *            parameter to long *.
117  *         Remove the code that took the absolute value of random seeds.
118  *         Correct integer divide artifact in setting default transition/
119  *            transversion parameter values.
120  *         When transition/transversion ratio is "reset", change to small
121  *            value, not the program default.
122  *         Report the "reset" transition/transversion ratio in the output.
123  *         Move global data into analdef, rawDNA, and crunchedDNA structures.
124  *         Change names of routines white and digit to whitechar and digitchar.
125  *         Convert y[] to yType, which is normally char, but is int if the
126  *            Vectorize flag is set.
127  *         Split option line reading out of getoptions routine.
128  *
129  *  1.1.1  September 30, 1994
130  *         Incorporate changes made in 1.0.A (Feb. 11, 1994):
131  *            Remove processing of quotation marks within comments.
132  *            Break label finding into copy to string and find tip.
133  *            Generalize tree reading to read trees when names are and are not
134  *               already known.
135  *            Remove absolute value from randum seed reading.
136  *         Include integer version number and program date.
137  *         Remove maxsite, maxpatterns and maxsp limitations.
138  *         Incorporate code for retaining multiple trees.
139  *         Activate code for Hasegawa & Kishino test of tree differences.
140  *         Make quick add the default, with Q turning it off.
141  *         Make emperical frequencies the option with F turning it off.
142  *         Allow a residue frequency option line anywhere in the options.
143  *         Increase string length passed to treeString (should be length
144  *               checked, but ...)
145  *         Introduce (Sept.30) and fix (Oct. 26) bug in bootstrap sampling.
146  *         Fix error when user frequencies are last line and start with F.
147  *
148  *  1.2    September 5, 1997
149  *         Move likelihood components into structure.
150  *         Change rawDNA to rawdata.
151  *         Change crunchedDNA to cruncheddata.
152  *         Recast the likelihoods per site into an array of stuctures,
153  *               where each stucture (likelivector) includes the likelihoods
154  *               of each residue type at the site, and a magnitude scale
155  *               factor (exp).  This requires changing the space allocation,
156  *               newview, makenewz, evaluate, and sigma.
157  *         Change code of newview to rescale likelihoods up by 2**256 when
158  *               the largest value falls below 2**-256.  This should solve
159  *               floating point underflow for all practical sized trees.
160  *               No changes are necessary in makenewz or sigma, since only
161  *               relative likelihoods are necessary.
162  *
163  *  1.2.1  March 9, 1998
164  *         Convert likelihood adjustment factor (2**256) to a constant.
165  *         Fix vectorized calculation of likelihood (error introduced in 1.2)
166  *
167  *  1.2.2  December 23, 1998
168  *         General code clean-up.
169  *         Convert to function definitions with parameter type lists
170  *
171  *  1.2.2  January 3, 2000
172  *         Add copyright and license information
173  *         Make this the current release version
174  */
175 
176 #ifdef Master
177 #  undef  Master
178 #  define Master     1
179 #  define Slave      0
180 #  define Sequential 0
181 #else
182 #  ifdef Slave
183 #    undef Slave
184 #    define Master     0
185 #    define Slave      1
186 #    define Sequential 0
187 #  else
188 #    ifdef Sequential
189 #      undef Sequential
190 #    endif
191 #    define Master     0
192 #    define Slave      0
193 #    define Sequential 1
194 #  endif
195 #endif
196 
197 #ifdef  CRAY
198 #  define  Vectorize
199 #endif
200 
201 #ifdef  Vectorize
202 #  define maxpatterns  10000  /* maximum number of different site patterns */
203 #endif
204 
205 #include <stdio.h>
206 #include <math.h>
207 #include "fastDNAml.h"  /*  Requires version 1.2  */
208 
209 #if Master || Slave
210 #  include "p4.h"
211 #  include "comm_link.h"
212 #endif
213 
214 /*  Global variables */
215 
216 xarray       *usedxtip, *freextip;
217 
218 #if Sequential     /*  Use standard input */
219 #  undef   DNAML_STEP
220 #  define  DNAML_STEP  0
221 #  define  INFILE  stdin
222 #endif
223 
224 #if Master
225 #  define MAX_SEND_AHEAD 400
226   char   *best_tr_recv = NULL;     /* these are used for flow control */
227   double  best_lk_recv;
228   int     send_ahead = 0;          /* number of outstanding sends */
229 
230 #  ifdef DNAML_STEP
231 #    define  DNAML_STEP  1
232 #  endif
233 #  define  INFILE   Seqf
234 #  define  OUTFILE  Outf
235   FILE  *INFILE, *OUTFILE;
236   comm_block comm_slave;
237 #endif
238 
239 #if Slave
240 #  undef   DNAML_STEP
241 #  define  DNAML_STEP  0
242 #  define  INFILE   Seqf
243 #  define  OUTFILE  Outf
244   FILE  *INFILE, *OUTFILE;
245   comm_block comm_master;
246 #endif
247 
248 #if Debug
249   FILE *debug;
250 #endif
251 
252 #if DNAML_STEP
253   int begin_step_time, end_step_time;
254 #  define  REPORT_ADD_SPECS  p4_send(DNAML_ADD_SPECS, DNAML_HOST_ID, NULL, 0)
255 #  define  REPORT_SEND_TREE  p4_send(DNAML_SEND_TREE, DNAML_HOST_ID, NULL, 0)
256 #  define  REPORT_RECV_TREE  p4_send(DNAML_RECV_TREE, DNAML_HOST_ID, NULL, 0)
257 #  define  REPORT_STEP_TIME \
258    {\
259        char send_buf[80]; \
260        end_step_time = p4_clock(); \
261        (void) sprintf(send_buf, "%d", end_step_time-begin_step_time); \
262        p4_send(DNAML_STEP_TIME, DNAML_HOST_ID, send_buf,strlen(send_buf)+1); \
263        begin_step_time = end_step_time; \
264    }
265 #else
266 #  define  REPORT_ADD_SPECS
267 #  define  REPORT_SEND_TREE
268 #  define  REPORT_RECV_TREE
269 #  define  REPORT_STEP_TIME
270 #endif
271 
272 /*=======================================================================*/
273 /*                              PROGRAM                                  */
274 /*=======================================================================*/
275 /*                    Best tree handling for dnaml                       */
276 /*=======================================================================*/
277 
278 /*  Tip value comparisons
279  *
280  *  Use void pointers to hide type from other routines.  Only tipValPtr and
281  *  cmpTipVal need to be changed to alter the nature of the values compared
282  *  (e.g., names instead of node numbers).
283  *
284  *    cmpTipVal(tipValPtr(nodeptr p), tipValPtr(nodeptr q)) == -1, 0 or 1.
285  *
286  *  This provides direct comparison of tip values (for example, for
287  *  definition of tr->start).
288  */
289 
290 
tipValPtr(nodeptr p)291 void  *tipValPtr (nodeptr p)
292   { return  (void *) & p->number;
293     }
294 
295 
cmpTipVal(void * v1,void * v2)296 int  cmpTipVal (void *v1, void *v2)
297   { /* cmpTipVal */
298     int  i1, i2;
299 
300     i1 = *((int *) v1);
301     i2 = *((int *) v2);
302     return  (i1 < i2) ? -1 : ((i1 == i2) ? 0 : 1);
303   } /* cmpTipVal */
304 
305 
306 /*  These are the only routines that need to UNDERSTAND topologies */
307 
308 
setupTopol(int maxtips,int nsites)309 topol  *setupTopol (int maxtips, int nsites)
310   { /* setupTopol */
311     topol   *tpl;
312 
313     if (! (tpl = (topol *) Malloc(sizeof(topol))) ||
314         ! (tpl->links = (connptr) Malloc((2*maxtips-3) * sizeof(connect))) ||
315         (nsites && ! (tpl->log_f
316                 = (double *) Malloc(nsites * sizeof(double))))) {
317       printf("ERROR: Unable to get topology memory");
318       tpl = (topol *) NULL;
319       }
320 
321     else {
322       if (nsites == 0)  tpl->log_f = (double *) NULL;
323       tpl->likelihood  = unlikely;
324       tpl->start       = (node *) NULL;
325       tpl->nextlink    = 0;
326       tpl->ntips       = 0;
327       tpl->nextnode    = 0;
328       tpl->opt_level   = 0;     /* degree of branch swapping explored */
329       tpl->scrNum      = 0;     /* position in sorted list of scores */
330       tpl->tplNum      = 0;     /* position in sorted list of trees */
331       tpl->log_f_valid = 0;     /* log_f value sites */
332       tpl->prelabeled  = TRUE;
333       tpl->smoothed    = FALSE; /* branch optimization converged? */
334       }
335 
336     return  tpl;
337   } /* setupTopol */
338 
339 
freeTopol(topol * tpl)340 void  freeTopol (topol *tpl)
341   { /* freeTopol */
342     Free(tpl->links);
343     if (tpl->log_f)  Free(tpl->log_f);
344     Free(tpl);
345   } /* freeTopol */
346 
347 
saveSubtree(nodeptr p,topol * tpl)348 int  saveSubtree (nodeptr p, topol *tpl)
349     /*  Save a subtree in a standard order so that earlier branches
350      *  from a node contain lower value tips than do second branches from
351      *  the node.  This code works with arbitrary furcations in the tree.
352      */
353   { /* saveSubtree */
354     connptr  r, r0;
355     nodeptr  q, s;
356     int      t, t0, t1;
357 
358     r0 = tpl->links;
359     r = r0 + (tpl->nextlink)++;
360     r->p = p;
361     r->q = q = p->back;
362     r->z = p->z;
363     r->descend = 0;                     /* No children (yet) */
364 
365     if (q->tip) {
366       r->valptr = tipValPtr(q);         /* Assign value */
367       }
368 
369     else {                              /* Internal node, look at children */
370       s = q->next;                      /* First child */
371       do {
372         t = saveSubtree(s, tpl);        /* Generate child's subtree */
373 
374         t0 = 0;                         /* Merge child into list */
375         t1 = r->descend;
376         while (t1 && (cmpTipVal(r0[t1].valptr, r0[t].valptr) < 0)) {
377           t0 = t1;
378           t1 = r0[t1].sibling;
379           }
380         if (t0) r0[t0].sibling = t;  else  r->descend = t;
381         r0[t].sibling = t1;
382 
383         s = s->next;                    /* Next child */
384         } while (s != q);
385 
386       r->valptr = r0[r->descend].valptr;   /* Inherit first child's value */
387       }                                 /* End of internal node processing */
388 
389     return  r - r0;
390   } /* saveSubtree */
391 
392 
minSubtreeTip(nodeptr p0)393 nodeptr  minSubtreeTip (nodeptr  p0)
394   { /* minTreeTip */
395     nodeptr  minTip, p, testTip;
396 
397     if (p0->tip) return p0;
398 
399     p = p0->next;
400     minTip = minSubtreeTip(p->back);
401     while ((p = p->next) != p0) {
402       testTip = minSubtreeTip(p->back);
403       if (cmpTipVal(tipValPtr(testTip), tipValPtr(minTip)) < 0)
404         minTip = testTip;
405       }
406     return minTip;
407   } /* minTreeTip */
408 
409 
minTreeTip(nodeptr p)410 nodeptr  minTreeTip (nodeptr  p)
411   { /* minTreeTip */
412     nodeptr  minp, minpb;
413 
414     minp  = minSubtreeTip(p);
415     minpb = minSubtreeTip(p->back);
416     return cmpTipVal(tipValPtr(minp), tipValPtr(minpb)) < 0 ? minp : minpb;
417   } /* minTreeTip */
418 
419 
saveTree(tree * tr,topol * tpl)420 void saveTree (tree *tr, topol *tpl)
421     /*  Save a tree topology in a standard order so that first branches
422      *  from a node contain lower value tips than do second branches from
423      *  the node.  The root tip should have the lowest value of all.
424      */
425   { /* saveTree */
426     connptr  r;
427     double  *tr_log_f, *tpl_log_f;
428     int  i;
429 
430     tpl->nextlink = 0;                             /* Reset link pointer */
431     r = tpl->links + saveSubtree(minTreeTip(tr->start), tpl);  /* Save tree */
432     r->sibling = 0;
433 
434     tpl->likelihood = tr->likelihood;
435     tpl->start      = tr->start;
436     tpl->ntips      = tr->ntips;
437     tpl->nextnode   = tr->nextnode;
438     tpl->opt_level  = tr->opt_level;
439     tpl->prelabeled = tr->prelabeled;
440     tpl->smoothed   = tr->smoothed;
441 
442     if (tpl_log_f = tpl->log_f) {
443       tr_log_f  = tr->log_f;
444       i = tpl->log_f_valid = tr->log_f_valid;
445       while (--i >= 0)  *tpl_log_f++ = *tr_log_f++;
446       }
447     else {
448       tpl->log_f_valid = 0;
449       }
450   } /* saveTree */
451 
452 
copyTopol(topol * tpl1,topol * tpl2)453 void copyTopol (topol *tpl1, topol *tpl2)
454   { /* copyTopol */
455     connptr  r1, r2, r10, r20;
456     double  *tpl1_log_f, *tpl2_log_f;
457     int  i;
458 
459     r10 = tpl1->links;
460     r20 = tpl2->links;
461     tpl2->nextlink = tpl1->nextlink;
462 
463     r1 = r10;
464     r2 = r20;
465     i = 2 * tpl1->ntips - 3;
466     while (--i >= 0) {
467       r2->z = r1->z;
468       r2->p = r1->p;
469       r2->q = r1->q;
470       r2->valptr = r1->valptr;
471       r2->descend = r1->descend;
472       r2->sibling = r1->sibling;
473       r1++;
474       r2++;
475       }
476 
477     if (tpl1->log_f_valid && tpl2->log_f) {
478       tpl1_log_f = tpl1->log_f;
479       tpl2_log_f = tpl2->log_f;
480       tpl2->log_f_valid = i = tpl1->log_f_valid;
481       while (--i >= 0)  *tpl2_log_f++ = *tpl1_log_f++;
482       }
483     else {
484       tpl2->log_f_valid = 0;
485       }
486 
487     tpl2->likelihood = tpl1->likelihood;
488     tpl2->start      = tpl1->start;
489     tpl2->ntips      = tpl1->ntips;
490     tpl2->nextnode   = tpl1->nextnode;
491     tpl2->opt_level  = tpl1->opt_level;
492     tpl2->prelabeled = tpl1->prelabeled;
493     tpl2->scrNum     = tpl1->scrNum;
494     tpl2->tplNum     = tpl1->tplNum;
495     tpl2->smoothed   = tpl1->smoothed;
496   } /* copyTopol */
497 
498 
restoreTree(topol * tpl,tree * tr)499 boolean restoreTree (topol *tpl, tree *tr)
500   { /* restoreTree */
501     void  hookup();
502     boolean  initrav();
503 
504     connptr  r;
505     nodeptr  p, p0;
506     double  *tr_log_f, *tpl_log_f;
507     int  i;
508 
509 /*  Clear existing connections */
510 
511     for (i = 1; i <= 2*(tr->mxtips) - 2; i++) {  /* Uses p = p->next at tip */
512       p0 = p = tr->nodep[i];
513       do {
514         p->back = (nodeptr) NULL;
515         p = p->next;
516         } while (p != p0);
517       }
518 
519 /*  Copy connections from topology */
520 
521     for (r = tpl->links, i = 0; i < tpl->nextlink; r++, i++) {
522       hookup(r->p, r->q, r->z);
523       }
524 
525     tr->likelihood = tpl->likelihood;
526     tr->start      = tpl->start;
527     tr->ntips      = tpl->ntips;
528     tr->nextnode   = tpl->nextnode;
529     tr->opt_level  = tpl->opt_level;
530     tr->prelabeled = tpl->prelabeled;
531     tr->smoothed   = tpl->smoothed;
532 
533     if (tpl_log_f = tpl->log_f) {
534       tr_log_f = tr->log_f;
535       i = tr->log_f_valid = tpl->log_f_valid;
536       while (--i >= 0)  *tr_log_f++ = *tpl_log_f++;
537       }
538     else {
539       tr->log_f_valid = 0;
540       }
541 
542     return (initrav(tr, tr->start) && initrav(tr, tr->start->back));
543   } /* restoreTree */
544 
545 
initBestTree(bestlist * bt,int newkeep,int numsp,int sites)546 int initBestTree (bestlist *bt, int newkeep, int numsp, int sites)
547   { /* initBestTree */
548     int  i, nlogf;
549 
550 
551     bt->nkeep = 0;
552 
553     if (bt->ninit <= 0) {
554       if (! (bt->start = setupTopol(numsp, sites)))  return  0;
555       bt->ninit = -1;
556       bt->nvalid = 0;
557       bt->numtrees = 0;
558       bt->best = unlikely;
559       bt->improved = FALSE;
560       bt->byScore = (topol **) Malloc((newkeep+1) * sizeof(topol *));
561       bt->byTopol = (topol **) Malloc((newkeep+1) * sizeof(topol *));
562       if (! bt->byScore || ! bt->byTopol) {
563         fprintf(stderr, "initBestTree: Malloc failure\n");
564         return 0;
565         }
566       }
567     else if (ABS(newkeep) > bt->ninit) {
568       if (newkeep <  0) newkeep = -(bt->ninit);
569       else newkeep = bt->ninit;
570       }
571 
572     if (newkeep < 1) {    /*  Use negative newkeep to clear list  */
573       newkeep = -newkeep;
574       if (newkeep < 1) newkeep = 1;
575       bt->nvalid = 0;
576       bt->best = unlikely;
577       }
578 
579     if (bt->nvalid >= newkeep) {
580       bt->nvalid = newkeep;
581       bt->worst = bt->byScore[newkeep]->likelihood;
582       }
583     else {
584       bt->worst = unlikely;
585       }
586 
587     for (i = bt->ninit + 1; i <= newkeep; i++) {
588       nlogf = (i <= maxlogf) ? sites : 0;
589       if (! (bt->byScore[i] = setupTopol(numsp, nlogf)))  break;
590       bt->byTopol[i] = bt->byScore[i];
591       bt->ninit = i;
592       }
593 
594     return  (bt->nkeep = MIN(newkeep, bt->ninit));
595   } /* initBestTree */
596 
597 
598 
resetBestTree(bestlist * bt)599 int resetBestTree (bestlist *bt)
600   { /* resetBestTree */
601     bt->best     = unlikely;
602     bt->worst    = unlikely;
603     bt->nvalid   = 0;
604     bt->improved = FALSE;
605   } /* resetBestTree */
606 
607 
freeBestTree(bestlist * bt)608 boolean  freeBestTree(bestlist *bt)
609   { /* freeBestTree */
610     while (bt->ninit >= 0)  freeTopol(bt->byScore[(bt->ninit)--]);
611     freeTopol(bt->start);
612     return TRUE;
613   } /* freeBestTree */
614 
615 
616 /*  Compare two trees, assuming that each is in standard order.  Return
617  *  -1 if first preceeds second, 0 if they are identical, or +1 if first
618  *  follows second in standard order.  Lower number tips preceed higher
619  *  number tips.  A tip preceeds a corresponding internal node.  Internal
620  *  nodes are ranked by their lowest number tip.
621  */
622 
cmpSubtopol(connptr p10,connptr p1,connptr p20,connptr p2)623 int  cmpSubtopol (connptr p10, connptr p1, connptr p20, connptr p2)
624   { /* cmpSubtopol */
625     connptr  p1d, p2d;
626     int  cmp;
627 
628     if (! p1->descend && ! p2->descend)          /* Two tips */
629       return cmpTipVal(p1->valptr, p2->valptr);
630 
631     if (! p1->descend) return -1;                /* p1 = tip, p2 = node */
632     if (! p2->descend) return  1;                /* p2 = tip, p1 = node */
633 
634     p1d = p10 + p1->descend;
635     p2d = p20 + p2->descend;
636     while (1) {                                  /* Two nodes */
637       if (cmp = cmpSubtopol(p10, p1d, p20, p2d))  return cmp; /* Subtrees */
638       if (! p1d->sibling && ! p2d->sibling)  return  0; /* Lists done */
639       if (! p1d->sibling) return -1;             /* One done, other not */
640       if (! p2d->sibling) return  1;             /* One done, other not */
641       p1d = p10 + p1d->sibling;                  /* Neither done */
642       p2d = p20 + p2d->sibling;
643       }
644   } /* cmpSubtopol */
645 
646 
647 
cmpTopol(void * tpl1,void * tpl2)648 int  cmpTopol (void *tpl1, void *tpl2)
649   { /* cmpTopol */
650     connptr  r1, r2;
651     int      cmp;
652 
653     r1 = ((topol *) tpl1)->links;
654     r2 = ((topol *) tpl2)->links;
655     cmp = cmpTipVal(tipValPtr(r1->p), tipValPtr(r2->p));
656     if (cmp) return cmp;
657     return  cmpSubtopol(r1, r1, r2, r2);
658   } /* cmpTopol */
659 
660 
661 
cmpTplScore(void * tpl1,void * tpl2)662 int  cmpTplScore (void *tpl1, void *tpl2)
663   { /* cmpTplScore */
664     double  l1, l2;
665 
666     l1 = ((topol *) tpl1)->likelihood;
667     l2 = ((topol *) tpl2)->likelihood;
668     return  (l1 > l2) ? -1 : ((l1 == l2) ? 0 : 1);
669   } /* cmpTplScore */
670 
671 
672 
673 /*  Find an item in a sorted list of n items.  If the item is in the list,
674  *  return its index.  If it is not in the list, return the negative of the
675  *  position into which it should be inserted.
676  */
677 
findInList(void * item,void * list[],int n,int (* cmpFunc)())678 int  findInList (void *item, void *list[], int n, int (* cmpFunc)())
679   { /* findInList */
680     int  mid, hi, lo, cmp;
681 
682     if (n < 1) return  -1;                    /*  No match; first index  */
683 
684     lo = 1;
685     mid = 0;
686     hi = n;
687     while (lo < hi) {
688       mid = (lo + hi) >> 1;
689       cmp = (* cmpFunc)(item, list[mid-1]);
690       if (cmp) {
691         if (cmp < 0) hi = mid;
692         else lo = mid + 1;
693         }
694       else  return  mid;                        /*  Exact match  */
695       }
696 
697     if (lo != mid) {
698        cmp = (* cmpFunc)(item, list[lo-1]);
699        if (cmp == 0) return lo;
700        }
701     if (cmp > 0) lo++;                         /*  Result of step = 0 test  */
702     return  -lo;
703   } /* findInList */
704 
705 
706 
findTreeInList(bestlist * bt,tree * tr)707 int  findTreeInList (bestlist *bt, tree *tr)
708   { /* findTreeInList */
709     topol  *tpl;
710 
711     tpl = bt->byScore[0];
712     saveTree(tr, tpl);
713     return  findInList((void *) tpl, (void **) (& (bt->byTopol[1])),
714                        bt->nvalid, cmpTopol);
715   } /* findTreeInList */
716 
717 
saveBestTree(bestlist * bt,tree * tr)718 int  saveBestTree (bestlist *bt, tree *tr)
719   { /* saveBestTree */
720     double *tr_log_f, *tpl_log_f;
721     topol  *tpl, *reuse;
722     int  tplNum, scrNum, reuseScrNum, reuseTplNum, i, oldValid, newValid;
723 
724     tplNum = findTreeInList(bt, tr);
725     tpl = bt->byScore[0];
726     oldValid = newValid = bt->nvalid;
727 
728     if (tplNum > 0) {                      /* Topology is in list  */
729       reuse = bt->byTopol[tplNum];         /* Matching topol  */
730       reuseScrNum = reuse->scrNum;
731       reuseTplNum = reuse->tplNum;
732       }
733                                            /* Good enough to keep? */
734     else if (tr->likelihood < bt->worst)  return 0;
735 
736     else {                                 /* Topology is not in list */
737       tplNum = -tplNum;                    /* Add to list (not replace) */
738       if (newValid < bt->nkeep) bt->nvalid = ++newValid;
739       reuseScrNum = newValid;              /* Take worst tree */
740       reuse = bt->byScore[reuseScrNum];
741       reuseTplNum = (newValid > oldValid) ? newValid : reuse->tplNum;
742       if (tr->likelihood > bt->start->likelihood) bt->improved = TRUE;
743       }
744 
745     scrNum = findInList((void *) tpl, (void **) (& (bt->byScore[1])),
746                          oldValid, cmpTplScore);
747     scrNum = ABS(scrNum);
748 
749     if (scrNum < reuseScrNum)
750       for (i = reuseScrNum; i > scrNum; i--)
751         (bt->byScore[i] = bt->byScore[i-1])->scrNum = i;
752 
753     else if (scrNum > reuseScrNum) {
754       scrNum--;
755       for (i = reuseScrNum; i < scrNum; i++)
756         (bt->byScore[i] = bt->byScore[i+1])->scrNum = i;
757       }
758 
759     if (tplNum < reuseTplNum)
760       for (i = reuseTplNum; i > tplNum; i--)
761         (bt->byTopol[i] = bt->byTopol[i-1])->tplNum = i;
762 
763     else if (tplNum > reuseTplNum) {
764       tplNum--;
765       for (i = reuseTplNum; i < tplNum; i++)
766         (bt->byTopol[i] = bt->byTopol[i+1])->tplNum = i;
767       }
768 
769     if (tpl_log_f = tpl->log_f) {
770       tr_log_f  = tr->log_f;
771       i = tpl->log_f_valid = tr->log_f_valid;
772       while (--i >= 0)  *tpl_log_f++ = *tr_log_f++;
773       }
774     else {
775       tpl->log_f_valid = 0;
776       }
777 
778     tpl->scrNum = scrNum;
779     tpl->tplNum = tplNum;
780     bt->byTopol[tplNum] = bt->byScore[scrNum] = tpl;
781     bt->byScore[0] = reuse;
782 
783     if (scrNum == 1)  bt->best = tr->likelihood;
784     if (newValid == bt->nkeep) bt->worst = bt->byScore[newValid]->likelihood;
785 
786     return  scrNum;
787   } /* saveBestTree */
788 
789 
startOpt(bestlist * bt,tree * tr)790 int  startOpt (bestlist *bt, tree *tr)
791   { /* startOpt */
792     int  scrNum;
793 
794     scrNum = saveBestTree(bt, tr);
795     copyTopol(bt->byScore[scrNum], bt->start);
796     bt->improved = FALSE;
797     return  scrNum;
798   } /* startOpt */
799 
800 
setOptLevel(bestlist * bt,int opt_level)801 int  setOptLevel (bestlist *bt, int opt_level)
802   { /* setOptLevel */
803     int  tplNum, scrNum;
804 
805     tplNum = findInList((void *) bt->start, (void **) (&(bt->byTopol[1])),
806                         bt->nvalid, cmpTopol);
807     if (tplNum > 0) {
808       bt->byTopol[tplNum]->opt_level = opt_level;
809       scrNum = bt->byTopol[tplNum]->scrNum;
810       }
811     else {
812       scrNum = 0;
813       }
814 
815     return  scrNum;
816   } /* setOptLevel */
817 
818 
recallBestTree(bestlist * bt,int rank,tree * tr)819 int  recallBestTree (bestlist *bt, int rank, tree *tr)
820   { /* recallBestTree */
821     if (rank < 1)  rank = 1;
822     if (rank > bt->nvalid)  rank = bt->nvalid;
823     if (rank > 0)  if (! restoreTree(bt->byScore[rank], tr)) return FALSE;
824     return  rank;
825   } /* recallBestTree */
826 
827 
828 /*=======================================================================*/
829 /*                       End of best tree routines                       */
830 /*=======================================================================*/
831 
832 
833 #if 0
834   void hang(char *msg)
835    { printf("Hanging around: %s\n", msg);
836      while(1);
837      }
838 #endif
839 
840 
getnums(rawdata * rdta)841 boolean getnums (rawdata *rdta)
842     /* input number of species, number of sites */
843   { /* getnums */
844     printf("\n%s, version %s, %s,\nCopyright (C) 1998, 1999, 2000 by Gary J. Olsen\n\n",
845             programName,
846             programVersion,
847             programDate);
848     printf("Based in part on Joseph Felsenstein's\n\n");
849     printf("   Nucleic acid sequence Maximum Likelihood method, version 3.3\n\n\n");
850 
851     if (fscanf(INFILE, "%d %d", & rdta->numsp, & rdta->sites) != 2) {
852       printf("ERROR: Problem reading number of species and sites\n");
853       return FALSE;
854       }
855     printf("%d Species, %d Sites\n\n", rdta->numsp, rdta->sites);
856 
857     if (rdta->numsp < 4) {
858       printf("TOO FEW SPECIES\n");
859       return FALSE;
860       }
861 
862     if (rdta->sites < 1) {
863       printf("TOO FEW SITES\n");
864       return FALSE;
865       }
866 
867     return TRUE;
868   } /* getnums */
869 
870 
digitchar(int ch)871 boolean digitchar (int ch) {return (ch >= '0' && ch <= '9'); }
872 
873 
whitechar(int ch)874 boolean whitechar (int ch)
875    { return (ch == ' ' || ch == '\n' || ch == '\t');
876      }
877 
878 
uppercase(int * chptr)879 void uppercase (int *chptr)
880     /* convert character to upper case -- either ASCII or EBCDIC */
881   { /* uppercase */
882     int  ch;
883 
884     ch = *chptr;
885     if ((ch >= 'a' && ch <= 'i') || (ch >= 'j' && ch <= 'r')
886                                  || (ch >= 's' && ch <= 'z'))
887       *chptr = ch + 'A' - 'a';
888   } /* uppercase */
889 
890 
base36(int ch)891 int base36 (int ch)
892   { /* base36 */
893     if      (ch >= '0' && ch <= '9') return (ch - '0');
894     else if (ch >= 'A' && ch <= 'I') return (ch - 'A' + 10);
895     else if (ch >= 'J' && ch <= 'R') return (ch - 'J' + 19);
896     else if (ch >= 'S' && ch <= 'Z') return (ch - 'S' + 28);
897     else if (ch >= 'a' && ch <= 'i') return (ch - 'a' + 10);
898     else if (ch >= 'j' && ch <= 'r') return (ch - 'j' + 19);
899     else if (ch >= 's' && ch <= 'z') return (ch - 's' + 28);
900     else return -1;
901   } /* base36 */
902 
903 
itobase36(int i)904 int itobase36 (int i)
905   { /* itobase36 */
906     if      (i <  0) return '?';
907     else if (i < 10) return (i      + '0');
908     else if (i < 19) return (i - 10 + 'A');
909     else if (i < 28) return (i - 19 + 'J');
910     else if (i < 36) return (i - 28 + 'S');
911     else return '?';
912   } /* itobase36 */
913 
914 
findch(int c)915 int findch (int c)
916   { /* findch */
917     int ch;
918 
919     while ((ch = getc(INFILE)) != EOF && ch != c) ;
920     return  ch;
921   } /* findch */
922 
923 
924 #if Master || Slave
str_findch(char ** treestrp,int c)925 int str_findch (char **treestrp, int c)
926   { /* str_findch */
927     int ch;
928 
929     while ((ch = *(*treestrp)++) != NULL && ch != c) ;
930     return  ch;
931   } /* str_findch */
932 #endif
933 
934 
inputboot(analdef * adef)935 boolean inputboot(analdef *adef)
936     /* read the bootstrap auxilliary info */
937   { /* inputboot */
938     if (! adef->boot) {
939       printf("ERROR: Unexpected Bootstrap auxiliary data line\n");
940       return FALSE;
941       }
942     else if (fscanf(INFILE, "%ld", & adef->boot) != 1 ||
943              findch('\n') == EOF) {
944       printf("ERROR: Problem reading boostrap random seed value\n");
945       return FALSE;
946       }
947 
948     return TRUE;
949   } /* inputboot */
950 
951 
inputcategories(rawdata * rdta)952 boolean inputcategories (rawdata *rdta)
953     /* read the category rates and the categories for each site */
954   { /* inputcategories */
955     int  i, j, ch, ci;
956 
957     if (rdta->categs >= 0) {
958       printf("ERROR: Unexpected Categories auxiliary data line\n");
959       return FALSE;
960       }
961     if (fscanf(INFILE, "%d", & rdta->categs) != 1) {
962       printf("ERROR: Problem reading number of rate categories\n");
963       return FALSE;
964       }
965     if (rdta->categs < 1 || rdta->categs > maxcategories) {
966       printf("ERROR: Bad number of categories: %d\n", rdta->categs);
967       printf("Must be in range 1 - %d\n", maxcategories);
968       return FALSE;
969       }
970 
971     for (j = 1; j <= rdta->categs
972            && fscanf(INFILE, "%lf", &(rdta->catrat[j])) == 1; j++) ;
973 
974     if ((j <= rdta->categs) || (findch('\n') == EOF)) {
975       printf("ERROR: Problem reading rate values\n");
976       return FALSE;
977       }
978 
979     for (i = 1; i <= nmlngth; i++)  (void) getc(INFILE);
980 
981     i = 1;
982     while (i <= rdta->sites) {
983       ch = getc(INFILE);
984       ci = base36(ch);
985       if (ci >= 0 && ci <= rdta->categs)
986         rdta->sitecat[i++] = ci;
987       else if (! whitechar(ch)) {
988         printf("ERROR: Bad category character (%c) at site %d\n", ch, i);
989         return FALSE;
990         }
991       }
992 
993     if (findch('\n') == EOF) {      /* skip to end of line */
994       printf("ERROR: Missing newline at end of category data\n");
995       return FALSE;
996       }
997 
998     return TRUE;
999   } /* inputcategories */
1000 
1001 
inputextra(analdef * adef)1002 boolean inputextra (analdef *adef)
1003   { /* inputextra */
1004     if (fscanf(INFILE,"%d", & adef->extra) != 1 ||
1005         findch('\n') == EOF) {
1006       printf("ERROR: Problem reading extra info value\n");
1007       return FALSE;
1008       }
1009 
1010     return TRUE;
1011   } /* inputextra */
1012 
1013 
inputfreqs(rawdata * rdta)1014 boolean inputfreqs (rawdata *rdta)
1015   { /* inputfreqs */
1016     if (fscanf(INFILE, "%lf%lf%lf%lf",
1017                & rdta->freqa, & rdta->freqc,
1018                & rdta->freqg, & rdta->freqt) != 4 ||
1019         findch('\n') == EOF) {
1020       printf("ERROR: Problem reading user base frequencies data\n");
1021       return  FALSE;
1022       }
1023 
1024     rdta->freqread = TRUE;
1025     return TRUE;
1026   } /* inputfreqs */
1027 
1028 
inputglobal(tree * tr)1029 boolean inputglobal (tree *tr)
1030     /* input the global option information */
1031   { /* inputglobal */
1032     int  ch;
1033 
1034     if (tr->global != -2) {
1035       printf("ERROR: Unexpected Global auxiliary data line\n");
1036       return FALSE;
1037       }
1038     if (fscanf(INFILE, "%d", &(tr->global)) != 1) {
1039       printf("ERROR: Problem reading rearrangement region size\n");
1040       return FALSE;
1041       }
1042     if (tr->global < 0) {
1043       printf("WARNING: Global region size too small;\n");
1044       printf("         value reset to local\n\n");
1045       tr->global = 1;
1046       }
1047     else if (tr->global == 0)  tr->partswap = 0;
1048     else if (tr->global > tr->mxtips - 3) {
1049       tr->global = tr->mxtips - 3;
1050       }
1051 
1052     while ((ch = getc(INFILE)) != '\n') {  /* Scan for second value */
1053       if (! whitechar(ch)) {
1054         if (ch != EOF)  (void) ungetc(ch, INFILE);
1055         if (ch == EOF || fscanf(INFILE, "%d", &(tr->partswap)) != 1
1056                       || findch('\n') == EOF) {
1057           printf("ERROR: Problem reading insert swap region size\n");
1058           return FALSE;
1059           }
1060         else if (tr->partswap < 0)  tr->partswap = 1;
1061         else if (tr->partswap > tr->mxtips - 3) {
1062           tr->partswap = tr->mxtips - 3;
1063           }
1064 
1065         if (tr->partswap > tr->global)  tr->global = tr->partswap;
1066         break;   /*  Break while loop */
1067         }
1068       }
1069 
1070     return TRUE;
1071   } /* inputglobal */
1072 
1073 
inputjumble(analdef * adef)1074 boolean inputjumble (analdef *adef)
1075   { /* inputjumble */
1076     if (! adef->jumble) {
1077       printf("ERROR: Unexpected Jumble auxiliary data line\n");
1078       return FALSE;
1079       }
1080     else if (fscanf(INFILE, "%ld", & adef->jumble) != 1 ||
1081              findch('\n') == EOF) {
1082       printf("ERROR: Problem reading jumble random seed value\n");
1083       return FALSE;
1084       }
1085     else if (adef->jumble == 0) {
1086       printf("WARNING: Jumble random number seed is zero\n\n");
1087       }
1088 
1089     return TRUE;
1090   } /* inputjumble */
1091 
1092 
inputkeep(analdef * adef)1093 boolean inputkeep (analdef *adef)
1094   { /* inputkeep */
1095     if (fscanf(INFILE, "%d", & adef->nkeep) != 1 ||
1096         findch('\n') == EOF || adef->nkeep < 1) {
1097       printf("ERROR: Problem reading number of kept trees\n");
1098       return FALSE;
1099       }
1100 
1101     return TRUE;
1102   } /* inputkeep */
1103 
1104 
inputoutgroup(analdef * adef,tree * tr)1105 boolean inputoutgroup (analdef *adef, tree *tr)
1106   { /* inputoutgroup */
1107     if (! adef->root || tr->outgr > 0) {
1108       printf("ERROR: Unexpected Outgroup auxiliary data line\n");
1109       return FALSE;
1110       }
1111     else if (fscanf(INFILE, "%d", &(tr->outgr)) != 1 ||
1112              findch('\n') == EOF) {
1113       printf("ERROR: Problem reading outgroup number\n");
1114       return FALSE;
1115       }
1116     else if ((tr->outgr < 1) || (tr->outgr > tr->mxtips)) {
1117       printf("ERROR: Bad outgroup: '%d'\n", tr->outgr);
1118       return FALSE;
1119       }
1120 
1121     return TRUE;
1122   } /* inputoutgroup */
1123 
1124 
inputratio(rawdata * rdta)1125 boolean inputratio (rawdata *rdta)
1126   { /* inputratio */
1127     if (rdta->ttratio >= 0.0) {
1128       printf("ERROR: Unexpected Transition/transversion auxiliary data\n");
1129       return FALSE;
1130       }
1131     else if (fscanf(INFILE,"%lf", & rdta->ttratio)!=1 ||
1132              findch('\n') == EOF) {
1133       printf("ERROR: Problem reading transition/transversion ratio\n");
1134       return FALSE;
1135       }
1136 
1137     return TRUE;
1138   } /* inputratio */
1139 
1140 
1141 /*  Y 0 is treeNone   (no tree)
1142     Y 1 is treeNewick
1143     Y 2 is treeProlog
1144     Y 3 is treePHYLIP
1145  */
1146 
inputtreeopt(analdef * adef)1147 boolean inputtreeopt (analdef *adef)
1148   { /* inputtreeopt */
1149     if (! adef->trout) {
1150       printf("ERROR: Unexpected Treefile auxiliary data\n");
1151       return FALSE;
1152       }
1153     else if (fscanf(INFILE,"%d", & adef->trout) != 1 ||
1154              findch('\n') == EOF) {
1155       printf("ERROR: Problem reading output tree-type number\n");
1156       return FALSE;
1157       }
1158     else if ((adef->trout < 0) || (adef->trout > treeMaxType)) {
1159       printf("ERROR: Bad output tree-type number: '%d'\n", adef->trout);
1160       return FALSE;
1161       }
1162 
1163     return TRUE;
1164   } /* inputtreeopt */
1165 
1166 
inputweights(analdef * adef,rawdata * rdta,cruncheddata * cdta)1167 boolean inputweights (analdef *adef, rawdata *rdta, cruncheddata *cdta)
1168     /* input the character weights 0, 1, 2 ... 9, A, B, ... Y, Z */
1169   { /* inputweights */
1170     int i, ch, wi;
1171 
1172     if (! adef->userwgt || cdta->wgtsum > 0) {
1173       printf("ERROR: Unexpected Weights auxiliary data\n");
1174       return FALSE;
1175       }
1176 
1177     for (i = 2; i <= nmlngth; i++)  (void) getc(INFILE);
1178     cdta->wgtsum = 0;
1179     i = 1;
1180     while (i <= rdta->sites) {
1181       ch = getc(INFILE);
1182       wi = base36(ch);
1183       if (wi >= 0)
1184         cdta->wgtsum += rdta->wgt[i++] = wi;
1185       else if (! whitechar(ch)) {
1186         printf("ERROR: Bad weight character: '%c'", ch);
1187         printf("       Weights in dnaml must be a digit or a letter.\n");
1188         return FALSE;
1189         }
1190       }
1191 
1192     if (findch('\n') == EOF) {      /* skip to end of line */
1193       printf("ERROR: Missing newline at end of weight data\n");
1194       return FALSE;
1195       }
1196 
1197     return TRUE;
1198   } /* inputweights */
1199 
1200 
getoptions(analdef * adef,rawdata * rdta,cruncheddata * cdta,tree * tr)1201 boolean getoptions (analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr)
1202   { /* getoptions */
1203     int     ch, i, extranum;
1204 
1205     adef->boot    =           0;  /* Don't bootstrap column weights */
1206     adef->empf    =        TRUE;  /* Use empirical base frequencies */
1207     adef->extra   =           0;  /* No extra runtime info unless requested */
1208     adef->interleaved =    TRUE;  /* By default, data format is interleaved */
1209     adef->jumble  =       FALSE;  /* Use random addition sequence */
1210     adef->nkeep   =           0;  /* Keep only the one best tree */
1211     adef->prdata  =       FALSE;  /* Don't echo data to output stream */
1212     adef->qadd    =        TRUE;  /* Smooth branches globally in add */
1213     adef->restart =       FALSE;  /* Restart from user tree */
1214     adef->root    =       FALSE;  /* User-defined outgroup rooting */
1215     adef->trout   = treeDefType;  /* Output tree file */
1216     adef->trprint =        TRUE;  /* Print tree to output stream */
1217     rdta->categs  =           0;  /* No rate categories */
1218     rdta->catrat[1] =       1.0;  /* Rate values */
1219     rdta->freqread =      FALSE;  /* User-defined frequencies not read yet */
1220     rdta->ttratio =         2.0;  /* Transition/transversion rate ratio */
1221     tr->global    =          -1;  /* Default search locale for optimum */
1222     tr->mxtips    = rdta->numsp;
1223     tr->outgr     =           1;  /* Outgroup number */
1224     tr->partswap  =           1;  /* Default to swap locally after insert */
1225     tr->userlen   =       FALSE;  /* User-supplied branch lengths */
1226     adef->usertree =      FALSE;  /* User-defined tree topologies */
1227     adef->userwgt =       FALSE;  /* User-defined position weights */
1228     extranum      =           0;
1229 
1230     while ((ch = getc(INFILE)) != '\n' && ch != EOF) {
1231       uppercase(& ch);
1232       switch (ch) {
1233           case '1' : adef->prdata  = ! adef->prdata; break;
1234           case '3' : adef->trprint = ! adef->trprint; break;
1235           case '4' : adef->trout   = treeDefType - adef->trout; break;
1236           case 'B' : adef->boot    =  1; extranum++; break;
1237           case 'C' : rdta->categs  = -1; extranum++; break;
1238           case 'E' : adef->extra   = -1; break;
1239           case 'F' : adef->empf    = ! adef->empf; break;
1240           case 'G' : tr->global    = -2; break;
1241           case 'I' : adef->interleaved = ! adef->interleaved; break;
1242           case 'J' : adef->jumble  = 1; extranum++; break;
1243           case 'K' : extranum++; break;
1244           case 'L' : tr->userlen   = TRUE; break;
1245           case 'O' : adef->root    = TRUE; tr->outgr = 0; extranum++; break;
1246           case 'Q' : adef->qadd    = FALSE; break;
1247           case 'R' : adef->restart = TRUE; break;
1248           case 'T' : rdta->ttratio = -1.0; extranum++; break;
1249           case 'U' : adef->usertree = TRUE; break;
1250           case 'W' : adef->userwgt = TRUE; cdta->wgtsum = 0; extranum++; break;
1251           case 'Y' : adef->trout   = treeDefType - adef->trout; break;
1252           case ' ' : break;
1253           case '\t': break;
1254           default  :
1255               printf("ERROR: Bad option character: '%c'\n", ch);
1256               return FALSE;
1257           }
1258       }
1259 
1260     if (ch == EOF) {
1261       printf("ERROR: End-of-file in options list\n");
1262       return FALSE;
1263       }
1264 
1265     if (adef->usertree && adef->restart) {
1266       printf("ERROR:  The restart and user-tree options conflict:\n");
1267       printf("        Restart adds rest of taxa to a starting tree;\n");
1268       printf("        User-tree does not add any taxa.\n\n");
1269       return FALSE;
1270       }
1271 
1272     if (adef->usertree && adef->jumble) {
1273       printf("WARNING:  The jumble and user-tree options conflict:\n");
1274       printf("          Jumble adds taxa to a tree in random order;\n");
1275       printf("          User-tree does not use taxa addition.\n");
1276       printf("          Jumble option cancelled for this run.\n\n");
1277       adef->jumble = FALSE;
1278       }
1279 
1280     if (tr->userlen && tr->global != -1) {
1281       printf("ERROR:  The global and user-lengths options conflict:\n");
1282       printf("        Global optimizes a starting tree;\n");
1283       printf("        User-lengths constrain the starting tree.\n\n");
1284       return FALSE;
1285       }
1286 
1287     if (tr->userlen && ! adef->usertree) {
1288       printf("WARNING:  User lengths required user tree option.\n");
1289       printf("          User-tree option set for this run.\n\n");
1290       adef->usertree = TRUE;
1291       }
1292 
1293     rdta->wgt      = (int *)    Malloc((rdta->sites + 1) * sizeof(int));
1294     rdta->wgt2     = (int *)    Malloc((rdta->sites + 1) * sizeof(int));
1295     rdta->sitecat  = (int *)    Malloc((rdta->sites + 1) * sizeof(int));
1296     cdta->alias    = (int *)    Malloc((rdta->sites + 1) * sizeof(int));
1297     cdta->aliaswgt = (int *)    Malloc((rdta->sites + 1) * sizeof(int));
1298     cdta->patcat   = (int *)    Malloc((rdta->sites + 1) * sizeof(int));
1299     cdta->patrat   = (double *) Malloc((rdta->sites + 1) * sizeof(double));
1300     cdta->wr       = (double *) Malloc((rdta->sites + 1) * sizeof(double));
1301     cdta->wr2      = (double *) Malloc((rdta->sites + 1) * sizeof(double));
1302     if ( ! rdta->wgt || ! rdta->wgt2     || ! rdta->sitecat || ! cdta->alias
1303                      || ! cdta->aliaswgt || ! cdta->patcat  || ! cdta->patrat
1304                      || ! cdta->wr       || ! cdta->wr2) {
1305       fprintf(stderr, "getoptions: Malloc failure\n");
1306       return 0;
1307       }
1308 
1309     /*  process lines with auxiliary data */
1310 
1311     while (extranum--) {
1312       ch = getc(INFILE);
1313       uppercase(& ch);
1314       switch (ch) {
1315         case 'B':  if (! inputboot(adef)) return FALSE; break;
1316         case 'C':  if (! inputcategories(rdta)) return FALSE; break;
1317         case 'E':  if (! inputextra(adef)) return FALSE; extranum++; break;
1318         case 'F':  if (! inputfreqs(rdta)) return FALSE; break;
1319         case 'G':  if (! inputglobal(tr)) return FALSE; extranum++; break;
1320         case 'J':  if (! inputjumble(adef)) return FALSE; break;
1321         case 'K':  if (! inputkeep(adef)) return FALSE; break;
1322         case 'O':  if (! inputoutgroup(adef, tr)) return FALSE; break;
1323         case 'T':  if (! inputratio(rdta)) return FALSE; break;
1324         case 'W':  if (! inputweights(adef, rdta, cdta)) return FALSE; break;
1325         case 'Y':  if (! inputtreeopt(adef)) return FALSE; extranum++; break;
1326 
1327         default:
1328             printf("ERROR: Auxiliary options line starts with '%c'\n", ch);
1329             return FALSE;
1330         }
1331       }
1332 
1333     if (! adef->userwgt) {
1334       for (i = 1; i <= rdta->sites; i++) rdta->wgt[i] = 1;
1335       cdta->wgtsum = rdta->sites;
1336       }
1337 
1338     if (adef->userwgt && cdta->wgtsum < 1) {
1339       printf("ERROR:  Missing or bad user-supplied weight data.\n");
1340       return FALSE;
1341       }
1342 
1343     if (adef->boot) {
1344       printf("Bootstrap random number seed = %ld\n\n", adef->boot);
1345       }
1346 
1347     if (adef->jumble) {
1348       printf("Jumble random number seed = %ld\n\n", adef->jumble);
1349       }
1350 
1351     if (adef->qadd) {
1352       printf("Quick add (only local branches initially optimized) in effect\n\n");
1353       }
1354 
1355     if (rdta->categs > 0) {
1356       printf("Site category   Rate of change\n\n");
1357       for (i = 1; i <= rdta->categs; i++)
1358         printf("           %c%13.3f\n", itobase36(i), rdta->catrat[i]);
1359       putchar('\n');
1360       for (i = 1; i <= rdta->sites; i++) {
1361         if ((rdta->wgt[i] > 0) && (rdta->sitecat[i] < 1)) {
1362           printf("ERROR: Bad category (%c) at site %d\n",
1363                   itobase36(rdta->sitecat[i]), i);
1364           return FALSE;
1365           }
1366         }
1367       }
1368     else if (rdta->categs < 0) {
1369       printf("ERROR: Category auxiliary data missing from input\n");
1370       return FALSE;
1371       }
1372     else {                                        /* rdta->categs == 0 */
1373       for (i = 1; i <= rdta->sites; i++) rdta->sitecat[i] = 1;
1374       rdta->categs = 1;
1375       }
1376 
1377     if (tr->outgr < 1) {
1378       printf("ERROR: Outgroup auxiliary data missing from input\n");
1379       return FALSE;
1380       }
1381 
1382     if (rdta->ttratio < 0.0) {
1383       printf("ERROR: Transition/transversion auxiliary data missing from input\n");
1384       return FALSE;
1385       }
1386 
1387     if (tr->global < 0) {
1388       if (tr->global == -2) tr->global = tr->mxtips - 3;  /* Default global */
1389       else                  tr->global = adef->usertree ? 0 : 1;/* No global */
1390       }
1391 
1392     if (adef->restart) {
1393       printf("Restart option in effect.  ");
1394       printf("Sequence addition will start from appended tree.\n\n");
1395       }
1396 
1397     if (adef->usertree && ! tr->global) {
1398       printf("User-supplied tree topology%swill be used.\n\n",
1399         tr->userlen ? " and branch lengths " : " ");
1400       }
1401     else {
1402       if (! adef->usertree) {
1403         printf("Rearrangements of partial trees may cross %d %s.\n",
1404                tr->partswap, tr->partswap == 1 ? "branch" : "branches");
1405         }
1406       printf("Rearrangements of full tree may cross %d %s.\n\n",
1407              tr->global, tr->global == 1 ? "branch" : "branches");
1408       }
1409 
1410     if (! adef->usertree && adef->nkeep == 0) adef->nkeep = 1;
1411 
1412     return TRUE;
1413   } /* getoptions */
1414 
1415 
getbasefreqs(rawdata * rdta)1416 boolean getbasefreqs (rawdata *rdta)
1417   { /* getbasefreqs */
1418     int  ch;
1419 
1420     if (rdta->freqread) return TRUE;
1421 
1422     ch = getc(INFILE);
1423     if (! ((ch == 'F') || (ch == 'f')))  (void) ungetc(ch, INFILE);
1424 
1425     if (fscanf(INFILE, "%lf%lf%lf%lf",
1426                & rdta->freqa, & rdta->freqc,
1427                & rdta->freqg, & rdta->freqt) != 4 ||
1428         findch('\n') == EOF) {
1429       printf("ERROR: Problem reading user base frequencies\n");
1430       return  FALSE;
1431       }
1432 
1433     return TRUE;
1434   } /* getbasefreqs */
1435 
1436 
getyspace(rawdata * rdta)1437 boolean getyspace (rawdata *rdta)
1438   { /* getyspace */
1439     long   size;
1440     int    i;
1441     yType *y0;
1442 
1443     if (! (rdta->y = (yType **) Malloc((rdta->numsp + 1) * sizeof(yType *)))) {
1444       printf("ERROR: Unable to obtain space for data array pointers\n");
1445       return  FALSE;
1446       }
1447 
1448     size = 4 * (rdta->sites / 4 + 1);
1449     if (! (y0 = (yType *) Malloc((rdta->numsp + 1) * size * sizeof(yType)))) {
1450       printf("ERROR: Unable to obtain space for data array\n");
1451       return  FALSE;
1452       }
1453 
1454     for (i = 0; i <= rdta->numsp; i++) {
1455       rdta->y[i] = y0;
1456       y0 += size;
1457       }
1458 
1459     return  TRUE;
1460   } /* getyspace */
1461 
1462 
setupTree(tree * tr,int nsites)1463 boolean setupTree (tree *tr, int nsites)
1464   { /* setupTree */
1465     nodeptr  p0, p, q;
1466     int      i, j, tips, inter;
1467 
1468     tips  = tr->mxtips;
1469     inter = tr->mxtips - 1;
1470 
1471     if (!(p0 = (nodeptr) Malloc((tips + 3*inter) * sizeof(node)))) {
1472       printf("ERROR: Unable to obtain sufficient tree memory\n");
1473       return  FALSE;
1474       }
1475 
1476     if (!(tr->nodep = (nodeptr *) Malloc((2*tr->mxtips) * sizeof(nodeptr)))) {
1477       printf("ERROR: Unable to obtain sufficient tree memory, too\n");
1478       return  FALSE;
1479       }
1480 
1481     tr->nodep[0] = (node *) NULL;    /* Use as 1-based array */
1482 
1483     for (i = 1; i <= tips; i++) {    /* Set-up tips */
1484       p = p0++;
1485       p->x      = (xarray *) NULL;
1486       p->tip    = (yType *) NULL;
1487       p->number = i;
1488       p->next   = p;
1489       p->back   = (node *) NULL;
1490 
1491       tr->nodep[i] = p;
1492       }
1493 
1494     for (i = tips + 1; i <= tips + inter; i++) { /* Internal nodes */
1495       q = (node *) NULL;
1496       for (j = 1; j <= 3; j++) {
1497         p = p0++;
1498         p->x      = (xarray *) NULL;
1499         p->tip    = (yType *) NULL;
1500         p->number = i;
1501         p->next   = q;
1502         p->back   = (node *) NULL;
1503         q = p;
1504         }
1505       p->next->next->next = p;
1506       tr->nodep[i] = p;
1507       }
1508 
1509     tr->likelihood  = unlikely;
1510     tr->start       = (node *) NULL;
1511     tr->outgrnode   = tr->nodep[tr->outgr];
1512     tr->ntips       = 0;
1513     tr->nextnode    = 0;
1514     tr->opt_level   = 0;
1515     tr->prelabeled  = TRUE;
1516     tr->smoothed    = FALSE;
1517     tr->log_f_valid = 0;
1518 
1519     tr->log_f = (double *) Malloc(nsites * sizeof(double));
1520     if (! tr->log_f) {
1521       printf("ERROR: Unable to obtain sufficient tree memory, trey\n");
1522       return  FALSE;
1523       }
1524 
1525     return TRUE;
1526   } /* setupTree */
1527 
1528 
freeTreeNode(nodeptr p)1529 void freeTreeNode (nodeptr p)   /* Free tree node (sector) associated data */
1530   { /* freeTreeNode */
1531     if (p) {
1532       if (p->x) {
1533         if (p->x->lv) Free(p->x->lv);
1534         Free(p->x);
1535         }
1536       }
1537   } /* freeTreeNode */
1538 
1539 
freeTree(tree * tr)1540 void freeTree (tree *tr)
1541   { /* freeTree */
1542     nodeptr  p, q;
1543     int  i, tips, inter;
1544 
1545     tips  = tr->mxtips;
1546     inter = tr->mxtips - 1;
1547 
1548     for (i = 1; i <= tips; i++) freeTreeNode(tr->nodep[i]);
1549 
1550     for (i = tips + 1; i <= tips + inter; i++) {
1551       if (p = tr->nodep[i]) {
1552         if (q = p->next) {
1553           freeTreeNode(q->next);
1554           freeTreeNode(q);
1555           }
1556         freeTreeNode(p);
1557         }
1558       }
1559 
1560     Free(tr->nodep[1]);       /* Free the actual nodes */
1561   } /* freeTree */
1562 
1563 
getdata(analdef * adef,rawdata * rdta,tree * tr)1564 boolean getdata (analdef *adef, rawdata *rdta, tree *tr)
1565     /* read sequences */
1566   { /* getdata */
1567     int   i, j, k, l, basesread, basesnew, ch;
1568     int   meaning[256];          /*  meaning of input characters */
1569     char *nameptr;
1570     boolean  allread, firstpass;
1571 
1572     for (i = 0; i <= 255; i++) meaning[i] = 0;
1573     meaning['A'] =  1;
1574     meaning['B'] = 14;
1575     meaning['C'] =  2;
1576     meaning['D'] = 13;
1577     meaning['G'] =  4;
1578     meaning['H'] = 11;
1579     meaning['K'] = 12;
1580     meaning['M'] =  3;
1581     meaning['N'] = 15;
1582     meaning['O'] = 15;
1583     meaning['R'] =  5;
1584     meaning['S'] =  6;
1585     meaning['T'] =  8;
1586     meaning['U'] =  8;
1587     meaning['V'] =  7;
1588     meaning['W'] =  9;
1589     meaning['X'] = 15;
1590     meaning['Y'] = 10;
1591     meaning['?'] = 15;
1592     meaning['-'] = 15;
1593 
1594     basesread = basesnew = 0;
1595 
1596     allread = FALSE;
1597     firstpass = TRUE;
1598     ch = ' ';
1599 
1600     while (! allread) {
1601       for (i = 1; i <= tr->mxtips; i++) {     /*  Read data line */
1602 
1603         if (firstpass) {                      /*  Read species names */
1604           j = 1;
1605           while (whitechar(ch = getc(INFILE))) {  /*  Skip blank lines */
1606             if (ch == '\n')  j = 1;  else  j++;
1607             }
1608 
1609           if (j > nmlngth) {
1610             printf("ERROR: Blank name for species %d; ", i);
1611             printf("check number of species,\n");
1612             printf("       number of sites, and interleave option.\n");
1613             return  FALSE;
1614             }
1615 
1616           nameptr = tr->nodep[i]->name;
1617           for (k = 1; k < j; k++)  *nameptr++ = ' ';
1618 
1619           while (ch != '\n' && ch != EOF) {
1620             if (whitechar(ch))  ch = ' ';
1621             *nameptr++ = ch;
1622             if (++j > nmlngth) break;
1623             ch = getc(INFILE);
1624             }
1625 
1626           while (*(--nameptr) == ' ') ;          /*  remove trailing blanks */
1627           *(++nameptr) = '\0';                   /*  add null termination */
1628 
1629           if (ch == EOF) {
1630             printf("ERROR: End-of-file in name of species %d\n", i);
1631             return  FALSE;
1632             }
1633           }    /* if (firstpass) */
1634 
1635         j = basesread;
1636         while ((j < rdta->sites)
1637                && ((ch = getc(INFILE)) != EOF)
1638                && ((! adef->interleaved) || (ch != '\n'))) {
1639           uppercase(& ch);
1640           if (meaning[ch] || ch == '.') {
1641             j++;
1642             if (ch == '.') {
1643               if (i != 1) ch = rdta->y[1][j];
1644               else {
1645                 printf("ERROR: Dot (.) found at site %d of sequence 1\n", j);
1646                 return  FALSE;
1647                 }
1648               }
1649             rdta->y[i][j] = ch;
1650             }
1651           else if (whitechar(ch) || digitchar(ch)) ;
1652           else {
1653             printf("ERROR: Bad base (%c) at site %d of sequence %d\n",
1654                     ch, j, i);
1655             return  FALSE;
1656             }
1657           }
1658 
1659         if (ch == EOF) {
1660           printf("ERROR: End-of-file at site %d of sequence %d\n", j, i);
1661           return  FALSE;
1662           }
1663 
1664         if (! firstpass && (j == basesread)) i--;        /* no data on line */
1665         else if (i == 1) basesnew = j;
1666         else if (j != basesnew) {
1667           printf("ERROR: Sequences out of alignment\n");
1668           printf("%d (instead of %d) residues read in sequence %d\n",
1669                   j - basesread, basesnew - basesread, i);
1670           return  FALSE;
1671           }
1672 
1673         while (ch != '\n' && ch != EOF) ch = getc(INFILE);  /* flush line */
1674         }                                                  /* next sequence */
1675       firstpass = FALSE;
1676       basesread = basesnew;
1677       allread = (basesread >= rdta->sites);
1678       }
1679 
1680     /*  Print listing of sequence alignment */
1681 
1682     if (adef->prdata) {
1683       j = nmlngth - 5 + ((rdta->sites + ((rdta->sites - 1)/10))/2);
1684       if (j < nmlngth - 1) j = nmlngth - 1;
1685       if (j > 37) j = 37;
1686       printf("Name"); for (i=1;i<=j;i++) putchar(' '); printf("Sequences\n");
1687       printf("----"); for (i=1;i<=j;i++) putchar(' '); printf("---------\n");
1688       putchar('\n');
1689 
1690       for (i = 1; i <= rdta->sites; i += 60) {
1691         l = i + 59;
1692         if (l > rdta->sites) l = rdta->sites;
1693 
1694         if (adef->userwgt) {
1695           printf("Weights   ");
1696           for (j = 11; j <= nmlngth+3; j++) putchar(' ');
1697           for (k = i; k <= l; k++) {
1698             putchar(itobase36(rdta->wgt[k]));
1699             if (((k % 10) == 0) && (k < l)) putchar(' ');
1700             }
1701           putchar('\n');
1702           }
1703 
1704         if (rdta->categs > 1) {
1705           printf("Categories");
1706           for (j = 11; j <= nmlngth+3; j++) putchar(' ');
1707           for (k = i; k <= l; k++) {
1708             putchar(itobase36(rdta->sitecat[k]));
1709             if (((k % 10) == 0) && (k < l)) putchar(' ');
1710             }
1711           putchar('\n');
1712           }
1713 
1714         for (j = 1; j <= tr->mxtips; j++) {
1715           nameptr = tr->nodep[j]->name;
1716           k = nmlngth+3;
1717           while (ch = *nameptr++) {putchar(ch); k--;}
1718           while (--k >= 0) putchar(' ');
1719 
1720           for (k = i; k <= l; k++) {
1721             ch = rdta->y[j][k];
1722             if ((j > 1) && (ch == rdta->y[1][k])) ch = '.';
1723             putchar(ch);
1724             if (((k % 10) == 0) && (k < l)) putchar(' ');
1725             }
1726           putchar('\n');
1727           }
1728         putchar('\n');
1729         }
1730       }
1731 
1732     for (j = 1; j <= tr->mxtips; j++)    /* Convert characters to meanings */
1733       for (i = 1; i <= rdta->sites; i++) {
1734         rdta->y[j][i] = meaning[rdta->y[j][i]];
1735         }
1736 
1737     return  TRUE;
1738   } /* getdata */
1739 
1740 
getntrees(analdef * adef)1741 boolean  getntrees (analdef *adef)
1742   { /* getntrees */
1743 
1744     if (fscanf(INFILE, "%d", &(adef->numutrees)) != 1 || findch('\n') == EOF) {
1745       printf("ERROR: Problem reading number of user trees\n");
1746       return  FALSE;
1747       }
1748 
1749     if (adef->nkeep == 0) adef->nkeep = adef->numutrees;
1750 
1751     return  TRUE;
1752   } /* getntrees */
1753 
1754 
getinput(analdef * adef,rawdata * rdta,cruncheddata * cdta,tree * tr)1755 boolean getinput (analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr)
1756   { /* getinput */
1757     if (! getnums(rdta))                       return FALSE;
1758     if (! getoptions(adef, rdta, cdta, tr))    return FALSE;
1759     if (! adef->empf && ! getbasefreqs(rdta))  return FALSE;
1760     if (! getyspace(rdta))                     return FALSE;
1761     if (! setupTree(tr, rdta->sites))          return FALSE;
1762     if (! getdata(adef, rdta, tr))             return FALSE;
1763     if (adef->usertree && ! getntrees(adef))   return FALSE;
1764 
1765     return TRUE;
1766   } /* getinput */
1767 
1768 
makeboot(analdef * adef,rawdata * rdta,cruncheddata * cdta)1769 void makeboot (analdef *adef, rawdata *rdta, cruncheddata *cdta)
1770   { /* makeboot */
1771     int  i, j, nonzero;
1772     double  randum();
1773 
1774     nonzero = 0;
1775     for (i = 1; i <= rdta->sites; i++)  if (rdta->wgt[i] > 0) nonzero++;
1776 
1777     for (j = 1; j <= nonzero; j++) cdta->aliaswgt[j] = 0;
1778     for (j = 1; j <= nonzero; j++)
1779       cdta->aliaswgt[(int) (nonzero*randum(& adef->boot)) + 1]++;
1780 
1781     j = 0;
1782     cdta->wgtsum = 0;
1783     for (i = 1; i <= rdta->sites; i++) {
1784       if (rdta->wgt[i] > 0)
1785         cdta->wgtsum += (rdta->wgt2[i] = rdta->wgt[i] * cdta->aliaswgt[++j]);
1786       else
1787         rdta->wgt2[i] = 0;
1788       }
1789   } /* makeboot */
1790 
1791 
sitesort(rawdata * rdta,cruncheddata * cdta)1792 void sitesort (rawdata *rdta, cruncheddata *cdta)
1793     /* Shell sort keeping sites with identical residues and weights in
1794      * the original order (i.e., a stable sort).
1795      * The index created in cdta->alias is 1 based.  The
1796      * sitecombcrunch routine packs it to a 0 based index.
1797      */
1798   { /* sitesort */
1799     int  gap, i, j, jj, jg, k, n, nsp;
1800     int  *index, *category;
1801     boolean  flip, tied;
1802     yType  **data;
1803 
1804     index    = cdta->alias;
1805     category = rdta->sitecat;
1806     data     = rdta->y;
1807     n        = rdta->sites;
1808     nsp      = rdta->numsp;
1809 
1810     for (gap = n / 2; gap > 0; gap /= 2) {
1811       for (i = gap + 1; i <= n; i++) {
1812         j = i - gap;
1813 
1814         do {
1815           jj = index[j];
1816           jg = index[j+gap];
1817           flip = (category[jj] >  category[jg]);
1818           tied = (category[jj] == category[jg]);
1819           for (k = 1; (k <= nsp) && tied; k++) {
1820             flip = (data[k][jj] >  data[k][jg]);
1821             tied = (data[k][jj] == data[k][jg]);
1822             }
1823           if (flip) {
1824             index[j]     = jg;
1825             index[j+gap] = jj;
1826             j -= gap;
1827             }
1828           } while (flip && (j > 0));
1829 
1830         }  /* for (i ...   */
1831       }    /* for (gap ... */
1832   } /* sitesort */
1833 
1834 
sitecombcrunch(rawdata * rdta,cruncheddata * cdta)1835 void sitecombcrunch (rawdata *rdta, cruncheddata *cdta)
1836     /* combine sites that have identical patterns (and nonzero weight) */
1837   { /* sitecombcrunch */
1838     int  i, sitei, j, sitej, k;
1839     boolean  tied;
1840 
1841     i = 0;
1842     cdta->alias[0] = cdta->alias[1];
1843     cdta->aliaswgt[0] = 0;
1844 
1845     for (j = 1; j <= rdta->sites; j++) {
1846       sitei = cdta->alias[i];
1847       sitej = cdta->alias[j];
1848       tied = (rdta->sitecat[sitei] == rdta->sitecat[sitej]);
1849 
1850       for (k = 1; tied && (k <= rdta->numsp); k++)
1851         tied = (rdta->y[k][sitei] == rdta->y[k][sitej]);
1852 
1853       if (tied) {
1854         cdta->aliaswgt[i] += rdta->wgt2[sitej];
1855         }
1856       else {
1857         if (cdta->aliaswgt[i] > 0) i++;
1858         cdta->aliaswgt[i] = rdta->wgt2[sitej];
1859         cdta->alias[i] = sitej;
1860         }
1861       }
1862 
1863     cdta->endsite = i;
1864     if (cdta->aliaswgt[i] > 0) cdta->endsite++;
1865   } /* sitecombcrunch */
1866 
1867 
makeweights(analdef * adef,rawdata * rdta,cruncheddata * cdta)1868 boolean makeweights (analdef *adef, rawdata *rdta, cruncheddata *cdta)
1869     /* make up weights vector to avoid duplicate computations */
1870   { /* makeweights */
1871     int  i;
1872 
1873     if (adef->boot)  makeboot(adef, rdta, cdta);
1874     else  for (i = 1; i <= rdta->sites; i++)  rdta->wgt2[i] = rdta->wgt[i];
1875 
1876     for (i = 1; i <= rdta->sites; i++)  cdta->alias[i] = i;
1877     sitesort(rdta, cdta);
1878     sitecombcrunch(rdta, cdta);
1879 
1880     printf("Total weight of positions in analysis = %d\n", cdta->wgtsum);
1881     printf("There are %d distinct data patterns (columns)\n\n", cdta->endsite);
1882 
1883     return TRUE;
1884   } /* makeweights */
1885 
1886 
makevalues(rawdata * rdta,cruncheddata * cdta)1887 boolean makevalues (rawdata *rdta, cruncheddata *cdta)
1888     /* set up fractional likelihoods at tips */
1889   { /* makevalues */
1890     double  temp, wtemp;
1891     int  i, j;
1892 
1893     for (i = 1; i <= rdta->numsp; i++) {    /* Pack and move tip data */
1894       for (j = 0; j < cdta->endsite; j++) {
1895         rdta->y[i-1][j] = rdta->y[i][cdta->alias[j]];
1896         }
1897       }
1898 
1899     for (j = 0; j < cdta->endsite; j++) {
1900       cdta->patcat[j] = i = rdta->sitecat[cdta->alias[j]];
1901       cdta->patrat[j] = temp = rdta->catrat[i];
1902       cdta->wr[j]  = wtemp = temp * cdta->aliaswgt[j];
1903       cdta->wr2[j] = temp * wtemp;
1904       }
1905 
1906     return TRUE;
1907   } /* makevalues */
1908 
1909 
empiricalfreqs(rawdata * rdta,cruncheddata * cdta)1910 boolean empiricalfreqs (rawdata *rdta, cruncheddata *cdta)
1911     /* Get empirical base frequencies from the data */
1912   { /* empiricalfreqs */
1913     double  sum, suma, sumc, sumg, sumt, wj, fa, fc, fg, ft;
1914     int     i, j, k, code;
1915     yType  *yptr;
1916 
1917     rdta->freqa = 0.25;
1918     rdta->freqc = 0.25;
1919     rdta->freqg = 0.25;
1920     rdta->freqt = 0.25;
1921     for (k = 1; k <= 8; k++) {
1922       suma = 0.0;
1923       sumc = 0.0;
1924       sumg = 0.0;
1925       sumt = 0.0;
1926       for (i = 0; i < rdta->numsp; i++) {
1927         yptr = rdta->y[i];
1928         for (j = 0; j < cdta->endsite; j++) {
1929           code = *yptr++;
1930           fa = rdta->freqa * ( code       & 1);
1931           fc = rdta->freqc * ((code >> 1) & 1);
1932           fg = rdta->freqg * ((code >> 2) & 1);
1933           ft = rdta->freqt * ((code >> 3) & 1);
1934           wj = cdta->aliaswgt[j] / (fa + fc + fg + ft);
1935           suma += wj * fa;
1936           sumc += wj * fc;
1937           sumg += wj * fg;
1938           sumt += wj * ft;
1939           }
1940         }
1941       sum = suma + sumc + sumg + sumt;
1942       rdta->freqa = suma / sum;
1943       rdta->freqc = sumc / sum;
1944       rdta->freqg = sumg / sum;
1945       rdta->freqt = sumt / sum;
1946       }
1947 
1948     return TRUE;
1949   } /* empiricalfreqs */
1950 
1951 
reportfreqs(analdef * adef,rawdata * rdta)1952 void reportfreqs (analdef *adef, rawdata *rdta)
1953   { /* reportfreqs */
1954     double  suma, sumb;
1955 
1956     if (adef->empf) printf("Empirical ");
1957     printf("Base Frequencies:\n\n");
1958     printf("   A    %10.5f\n",   rdta->freqa);
1959     printf("   C    %10.5f\n",   rdta->freqc);
1960     printf("   G    %10.5f\n",   rdta->freqg);
1961     printf("  T(U)  %10.5f\n\n", rdta->freqt);
1962     rdta->freqr = rdta->freqa + rdta->freqg;
1963     rdta->invfreqr = 1.0/rdta->freqr;
1964     rdta->freqar = rdta->freqa * rdta->invfreqr;
1965     rdta->freqgr = rdta->freqg * rdta->invfreqr;
1966     rdta->freqy = rdta->freqc + rdta->freqt;
1967     rdta->invfreqy = 1.0/rdta->freqy;
1968     rdta->freqcy = rdta->freqc * rdta->invfreqy;
1969     rdta->freqty = rdta->freqt * rdta->invfreqy;
1970     printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio);
1971     suma = rdta->ttratio*rdta->freqr*rdta->freqy
1972          - (rdta->freqa*rdta->freqg + rdta->freqc*rdta->freqt);
1973     sumb = rdta->freqa*rdta->freqgr + rdta->freqc*rdta->freqty;
1974     rdta->xi = suma/(suma+sumb);
1975     rdta->xv = 1.0 - rdta->xi;
1976     if (rdta->xi <= 0.0) {
1977       printf("WARNING: This transition/transversion ratio\n");
1978       printf("         is impossible with these base frequencies!\n");
1979       printf("Transition/transversion parameter reset\n\n");
1980       rdta->xi = 0.000001;
1981       rdta->xv = 1.0 - rdta->xi;
1982       rdta->ttratio = (sumb * rdta->xi / rdta->xv
1983                        + rdta->freqa * rdta->freqg
1984                        + rdta->freqc * rdta->freqt)
1985                     / (rdta->freqr * rdta->freqy);
1986       printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio);
1987       }
1988     printf("(Transition/transversion parameter = %10.6f)\n\n",
1989             rdta->xi/rdta->xv);
1990     rdta->fracchange = 2.0 * rdta->xi * (rdta->freqa * rdta->freqgr
1991                                        + rdta->freqc * rdta->freqty)
1992                      + rdta->xv * (1.0 - rdta->freqa * rdta->freqa
1993                                        - rdta->freqc * rdta->freqc
1994                                        - rdta->freqg * rdta->freqg
1995                                        - rdta->freqt * rdta->freqt);
1996   } /* reportfreqs */
1997 
1998 
linkdata2tree(rawdata * rdta,cruncheddata * cdta,tree * tr)1999 boolean linkdata2tree (rawdata *rdta, cruncheddata *cdta, tree *tr)
2000     /* Link data array to the tree tips */
2001   { /* linkdata2tree */
2002     int  i;
2003 
2004     for (i = 1; i <= tr->mxtips; i++) {    /* Associate data with tips */
2005       tr->nodep[i]->tip = &(rdta->y[i-1][0]);
2006       }
2007 
2008     tr->rdta       = rdta;
2009     tr->cdta       = cdta;
2010     return TRUE;
2011   } /* linkdata2tree */
2012 
2013 
setupxarray(int npat)2014 xarray *setupxarray (int npat)
2015   { /* setupxarray */
2016     xarray        *x;
2017     likelivector  *data;
2018 
2019     x = (xarray *) Malloc(sizeof(xarray));
2020     if (x) {
2021       data = (likelivector *) Malloc(npat * sizeof(likelivector));
2022       if (data) {
2023         x->lv = data;
2024         x->prev = x->next = x;
2025         x->owner = (node *) NULL;
2026         }
2027       else {
2028         Free(x);
2029         return (xarray *) NULL;
2030         }
2031       }
2032     return x;
2033   } /* setupxarray */
2034 
2035 
linkxarray(int req,int min,int npat,xarray ** freexptr,xarray ** usedxptr)2036 boolean linkxarray (int req, int min, int npat,
2037                     xarray **freexptr, xarray **usedxptr)
2038     /*  Link a set of xarrays */
2039   { /* linkxarray */
2040     xarray  *first, *prev, *x;
2041     int  i;
2042 
2043     first = prev = (xarray *) NULL;
2044     i = 0;
2045 
2046     do {
2047       x = setupxarray(npat);
2048       if (x) {
2049         if (! first) first = x;
2050         else {
2051           prev->next = x;
2052           x->prev = prev;
2053           }
2054         prev = x;
2055         i++;
2056         }
2057       else {
2058         printf("ERROR: Failure to get requested xarray memory\n");
2059         if (i < min)  return  FALSE;
2060         }
2061       } while ((i < req) && x);
2062 
2063     if (first) {
2064       first->prev = prev;
2065       prev->next = first;
2066       }
2067 
2068     *freexptr = first;
2069     *usedxptr = (xarray *) NULL;
2070 
2071     return  TRUE;
2072   } /* linkxarray */
2073 
2074 
setupnodex(tree * tr)2075 boolean setupnodex (tree *tr)
2076   { /* setupnodex */
2077     nodeptr  p;
2078     int  i;
2079 
2080     for (i = tr->mxtips + 1; (i <= 2*(tr->mxtips) - 2); i++) {
2081       p = tr->nodep[i];
2082       if (! (p->x = setupxarray(tr->cdta->endsite))) {
2083         printf("ERROR: Failure to get internal node xarray memory\n");
2084         return  FALSE;
2085         }
2086       }
2087 
2088     return  TRUE;
2089   } /* setupnodex */
2090 
2091 
getxtip(nodeptr p)2092 xarray *getxtip (nodeptr p)
2093   { /* getxtip */
2094     xarray  *new;
2095     boolean  splice;
2096 
2097     if (! p) return (xarray *) NULL;
2098 
2099     splice = FALSE;
2100 
2101     if (p->x) {                  /* array is there; move to tail of list */
2102       new = p->x;
2103       if (new == new->prev) ;             /* linked to self; leave it */
2104       else if (new == usedxtip) usedxtip = usedxtip->next; /* at head */
2105       else if (new == usedxtip->prev) ;   /* already at tail */
2106       else {                              /* move to tail of list */
2107         new->prev->next = new->next;
2108         new->next->prev = new->prev;
2109         splice = TRUE;
2110         }
2111       }
2112 
2113     else if (freextip) {                 /* take from unused list */
2114       p->x = new = freextip;
2115       new->owner = p;
2116       if (new->prev != new) {            /* not only member of freelist */
2117         new->prev->next = new->next;
2118         new->next->prev = new->prev;
2119         freextip = new->next;
2120         }
2121       else
2122         freextip = (xarray *) NULL;
2123 
2124       splice = TRUE;
2125       }
2126 
2127     else if (usedxtip) {                 /* take from head of used list */
2128       usedxtip->owner->x = (xarray *) NULL;
2129       p->x = new = usedxtip;
2130       new->owner = p;
2131       usedxtip = usedxtip->next;
2132       }
2133 
2134     else {
2135       printf("ERROR: Unable to locate memory for tip %d.\n", p->number);
2136       return  (xarray *) NULL;
2137       }
2138 
2139     if (splice) {
2140       if (usedxtip) {                  /* list is not empty */
2141         usedxtip->prev->next = new;
2142         new->prev = usedxtip->prev;
2143         usedxtip->prev = new;
2144         new->next = usedxtip;
2145         }
2146       else
2147         usedxtip = new->prev = new->next = new;
2148       }
2149 
2150     return  new;
2151   } /* getxtip */
2152 
2153 
getxnode(nodeptr p)2154 xarray *getxnode (nodeptr p)
2155     /* Ensure that internal node p has memory */
2156   { /* getxnode */
2157     nodeptr  s;
2158 
2159     if (! (p->x)) {  /*  Move likelihood array on this node to sector p */
2160       if ((s = p->next)->x || (s = s->next)->x) {
2161         p->x = s->x;
2162         s->x = (xarray *) NULL;
2163         }
2164       else {
2165         printf("ERROR: Unable to locate memory at node %d.\n", p->number);
2166         exit(1);
2167         }
2168       }
2169     return  p->x;
2170   } /* getxnode */
2171 
2172 
newview(tree * tr,nodeptr p)2173 boolean newview (tree *tr, nodeptr p)        /*  Update likelihoods at node */
2174   { /* newview */
2175     double   zq, lzq, xvlzq, zr, lzr, xvlzr;
2176     nodeptr  q, r;
2177     likelivector *lp, *lq, *lr;
2178     int  i;
2179 
2180     if (p->tip) {             /*  Make sure that data are at tip */
2181       likelivector *l;
2182       int           code;
2183       yType        *yptr;
2184 
2185       if (p->x) return TRUE;  /*  They are already there */
2186 
2187       if (! getxtip(p)) return FALSE; /*  They are not, so get memory */
2188       l = p->x->lv;           /*  Pointer to first likelihood vector value */
2189       yptr = p->tip;          /*  Pointer to first nucleotide datum */
2190       for (i = 0; i < tr->cdta->endsite; i++) {
2191         code = *yptr++;
2192         l->a =  code       & 1;
2193         l->c = (code >> 1) & 1;
2194         l->g = (code >> 2) & 1;
2195         l->t = (code >> 3) & 1;
2196         l->exp = 0;
2197         l++;
2198         }
2199       return TRUE;
2200       }
2201 
2202 /*  Internal node needs update */
2203 
2204     q = p->next->back;
2205     r = p->next->next->back;
2206 
2207     while ((! p->x) || (! q->x) || (! r->x)) {
2208       if (! q->x) if (! newview(tr, q))  return FALSE;
2209       if (! r->x) if (! newview(tr, r))  return FALSE;
2210       if (! p->x) if (! getxnode(p)) return FALSE;
2211       }
2212 
2213     lp = p->x->lv;
2214 
2215     lq = q->x->lv;
2216     zq = q->z;
2217     lzq = (zq > zmin) ? log(zq) : log(zmin);
2218     xvlzq = tr->rdta->xv * lzq;
2219 
2220     lr = r->x->lv;
2221     zr = r->z;
2222     lzr = (zr > zmin) ? log(zr) : log(zmin);
2223     xvlzr = tr->rdta->xv * lzr;
2224 
2225     { double  zzqtable[maxcategories+1], zvqtable[maxcategories+1],
2226               zzrtable[maxcategories+1], zvrtable[maxcategories+1],
2227              *zzqptr, *zvqptr, *zzrptr, *zvrptr, *rptr;
2228       double  fxqr, fxqy, fxqn, sumaq, sumgq, sumcq, sumtq,
2229               fxrr, fxry, fxrn, ki, tempi, tempj;
2230       int  *cptr;
2231 
2232 #     ifdef Vectorize
2233         double zzq[maxpatterns], zvq[maxpatterns],
2234                zzr[maxpatterns], zvr[maxpatterns];
2235         int  cat;
2236 #     else
2237         double zzq, zvq, zzr, zvr;
2238         int cat;
2239 #     endif
2240 
2241       rptr   = &(tr->rdta->catrat[1]);
2242       zzqptr = &(zzqtable[1]);
2243       zvqptr = &(zvqtable[1]);
2244       zzrptr = &(zzrtable[1]);
2245       zvrptr = &(zvrtable[1]);
2246 
2247 #     ifdef Vectorize
2248 #       pragma IVDEP
2249 #     endif
2250 
2251       for (i = 1; i <= tr->rdta->categs; i++) {   /* exps for each category */
2252         ki = *rptr++;
2253         *zzqptr++ = exp(ki *   lzq);
2254         *zvqptr++ = exp(ki * xvlzq);
2255         *zzrptr++ = exp(ki *   lzr);
2256         *zvrptr++ = exp(ki * xvlzr);
2257         }
2258 
2259       cptr = &(tr->cdta->patcat[0]);
2260 
2261 #     ifdef Vectorize
2262 #       pragma IVDEP
2263         for (i = 0; i < tr->cdta->endsite; i++) {
2264           cat = *cptr++;
2265           zzq[i] = zzqtable[cat];
2266           zvq[i] = zvqtable[cat];
2267           zzr[i] = zzrtable[cat];
2268           zvr[i] = zvrtable[cat];
2269         }
2270 
2271 #       pragma IVDEP
2272         for (i = 0; i < tr->cdta->endsite; i++) {
2273           fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
2274           fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
2275           fxqn = fxqr + fxqy;
2276           tempi = fxqr * tr->rdta->invfreqr;
2277           tempj = zvq[i] * (tempi-fxqn) + fxqn;
2278           sumaq = zzq[i] * (lq->a - tempi) + tempj;
2279           sumgq = zzq[i] * (lq->g - tempi) + tempj;
2280           tempi = fxqy * tr->rdta->invfreqy;
2281           tempj = zvq[i] * (tempi-fxqn) + fxqn;
2282           sumcq = zzq[i] * (lq->c - tempi) + tempj;
2283           sumtq = zzq[i] * (lq->t - tempi) + tempj;
2284 
2285           fxrr = tr->rdta->freqa * lr->a + tr->rdta->freqg * lr->g;
2286           fxry = tr->rdta->freqc * lr->c + tr->rdta->freqt * lr->t;
2287           fxrn = fxrr + fxry;
2288           tempi = fxrr * tr->rdta->invfreqr;
2289           tempj = zvr[i] * (tempi-fxrn) + fxrn;
2290           lp->a = sumaq * (zzr[i] * (lr->a - tempi) + tempj);
2291           lp->g = sumgq * (zzr[i] * (lr->g - tempi) + tempj);
2292           tempi = fxry * tr->rdta->invfreqy;
2293           tempj = zvr[i] * (tempi-fxrn) + fxrn;
2294           lp->c = sumcq * (zzr[i] * (lr->c - tempi) + tempj);
2295           lp->t = sumtq * (zzr[i] * (lr->t - tempi) + tempj);
2296           lp->exp = lq->exp + lr->exp;
2297 
2298           if (lp->a < minlikelihood && lp->g < minlikelihood
2299            && lp->c < minlikelihood && lp->t < minlikelihood) {
2300             lp->a   *= twotothe256;
2301             lp->g   *= twotothe256;
2302             lp->c   *= twotothe256;
2303             lp->t   *= twotothe256;
2304             lp->exp += 1;
2305             }
2306           lp++;
2307           lq++;
2308           lr++;
2309           }
2310 
2311 #     else  /*  Not Vectorize  */
2312         for (i = 0; i < tr->cdta->endsite; i++) {
2313           cat = *cptr++;
2314           zzq = zzqtable[cat];
2315           zvq = zvqtable[cat];
2316           fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
2317           fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
2318           fxqn = fxqr + fxqy;
2319           tempi = fxqr * tr->rdta->invfreqr;
2320           tempj = zvq * (tempi-fxqn) + fxqn;
2321           sumaq = zzq * (lq->a - tempi) + tempj;
2322           sumgq = zzq * (lq->g - tempi) + tempj;
2323           tempi = fxqy * tr->rdta->invfreqy;
2324           tempj = zvq * (tempi-fxqn) + fxqn;
2325           sumcq = zzq * (lq->c - tempi) + tempj;
2326           sumtq = zzq * (lq->t - tempi) + tempj;
2327 
2328           zzr = zzrtable[cat];
2329           zvr = zvrtable[cat];
2330           fxrr = tr->rdta->freqa * lr->a + tr->rdta->freqg * lr->g;
2331           fxry = tr->rdta->freqc * lr->c + tr->rdta->freqt * lr->t;
2332           fxrn = fxrr + fxry;
2333           tempi = fxrr * tr->rdta->invfreqr;
2334           tempj = zvr * (tempi-fxrn) + fxrn;
2335           lp->a = sumaq * (zzr * (lr->a - tempi) + tempj);
2336           lp->g = sumgq * (zzr * (lr->g - tempi) + tempj);
2337           tempi = fxry * tr->rdta->invfreqy;
2338           tempj = zvr * (tempi-fxrn) + fxrn;
2339           lp->c = sumcq * (zzr * (lr->c - tempi) + tempj);
2340           lp->t = sumtq * (zzr * (lr->t - tempi) + tempj);
2341           lp->exp = lq->exp + lr->exp;
2342 
2343           if (lp->a < minlikelihood && lp->g < minlikelihood
2344            && lp->c < minlikelihood && lp->t < minlikelihood) {
2345             lp->a   *= twotothe256;
2346             lp->g   *= twotothe256;
2347             lp->c   *= twotothe256;
2348             lp->t   *= twotothe256;
2349             lp->exp += 1;
2350             }
2351           lp++;
2352           lq++;
2353           lr++;
2354           }
2355 #     endif  /*  Vectorize or not  */
2356 
2357       return TRUE;
2358       }
2359   } /* newview */
2360 
2361 
evaluate(tree * tr,nodeptr p)2362 double evaluate (tree *tr, nodeptr p)
2363   { /* evaluate */
2364     double   sum, z, lz, xvlz,
2365              ki, fxpa, fxpc, fxpg, fxpt, fxpr, fxpy, fxqr, fxqy,
2366              suma, sumb, sumc, term;
2367 
2368 #   ifdef Vectorize
2369        double   zz[maxpatterns], zv[maxpatterns];
2370 #   else
2371        double   zz, zv;
2372 #   endif
2373 
2374     double        zztable[maxcategories+1], zvtable[maxcategories+1],
2375                  *zzptr, *zvptr;
2376     double       *log_f, *rptr;
2377     likelivector *lp, *lq;
2378     nodeptr       q;
2379     int           cat, *cptr, i, *wptr;
2380 
2381     q = p->back;
2382     while ((! p->x) || (! q->x)) {
2383       if (! (p->x)) if (! newview(tr, p)) return badEval;
2384       if (! (q->x)) if (! newview(tr, q)) return badEval;
2385       }
2386 
2387     lp = p->x->lv;
2388     lq = q->x->lv;
2389 
2390     z = p->z;
2391     if (z < zmin) z = zmin;
2392     lz = log(z);
2393     xvlz = tr->rdta->xv * lz;
2394 
2395     rptr  = &(tr->rdta->catrat[1]);
2396     zzptr = &(zztable[1]);
2397     zvptr = &(zvtable[1]);
2398 
2399 #   ifdef Vectorize
2400 #     pragma IVDEP
2401 #   endif
2402 
2403     for (i = 1; i <= tr->rdta->categs; i++) {
2404       ki = *rptr++;
2405       *zzptr++ = exp(ki *   lz);
2406       *zvptr++ = exp(ki * xvlz);
2407       }
2408 
2409     wptr = &(tr->cdta->aliaswgt[0]);
2410     cptr = &(tr->cdta->patcat[0]);
2411     log_f = tr->log_f;
2412     tr->log_f_valid = tr->cdta->endsite;
2413     sum = 0.0;
2414 
2415 #   ifdef Vectorize
2416 #     pragma IVDEP
2417       for (i = 0; i < tr->cdta->endsite; i++) {
2418         cat   = *cptr++;
2419         zz[i] = zztable[cat];
2420         zv[i] = zvtable[cat];
2421       }
2422 
2423 #     pragma IVDEP
2424       for (i = 0; i < tr->cdta->endsite; i++) {
2425         fxpa = tr->rdta->freqa * lp->a;
2426         fxpg = tr->rdta->freqg * lp->g;
2427         fxpc = tr->rdta->freqc * lp->c;
2428         fxpt = tr->rdta->freqt * lp->t;
2429         suma = fxpa * lq->a + fxpc * lq->c + fxpg * lq->g + fxpt * lq->t;
2430         fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
2431         fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
2432         fxpr = fxpa + fxpg;
2433         fxpy = fxpc + fxpt;
2434         sumc = (fxpr + fxpy) * (fxqr + fxqy);
2435         sumb = fxpr * fxqr * tr->rdta->invfreqr + fxpy * fxqy * tr->rdta->invfreqy;
2436         suma -= sumb;
2437         sumb -= sumc;
2438         term = log(zz[i] * suma + zv[i] * sumb + sumc) + (lp->exp + lq->exp)*log(minlikelihood);
2439         sum += *wptr++ * term;
2440         *log_f++ = term;
2441         lp++;
2442         lq++;
2443         }
2444 
2445 #   else  /*  Not Vectorize  */
2446       for (i = 0; i < tr->cdta->endsite; i++) {
2447         cat  = *cptr++;
2448         zz   = zztable[cat];
2449         zv   = zvtable[cat];
2450         fxpa = tr->rdta->freqa * lp->a;
2451         fxpg = tr->rdta->freqg * lp->g;
2452         fxpc = tr->rdta->freqc * lp->c;
2453         fxpt = tr->rdta->freqt * lp->t;
2454         suma = fxpa * lq->a + fxpc * lq->c + fxpg * lq->g + fxpt * lq->t;
2455         fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
2456         fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
2457         fxpr = fxpa + fxpg;
2458         fxpy = fxpc + fxpt;
2459         sumc = (fxpr + fxpy) * (fxqr + fxqy);
2460         sumb = fxpr * fxqr * tr->rdta->invfreqr + fxpy * fxqy * tr->rdta->invfreqy;
2461         suma -= sumb;
2462         sumb -= sumc;
2463         term = log(zz * suma + zv * sumb + sumc) + (lp->exp + lq->exp)*log(minlikelihood);
2464 /* printf("evaluate: %le\n", term); */
2465         sum += *wptr++ * term;
2466         *log_f++ = term;
2467         lp++;
2468         lq++;
2469         }
2470 #   endif  /*  Vectorize or not  */
2471 
2472     tr->likelihood = sum;
2473     return  sum;
2474   } /* evaluate */
2475 
2476 
makenewz(tree * tr,nodeptr p,nodeptr q,double z0,int maxiter)2477 double makenewz (tree *tr, nodeptr p, nodeptr q, double z0, int maxiter)
2478   { /* makenewz */
2479     likelivector *lp, *lq;
2480     double  *abi, *bci, *sumci, *abptr, *bcptr, *sumcptr;
2481     double   dlnLidlz, dlnLdlz, d2lnLdlz2, z, zprev, zstep, lz, xvlz,
2482              ki, suma, sumb, sumc, ab, bc, inv_Li, t1, t2,
2483              fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y;
2484     double   zztable[maxcategories+1], zvtable[maxcategories+1],
2485             *zzptr, *zvptr;
2486     double  *rptr, *wrptr, *wr2ptr;
2487     int      cat, *cptr, i, curvatOK;
2488 
2489 
2490     while ((! p->x) || (! q->x)) {
2491       if (! (p->x)) if (! newview(tr, p)) return badZ;
2492       if (! (q->x)) if (! newview(tr, q)) return badZ;
2493       }
2494 
2495     lp = p->x->lv;
2496     lq = q->x->lv;
2497 
2498     { unsigned scratch_size;
2499       scratch_size = sizeof(double) * tr->cdta->endsite;
2500       if ((abi   = (double *) Malloc(scratch_size)) &&
2501           (bci   = (double *) Malloc(scratch_size)) &&
2502           (sumci = (double *) Malloc(scratch_size))) ;
2503       else {
2504         printf("ERROR: makenewz unable to obtain space for arrays\n");
2505         return badZ;
2506         }
2507       }
2508     abptr   = abi;
2509     bcptr   = bci;
2510     sumcptr = sumci;
2511 
2512 #   ifdef Vectorize
2513 #     pragma IVDEP
2514 #   endif
2515 
2516     for (i = 0; i < tr->cdta->endsite; i++) {
2517       fx1a = tr->rdta->freqa * lp->a;
2518       fx1g = tr->rdta->freqg * lp->g;
2519       fx1c = tr->rdta->freqc * lp->c;
2520       fx1t = tr->rdta->freqt * lp->t;
2521       suma = fx1a * lq->a + fx1c * lq->c + fx1g * lq->g + fx1t * lq->t;
2522       fx2r = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
2523       fx2y = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
2524       fx1r = fx1a + fx1g;
2525       fx1y = fx1c + fx1t;
2526       *sumcptr++ = sumc = (fx1r + fx1y) * (fx2r + fx2y);
2527       sumb       = fx1r * fx2r * tr->rdta->invfreqr + fx1y * fx2y * tr->rdta->invfreqy;
2528       *abptr++   = suma - sumb;
2529       *bcptr++   = sumb - sumc;
2530       lp++;
2531       lq++;
2532       }
2533 
2534     z = z0;
2535     do {
2536       zprev = z;
2537       zstep = (1.0 - zmax) * z + zmin;
2538       curvatOK = FALSE;
2539 
2540       do {
2541         if (z < zmin) z = zmin;
2542         else if (z > zmax) z = zmax;
2543 
2544         lz    = log(z);
2545         xvlz  = tr->rdta->xv * lz;
2546         rptr  = &(tr->rdta->catrat[1]);
2547         zzptr = &(zztable[1]);
2548         zvptr = &(zvtable[1]);
2549 
2550 #       ifdef Vectorize
2551 #         pragma IVDEP
2552 #       endif
2553 
2554         for (i = 1; i <= tr->rdta->categs; i++) {
2555           ki = *rptr++;
2556           *zzptr++ = exp(ki *   lz);
2557           *zvptr++ = exp(ki * xvlz);
2558           }
2559 
2560         abptr   = abi;
2561         bcptr   = bci;
2562         sumcptr = sumci;
2563         cptr    = &(tr->cdta->patcat[0]);
2564         wrptr   = &(tr->cdta->wr[0]);
2565         wr2ptr  = &(tr->cdta->wr2[0]);
2566         dlnLdlz = 0.0;                 /*  = d(ln(likelihood))/d(lz) */
2567         d2lnLdlz2 = 0.0;               /*  = d2(ln(likelihood))/d(lz)2 */
2568 
2569 #       ifdef Vectorize
2570 #         pragma IVDEP
2571 #       endif
2572 
2573         for (i = 0; i < tr->cdta->endsite; i++) {
2574           cat    = *cptr++;              /*  ratecategory(i) */
2575           ab     = *abptr++ * zztable[cat];
2576           bc     = *bcptr++ * zvtable[cat];
2577           sumc   = *sumcptr++;
2578           inv_Li = 1.0/(ab + bc + sumc);
2579           t1     = ab * inv_Li;
2580           t2     = tr->rdta->xv * bc * inv_Li;
2581           dlnLidlz   = t1 + t2;
2582           dlnLdlz   += *wrptr++  * dlnLidlz;
2583           d2lnLdlz2 += *wr2ptr++ * (t1 + tr->rdta->xv * t2 - dlnLidlz * dlnLidlz);
2584           }
2585 
2586         if ((d2lnLdlz2 >= 0.0) && (z < zmax))
2587           zprev = z = 0.37 * z + 0.63;  /*  Bad curvature, shorten branch */
2588         else
2589           curvatOK = TRUE;
2590 
2591         } while (! curvatOK);
2592 
2593       if (d2lnLdlz2 < 0.0) {
2594         z *= exp(-dlnLdlz / d2lnLdlz2);
2595         if (z < zmin) z = zmin;
2596         if (z > 0.25 * zprev + 0.75)    /*  Limit steps toward z = 1.0 */
2597           z = 0.25 * zprev + 0.75;
2598         }
2599 
2600       if (z > zmax) z = zmax;
2601 
2602       } while ((--maxiter > 0) && (ABS(z - zprev) > zstep));
2603 
2604     Free(abi);
2605     Free(bci);
2606     Free(sumci);
2607 
2608 /* printf("makenewz: %le\n", z); */
2609 
2610     return  z;
2611   } /* makenewz */
2612 
2613 
update(tree * tr,nodeptr p)2614 boolean update (tree *tr, nodeptr p)
2615   { /* update */
2616     nodeptr  q;
2617     double   z0, z;
2618 
2619     q = p->back;
2620     z0 = q->z;
2621     if ((z = makenewz(tr, p, q, z0, newzpercycle)) == badZ) return FALSE;
2622     p->z = q->z = z;
2623     if (ABS(z - z0) > deltaz)  tr->smoothed = FALSE;
2624     return TRUE;
2625   } /* update */
2626 
2627 
smooth(tree * tr,nodeptr p)2628 boolean smooth (tree *tr, nodeptr p)
2629   { /* smooth */
2630     nodeptr  q;
2631 
2632     if (! update(tr, p))               return FALSE; /*  Adjust branch */
2633     if (! p->tip) {                                  /*  Adjust descendants */
2634         q = p->next;
2635         while (q != p) {
2636           if (! smooth(tr, q->back))   return FALSE;
2637           q = q->next;
2638           }
2639 
2640 #     if ReturnSmoothedView
2641         if (! newview(tr, p)) return FALSE;
2642 #     endif
2643       }
2644 
2645     return TRUE;
2646   } /* smooth */
2647 
2648 
smoothTree(tree * tr,int maxtimes)2649 boolean smoothTree (tree *tr, int maxtimes)
2650   { /* smoothTree */
2651     nodeptr  p, q;
2652 
2653     p = tr->start;
2654 
2655     while (--maxtimes >= 0) {
2656       tr->smoothed = TRUE;
2657       if (! smooth(tr, p->back))       return FALSE;
2658       if (! p->tip) {
2659         q = p->next;
2660         while (q != p) {
2661           if (! smooth(tr, q->back))   return FALSE;
2662           q = q->next;
2663           }
2664         }
2665       if (tr->smoothed)  break;
2666       }
2667 
2668     return TRUE;
2669   } /* smoothTree */
2670 
2671 
localSmooth(tree * tr,nodeptr p,int maxtimes)2672 boolean localSmooth (tree *tr, nodeptr p, int maxtimes)
2673   { /* localSmooth -- Smooth branches around p */
2674     nodeptr  q;
2675 
2676     if (p->tip) return FALSE;            /* Should be an error */
2677 
2678     while (--maxtimes >= 0) {
2679       tr->smoothed = TRUE;
2680       q = p;
2681       do {
2682         if (! update(tr, q)) return FALSE;
2683         q = q->next;
2684         } while (q != p);
2685       if (tr->smoothed)  break;
2686       }
2687 
2688     tr->smoothed = FALSE;             /* Only smooth locally */
2689     return TRUE;
2690   } /* localSmooth */
2691 
2692 
hookup(nodeptr p,nodeptr q,double z)2693 void hookup (nodeptr p, nodeptr q, double z)
2694   { /* hookup */
2695     p->back = q;
2696     q->back = p;
2697     p->z = q->z = z;
2698   } /* hookup */
2699 
2700 
2701 /* Insert node p into branch q <-> q->back */
2702 
insert(tree * tr,nodeptr p,nodeptr q,boolean glob)2703 boolean insert (tree *tr, nodeptr p, nodeptr q, boolean glob)
2704 /* glob -- Smooth tree globally? */
2705 
2706 /*                q
2707                  /.
2708              add/ .
2709                /  .
2710              pn   .
2711     s ---- p      .remove
2712              pnn  .
2713                \  .
2714              add\ .
2715                  \.      pn  = p->next;
2716                   r      pnn = p->next->next;
2717  */
2718 
2719   { /* insert */
2720     nodeptr  r, s;
2721 
2722     r = q->back;
2723     s = p->back;
2724 
2725 #   if BestInsertAverage && ! Master
2726     { double  zqr, zqs, zrs, lzqr, lzqs, lzrs, lzsum, lzq, lzr, lzs, lzmax;
2727 
2728       if ((zqr = makenewz(tr, q, r, q->z,     iterations)) == badZ) return FALSE;
2729       if ((zqs = makenewz(tr, q, s, defaultz, iterations)) == badZ) return FALSE;
2730       if ((zrs = makenewz(tr, r, s, defaultz, iterations)) == badZ) return FALSE;
2731 
2732       lzqr = (zqr > zmin) ? log(zqr) : log(zmin);  /* long branches */
2733       lzqs = (zqs > zmin) ? log(zqs) : log(zmin);
2734       lzrs = (zrs > zmin) ? log(zrs) : log(zmin);
2735       lzsum = 0.5 * (lzqr + lzqs + lzrs);
2736 
2737       lzq = lzsum - lzrs;
2738       lzr = lzsum - lzqs;
2739       lzs = lzsum - lzqr;
2740       lzmax = log(zmax);
2741 
2742       if      (lzq > lzmax) {lzq = lzmax; lzr = lzqr; lzs = lzqs;} /* short */
2743       else if (lzr > lzmax) {lzr = lzmax; lzq = lzqr; lzs = lzrs;}
2744       else if (lzs > lzmax) {lzs = lzmax; lzq = lzqs; lzr = lzrs;}
2745 
2746       hookup(p->next,       q, exp(lzq));
2747       hookup(p->next->next, r, exp(lzr));
2748       hookup(p,             s, exp(lzs));
2749       }
2750 
2751 #   else
2752     { double  z;
2753       z = sqrt(q->z);
2754       hookup(p->next,       q, z);
2755       hookup(p->next->next, r, z);
2756       }
2757 
2758 #   endif
2759 
2760     if (! newview(tr, p)) return FALSE;   /*  So that p is valid at update */
2761     tr->opt_level = 0;
2762 
2763 #   if ! Master         /*  Smoothings are done by slave */
2764       if (glob) {                                    /* Smooth whole tree */
2765         if (! smoothTree(tr, smoothings)) return FALSE;
2766         }
2767       else {                                         /* Smooth locale of p */
2768         if (! localSmooth(tr, p, smoothings)) return FALSE;
2769         }
2770 
2771 #   else
2772       tr->likelihood = unlikely;
2773 #   endif
2774     return  TRUE;
2775   } /* insert */
2776 
2777 
removeNode(tree * tr,nodeptr p)2778 nodeptr  removeNode (tree *tr, nodeptr p)
2779 
2780 /*                q
2781                  .|
2782           remove. |
2783                .  |
2784              pn   |
2785     s ---- p      |add
2786              pnn  |
2787                .  |
2788           remove. |
2789                  .|      pn  = p->next;
2790                   r      pnn = p->next->next;
2791  */
2792 
2793     /* remove p and return where it was */
2794   { /* removeNode */
2795     double   zqr;
2796     nodeptr  q, r;
2797 
2798     q = p->next->back;
2799     r = p->next->next->back;
2800     zqr = q->z * r->z;
2801 #   if ! Master
2802       if ((zqr = makenewz(tr, q, r, zqr, iterations)) == badZ) return (node *) NULL;
2803 #   endif
2804     hookup(q, r, zqr);
2805 
2806     p->next->next->back = p->next->back = (node *) NULL;
2807     return  q;
2808   } /* removeNode */
2809 
2810 
initrav(tree * tr,nodeptr p)2811 boolean initrav (tree *tr, nodeptr p)
2812   { /* initrav */
2813     nodeptr  q;
2814 
2815     if (! p->tip) {
2816       q = p->next;
2817       do {
2818         if (! initrav(tr, q->back))  return FALSE;
2819         q = q->next;
2820         } while (q != p);
2821       if (! newview(tr, p))         return FALSE;
2822       }
2823 
2824     return TRUE;
2825   } /* initrav */
2826 
2827 
buildNewTip(tree * tr,nodeptr p)2828 nodeptr buildNewTip (tree *tr, nodeptr p)
2829   { /* buildNewTip */
2830     nodeptr  q;
2831 
2832     q = tr->nodep[(tr->nextnode)++];
2833     hookup(p, q, defaultz);
2834     return  q;
2835   } /* buildNewTip */
2836 
2837 
buildSimpleTree(tree * tr,int ip,int iq,int ir)2838 boolean buildSimpleTree (tree *tr, int ip, int iq, int ir)
2839   { /* buildSimpleTree */
2840     /* p, q and r are tips meeting at s */
2841     nodeptr  p, s;
2842     int  i;
2843 
2844     i = MIN(ip, iq);
2845     if (ir < i)  i = ir;
2846     tr->start = tr->nodep[i];
2847     tr->ntips = 3;
2848     p = tr->nodep[ip];
2849     hookup(p, tr->nodep[iq], defaultz);
2850     s = buildNewTip(tr, tr->nodep[ir]);
2851 
2852     return insert(tr, s, p, FALSE);  /* Smoothing is local to s */
2853   } /* buildSimpleTree */
2854 
2855 
strchr(char * str,int chr)2856 char * strchr (char *str, int chr)
2857  { /* strchr */
2858     int  c;
2859 
2860     while (c = *str)  {if (c == chr) return str; str++;}
2861     return  (char *) NULL;
2862  } /* strchr */
2863 
2864 
strstr(char * str1,char * str2)2865 char * strstr (char *str1, char *str2)
2866  { /* strstr */
2867     char *s1, *s2;
2868     int  c;
2869 
2870     while (*(s1 = str1)) {
2871       s2 = str2;
2872       do {
2873         if (! (c = *s2++))  return str1;
2874         }
2875         while (*s1++ == c);
2876       str1++;
2877       }
2878     return  (char *) NULL;
2879  } /* strstr */
2880 
2881 
readKeyValue(char * string,char * key,char * format,void * value)2882 boolean readKeyValue (char *string, char *key, char *format, void *value)
2883   { /* readKeyValue */
2884 
2885     if (! (string = strstr(string, key)))  return FALSE;
2886     string += strlen(key);
2887     if (! (string = strchr(string, '=')))  return FALSE;
2888     string++;
2889     return  sscanf(string, format, value);  /* 1 if read, otherwise 0 */
2890   } /* readKeyValue */
2891 
2892 
2893 #if Master || Slave
2894 
str_readTreeLikelihood(char * treestr)2895 double str_readTreeLikelihood (char *treestr)
2896   { /* str_readTreeLikelihood */
2897     double lk1;
2898     char    *com, *com_end;
2899     boolean  readKeyValue();
2900 
2901     if ((com = strchr(treestr, '[')) && (com < strchr(treestr, '('))
2902                                      && (com_end = strchr(com, ']'))) {
2903       com++;
2904       *com_end = 0;
2905       if (readKeyValue(com, likelihood_key, "%lg", (void *) &(lk1))) {
2906         *com_end = ']';
2907         return lk1;
2908         }
2909       }
2910 
2911     fprintf(stderr, "ERROR reading likelihood in receiveTree\n");
2912     return  badEval;
2913   } /* str_readTreeLikelihood */
2914 
2915 
sendTree(comm_block * comm,tree * tr)2916 boolean sendTree (comm_block *comm, tree *tr)
2917   { /* sendTree */
2918     char  *treestr;
2919     char  *treeString();
2920 #   if Master
2921       void sendTreeNum();
2922 #   endif
2923 
2924     comm->done_flag = tr->likelihood > 0.0;
2925     if (comm->done_flag)
2926       write_comm_msg(comm, NULL);
2927 
2928     else {
2929       treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256);
2930       if (! treestr) {
2931         fprintf(stderr, "sendTree: Malloc failure\n");
2932         return 0;
2933         }
2934 
2935 #     if Master
2936         if (send_ahead >= MAX_SEND_AHEAD) {
2937           double new_likelihood;
2938           int  n_to_get;
2939 
2940           n_to_get = (send_ahead+1)/2;
2941           sendTreeNum(n_to_get);
2942           send_ahead -= n_to_get;
2943           read_comm_msg(& comm_slave, treestr);
2944           new_likelihood = str_readTreeLikelihood(treestr);
2945           if (new_likelihood == badEval)  return FALSE;
2946           if (! best_tr_recv || (new_likelihood > best_lk_recv)) {
2947             if (best_tr_recv)  Free(best_tr_recv);
2948             best_tr_recv = Malloc(strlen(treestr) + 1);
2949             strcpy(best_tr_recv, treestr);
2950             best_lk_recv = new_likelihood;
2951             }
2952           }
2953         send_ahead++;
2954 #     endif           /*  End #if Master  */
2955 
2956       REPORT_SEND_TREE;
2957       (void) treeString(treestr, tr, tr->start->back, 1);
2958       write_comm_msg(comm, treestr);
2959 
2960       Free(treestr);
2961       }
2962 
2963     return TRUE;
2964   } /* sendTree */
2965 
2966 
receiveTree(comm_block * comm,tree * tr)2967 boolean  receiveTree (comm_block *comm, tree *tr)
2968   { /* receiveTree */
2969     char   *treestr;
2970     boolean status;
2971     boolean str_treeReadLen();
2972 
2973     treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256);
2974     if (! treestr) {
2975       fprintf(stderr, "receiveTree: Malloc failure\n");
2976       return 0;
2977       }
2978 
2979     read_comm_msg(comm, treestr);
2980     if (comm->done_flag) {
2981       tr->likelihood = 1.0;
2982       status = TRUE;
2983       }
2984 
2985     else {
2986 #     if Master
2987         if (best_tr_recv) {
2988           if (str_readTreeLikelihood(treestr) < best_lk_recv) {
2989             strcpy(treestr, best_tr_recv);  /* Overwrite new tree with best */
2990             }
2991           Free(best_tr_recv);
2992           best_tr_recv = NULL;
2993           }
2994 #     endif           /*  End #if Master  */
2995 
2996       status = str_treeReadLen(treestr, tr);
2997       }
2998 
2999     Free(treestr);
3000     return status;
3001   } /* receiveTree */
3002 
3003 
requestForWork(void)3004 void requestForWork (void)
3005   { /* requestForWork */
3006     p4_send(DNAML_REQUEST, DNAML_DISPATCHER_ID, NULL, 0);
3007   } /* requestForWork */
3008 #endif                  /* End #if Master || Slave  */
3009 
3010 
3011 #if Master
sendTreeNum(int n_to_get)3012 void sendTreeNum(int n_to_get)
3013   { /* sendTreeNum */
3014     char scr[512];
3015 
3016     sprintf(scr, "%d", n_to_get);
3017     p4_send(DNAML_NUM_TREE, DNAML_MERGER_ID, scr, strlen(scr)+1);
3018   } /* sendTreeNum */
3019 
3020 
getReturnedTrees(tree * tr,bestlist * bt,int n_tree_sent)3021 boolean  getReturnedTrees (tree *tr, bestlist *bt, int n_tree_sent)
3022  /* n_tree_sent -- number of trees sent to slaves */
3023   { /* getReturnedTrees */
3024     void sendTreeNum();
3025     boolean receiveTree();
3026 
3027     sendTreeNum(send_ahead);
3028     send_ahead = 0;
3029 
3030     if (! receiveTree(& comm_slave, tr))  return FALSE;
3031     tr->smoothed = TRUE;
3032     (void) saveBestTree(bt, tr);
3033 
3034     return TRUE;
3035   } /* getReturnedTrees */
3036 #endif
3037 
3038 
cacheZ(tree * tr)3039 void cacheZ (tree *tr)
3040   { /* cacheZ */
3041     nodeptr  p;
3042     int  nodes;
3043 
3044     nodes = tr->mxtips  +  3 * (tr->mxtips - 2);
3045     p = tr->nodep[1];
3046     while (nodes-- > 0) {p->z0 = p->z; p++;}
3047   } /* cacheZ */
3048 
3049 
restoreZ(tree * tr)3050 void restoreZ (tree *tr)
3051   { /* restoreZ */
3052     nodeptr  p;
3053     int  nodes;
3054 
3055     nodes = tr->mxtips  +  3 * (tr->mxtips - 2);
3056     p = tr->nodep[1];
3057     while (nodes-- > 0) {p->z = p->z0; p++;}
3058   } /* restoreZ */
3059 
3060 
testInsert(tree * tr,nodeptr p,nodeptr q,bestlist * bt,boolean fast)3061 boolean testInsert (tree *tr, nodeptr p, nodeptr q, bestlist *bt, boolean  fast)
3062   { /* testInsert */
3063     double  qz;
3064     nodeptr  r;
3065 
3066     r = q->back;             /* Save original connection */
3067     qz = q->z;
3068     if (! insert(tr, p, q, ! fast)) return FALSE;
3069 
3070 #   if ! Master
3071       if (evaluate(tr, fast ? p->next->next : tr->start) == badEval) return FALSE;
3072       (void) saveBestTree(bt, tr);
3073 #   else  /* Master */
3074       tr->likelihood = unlikely;
3075       if (! sendTree(& comm_slave, tr))  return FALSE;
3076 #   endif
3077 
3078     /* remove p from this branch */
3079 
3080     hookup(q, r, qz);
3081     p->next->next->back = p->next->back = (nodeptr) NULL;
3082     if (! fast) {            /* With fast add, other values are still OK */
3083       restoreZ(tr);          /*   Restore branch lengths */
3084 #     if ! Master            /*   Regenerate x values */
3085         if (! initrav(tr, p->back))  return FALSE;
3086         if (! initrav(tr, q))        return FALSE;
3087         if (! initrav(tr, r))        return FALSE;
3088 #     endif
3089       }
3090 
3091     return TRUE;
3092   } /* testInsert */
3093 
3094 
addTraverse(tree * tr,nodeptr p,nodeptr q,int mintrav,int maxtrav,bestlist * bt,boolean fast)3095 int addTraverse (tree *tr, nodeptr p, nodeptr q,
3096                  int mintrav, int maxtrav, bestlist *bt, boolean fast)
3097   { /* addTraverse */
3098     int  tested, newtested;
3099 
3100     tested = 0;
3101     if (--mintrav <= 0) {           /* Moved minimum distance? */
3102       if (! testInsert(tr, p, q, bt, fast))  return badRear;
3103       tested++;
3104       }
3105 
3106     if ((! q->tip) && (--maxtrav > 0)) {    /* Continue traverse? */
3107       newtested = addTraverse(tr, p, q->next->back,
3108                               mintrav, maxtrav, bt, fast);
3109       if (newtested == badRear) return badRear;
3110       tested += newtested;
3111       newtested = addTraverse(tr, p, q->next->next->back,
3112                               mintrav, maxtrav, bt, fast);
3113       if (newtested == badRear) return badRear;
3114       tested += newtested;
3115       }
3116 
3117     return tested;
3118   } /* addTraverse */
3119 
3120 
rearrange(tree * tr,nodeptr p,int mintrav,int maxtrav,bestlist * bt)3121 int  rearrange (tree *tr, nodeptr p, int mintrav, int maxtrav, bestlist *bt)
3122     /* rearranges the tree, globally or locally */
3123   { /* rearrange */
3124     double   p1z, p2z, q1z, q2z;
3125     nodeptr  p1, p2, q, q1, q2;
3126     int      tested, mintrav2, newtested;
3127 
3128     tested = 0;
3129     if (maxtrav < 1 || mintrav > maxtrav)  return tested;
3130 
3131 /* Moving subtree forward in tree. */
3132 
3133     if (! p->tip) {
3134       p1 = p->next->back;
3135       p2 = p->next->next->back;
3136       if (! p1->tip || ! p2->tip) {
3137         p1z = p1->z;
3138         p2z = p2->z;
3139         if (! removeNode(tr, p)) return badRear;
3140         cacheZ(tr);
3141         if (! p1->tip) {
3142           newtested = addTraverse(tr, p, p1->next->back,
3143                                   mintrav, maxtrav, bt, FALSE);
3144           if (newtested == badRear) return badRear;
3145           tested += newtested;
3146           newtested = addTraverse(tr, p, p1->next->next->back,
3147                                   mintrav, maxtrav, bt, FALSE);
3148           if (newtested == badRear) return badRear;
3149           tested += newtested;
3150           }
3151 
3152         if (! p2->tip) {
3153           newtested = addTraverse(tr, p, p2->next->back,
3154                                   mintrav, maxtrav, bt, FALSE);
3155           if (newtested == badRear) return badRear;
3156           tested += newtested;
3157           newtested = addTraverse(tr, p, p2->next->next->back,
3158                                   mintrav, maxtrav, bt, FALSE);
3159           if (newtested == badRear) return badRear;
3160           tested += newtested;
3161           }
3162 
3163         hookup(p->next,       p1, p1z);  /*  Restore original tree */
3164         hookup(p->next->next, p2, p2z);
3165         if (! (initrav(tr, tr->start)
3166             && initrav(tr, tr->start->back))) return badRear;
3167         }
3168       }   /* if (! p->tip) */
3169 
3170 /* Moving subtree backward in tree.  Minimum move is 2 to avoid duplicates */
3171 
3172     q = p->back;
3173     if (! q->tip && maxtrav > 1) {
3174       q1 = q->next->back;
3175       q2 = q->next->next->back;
3176       if (! q1->tip && (!q1->next->back->tip || !q1->next->next->back->tip) ||
3177           ! q2->tip && (!q2->next->back->tip || !q2->next->next->back->tip)) {
3178         q1z = q1->z;
3179         q2z = q2->z;
3180         if (! removeNode(tr, q)) return badRear;
3181         cacheZ(tr);
3182         mintrav2 = mintrav > 2 ? mintrav : 2;
3183 
3184         if (! q1->tip) {
3185           newtested = addTraverse(tr, q, q1->next->back,
3186                                   mintrav2 , maxtrav, bt, FALSE);
3187           if (newtested == badRear) return badRear;
3188           tested += newtested;
3189           newtested = addTraverse(tr, q, q1->next->next->back,
3190                                   mintrav2 , maxtrav, bt, FALSE);
3191           if (newtested == badRear) return badRear;
3192           tested += newtested;
3193           }
3194 
3195         if (! q2->tip) {
3196           newtested = addTraverse(tr, q, q2->next->back,
3197                                   mintrav2 , maxtrav, bt, FALSE);
3198           if (newtested == badRear) return badRear;
3199           tested += newtested;
3200           newtested = addTraverse(tr, q, q2->next->next->back,
3201                                   mintrav2 , maxtrav, bt, FALSE);
3202           if (newtested == badRear) return badRear;
3203           tested += newtested;
3204           }
3205 
3206         hookup(q->next,       q1, q1z);  /*  Restore original tree */
3207         hookup(q->next->next, q2, q2z);
3208         if (! (initrav(tr, tr->start)
3209             && initrav(tr, tr->start->back))) return badRear;
3210         }
3211       }   /* if (! q->tip && maxtrav > 1) */
3212 
3213 /* Move other subtrees */
3214 
3215     if (! p->tip) {
3216       newtested = rearrange(tr, p->next->back,       mintrav, maxtrav, bt);
3217       if (newtested == badRear) return badRear;
3218       tested += newtested;
3219       newtested = rearrange(tr, p->next->next->back, mintrav, maxtrav, bt);
3220       if (newtested == badRear) return badRear;
3221       tested += newtested;
3222       }
3223 
3224     return  tested;
3225   } /* rearrange */
3226 
3227 
fopen_pid(char * filenm,char * mode,char * name_pid)3228 FILE *fopen_pid (char *filenm, char *mode, char *name_pid)
3229   { /* fopen_pid */
3230 
3231     (void) sprintf(name_pid, "%s.%d", filenm, getpid());
3232     return  fopen(name_pid, mode);
3233   } /* fopen_pid */
3234 
3235 
3236 #if DeleteCheckpointFile
unlink_pid(char * filenm)3237 void  unlink_pid (char *filenm)
3238   { /* unlink_pid */
3239     char scr[512];
3240 
3241     (void) sprintf(scr, "%s.%d", filenm, getpid());
3242     unlink(scr);
3243   } /* unlink_pid */
3244 #endif
3245 
3246 
writeCheckpoint(tree * tr)3247 void  writeCheckpoint (tree *tr)
3248   { /* writeCheckpoint */
3249     char   filename[128];
3250     FILE  *checkpointf;
3251     void   treeOut();
3252 
3253     checkpointf = fopen_pid(checkpointname, "a", filename);
3254     if (checkpointf) {
3255       treeOut(checkpointf, tr, treeNewick);
3256       (void) fclose(checkpointf);
3257       }
3258   } /* writeCheckpoint */
3259 
3260 
findAnyTip(nodeptr p)3261 node * findAnyTip(nodeptr p)
3262   { /* findAnyTip */
3263     return  p->tip ? p : findAnyTip(p->next->back);
3264   } /* findAnyTip */
3265 
3266 
optimize(tree * tr,int maxtrav,bestlist * bt)3267 boolean  optimize (tree *tr, int maxtrav, bestlist *bt)
3268   { /* optimize */
3269     nodeptr  p;
3270     int    mintrav, tested;
3271 
3272     if (tr->ntips < 4)  return  TRUE;
3273 
3274     writeCheckpoint(tr);                    /* checkpoint the starting tree */
3275 
3276     if (maxtrav > tr->ntips - 3)  maxtrav = tr->ntips - 3;
3277     if (maxtrav <= tr->opt_level)  return  TRUE;
3278 
3279     printf("      Doing %s rearrangements\n",
3280              (maxtrav == 1)            ? "local" :
3281              (maxtrav < tr->ntips - 3) ? "regional" : "global");
3282 
3283     /* loop while tree gets better  */
3284 
3285     do {
3286       (void) startOpt(bt, tr);
3287       mintrav = tr->opt_level + 1;
3288 
3289       /* rearrange must start from a tip or it will miss some trees */
3290 
3291       p = findAnyTip(tr->start);
3292       tested = rearrange(tr, p->back, mintrav, maxtrav, bt);
3293       if (tested == badRear)  return FALSE;
3294 
3295 #     if Master
3296         if (! getReturnedTrees(tr, bt, tested)) return FALSE;
3297 #     endif
3298 
3299       bt->numtrees += tested;
3300       (void) setOptLevel(bt, maxtrav);
3301       if (! recallBestTree(bt, 1, tr)) return FALSE;   /* recover best tree */
3302 
3303       printf("      Tested %d alternative trees\n", tested);
3304       if (bt->improved) {
3305         printf("      Ln Likelihood =%14.5f\n", tr->likelihood);
3306         }
3307 
3308       writeCheckpoint(tr);                  /* checkpoint the new tree */
3309       } while (maxtrav > tr->opt_level);
3310 
3311     return TRUE;
3312   } /* optimize */
3313 
3314 
coordinates(tree * tr,nodeptr p,double lengthsum,drawdata * tdptr)3315 void coordinates (tree *tr, nodeptr p, double lengthsum, drawdata *tdptr)
3316   { /* coordinates */
3317     /* establishes coordinates of nodes */
3318     double  x, z;
3319     nodeptr  q, first, last;
3320 
3321     if (p->tip) {
3322       p->xcoord = NINT(over * lengthsum);
3323       p->ymax = p->ymin = p->ycoord = tdptr->tipy;
3324       tdptr->tipy += down;
3325       if (lengthsum > tdptr->tipmax) tdptr->tipmax = lengthsum;
3326       }
3327 
3328     else {
3329       q = p->next;
3330       do {
3331         z = q->z;
3332         if (z < zmin) z = zmin;
3333         x = lengthsum - tr->rdta->fracchange * log(z);
3334         coordinates(tr, q->back, x, tdptr);
3335         q = q->next;
3336         } while (p == tr->start->back ? q != p->next : q != p);
3337 
3338       first = p->next->back;
3339       q = p;
3340       while (q->next != p) q = q->next;
3341       last = q->back;
3342       p->xcoord = NINT(over * lengthsum);
3343       p->ycoord = (first->ycoord + last->ycoord)/2;
3344       p->ymin = first->ymin;
3345       p->ymax = last->ymax;
3346       }
3347   } /* coordinates */
3348 
3349 
drawline(tree * tr,int i,double scale)3350 void drawline (tree *tr, int i, double scale)
3351     /* draws one row of the tree diagram by moving up tree */
3352     /* Modified to handle 1000 taxa, October 16, 1991 */
3353   { /* drawline */
3354     nodeptr  p, q, r, first, last;
3355     int  n, j, k, l, extra;
3356     boolean  done;
3357 
3358     p = q = tr->start->back;
3359     extra = 0;
3360 
3361     if (i == p->ycoord) {
3362       k = q->number - tr->mxtips;
3363       for (j = k; j < 1000; j *= 10) putchar('-');
3364       printf("%d", k);
3365       extra = 1;
3366       }
3367     else printf("   ");
3368 
3369     do {
3370       if (! p->tip) {
3371         r = p->next;
3372         done = FALSE;
3373         do {
3374           if ((i >= r->back->ymin) && (i <= r->back->ymax)) {
3375             q = r->back;
3376             done = TRUE;
3377             }
3378           r = r->next;
3379           } while (! done && (p == tr->start->back ? r != p->next : r != p));
3380 
3381         first = p->next->back;
3382         r = p;
3383         while (r->next != p) r = r->next;
3384         last = r->back;
3385         if (p == tr->start->back) last = p->back;
3386         }
3387 
3388       done = (p->tip) || (p == q);
3389       n = NINT(scale*(q->xcoord - p->xcoord));
3390       if ((n < 3) && (! q->tip)) n = 3;
3391       n -= extra;
3392       extra = 0;
3393 
3394       if ((q->ycoord == i) && (! done)) {
3395         if (p->ycoord != q->ycoord) putchar('+');
3396         else                        putchar('-');
3397 
3398         if (! q->tip) {
3399           k = q->number - tr->mxtips;
3400           l = n - 3;
3401           for (j = k; j < 100; j *= 10)  l++;
3402           for (j = 1; j <= l; j++) putchar('-');
3403           printf("%d", k);
3404           extra = 1;
3405           }
3406         else for (j = 1; j <= n-1; j++) putchar('-');
3407         }
3408 
3409       else if (! p->tip) {
3410         if ((last->ycoord > i) && (first->ycoord < i) && (i != p->ycoord)) {
3411           putchar('!');
3412           for (j = 1; j <= n-1; j++) putchar(' ');
3413           }
3414         else for (j = 1; j <= n; j++) putchar(' ');
3415         }
3416 
3417       else
3418         for (j = 1; j <= n; j++) putchar(' ');
3419 
3420       p = q;
3421       } while (! done);
3422 
3423     if ((p->ycoord == i) && p->tip) {
3424       printf(" %s", p->name);
3425       }
3426 
3427     putchar('\n');
3428   } /* drawline */
3429 
3430 
printTree(tree * tr,analdef * adef)3431 void printTree (tree *tr, analdef *adef)
3432     /* prints out diagram of the tree */
3433   { /* printTree */
3434     drawdata  tipdata;
3435     double  scale;
3436     int  i, imax;
3437 
3438     if (adef->trprint) {
3439       putchar('\n');
3440       tipdata.tipy = 1;
3441       tipdata.tipmax = 0.0;
3442       coordinates(tr, tr->start->back, (double) 0.0, & tipdata);
3443       scale = 1.0 / tipdata.tipmax;
3444       imax = tipdata.tipy - down;
3445       for (i = 1; i <= imax; i++)  drawline(tr, i, scale);
3446       printf("\nRemember: ");
3447       if (adef->root) printf("(although rooted by outgroup) ");
3448       printf("this is an unrooted tree!\n\n");
3449       }
3450   } /* printTree */
3451 
3452 
sigma(tree * tr,nodeptr p,double * sumlrptr)3453 double sigma (tree *tr, nodeptr p, double *sumlrptr)
3454     /* compute standard deviation */
3455   { /* sigma */
3456     likelivector *lp, *lq;
3457     double  slope, sum, sumlr, z, zv, zz, lz,
3458             rat, suma, sumb, sumc, d2, d, li, temp, abzz, bczv, t3,
3459             fxpa, fxpc, fxpg, fxpt, fxpr, fxpy, fxqr, fxqy, w;
3460     double  *rptr;
3461     nodeptr  q;
3462     int  i, *wptr;
3463 
3464     q = p->back;
3465     while ((! p->x) || (! q->x)) {
3466       if (! (p->x)) if (! newview(tr, p)) return -1.0;
3467       if (! (q->x)) if (! newview(tr, q)) return -1.0;
3468       }
3469 
3470     lp = p->x->lv;
3471     lq = q->x->lv;
3472 
3473     z = p->z;
3474     if (z < zmin) z = zmin;
3475     lz = log(z);
3476 
3477     wptr = &(tr->cdta->aliaswgt[0]);
3478     rptr = &(tr->cdta->patrat[0]);
3479     sum = sumlr = slope = 0.0;
3480 
3481 #   ifdef Vectorize
3482 #     pragma IVDEP
3483 #   endif
3484 
3485     for (i = 0; i < tr->cdta->endsite; i++) {
3486       rat  = *rptr++;
3487       zz   = exp(rat                * lz);
3488       zv   = exp(rat * tr->rdta->xv * lz);
3489 
3490       fxpa = tr->rdta->freqa * lp->a;
3491       fxpg = tr->rdta->freqg * lp->g;
3492       fxpc = tr->rdta->freqc * lp->c;
3493       fxpt = tr->rdta->freqt * lp->t;
3494       fxpr = fxpa + fxpg;
3495       fxpy = fxpc + fxpt;
3496       suma = fxpa * lq->a + fxpc * lq->c + fxpg * lq->g + fxpt * lq->t;
3497       fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g;
3498       fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t;
3499       sumc = (fxpr + fxpy) * (fxqr + fxqy);
3500       sumb = fxpr * fxqr * tr->rdta->invfreqr + fxpy * fxqy * tr->rdta->invfreqy;
3501       abzz = zz * (suma - sumb);
3502       bczv = zv * (sumb - sumc);
3503       li = sumc + abzz + bczv;
3504       t3 = tr->rdta->xv * bczv;
3505       d  = abzz + t3;
3506       d2 = rat * (abzz*(rat-1.0) + t3*(rat * tr->rdta->xv - 1.0));
3507 
3508       temp = rat * d / li;
3509       w = *wptr++;
3510       slope += w *  temp;
3511       sum   += w * (temp * temp - d2/li);
3512       sumlr += w * log(li / (suma + 1.0E-300));
3513       lp++;
3514       lq++;
3515       }
3516 
3517     *sumlrptr = sumlr;
3518     return (sum > 1.0E-300) ? z*(-slope + sqrt(slope*slope + 3.841*sum))/sum
3519                             : 1.0;
3520   } /* sigma */
3521 
3522 
describe(tree * tr,nodeptr p)3523 void describe (tree *tr, nodeptr p)
3524     /* print out information for one branch */
3525   { /* describe */
3526     double   z, s, sumlr;
3527     nodeptr  q;
3528     char    *nameptr;
3529     int      k, ch;
3530 
3531     q = p->back;
3532     printf("%4d          ", q->number - tr->mxtips);
3533     if (p->tip) {
3534       nameptr = p->name;
3535       k = nmlngth;
3536       while (ch = *nameptr++) {putchar(ch); k--;}
3537       while (--k >= 0) putchar(' ');
3538       }
3539     else {
3540       printf("%4d", p->number - tr->mxtips);
3541       for (k = 4; k < nmlngth; k++) putchar(' ');
3542       }
3543 
3544     z = q->z;
3545     if (z <= zmin) printf("    infinity");
3546     else printf("%15.5f", -log(z) * tr->rdta->fracchange);
3547 
3548     s = sigma(tr, q, & sumlr);
3549     printf("     (");
3550     if (z + s >= zmax) printf("     zero");
3551     else printf("%9.5f", (double) -log(z + s) * tr->rdta->fracchange);
3552     putchar(',');
3553     if (z - s <= zmin) printf("    infinity");
3554     else printf("%12.5f", (double) -log(z - s) * tr->rdta->fracchange);
3555     putchar(')');
3556 
3557     if      (sumlr > 2.995 ) printf(" **");
3558     else if (sumlr > 1.9205) printf(" *");
3559     putchar('\n');
3560 
3561     if (! p->tip) {
3562       describe(tr, p->next->back);
3563       describe(tr, p->next->next->back);
3564       }
3565   } /* describe */
3566 
3567 
summarize(tree * tr)3568 void summarize (tree *tr)
3569     /* print out branch length information and node numbers */
3570   { /* summarize */
3571     printf("Ln Likelihood =%14.5f\n", tr->likelihood);
3572     putchar('\n');
3573     printf(" Between        And             Length");
3574     printf("      Approx. Confidence Limits\n");
3575     printf(" -------        ---             ------");
3576     printf("      ------- ---------- ------\n");
3577 
3578     describe(tr, tr->start->back->next->back);
3579     describe(tr, tr->start->back->next->next->back);
3580     describe(tr, tr->start);
3581     putchar('\n');
3582     printf("     *  = significantly positive, P < 0.05\n");
3583     printf("     ** = significantly positive, P < 0.01\n\n\n");
3584   } /* summarize */
3585 
3586 
3587 /*===========  This is a problem if tr->start->back is a tip!  ===========*/
3588 /*  All routines should be contrived so that tr->start->back is not a tip */
3589 
treeString(char * treestr,tree * tr,nodeptr p,int form)3590 char *treeString (char *treestr, tree *tr, nodeptr p, int form)
3591     /* write string with representation of tree */
3592     /*   form == 1 -> Newick tree */
3593     /*   form == 2 -> Prolog fact */
3594     /*   form == 3 -> PHYLIP tree */
3595   { /* treeString */
3596     double  x, z;
3597     char  *nameptr;
3598     int    c;
3599 
3600     if (p == tr->start->back) {
3601       if (form != treePHYLIP) {
3602         if (form == treeProlog) {
3603           (void) sprintf(treestr, "phylip_tree(");
3604           while (*treestr) treestr++;            /* move pointer to null */
3605           }
3606 
3607         (void) sprintf(treestr, "[&&%s: version = '%s'",
3608                                  programName, programVersion);
3609         while (*treestr) treestr++;
3610 
3611         (void) sprintf(treestr, ", %s = %15.13g",
3612                                  likelihood_key, tr->likelihood);
3613         while (*treestr) treestr++;
3614 
3615         (void) sprintf(treestr, ", %s = %d", ntaxa_key, tr->ntips);
3616         while (*treestr) treestr++;
3617 
3618         (void) sprintf(treestr,", %s = %d", opt_level_key, tr->opt_level);
3619         while (*treestr) treestr++;
3620 
3621         (void) sprintf(treestr, ", %s = %d", smoothed_key, tr->smoothed);
3622         while (*treestr) treestr++;
3623 
3624         (void) sprintf(treestr, "]%s", form == treeProlog ? ", " : " ");
3625         while (*treestr) treestr++;
3626         }
3627       }
3628 
3629     if (p->tip) {
3630       if (form != treePHYLIP) *treestr++ = '\'';
3631       nameptr = p->name;
3632       while (c = *nameptr++) {
3633         if (form != treePHYLIP) {if (c == '\'') *treestr++ = '\'';}
3634         else if (c == ' ') {c = '_';}
3635         *treestr++ = c;
3636         }
3637       if (form != treePHYLIP) *treestr++ = '\'';
3638       }
3639 
3640     else {
3641       *treestr++ = '(';
3642       treestr = treeString(treestr, tr, p->next->back, form);
3643       *treestr++ = ',';
3644       treestr = treeString(treestr, tr, p->next->next->back, form);
3645       if (p == tr->start->back) {
3646         *treestr++ = ',';
3647         treestr = treeString(treestr, tr, p->back, form);
3648         }
3649       *treestr++ = ')';
3650       }
3651 
3652     if (p == tr->start->back) {
3653       (void) sprintf(treestr, ":0.0%s\n", (form != treeProlog) ? ";" : ").");
3654       }
3655     else {
3656       z = p->z;
3657       if (z < zmin) z = zmin;
3658       x = -log(z) * tr->rdta->fracchange;
3659       (void) sprintf(treestr, ": %8.6f", x);  /* prolog needs the space */
3660       }
3661 
3662     while (*treestr) treestr++;     /* move pointer up to null termination */
3663     return  treestr;
3664   } /* treeString */
3665 
3666 
treeOut(FILE * treefile,tree * tr,int form)3667 void treeOut (FILE *treefile, tree *tr, int form)
3668     /* write out file with representation of final tree */
3669   { /* treeOut */
3670     int    c;
3671     char  *cptr, *treestr;
3672 
3673     treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256);
3674     if (! treestr) {
3675       fprintf(stderr, "treeOut: Malloc failure\n");
3676       exit(1);
3677       }
3678 
3679     (void) treeString(treestr, tr, tr->start->back, form);
3680     cptr = treestr;
3681     while (c = *cptr++) putc(c, treefile);
3682 
3683     Free(treestr);
3684   } /* treeOut */
3685 
3686 
3687 /*=======================================================================*/
3688 /*                         Read a tree from a file                       */
3689 /*=======================================================================*/
3690 
3691 
3692 /*  1.0.A  Processing of quotation marks in comment removed
3693  */
3694 
treeFinishCom(FILE * fp,char ** strp)3695 int treeFinishCom (FILE *fp, char **strp)
3696   { /* treeFinishCom */
3697     int  ch;
3698 
3699     while ((ch = getc(fp)) != EOF && ch != ']') {
3700       if (strp != NULL) *(*strp)++ = ch;    /* save character  */
3701       if (ch == '[') {                      /* nested comment; find its end */
3702         if ((ch = treeFinishCom(fp, strp)) == EOF)  break;
3703         if (strp != NULL) *(*strp)++ = ch;  /* save closing ]  */
3704         }
3705       }
3706 
3707     if (strp != NULL) **strp = '\0';        /* terminate string  */
3708     return  ch;
3709   } /* treeFinishCom */
3710 
3711 
treeGetCh(FILE * fp)3712 int treeGetCh (FILE *fp)         /* get next nonblank, noncomment character */
3713   { /* treeGetCh */
3714     int  ch;
3715 
3716     while ((ch = getc(fp)) != EOF) {
3717       if (whitechar(ch)) ;
3718       else if (ch == '[') {                   /* comment; find its end */
3719         if ((ch = treeFinishCom(fp, (char **) NULL)) == EOF)  break;
3720         }
3721       else  break;
3722       }
3723 
3724     return  ch;
3725   } /* treeGetCh */
3726 
3727 
treeLabelEnd(int ch)3728 boolean  treeLabelEnd (int ch)
3729   { /* treeLabelEnd */
3730     switch (ch) {
3731         case EOF:  case '\0':  case '\t':  case '\n':  case ' ':
3732         case ':':  case ',':   case '(':   case ')':   case '[':
3733         case ';':
3734           return TRUE;
3735         default:
3736           break;
3737         }
3738     return FALSE;
3739   } /* treeLabelEnd */
3740 
3741 
treeGetLabel(FILE * fp,char * lblPtr,int maxlen)3742 boolean  treeGetLabel (FILE *fp, char *lblPtr, int maxlen)
3743   { /* treeGetLabel */
3744     int      ch;
3745     boolean  done, quoted, lblfound;
3746 
3747     if (--maxlen < 0) lblPtr = (char *) NULL;  /* reserves space for '\0' */
3748     else if (lblPtr == NULL) maxlen = 0;
3749 
3750     ch = getc(fp);
3751     done = treeLabelEnd(ch);
3752 
3753     lblfound = ! done;
3754     quoted = (ch == '\'');
3755     if (quoted && ! done) {ch = getc(fp); done = (ch == EOF);}
3756 
3757     while (! done) {
3758       if (quoted) {
3759         if (ch == '\'') {ch = getc(fp); if (ch != '\'') break;}
3760         }
3761 
3762       else if (treeLabelEnd(ch)) break;
3763 
3764       else if (ch == '_') ch = ' ';  /* unquoted _ goes to space */
3765 
3766       if (--maxlen >= 0) *lblPtr++ = ch;
3767       ch = getc(fp);
3768       if (ch == EOF) break;
3769       }
3770 
3771     if (ch != EOF)  (void) ungetc(ch, fp);
3772 
3773     if (lblPtr != NULL) *lblPtr = '\0';
3774 
3775     return lblfound;
3776   } /* treeGetLabel */
3777 
3778 
treeFlushLabel(FILE * fp)3779 boolean  treeFlushLabel (FILE *fp)
3780   { /* treeFlushLabel */
3781     return  treeGetLabel(fp, (char *) NULL, (int) 0);
3782   } /* treeFlushLabel */
3783 
3784 
treeFindTipByLabel(char * str,tree * tr)3785 int  treeFindTipByLabel (char  *str, tree *tr)
3786                      /*  str -- label string pointer */
3787   { /* treeFindTipByLabel */
3788     nodeptr  q;
3789     char    *nameptr;
3790     int      ch, i, n;
3791     boolean  found;
3792 
3793     for (n = 1; n <= tr->mxtips; n++) {
3794       q = tr->nodep[n];
3795       if (! (q->back)) {          /*  Only consider unused tips */
3796         i = 0;
3797         nameptr = q->name;
3798         while ((found = (str[i++] == (ch = *nameptr++))) && ch) ;
3799         if (found) return n;
3800         }
3801       }
3802 
3803     printf("ERROR: Cannot find tree species: %s\n", str);
3804 
3805     return  0;
3806   } /* treeFindTipByLabel */
3807 
3808 
treeFindTipName(FILE * fp,tree * tr)3809 int  treeFindTipName (FILE *fp, tree *tr)
3810   { /* treeFindTipName */
3811     char    *nameptr, str[nmlngth+2];
3812     int      n;
3813 
3814     if (tr->prelabeled) {
3815       if (treeGetLabel(fp, str, nmlngth+2))
3816         n = treeFindTipByLabel(str, tr);
3817       else
3818         n = 0;
3819       }
3820 
3821     else if (tr->ntips < tr->mxtips) {
3822       n = tr->ntips + 1;
3823       nameptr = tr->nodep[n]->name;
3824       if (! treeGetLabel(fp, nameptr, nmlngth+1)) n = 0;
3825       }
3826 
3827     else {
3828       n = 0;
3829       }
3830 
3831     return  n;
3832   } /* treeFindTipName */
3833 
3834 
treeEchoContext(FILE * fp1,FILE * fp2,int n)3835 void  treeEchoContext (FILE *fp1, FILE *fp2, int n)
3836  { /* treeEchoContext */
3837    int      ch;
3838    boolean  waswhite;
3839 
3840    waswhite = TRUE;
3841 
3842    while (n > 0 && ((ch = getc(fp1)) != EOF)) {
3843      if (whitechar(ch)) {
3844        ch = waswhite ? '\0' : ' ';
3845        waswhite = TRUE;
3846        }
3847      else {
3848        waswhite = FALSE;
3849        }
3850 
3851      if (ch > '\0') {putc(ch, fp2); n--;}
3852      }
3853  } /* treeEchoContext */
3854 
3855 
treeProcessLength(FILE * fp,double * dptr)3856 boolean treeProcessLength (FILE *fp, double *dptr)
3857   { /* treeProcessLength */
3858     int  ch;
3859 
3860     if ((ch = treeGetCh(fp)) == EOF)  return FALSE;    /*  Skip comments */
3861     (void) ungetc(ch, fp);
3862 
3863     if (fscanf(fp, "%lf", dptr) != 1) {
3864       printf("ERROR: treeProcessLength: Problem reading branch length\n");
3865       treeEchoContext(fp, stdout, 40);
3866       printf("\n");
3867       return  FALSE;
3868       }
3869 
3870     return  TRUE;
3871   } /* treeProcessLength */
3872 
3873 
treeFlushLen(FILE * fp)3874 boolean  treeFlushLen (FILE  *fp)
3875   { /* treeFlushLen */
3876     double  dummy;
3877     int     ch;
3878 
3879     if ((ch = treeGetCh(fp)) == ':') return treeProcessLength(fp, & dummy);
3880 
3881     if (ch != EOF) (void) ungetc(ch, fp);
3882     return TRUE;
3883   } /* treeFlushLen */
3884 
3885 
treeNeedCh(FILE * fp,int c1,char * where)3886 boolean  treeNeedCh (FILE *fp, int c1, char *where)
3887   { /* treeNeedCh */
3888     int  c2;
3889 
3890     if ((c2 = treeGetCh(fp)) == c1)  return TRUE;
3891 
3892     printf("ERROR: Expecting '%c' %s tree; found:", c1, where);
3893     if (c2 == EOF) {
3894       printf("End-of-File");
3895       }
3896     else {
3897       ungetc(c2, fp);
3898       treeEchoContext(fp, stdout, 40);
3899       }
3900     putchar('\n');
3901     return FALSE;
3902   } /* treeNeedCh */
3903 
3904 
addElementLen(FILE * fp,tree * tr,nodeptr p)3905 boolean  addElementLen (FILE *fp, tree *tr, nodeptr p)
3906   { /* addElementLen */
3907     double   z, branch;
3908     nodeptr  q;
3909     int      n, ch;
3910 
3911     if ((ch = treeGetCh(fp)) == '(') {     /*  A new internal node */
3912       n = (tr->nextnode)++;
3913       if (n > 2*(tr->mxtips) - 2) {
3914         if (tr->rooted || n > 2*(tr->mxtips) - 1) {
3915           printf("ERROR: Too many internal nodes.  Is tree rooted?\n");
3916           printf("       Deepest splitting should be a trifurcation.\n");
3917           return FALSE;
3918           }
3919         else {
3920           tr->rooted = TRUE;
3921           }
3922         }
3923       q = tr->nodep[n];
3924       if (! addElementLen(fp, tr, q->next))        return FALSE;
3925       if (! treeNeedCh(fp, ',', "in"))             return FALSE;
3926       if (! addElementLen(fp, tr, q->next->next))  return FALSE;
3927       if (! treeNeedCh(fp, ')', "in"))             return FALSE;
3928       (void) treeFlushLabel(fp);
3929       }
3930 
3931     else {                               /*  A new tip */
3932       ungetc(ch, fp);
3933       if ((n = treeFindTipName(fp, tr)) <= 0)          return FALSE;
3934       q = tr->nodep[n];
3935       if (tr->start->number > n)  tr->start = q;
3936       (tr->ntips)++;
3937       }                                  /* End of tip processing */
3938 
3939     if (tr->userlen) {
3940       if (! treeNeedCh(fp, ':', "in"))             return FALSE;
3941       if (! treeProcessLength(fp, & branch))       return FALSE;
3942       z = exp(-branch / tr->rdta->fracchange);
3943       if (z > zmax)  z = zmax;
3944       hookup(p, q, z);
3945       }
3946     else {
3947       if (! treeFlushLen(fp))                       return FALSE;
3948       hookup(p, q, defaultz);
3949       }
3950 
3951     return TRUE;
3952   } /* addElementLen */
3953 
3954 
saveTreeCom(char ** comstrp)3955 int saveTreeCom (char  **comstrp)
3956   { /* saveTreeCom */
3957     int  ch;
3958     boolean  inquote;
3959 
3960     inquote = FALSE;
3961     while ((ch = getc(INFILE)) != EOF && (inquote || ch != ']')) {
3962       *(*comstrp)++ = ch;                        /* save character  */
3963       if (ch == '[' && ! inquote) {              /* comment; find its end */
3964         if ((ch = saveTreeCom(comstrp)) == EOF)  break;
3965         *(*comstrp)++ = ch;                      /* add ] */
3966         }
3967       else if (ch == '\'') inquote = ! inquote;  /* start or end of quote */
3968       }
3969 
3970     return  ch;
3971   } /* saveTreeCom */
3972 
3973 
processTreeCom(FILE * fp,tree * tr)3974 boolean processTreeCom (FILE *fp, tree *tr)
3975   { /* processTreeCom */
3976     int   text_started, functor_read, com_open;
3977 
3978     /*  Accept prefatory "phylip_tree(" or "pseudoNewick("  */
3979 
3980     functor_read = text_started = 0;
3981     (void) fscanf(fp, " p%nhylip_tree(%n", & text_started, & functor_read);
3982     if (text_started && ! functor_read) {
3983       (void) fscanf(fp, "seudoNewick(%n", & functor_read);
3984       if (! functor_read) {
3985         printf("Start of tree 'p...' not understood.\n");
3986         return FALSE;
3987         }
3988       }
3989 
3990     com_open = 0;
3991     (void) fscanf(fp, " [%n", & com_open);
3992 
3993     if (com_open) {                                  /* comment; read it */
3994       char  com[1024], *com_end;
3995 
3996       com_end = com;
3997       if (treeFinishCom(fp, & com_end) == EOF) {     /* omits enclosing []s */
3998         printf("Missing end of tree comment\n");
3999         return FALSE;
4000         }
4001 
4002       (void) readKeyValue(com, likelihood_key, "%lg",
4003                                (void *) &(tr->likelihood));
4004       (void) readKeyValue(com, opt_level_key,  "%d",
4005                                (void *) &(tr->opt_level));
4006       (void) readKeyValue(com, smoothed_key,   "%d",
4007                                (void *) &(tr->smoothed));
4008 
4009       if (functor_read) (void) fscanf(fp, " ,");   /* remove trailing comma */
4010       }
4011 
4012     return (functor_read > 0);
4013   } /* processTreeCom */
4014 
4015 
uprootTree(tree * tr,nodeptr p)4016 nodeptr uprootTree (tree *tr, nodeptr p)
4017   { /* uprootTree */
4018     nodeptr  q, r, s, start;
4019     int      n;
4020 
4021     if (p->tip || p->back) {
4022       printf("ERROR: Unable to uproot tree.\n");
4023       printf("       Inappropriate node marked for removal.\n");
4024       return (nodeptr) NULL;
4025       }
4026 
4027     n = --(tr->nextnode);               /* last internal node added */
4028     if (n != tr->mxtips + tr->ntips - 1) {
4029       printf("ERROR: Unable to uproot tree.  Inconsistent\n");
4030       printf("       number of tips and nodes for rooted tree.\n");
4031       return (nodeptr) NULL;
4032       }
4033 
4034     q = p->next->back;                  /* remove p from tree */
4035     r = p->next->next->back;
4036     hookup(q, r, tr->userlen ? (q->z * r->z) : defaultz);
4037 
4038     start = (r->tip || (! q->tip)) ? r : r->next->next->back;
4039 
4040     if (tr->ntips > 2 && p->number != n) {
4041       q = tr->nodep[n];            /* transfer last node's conections to p */
4042       r = q->next;
4043       s = q->next->next;
4044       hookup(p,             q->back, q->z);   /* move connections to p */
4045       hookup(p->next,       r->back, r->z);
4046       hookup(p->next->next, s->back, s->z);
4047       if (start->number == q->number) start = start->back->back;
4048       q->back = r->back = s->back = (nodeptr) NULL;
4049       }
4050     else {
4051       p->back = p->next->back = p->next->next->back = (nodeptr) NULL;
4052       }
4053 
4054     tr->rooted = FALSE;
4055     return  start;
4056   } /* uprootTree */
4057 
4058 
treeReadLen(FILE * fp,tree * tr)4059 boolean treeReadLen (FILE *fp, tree *tr)
4060   { /* treeReadLen */
4061     nodeptr  p;
4062     int      i, ch;
4063     boolean  is_fact;
4064 
4065     for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL;
4066     tr->start       = tr->nodep[tr->mxtips];
4067     tr->ntips       = 0;
4068     tr->nextnode    = tr->mxtips + 1;
4069     tr->opt_level   = 0;
4070     tr->log_f_valid = 0;
4071     tr->smoothed    = FALSE;
4072     tr->rooted      = FALSE;
4073 
4074     is_fact = processTreeCom(fp, tr);
4075 
4076     p = tr->nodep[(tr->nextnode)++];
4077     if (! treeNeedCh(fp, '(', "at start of"))       return FALSE;
4078     if (! addElementLen(fp, tr, p))                 return FALSE;
4079     if (! treeNeedCh(fp, ',', "in"))                return FALSE;
4080     if (! addElementLen(fp, tr, p->next))           return FALSE;
4081     if (! tr->rooted) {
4082       if ((ch = treeGetCh(fp)) == ',') {        /*  An unrooted format */
4083         if (! addElementLen(fp, tr, p->next->next)) return FALSE;
4084         }
4085       else {                                    /*  A rooted format */
4086         tr->rooted = TRUE;
4087         if (ch != EOF)  (void) ungetc(ch, fp);
4088         }
4089       }
4090     else {
4091       p->next->next->back = (nodeptr) NULL;
4092       }
4093     if (! treeNeedCh(fp, ')', "in"))                return FALSE;
4094     (void) treeFlushLabel(fp);
4095     if (! treeFlushLen(fp))                         return FALSE;
4096     if (is_fact) {
4097       if (! treeNeedCh(fp, ')', "at end of"))       return FALSE;
4098       if (! treeNeedCh(fp, '.', "at end of"))       return FALSE;
4099       }
4100     else {
4101       if (! treeNeedCh(fp, ';', "at end of"))       return FALSE;
4102       }
4103 
4104     if (tr->rooted) {
4105       p->next->next->back = (nodeptr) NULL;
4106       tr->start = uprootTree(tr, p->next->next);
4107       if (! tr->start)                              return FALSE;
4108       }
4109     else {
4110       tr->start = p->next->next->back;  /* This is start used by treeString */
4111       }
4112 
4113     return  (initrav(tr, tr->start) && initrav(tr, tr->start->back));
4114   } /* treeReadLen */
4115 
4116 
4117 /*=======================================================================*/
4118 /*                        Read a tree from a string                      */
4119 /*=======================================================================*/
4120 
4121 
4122 #if Master || Slave
str_treeFinishCom(char ** treestrp,char ** strp)4123 int str_treeFinishCom (char **treestrp, char **strp)
4124                       /* treestrp -- tree string pointer */
4125                       /* strp -- comment string pointer */
4126   { /* str_treeFinishCom */
4127     int  ch;
4128 
4129     while ((ch = *(*treestrp)++) != NULL && ch != ']') {
4130       if (strp != NULL) *(*strp)++ = ch;    /* save character  */
4131       if (ch == '[') {                      /* nested comment; find its end */
4132         if ((ch = str_treeFinishCom(treestrp)) == NULL)  break;
4133         if (strp != NULL) *(*strp)++ = ch;  /* save closing ]  */
4134         }
4135       }
4136     if (strp != NULL) **strp = '\0';        /* terminate string  */
4137     return  ch;
4138   } /* str_treeFinishCom */
4139 
4140 
str_treeGetCh(char ** treestrp)4141 int str_treeGetCh (char **treestrp)
4142     /* get next nonblank, noncomment character */
4143   { /* str_treeGetCh */
4144     int  ch;
4145 
4146     while ((ch = *(*treestrp)++) != NULL) {
4147       if (whitechar(ch)) ;
4148       else if (ch == '[') {                  /* comment; find its end */
4149         if ((ch = str_treeFinishCom(treestrp, (char *) NULL)) == NULL)  break;
4150         }
4151       else  break;
4152       }
4153 
4154     return  ch;
4155   } /* str_treeGetCh */
4156 
4157 
str_treeGetLabel(char ** treestrp,char * lblPtr,int maxlen)4158 boolean  str_treeGetLabel (char **treestrp, char *lblPtr, int maxlen)
4159   { /* str_treeGetLabel */
4160     int      ch;
4161     boolean  done, quoted, lblfound;
4162 
4163     if (--maxlen < 0) lblPtr = (char *) NULL;  /* reserves space for '\0' */
4164     else if (lblPtr == NULL) maxlen = 0;
4165 
4166     ch = *(*treestrp)++;
4167     done = treeLabelEnd(ch);
4168 
4169     lblfound = ! done;
4170     quoted = (ch == '\'');
4171     if (quoted && ! done) {ch = *(*treestrp)++; done = (ch == '\0');}
4172 
4173     while (! done) {
4174       if (quoted) {
4175         if (ch == '\'') {ch = *(*treestrp)++; if (ch != '\'') break;}
4176         }
4177 
4178       else if (treeLabelEnd(ch)) break;
4179 
4180       else if (ch == '_') ch = ' ';  /* unquoted _ goes to space */
4181 
4182       if (--maxlen >= 0) *lblPtr++ = ch;
4183       ch = *(*treestrp)++;
4184       if (ch == '\0') break;
4185       }
4186 
4187     (*treestrp)--;
4188 
4189     if (lblPtr != NULL) *lblPtr = '\0';
4190 
4191     return lblfound;
4192   } /* str_treeGetLabel */
4193 
4194 
str_treeFlushLabel(char ** treestrp)4195 boolean  str_treeFlushLabel (char **treestrp)
4196   { /* str_treeFlushLabel */
4197     return  str_treeGetLabel(treestrp, (char *) NULL, (int) 0);
4198   } /* str_treeFlushLabel */
4199 
4200 
str_treeFindTipName(char ** treestrp,tree * tr)4201 int  str_treeFindTipName (char **treestrp, tree *tr)
4202   { /* str_treeFindTipName */
4203     nodeptr  q;
4204     char    *nameptr, str[nmlngth+2];
4205     int      ch, i, n;
4206 
4207     if (tr->prelabeled) {
4208       if (str_treeGetLabel(treestrp, str, nmlngth+2))
4209         n = treeFindTipByLabel(str, tr);
4210       else
4211         n = 0;
4212       }
4213 
4214     else if (tr->ntips < tr->mxtips) {
4215       n = tr->ntips + 1;
4216       nameptr = tr->nodep[n]->name;
4217       if (! str_treeGetLabel(treestrp, nameptr, nmlngth+1)) n = 0;
4218       }
4219 
4220     else {
4221       n = 0;
4222       }
4223 
4224     return  n;
4225   } /* str_treeFindTipName */
4226 
4227 
str_treeProcessLength(char ** treestrp,double * dptr)4228 boolean str_treeProcessLength (char **treestrp, double *dptr)
4229   { /* str_treeProcessLength */
4230     int     used;
4231 
4232     if(! str_treeGetCh(treestrp))  return FALSE;    /*  Skip comments */
4233     (*treestrp)--;
4234 
4235     if (sscanf(*treestrp, "%lf%n", dptr, & used) != 1) {
4236       printf("ERROR: str_treeProcessLength: Problem reading branch length\n");
4237       printf("%40s\n", *treestrp);
4238       *dptr = 0.0;
4239       return FALSE;
4240       }
4241     else {
4242       *treestrp += used;
4243       }
4244 
4245     return  TRUE;
4246   } /* str_treeProcessLength */
4247 
4248 
str_treeFlushLen(char ** treestrp)4249 boolean  str_treeFlushLen (char **treestrp)
4250   { /* str_treeFlushLen */
4251     int  ch;
4252 
4253     if ((ch = str_treeGetCh(treestrp)) == ':')
4254       return str_treeProcessLength(treestrp, (double *) NULL);
4255     else {
4256       (*treestrp)--;
4257       return TRUE;
4258       }
4259   } /* str_treeFlushLen */
4260 
4261 
str_treeNeedCh(char ** treestrp,int c1,char * where)4262 boolean  str_treeNeedCh (char **treestrp, int c1, char *where)
4263   { /* str_treeNeedCh */
4264     int  c2, i;
4265 
4266     if ((c2 = str_treeGetCh(treestrp)) == c1)  return TRUE;
4267 
4268     printf("ERROR: Missing '%c' %s tree; ", c1, where);
4269     if (c2 == '\0')
4270       printf("end-of-string");
4271     else {
4272       putchar('"');
4273       for (i = 24; i-- && (c2 != '\0'); c2 = *(*treestrp)++)  putchar(c2);
4274       putchar('"');
4275       }
4276 
4277     printf(" found instead\n");
4278     return FALSE;
4279   } /* str_treeNeedCh */
4280 
4281 
str_addElementLen(char ** treestrp,tree * tr,nodeptr p)4282 boolean  str_addElementLen (char **treestrp, tree *tr, nodeptr p)
4283   { /* str_addElementLen */
4284     double   z, branch;
4285     nodeptr  q;
4286     int      n, ch;
4287 
4288     if ((ch = str_treeGetCh(treestrp)) == '(') { /*  A new internal node */
4289       n = (tr->nextnode)++;
4290       if (n > 2*(tr->mxtips) - 2) {
4291         if (tr->rooted || n > 2*(tr->mxtips) - 1) {
4292           printf("ERROR: too many internal nodes.  Is tree rooted?\n");
4293           printf("Deepest splitting should be a trifurcation.\n");
4294           return  FALSE;
4295           }
4296         else {
4297           tr->rooted = TRUE;
4298           }
4299         }
4300       q = tr->nodep[n];
4301       if (! str_addElementLen(treestrp, tr, q->next))          return FALSE;
4302       if (! str_treeNeedCh(treestrp, ',', "in"))               return FALSE;
4303       if (! str_addElementLen(treestrp, tr, q->next->next))    return FALSE;
4304       if (! str_treeNeedCh(treestrp, ')', "in"))               return FALSE;
4305       if (! str_treeFlushLabel(treestrp))                      return FALSE;
4306       }
4307 
4308     else {                           /*  A new tip */
4309       n = str_treeFindTipName(treestrp, tr, ch);
4310       if (n <= 0) return FALSE;
4311       q = tr->nodep[n];
4312       if (tr->start->number > n)  tr->start = q;
4313       (tr->ntips)++;
4314       }                              /* End of tip processing */
4315 
4316     /*  Master and Slave always use lengths */
4317 
4318     if (! str_treeNeedCh(treestrp, ':', "in"))                 return FALSE;
4319     if (! str_treeProcessLength(treestrp, & branch))           return FALSE;
4320     z = exp(-branch / tr->rdta->fracchange);
4321     if (z > zmax)  z = zmax;
4322     hookup(p, q, z);
4323 
4324     return  TRUE;
4325   } /* str_addElementLen */
4326 
4327 
str_processTreeCom(tree * tr,char ** treestrp)4328 boolean str_processTreeCom(tree *tr, char **treestrp)
4329   { /* str_processTreeCom */
4330     char  *com, *com_end;
4331     int  text_started, functor_read, com_open;
4332 
4333     com = *treestrp;
4334 
4335     functor_read = text_started = 0;
4336     sscanf(com, " p%nhylip_tree(%n", & text_started, & functor_read);
4337     if (functor_read) {
4338       com += functor_read;
4339       }
4340     else if (text_started) {
4341       com += text_started;
4342       sscanf(com, "seudoNewick(%n", & functor_read);
4343       if (! functor_read) {
4344         printf("Start of tree 'p...' not understood.\n");
4345         return  FALSE;
4346         }
4347       else {
4348         com += functor_read;
4349         }
4350       }
4351 
4352     com_open = 0;
4353     sscanf(com, " [%n", & com_open);
4354     com += com_open;
4355 
4356     if (com_open) {                              /* comment; read it */
4357 	if (!(com_end = strchr(com, ']'))) {
4358         printf("Missing end of tree comment.\n");
4359         return  FALSE;
4360         }
4361 
4362       *com_end = 0;
4363       (void) readKeyValue(com, likelihood_key, "%lg",
4364                                (void *) &(tr->likelihood));
4365       (void) readKeyValue(com, opt_level_key,  "%d",
4366                                (void *) &(tr->opt_level));
4367       (void) readKeyValue(com, smoothed_key,   "%d",
4368                                (void *) &(tr->smoothed));
4369       *com_end = ']';
4370       com_end++;
4371 
4372       if (functor_read) {                          /* remove trailing comma */
4373         text_started = 0;
4374         sscanf(com_end, " ,%n", & text_started);
4375         com_end += text_started;
4376         }
4377 
4378       *treestrp = com_end;
4379       }
4380 
4381     return (functor_read > 0);
4382   } /* str_processTreeCom */
4383 
4384 
str_treeReadLen(char * treestr,tree * tr)4385 boolean str_treeReadLen (char *treestr, tree *tr)
4386     /* read string with representation of tree */
4387   { /* str_treeReadLen */
4388     nodeptr  p;
4389     int  i;
4390     boolean  is_fact, found;
4391 
4392     for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL;
4393     tr->start       = tr->nodep[tr->mxtips];
4394     tr->ntips       = 0;
4395     tr->nextnode    = tr->mxtips + 1;
4396     tr->opt_level   = 0;
4397     tr->log_f_valid = 0;
4398     tr->smoothed    = Master;
4399     tr->rooted      = FALSE;
4400 
4401     is_fact = str_processTreeCom(tr, & treestr);
4402 
4403     p = tr->nodep[(tr->nextnode)++];
4404     if (! str_treeNeedCh(& treestr, '(', "at start of"))       return FALSE;
4405     if (! str_addElementLen(& treestr, tr, p))                 return FALSE;
4406     if (! str_treeNeedCh(& treestr, ',', "in"))                return FALSE;
4407     if (! str_addElementLen(& treestr, tr, p->next))           return FALSE;
4408     if (! tr->rooted) {
4409       if (str_treeGetCh(& treestr) == ',') {        /*  An unrooted format */
4410         if (! str_addElementLen(& treestr, tr, p->next->next)) return FALSE;
4411         }
4412       else {                                       /*  A rooted format */
4413         p->next->next->back = (nodeptr) NULL;
4414         tr->rooted = TRUE;
4415         treestr--;
4416         }
4417       }
4418     if (! str_treeNeedCh(& treestr, ')', "in"))                return FALSE;
4419     if (! str_treeFlushLabel(& treestr))                       return FALSE;
4420     if (! str_treeFlushLen(& treestr))                         return FALSE;
4421     if (is_fact) {
4422       if (! str_treeNeedCh(& treestr, ')', "at end of"))       return FALSE;
4423       if (! str_treeNeedCh(& treestr, '.', "at end of"))       return FALSE;
4424       }
4425     else {
4426       if (! str_treeNeedCh(& treestr, ';', "at end of"))       return FALSE;
4427       }
4428 
4429     if (tr->rooted)  if (! uprootTree(tr, p->next->next))     return FALSE;
4430     tr->start = p->next->next->back;  /* This is start used by treeString */
4431 
4432     return  (initrav(tr, tr->start) && initrav(tr, tr->start->back));
4433   } /* str_treeReadLen */
4434 #endif
4435 
4436 
treeEvaluate(tree * tr,bestlist * bt)4437 boolean treeEvaluate (tree *tr, bestlist *bt)       /* Evaluate a user tree */
4438   { /* treeEvaluate */
4439 
4440     if (Slave || ! tr->userlen) {
4441       if (! smoothTree(tr, 4 * smoothings)) return FALSE;
4442       }
4443 
4444     if (evaluate(tr, tr->start) == badEval)  return FALSE;
4445 
4446 #   if ! Slave
4447       (void) saveBestTree(bt, tr);
4448 #   endif
4449     return TRUE;
4450   } /* treeEvaluate */
4451 
4452 
4453 #if Master || Slave
freopen_pid(char * filenm,char * mode,FILE * stream)4454 FILE *freopen_pid (char *filenm, char *mode, FILE *stream)
4455   { /* freopen_pid */
4456     char scr[512];
4457 
4458     (void) sprintf(scr, "%s.%d", filenm, getpid());
4459     return  freopen(scr, mode, stream);
4460   } /* freopen_pid */
4461 #endif
4462 
4463 
showBestTrees(bestlist * bt,tree * tr,analdef * adef,FILE * treefile)4464 boolean  showBestTrees (bestlist *bt, tree *tr, analdef *adef, FILE *treefile)
4465   { /* showBestTrees */
4466     int     rank;
4467 
4468     for (rank = 1; rank <= bt->nvalid; rank++) {
4469       if (rank > 1) {
4470         if (rank != recallBestTree(bt, rank, tr))  break;
4471         }
4472       if (evaluate(tr, tr->start) == badEval) return FALSE;
4473       if (tr->outgrnode->back)  tr->start = tr->outgrnode;
4474       printTree(tr, adef);
4475       summarize(tr);
4476       if (treefile)  treeOut(treefile, tr, adef->trout);
4477       }
4478 
4479     return TRUE;
4480   } /* showBestTrees */
4481 
4482 
cmpBestTrees(bestlist * bt,tree * tr)4483 boolean cmpBestTrees (bestlist *bt, tree *tr)
4484   { /* cmpBestTrees */
4485     double  sum, sum2, sd, temp, wtemp, bestscore;
4486     double *log_f0, *log_f0_ptr;      /* Save a copy of best log_f */
4487     double *log_f_ptr;
4488     int     i, j, num, besttips;
4489 
4490     num = bt->nvalid;
4491     if ((num <= 1) || (tr->cdta->wgtsum <= 1)) return TRUE;
4492 
4493     if (! (log_f0 = (double *) Malloc(sizeof(double) * tr->cdta->endsite))) {
4494       printf("ERROR: cmpBestTrees unable to obtain space for log_f0\n");
4495       return FALSE;
4496       }
4497 
4498     printf("Tree      Ln L        Diff Ln L       Its S.D.");
4499     printf("   Significantly worse?\n\n");
4500 
4501     for (i = 1; i <= num; i++) {
4502       if (i != recallBestTree(bt, i, tr))  break;
4503       if (! (tr->log_f_valid))  {
4504         if (evaluate(tr, tr->start) == badEval) return FALSE;
4505         }
4506 
4507       printf("%3d%14.5f", i, tr->likelihood);
4508       if (i == 1) {
4509         printf("  <------ best\n");
4510         besttips = tr->ntips;
4511         bestscore = tr->likelihood;
4512         log_f0_ptr = log_f0;
4513         log_f_ptr  = tr->log_f;
4514         for (j = 0; j < tr->cdta->endsite; j++)  *log_f0_ptr++ = *log_f_ptr++;
4515         }
4516       else if (tr->ntips != besttips)
4517         printf("  (different number of species)\n");
4518       else {
4519         sum = sum2 = 0.0;
4520         log_f0_ptr = log_f0;
4521         log_f_ptr  = tr->log_f;
4522         for (j = 0; j < tr->cdta->endsite; j++) {
4523           temp  = *log_f0_ptr++ - *log_f_ptr++;
4524           wtemp = tr->cdta->aliaswgt[j] * temp;
4525           sum  += wtemp;
4526           sum2 += wtemp * temp;
4527           }
4528         sd = sqrt( tr->cdta->wgtsum * (sum2 - sum*sum / tr->cdta->wgtsum)
4529                                     / (tr->cdta->wgtsum - 1) );
4530         printf("%14.5f%14.4f", tr->likelihood - bestscore, sd);
4531         printf("           %s\n", (sum > 1.95996 * sd) ? "Yes" : " No");
4532         }
4533       }
4534 
4535     Free(log_f0);
4536     printf("\n\n");
4537 
4538     return TRUE;
4539   } /* cmpBestTrees */
4540 
4541 
makeUserTree(tree * tr,bestlist * bt,analdef * adef)4542 boolean  makeUserTree (tree *tr, bestlist *bt, analdef *adef)
4543   { /* makeUserTree */
4544     char   filename[128];
4545     FILE  *treefile;
4546     int    nusertrees, which;
4547 
4548     nusertrees = adef->numutrees;
4549 
4550     printf("User-defined %s:\n\n", (nusertrees == 1) ? "tree" : "trees");
4551 
4552     treefile = adef->trout ? fopen_pid("treefile", "w", filename) : (FILE *) NULL;
4553 
4554     for (which = 1; which <= nusertrees; which++) {
4555       if (! treeReadLen(INFILE, tr)) return FALSE;
4556       if (! treeEvaluate(tr, bt))    return FALSE;
4557       if (tr->global <= 0) {
4558         if (tr->outgrnode->back)  tr->start = tr->outgrnode;
4559         printTree(tr, adef);
4560         summarize(tr);
4561         if (treefile)  treeOut(treefile, tr, adef->trout);
4562         }
4563       else {
4564         printf("%6d:  Ln Likelihood =%14.5f\n", which, tr->likelihood);
4565         }
4566       }
4567 
4568     if (tr->global > 0) {
4569       putchar('\n');
4570       if (! recallBestTree(bt, 1, tr))  return FALSE;
4571       printf("      Ln Likelihood =%14.5f\n", tr->likelihood);
4572       if (! optimize(tr, tr->global, bt))  return FALSE;
4573       if (tr->outgrnode->back)  tr->start = tr->outgrnode;
4574       printTree(tr, adef);
4575       summarize(tr);
4576       if (treefile)  treeOut(treefile, tr, adef->trout);
4577       }
4578 
4579     if (treefile) {
4580       (void) fclose(treefile);
4581       printf("Tree also written to %s\n", filename);
4582       }
4583 
4584     putchar('\n');
4585 
4586     (void) cmpBestTrees(bt, tr);
4587     return TRUE;
4588   } /* makeUserTree */
4589 
4590 
4591 #if Slave
slaveTreeEvaluate(tree * tr,bestlist * bt)4592 boolean slaveTreeEvaluate (tree *tr, bestlist *bt)
4593   { /* slaveTreeEvaluate */
4594     boolean done;
4595 
4596     do {
4597        requestForWork();
4598        if (! receiveTree(& comm_master, tr))        return FALSE;
4599        done = tr->likelihood > 0.0;
4600        if (! done) {
4601          if (! treeEvaluate(tr, bt))                return FALSE;
4602          if (! sendTree(& comm_master, tr))         return FALSE;
4603          }
4604        } while (! done);
4605 
4606     return TRUE;
4607   } /* slaveTreeEvaluate */
4608 #endif
4609 
4610 
randum(long * seed)4611 double randum (long  *seed)
4612     /* random number generator, modified to use 12 bit chunks */
4613   { /* randum */
4614     long  sum, mult0, mult1, seed0, seed1, seed2, newseed0, newseed1, newseed2;
4615 
4616     mult0 = 1549;
4617     seed0 = *seed & 4095;
4618     sum  = mult0 * seed0;
4619     newseed0 = sum & 4095;
4620     sum >>= 12;
4621     seed1 = (*seed >> 12) & 4095;
4622     mult1 =  406;
4623     sum += mult0 * seed1 + mult1 * seed0;
4624     newseed1 = sum & 4095;
4625     sum >>= 12;
4626     seed2 = (*seed >> 24) & 255;
4627     sum += mult0 * seed2 + mult1 * seed1;
4628     newseed2 = sum & 255;
4629 
4630     *seed = newseed2 << 24 | newseed1 << 12 | newseed0;
4631     return  0.00390625 * (newseed2
4632                           + 0.000244140625 * (newseed1
4633                                               + 0.000244140625 * newseed0));
4634   } /* randum */
4635 
4636 
makeDenovoTree(tree * tr,bestlist * bt,analdef * adef)4637 boolean makeDenovoTree (tree *tr, bestlist *bt, analdef *adef)
4638   { /* makeDenovoTree */
4639     char   filename[128];
4640     FILE  *treefile;
4641     nodeptr  p;
4642     int  *enterorder;      /*  random entry order */
4643     int  i, j, k, nextsp, newsp, maxtrav, tested;
4644 
4645     double randum();
4646 
4647 
4648     enterorder = (int *) Malloc(sizeof(int) * (tr->mxtips + 1));
4649     if (! enterorder) {
4650        fprintf(stderr, "makeDenovoTree: Malloc failure for enterorder\n");
4651        return 0;
4652        }
4653 
4654     if (adef->restart) {
4655       printf("Restarting from tree with the following sequences:\n");
4656       tr->userlen = TRUE;
4657       if (! treeReadLen(INFILE, tr))          return FALSE;
4658       if (! smoothTree(tr, smoothings))       return FALSE;
4659       if (evaluate(tr, tr->start) == badEval) return FALSE;
4660       if (saveBestTree(bt, tr) < 1)           return FALSE;
4661 
4662       for (i = 1, j = tr->ntips; i <= tr->mxtips; i++) { /* find loose tips */
4663         if (! tr->nodep[i]->back) {
4664           enterorder[++j] = i;
4665           }
4666         else {
4667           printf("   %s\n", tr->nodep[i]->name);
4668 
4669 #         if Master
4670             if (i>3) REPORT_ADD_SPECS;
4671 #         endif
4672           }
4673         }
4674       putchar('\n');
4675       }
4676 
4677     else {                                           /* start from scratch */
4678       tr->ntips = 0;
4679       for (i = 1; i <= tr->mxtips; i++) enterorder[i] = i;
4680       }
4681 
4682     if (adef->jumble) for (i = tr->ntips + 1; i <= tr->mxtips; i++) {
4683       j = randum(&(adef->jumble))*(tr->mxtips - tr->ntips) + tr->ntips + 1;
4684       k = enterorder[j];
4685       enterorder[j] = enterorder[i];
4686       enterorder[i] = k;
4687       }
4688 
4689     bt->numtrees = 1;
4690     if (tr->ntips < tr->mxtips)  printf("Adding species:\n");
4691 
4692     if (tr->ntips == 0) {
4693       for (i = 1; i <= 3; i++) {
4694         printf("   %s\n", tr->nodep[enterorder[i]]->name);
4695         }
4696       tr->nextnode = tr->mxtips + 1;
4697       if (! buildSimpleTree(tr, enterorder[1], enterorder[2], enterorder[3]))
4698         return FALSE;
4699       }
4700 
4701     while (tr->ntips < tr->mxtips || tr->opt_level < tr->global) {
4702       maxtrav = (tr->ntips == tr->mxtips) ? tr->global : tr->partswap;
4703       if (maxtrav > tr->ntips - 3)  maxtrav = tr->ntips - 3;
4704 
4705       if (tr->opt_level >= maxtrav) {
4706         nextsp = ++(tr->ntips);
4707         newsp = enterorder[nextsp];
4708         p = tr->nodep[newsp];
4709         printf("   %s\n", p->name);
4710 
4711 #       if Master
4712           if (nextsp % DNAML_STEP_TIME_COUNT == 1) {
4713             REPORT_STEP_TIME;
4714             }
4715           REPORT_ADD_SPECS;
4716 #       endif
4717 
4718         (void) buildNewTip(tr, p);
4719 
4720         resetBestTree(bt);
4721         cacheZ(tr);
4722         tested = addTraverse(tr, p->back, findAnyTip(tr->start)->back,
4723                              1, tr->ntips - 2, bt, adef->qadd);
4724         if (tested == badRear) return FALSE;
4725         bt->numtrees += tested;
4726 
4727 #       if Master
4728           getReturnedTrees(tr, bt, tested);
4729 #       endif
4730 
4731         printf("      Tested %d alternative trees\n", tested);
4732 
4733         (void) recallBestTree(bt, 1, tr);
4734         if (! tr->smoothed) {
4735           if (! smoothTree(tr, smoothings))        return FALSE;
4736           if (evaluate(tr, tr->start) == badEval)  return FALSE;
4737           (void) saveBestTree(bt, tr);
4738           }
4739 
4740         if (tr->ntips == 4)  tr->opt_level = 1;  /* All 4 taxon trees done */
4741         maxtrav = (tr->ntips == tr->mxtips) ? tr->global : tr->partswap;
4742         if (maxtrav > tr->ntips - 3)  maxtrav = tr->ntips - 3;
4743         }
4744 
4745       printf("      Ln Likelihood =%14.5f\n", tr->likelihood);
4746       if (! optimize(tr, maxtrav, bt)) return FALSE;
4747       }
4748 
4749     printf("\nExamined %d %s\n", bt->numtrees,
4750                                  bt->numtrees != 1 ? "trees" : "tree");
4751 
4752     treefile = adef->trout ? fopen_pid("treefile", "w", filename) : (FILE *) NULL;
4753     (void) showBestTrees(bt, tr, adef, treefile);
4754     if (treefile) {
4755       (void) fclose(treefile);
4756       printf("Tree also written to %s\n\n", filename);
4757       }
4758 
4759     (void) cmpBestTrees(bt, tr);
4760 
4761 #   if DeleteCheckpointFile
4762       unlink_pid(checkpointname);
4763 #   endif
4764 
4765     Free(enterorder);
4766 
4767     return TRUE;
4768   } /* makeDenovoTree */
4769 
4770 /*==========================================================================*/
4771 /*                             "main" routine                               */
4772 /*==========================================================================*/
4773 
4774 #if Sequential
main()4775   main ()
4776 #else
4777   slave ()
4778 #endif
4779   { /* DNA Maximum Likelihood */
4780 #   if Master
4781       int starttime, inputtime, endtime;
4782 #   endif
4783 
4784 #   if Master || Slave
4785       int my_id, nprocs, type, from, sz;
4786       char *msg;
4787 #   endif
4788 
4789     analdef      *adef;
4790     rawdata      *rdta;
4791     cruncheddata  *cdta;
4792     tree         *tr;    /*  current tree */
4793     bestlist     *bt;    /*  topology of best found tree */
4794 
4795 
4796 #   if Debug
4797       {
4798         char debugfilename[128];
4799         debug = fopen_pid("dnaml_debug", "w", debugfilename);
4800         }
4801 #   endif
4802 
4803 #   if Master
4804       starttime = p4_clock();
4805       nprocs = p4_num_total_slaves();
4806 
4807       if ((OUTFILE = freopen_pid("master.out", "w", stdout)) == NULL) {
4808         fprintf(stderr, "Could not open output file\n");
4809         exit(1);
4810         }
4811 
4812       /* Receive input file name from host */
4813       type = DNAML_FILE_NAME;
4814       from = DNAML_HOST_ID;
4815       msg  = NULL;
4816       p4_recv(& type, & from, & msg, & sz);
4817       if ((INFILE = fopen(msg, "r")) == NULL) {
4818         fprintf(stderr, "master could not open input file %s\n", msg);
4819         exit(1);
4820         }
4821       p4_msg_free(msg);
4822 
4823       open_link(& comm_slave);
4824 #   endif
4825 
4826 #  if DNAML_STEP
4827       begin_step_time = starttime;
4828 #  endif
4829 
4830 #   if Slave
4831       my_id = p4_get_my_id();
4832       nprocs = p4_num_total_slaves();
4833 
4834       /* Receive input file name from host */
4835       type = DNAML_FILE_NAME;
4836       from = DNAML_HOST_ID;
4837       msg  = NULL;
4838       p4_recv(& type, & from, & msg, & sz);
4839       if ((INFILE = fopen(msg, "r")) == NULL) {
4840         fprintf(stderr, "slave could not open input file %s\n",msg);
4841         exit(1);
4842         }
4843       p4_msg_free(msg);
4844 
4845 #     ifdef P4DEBUG
4846         if ((OUTFILE = freopen_pid("slave.out", "w", stdout)) == NULL) {
4847           fprintf(stderr, "Could not open output file\n");
4848           exit(1);
4849           }
4850 #     else
4851         if ((OUTFILE = freopen("/dev/null", "w", stdout)) == NULL) {
4852           fprintf(stderr, "Could not open output file\n");
4853           exit(1);
4854           }
4855 #     endif
4856 
4857       open_link(& comm_master);
4858 #   endif
4859 
4860 
4861 /*  Get data structure memory  */
4862 
4863     if (! (adef = (analdef *) Malloc(sizeof(analdef)))) {
4864       printf("ERROR: Unable to get memory for analysis definition\n\n");
4865       return 1;
4866       }
4867 
4868     if (! (rdta = (rawdata *) Malloc(sizeof(rawdata)))) {
4869       printf("ERROR: Unable to get memory for raw DNA\n\n");
4870       return 1;
4871       }
4872 
4873     if (! (cdta = (cruncheddata *) Malloc(sizeof(cruncheddata)))) {
4874       printf("ERROR: Unable to get memory for crunched DNA\n\n");
4875       return 1;
4876       }
4877 
4878     if ((tr = (tree *)     Malloc(sizeof(tree))) &&
4879         (bt = (bestlist *) Malloc(sizeof(bestlist)))) ;
4880     else {
4881       printf("ERROR: Unable to get memory for trees\n\n");
4882       return 1;
4883       }
4884     bt->ninit = 0;
4885 
4886     if (! getinput(adef, rdta, cdta, tr))                            return 1;
4887 
4888 #   if Master
4889       inputtime = p4_clock();
4890       printf("Input time %d milliseconds\n", inputtime - starttime);
4891       REPORT_STEP_TIME;
4892 #   endif
4893 
4894 #   if Slave
4895       (void) fclose(INFILE);
4896 #   endif
4897 
4898 /*  The material below would be a loop over jumbles and/or boots */
4899 
4900     if (! makeweights(adef, rdta, cdta))                             return 1;
4901     if (! makevalues(rdta, cdta))                                    return 1;
4902     if (adef->empf && ! empiricalfreqs(rdta, cdta))                  return 1;
4903     reportfreqs(adef, rdta);
4904     if (! linkdata2tree(rdta, cdta, tr))                             return 1;
4905 
4906     if (! linkxarray(3, 3, cdta->endsite, & freextip, & usedxtip))   return 1;
4907     if (! setupnodex(tr))                                            return 1;
4908 
4909 #   if Slave
4910       if (! slaveTreeEvaluate(tr, bt))                               return 1;
4911 #   else
4912       if (! initBestTree(bt, adef->nkeep, tr->mxtips, tr->cdta->endsite))    return 1;
4913       if (! adef->usertree) {
4914         if (! makeDenovoTree(tr, bt, adef))                          return 1;
4915         }
4916       else {
4917         if (! makeUserTree(tr, bt, adef))                            return 1;
4918         }
4919       if (! freeBestTree(bt))                                        return 1;
4920 #   endif
4921 
4922 /*  Endpoint for jumble and/or boot loop */
4923 
4924 #   if Master
4925       tr->likelihood = 1.0;             /* terminate slaves */
4926       (void) sendTree(& comm_slave, tr);
4927 #   endif
4928 
4929     freeTree(tr);
4930 
4931 #   if Master
4932       close_link(& comm_slave);
4933       (void) fclose(INFILE);
4934 
4935       REPORT_STEP_TIME;
4936       endtime = p4_clock();
4937       printf("Execution time %d milliseconds\n", endtime - inputtime);
4938       (void) fclose(OUTFILE);
4939 #   endif
4940 
4941 #   if Slave
4942       close_link(& comm_master);
4943       (void) fclose(OUTFILE);
4944 #   endif
4945 
4946 #   if Debug
4947       (void) fclose(debug);
4948 #   endif
4949 
4950 #   if Master || Slave
4951       p4_send(DNAML_DONE, DNAML_HOST_ID, NULL, 0);
4952 #   else
4953       return 0;
4954 #   endif
4955   } /* DNA Maximum Likelihood */
4956