1 #include "phylip.h"
2 #include "seq.h"
3 #include <unistd.h>
4 #include <setjmp.h>
5
6 /*
7 derived from file dnapars.c of PHYLIP version 3.696 with the following copyright:
8
9 version 3.696.
10 Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe.
11
12 Copyright (c) 1993-2014, Joseph Felsenstein
13 All rights reserved.
14
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 1. Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 2. Redistributions in binary form must reproduce the above copyright notice,
22 this list of conditions and the following disclaimer in the documentation
23 and/or other materials provided with the distribution.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #define MAXNUMTREES 1000000 /* bigger than number of user trees can be */
39 static jmp_buf lngjmp_env;
40 typedef enum {more_thorough, less_thorough, rearrange_best_tree} dnapars_S_option;
41
42 /* function prototypes */
43 extern int tree_build_interrupted;
44 extern char *create_tmp_filename_from_C(void);
45 extern FILE *fl_fopen_from_C(const char *fname, const char *mode);
46 extern int fl_unlink_from_C(const char*fname);
47 extern void awake_from_C(void);
48 void padtosize(char *pname, char *name, int length);
49 static void getoptions(int, dnapars_S_option );
50 static void allocrest(void);
51 static void reallocchars(void);
52 static void doinit(int, int, int, char*, dnapars_S_option);
53 void makeweights(void);
54 static void doinput(char** seq, char** seqname);
55 void initdnaparsnode(node **, node **, node *, long, long, long *,
56 long *, initops, pointarray, pointarray, Char *, Char *, FILE *);
57 static void evaluate(node *);
58 static void tryadd(node *, node *, node *);
59 static void addpreorder(node *, node *, node *);
60 void trydescendants(node *, node *, node *, node *, boolean);
61
62 void trylocal(node *, node *);
63 void trylocal2(node *, node *, node *);
64 static void tryrearr(node *, boolean *);
65 static void repreorder(node *, boolean *);
66 static void rearrange(node **);
67 static void describe(void);
68 void dnapars_coordinates(node *, double, long *, double *);
69 //void dnapars_printree(void);
70 void globrearrange(void);
71 void grandrearr(void);
72
73 static void maketree(char*);
74 void freerest(void);
75 void load_tree(long treei);
76
77 /* function prototypes */
78
79
80 static Char infilename[FNMLNGTH], outfilename[FNMLNGTH], intreename[FNMLNGTH], *outtreename,
81 weightfilename[FNMLNGTH];
82 char basechar[32]="ACMGRSVTWYHKDBNO???????????????";
83 static node *root;
84 static long chars, col, msets, ith, njumble, jumb, maxtrees;
85 /* chars = number of sites in actual sequences */
86 static long inseed, inseed0;
87 static double threshold;
88 static boolean jumble, usertree, thresh, weights, thorough, rearrfirst,
89 trout, progress, stepbox, ancseq, mulsets, justwts, firstset, mulf,
90 multf;
91 steptr oldweight;
92 static longer seed;
93 static pointarray treenode; /* pointers to all nodes in tree */
94 static long *enterorder;
95 long *zeros;
96
97 /* local variables for Pascal maketree, propagated globally for C version: */
98
99 static long minwhich;
100 static double like, minsteps, bestyet, bestlike, bstlike2;
101 static boolean lastrearr, recompute;
102 static double nsteps[maxuser];
103 static long **fsteps;
104 static node *there, *oldnufork;
105 static long *place;
106 static bestelm *bestrees;
107 static long *threshwt;
108 baseptr nothing;
109 static gbases *garbage;
110 static node *temp, *temp1, *temp2, *tempsum, *temprm, *tempadd, *tempf, *tmp, *tmp1,
111 *tmp2, *tmp3, *tmprm, *tmpadd;
112 static boolean *names;
113 node *grbg;
114 static char *progname;
115
116
getoptions(int arg_maxtrees,dnapars_S_option s_option)117 static void getoptions(int arg_maxtrees, dnapars_S_option s_option)
118 {
119 /* interactively set options */
120 //long loopcount, loopcount2;
121 //Char ch, ch2;
122
123 //fprintf(outfile, "\nDNA parsimony algorithm, version %s\n\n",VERSION);
124 jumble = false;
125 njumble = 1;
126 outgrno = 1;
127 outgropt = false;
128 thresh = false;
129 //thorough = true;
130 transvp = false;
131 //rearrfirst = false;
132 if (s_option == more_thorough) { thorough = true; rearrfirst = false; }
133 else if (s_option == less_thorough) {thorough = false; rearrfirst = false; }
134 else {thorough = false; rearrfirst = true; }
135 maxtrees = arg_maxtrees;
136 trout = true;
137 usertree = false;
138 weights = false;
139 mulsets = false;
140 printdata = false;
141 progress = false;
142 treeprint = false;
143 stepbox = false;
144 ancseq = false;
145 dotdiff = true;
146 interleaved = true;
147 /*loopcount = 0;
148
149 for (;;) {
150 cleerhome();
151 printf("\nDNA parsimony algorithm, version %s\n\n",VERSION);
152 printf("Setting for this run:\n");
153 printf(" U Search for best tree? %s\n",
154 (usertree ? "No, use user trees in input file" : "Yes"));
155 if (!usertree) {
156 printf(" S Search option? ");
157 if (thorough)
158 printf("More thorough search\n");
159 else if (rearrfirst)
160 printf("Rearrange on one best tree\n");
161 else
162 printf("Less thorough\n");
163 printf(" V Number of trees to save? %ld\n", maxtrees);
164 printf(" J Randomize input order of sequences?");
165 if (jumble)
166 printf(" Yes (seed =%8ld,%3ld times)\n", inseed0, njumble);
167 else
168 printf(" No. Use input order\n");
169 }
170 printf(" O Outgroup root?");
171 if (outgropt)
172 printf(" Yes, at sequence number%3ld\n", outgrno);
173 else
174 printf(" No, use as outgroup species%3ld\n", outgrno);
175 printf(" T Use Threshold parsimony?");
176 if (thresh)
177 printf(" Yes, count steps up to%4.1f per site\n", threshold);
178 else
179 printf(" No, use ordinary parsimony\n");
180 printf(" N Use Transversion parsimony?");
181 if (transvp)
182 printf(" Yes, count only transversions\n");
183 else
184 printf(" No, count all steps\n");
185 printf(" W Sites weighted? %s\n",
186 (weights ? "Yes" : "No"));
187 printf(" M Analyze multiple data sets?");
188 if (mulsets)
189 printf(" Yes, %2ld %s\n", msets,
190 (justwts ? "sets of weights" : "data sets"));
191 else
192 printf(" No\n");
193 printf(" I Input sequences interleaved? %s\n",
194 (interleaved ? "Yes" : "No, sequential"));
195 printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n",
196 ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)");
197 printf(" 1 Print out the data at start of run %s\n",
198 (printdata ? "Yes" : "No"));
199 printf(" 2 Print indications of progress of run %s\n",
200 progress ? "Yes" : "No");
201 printf(" 3 Print out tree %s\n",
202 treeprint ? "Yes" : "No");
203 printf(" 4 Print out steps in each site %s\n",
204 stepbox ? "Yes" : "No");
205 printf(" 5 Print sequences at all nodes of tree %s\n",
206 ancseq ? "Yes" : "No");
207 if (ancseq || printdata)
208 printf(" . Use dot-differencing to display them %s\n",
209 dotdiff ? "Yes" : "No");
210 printf(" 6 Write out trees onto tree file? %s\n",
211 trout ? "Yes" : "No");
212 printf("\n Y to accept these or type the letter for one to change\n");
213 #ifdef WIN32
214 phyFillScreenColor();
215 #endif
216 fflush(stdout);
217 scanf("%c%*[^\n]", &ch);
218 getchar();
219 if (ch == '\n')
220 ch = ' ';
221 uppercase(&ch);
222 if (ch == 'Y')
223 break;
224 if (((!usertree) && (strchr("WSVJOTNUMI12345.60", ch) != NULL))
225 || (usertree && ((strchr("WSVOTNUMI12345.60", ch) != NULL)))){
226 switch (ch) {
227
228 case 'J':
229 jumble = !jumble;
230 if (jumble)
231 initjumble(&inseed, &inseed0, seed, &njumble);
232 else njumble = 1;
233 break;
234
235 case 'O':
236 outgropt = !outgropt;
237 if (outgropt)
238 initoutgroup(&outgrno, spp);
239 break;
240
241 case 'T':
242 thresh = !thresh;
243 if (thresh)
244 initthreshold(&threshold);
245 break;
246
247 case 'N':
248 transvp = !transvp;
249 break;
250
251 case 'W':
252 weights = !weights;
253 break;
254
255 case 'M':
256 mulsets = !mulsets;
257 if (mulsets) {
258 printf("Multiple data sets or multiple weights?");
259 loopcount2 = 0;
260 do {
261 printf(" (type D or W)\n");
262 #ifdef WIN32
263 phyFillScreenColor();
264 #endif
265 fflush(stdout);
266 scanf("%c%*[^\n]", &ch2);
267 getchar();
268 if (ch2 == '\n')
269 ch2 = ' ';
270 uppercase(&ch2);
271 countup(&loopcount2, 10);
272 } while ((ch2 != 'W') && (ch2 != 'D'));
273 justwts = (ch2 == 'W');
274 if (justwts)
275 justweights(&msets);
276 else
277 initdatasets(&msets);
278 if (!jumble) {
279 jumble = true;
280 initjumble(&inseed, &inseed0, seed, &njumble);
281 }
282 }
283 break;
284
285 case 'U':
286 usertree = !usertree;
287 break;
288
289 case 'S':
290 thorough = !thorough;
291 if (!thorough) {
292 printf("Rearrange on just one best tree?");
293 loopcount2 = 0;
294 do {
295 printf(" (type Y or N)\n");
296 #ifdef WIN32
297 phyFillScreenColor();
298 #endif
299 fflush(stdout);
300 scanf("%c%*[^\n]", &ch2);
301 getchar();
302 if (ch2 == '\n')
303 ch2 = ' ';
304 uppercase(&ch2);
305 countup(&loopcount2, 10);
306 } while ((ch2 != 'Y') && (ch2 != 'N'));
307 rearrfirst = (ch2 == 'Y');
308 }
309 break;
310
311 case 'V':
312 loopcount2 = 0;
313 do {
314 printf("type the number of trees to save\n");
315 #ifdef WIN32
316 phyFillScreenColor();
317 #endif
318 fflush(stdout);
319 scanf("%ld%*[^\n]", &maxtrees);
320 if (maxtrees > MAXNUMTREES)
321 maxtrees = MAXNUMTREES;
322 getchar();
323 countup(&loopcount2, 10);
324 } while (maxtrees < 1);
325 break;
326
327 case 'I':
328 interleaved = !interleaved;
329 break;
330
331 case '0':
332 initterminal(&ibmpc, &ansi);
333 break;
334
335 case '1':
336 printdata = !printdata;
337 break;
338
339 case '2':
340 progress = !progress;
341 break;
342
343 case '3':
344 treeprint = !treeprint;
345 break;
346
347 case '4':
348 stepbox = !stepbox;
349 break;
350
351 case '5':
352 ancseq = !ancseq;
353 break;
354
355 case '.':
356 dotdiff = !dotdiff;
357 break;
358
359 case '6':
360 trout = !trout;
361 break;
362 }
363 } else
364 printf("Not a possible option!\n");
365 countup(&loopcount, 100);
366 }
367 if (transvp)
368 fprintf(outfile, "Transversion parsimony\n\n");*/
369 } /* getoptions */
370
371
allocrest()372 static void allocrest()
373 {
374 long i;
375
376 y = (Char **)Malloc(spp*sizeof(Char *));
377 for (i = 0; i < spp; i++)
378 y[i] = (Char *)Malloc(chars*sizeof(Char));
379 bestrees = (bestelm *)Malloc(maxtrees*sizeof(bestelm));
380 for (i = 1; i <= maxtrees; i++)
381 bestrees[i - 1].btree = (long *)Malloc(nonodes*sizeof(long));
382 //nayme = (naym *)Malloc(spp*sizeof(naym));
383 enterorder = (long *)Malloc(spp*sizeof(long));
384 place = (long *)Malloc(nonodes*sizeof(long));
385 weight = (long *)Malloc(chars*sizeof(long));
386 oldweight = (long *)Malloc(chars*sizeof(long));
387 alias = (long *)Malloc(chars*sizeof(long));
388 ally = (long *)Malloc(chars*sizeof(long));
389 location = (long *)Malloc(chars*sizeof(long));
390 } /* allocrest */
391
392
doinit(int myspp,int mychars,int arg_maxtrees,char * toevaluate,dnapars_S_option s_option)393 static void doinit(int myspp, int mychars, int arg_maxtrees, char* toevaluate, dnapars_S_option s_option)
394 {
395 /* initializes variables */
396
397 //inputnumbers(&spp, &chars, &nonodes, 1);
398 spp = myspp;
399 chars = mychars;
400 nonodes = (spp * 2 - 1);
401 getoptions(arg_maxtrees, s_option);
402 usertree = (toevaluate != NULL);
403 /*if (printdata)
404 fprintf(outfile, "%2ld species, %3ld sites\n\n", spp, chars);*/
405 alloctree(&treenode, nonodes, usertree);
406 } /* doinit */
407
408
makeweights()409 void makeweights()
410 {
411 /* make up weights vector to avoid duplicate computations */
412 long i;
413
414 for (i = 1; i <= chars; i++) {
415 alias[i - 1] = i;
416 oldweight[i - 1] = weight[i - 1];
417 ally[i - 1] = i;
418 }
419 sitesort(chars, weight);
420 sitecombine(chars);
421 sitescrunch(chars);
422 endsite = 0;
423 for (i = 1; i <= chars; i++) {
424 if (ally[i - 1] == i)
425 endsite++;
426 }
427 for (i = 1; i <= endsite; i++)
428 location[alias[i - 1] - 1] = i;
429 if (!thresh)
430 threshold = spp;
431 threshwt = (long *)Malloc(endsite*sizeof(long));
432 for (i = 0; i < endsite; i++) {
433 weight[i] *= 10;
434 threshwt[i] = (long)(threshold * weight[i] + 0.5);
435 }
436 zeros = (long *)Malloc(endsite*sizeof(long));
437 for (i = 0; i < endsite; i++)
438 zeros[i] = 0;
439 } /* makeweights */
440
441
doinput(char ** seq,char ** seqname)442 static void doinput(char** seq, char** seqname)
443 {
444 /* reads the input data */
445 if (justwts) {
446 if (firstset) {
447 //inputdata(chars);
448 y = seq;
449 nayme = seqname;
450 }
451 /*for (i = 0; i < chars; i++)
452 weight[i] = 1;*/
453 //inputweights(chars, weight, &weights);
454 } else {
455 if (!firstset){
456 //samenumsp(&chars, ith);
457 reallocchars();
458 }
459 //inputdata(chars);
460 y = seq;
461 nayme = seqname;
462
463 /*for (i = 0; i < chars; i++)
464 weight[i] = 1;*/
465 /*if (weights) {
466 inputweights(chars, weight, &weights);
467 if (printdata)
468 printweights(outfile, 0, chars, weight, "Sites");
469 }*/
470 }
471 makeweights();
472 makevalues(treenode, zeros, usertree);
473 if (!usertree) {
474 allocnode(&temp, zeros, endsite);
475 allocnode(&temp1, zeros, endsite);
476 allocnode(&temp2, zeros, endsite);
477 allocnode(&tempsum, zeros, endsite);
478 allocnode(&temprm, zeros, endsite);
479 allocnode(&tempadd, zeros, endsite);
480 allocnode(&tempf, zeros, endsite);
481 allocnode(&tmp, zeros, endsite);
482 allocnode(&tmp1, zeros, endsite);
483 allocnode(&tmp2, zeros, endsite);
484 allocnode(&tmp3, zeros, endsite);
485 allocnode(&tmprm, zeros, endsite);
486 allocnode(&tmpadd, zeros, endsite);
487 }
488 } /* doinput */
489
490
initdnaparsnode(node ** p,node ** grbg,node * q,long len,long nodei,long * ntips,long * parens,initops whichinit,pointarray treenode,pointarray nodep,Char * str,Char * ch,FILE * intree)491 void initdnaparsnode(node **p, node **grbg, node *q, long len, long nodei,
492 long *ntips, long *parens, initops whichinit,
493 pointarray treenode, pointarray nodep, Char *str,
494 Char *ch, FILE *intree)
495 {
496 /* initializes a node */
497 boolean minusread;
498 double valyew, divisor;
499
500 switch (whichinit) {
501 case bottom:
502 gnutreenode(grbg, p, nodei, endsite, zeros);
503 treenode[nodei - 1] = *p;
504 break;
505 case nonbottom:
506 gnutreenode(grbg, p, nodei, endsite, zeros);
507 break;
508 case tip:
509 match_names_to_data (str, treenode, p, spp);
510 break;
511 case length: /* if there is a length, read it and discard value */
512 processlength(&valyew, &divisor, ch, &minusread, intree, parens);
513 break;
514 default: /*cases hslength,hsnolength,treewt,unittrwt,iter,*/
515 break;
516 }
517 } /* initdnaparsnode */
518
519
evaluate(node * r)520 static void evaluate(node *r)
521 {
522 /* determines the number of steps needed for a tree. this is
523 the minimum number of steps needed to evolve sequences on
524 this tree */
525 long i, steps;
526 long term;
527 double sum;
528
529 sum = 0.0;
530 for (i = 0; i < endsite; i++) {
531 steps = r->numsteps[i];
532 if ((long)steps <= threshwt[i])
533 term = steps;
534 else
535 term = threshwt[i];
536 sum += (double)term;
537 if (usertree && which <= maxuser)
538 fsteps[which - 1][i] = term;
539 }
540 if (usertree && which <= maxuser) {
541 nsteps[which - 1] = sum;
542 if (which == 1) {
543 minwhich = 1;
544 minsteps = sum;
545 } else if (sum < minsteps) {
546 minwhich = which;
547 minsteps = sum;
548 }
549 }
550 like = -sum;
551 } /* evaluate */
552
553
tryadd(node * p,node * item,node * nufork)554 static void tryadd(node *p, node *item, node *nufork)
555 {
556 /* temporarily adds one fork and one tip to the tree.
557 if the location where they are added yields greater
558 "likelihood" than other locations tested up to that
559 time, then keeps that location as there */
560 long pos;
561 double belowsum, parentsum;
562 boolean found, collapse, changethere, trysave;
563
564 if (!p->tip) {
565 memcpy(temp->base, p->base, endsite*sizeof(long));
566 memcpy(temp->numsteps, p->numsteps, endsite*sizeof(long));
567 memcpy(temp->numnuc, p->numnuc, endsite*sizeof(nucarray));
568 temp->numdesc = p->numdesc + 1;
569 if (p->back) {
570 multifillin(temp, tempadd, 1);
571 sumnsteps2(tempsum, temp, p->back, 0, endsite, threshwt);
572 } else {
573 multisumnsteps(temp, tempadd, 0, endsite, threshwt);
574 tempsum->sumsteps = temp->sumsteps;
575 }
576 if (tempsum->sumsteps <= -bestyet) {
577 if (p->back)
578 sumnsteps2(tempsum, temp, p->back, endsite+1, endsite, threshwt);
579 else {
580 multisumnsteps(temp, temp1, endsite+1, endsite, threshwt);
581 tempsum->sumsteps = temp->sumsteps;
582 }
583 }
584 p->sumsteps = tempsum->sumsteps;
585 }
586 if (p == root)
587 sumnsteps2(temp, item, p, 0, endsite, threshwt);
588 else {
589 sumnsteps(temp1, item, p, 0, endsite);
590 sumnsteps2(temp, temp1, p->back, 0, endsite, threshwt);
591 }
592 if (temp->sumsteps <= -bestyet) {
593 if (p == root)
594 sumnsteps2(temp, item, p, endsite+1, endsite, threshwt);
595 else {
596 sumnsteps(temp1, item, p, endsite+1, endsite);
597 sumnsteps2(temp, temp1, p->back, endsite+1, endsite, threshwt);
598 }
599 }
600 belowsum = temp->sumsteps;
601 multf = false;
602 like = -belowsum;
603 if (!p->tip && belowsum >= p->sumsteps) {
604 multf = true;
605 like = -p->sumsteps;
606 }
607 trysave = true;
608 if (!multf && p != root) {
609 parentsum = treenode[p->back->index - 1]->sumsteps;
610 if (belowsum >= parentsum)
611 trysave = false;
612 }
613 if (lastrearr) {
614 changethere = true;
615 if (like >= bstlike2 && trysave) {
616 if (like > bstlike2)
617 found = false;
618 else {
619 addnsave(p, item, nufork, &root, &grbg, multf, treenode, place, zeros);
620 pos = 0;
621 findtree(&found, &pos, nextree, place, bestrees);
622 }
623 if (!found) {
624 collapse = collapsible(item, p, temp, temp1, temp2, tempsum, temprm,
625 tmpadd, multf, root, zeros, treenode);
626 if (!thorough)
627 changethere = !collapse;
628 if (thorough || !collapse || like > bstlike2 || (nextree == 1)) {
629 if (like > bstlike2) {
630 addnsave(p, item, nufork, &root, &grbg, multf, treenode,
631 place, zeros);
632 bestlike = bstlike2 = like;
633 addbestever(&pos, &nextree, maxtrees, collapse, place, bestrees);
634 } else
635 addtiedtree(pos, &nextree, maxtrees, collapse, place, bestrees);
636 }
637 }
638 }
639 if (like >= bestyet) {
640 if (like > bstlike2)
641 bstlike2 = like;
642 if (changethere && trysave) {
643 bestyet = like;
644 there = p;
645 mulf = multf;
646 }
647 }
648 } else if ((like > bestyet) || (like >= bestyet && trysave)) {
649 bestyet = like;
650 there = p;
651 mulf = multf;
652 }
653 } /* tryadd */
654
655
addpreorder(node * p,node * item,node * nufork)656 static void addpreorder(node *p, node *item, node *nufork)
657 {
658 /* traverses a n-ary tree, calling function tryadd
659 at a node before calling tryadd at its descendants */
660 node *q;
661
662 if (p == NULL)
663 return;
664 tryadd(p, item, nufork);
665 if (!p->tip) {
666 q = p->next;
667 while (q != p) {
668 addpreorder(q->back, item, nufork);
669 q = q->next;
670 }
671 }
672 } /* addpreorder */
673
674
trydescendants(node * item,node * forknode,node * parent,node * parentback,boolean trybelow)675 void trydescendants(node *item, node *forknode, node *parent,
676 node *parentback, boolean trybelow)
677 {
678 /* tries rearrangements at parent and below parent's descendants */
679 node *q, *tempblw;
680 boolean bestever=0, belowbetter, multf=0, saved, trysave;
681 double parentsum=0, belowsum;
682
683 memcpy(temp->base, parent->base, endsite*sizeof(long));
684 memcpy(temp->numsteps, parent->numsteps, endsite*sizeof(long));
685 memcpy(temp->numnuc, parent->numnuc, endsite*sizeof(nucarray));
686 temp->numdesc = parent->numdesc + 1;
687 multifillin(temp, tempadd, 1);
688 sumnsteps2(tempsum, parentback, temp, 0, endsite, threshwt);
689 belowbetter = true;
690 if (lastrearr) {
691 parentsum = tempsum->sumsteps;
692 if (-tempsum->sumsteps >= bstlike2) {
693 belowbetter = false;
694 bestever = false;
695 multf = true;
696 if (-tempsum->sumsteps > bstlike2)
697 bestever = true;
698 savelocrearr(item, forknode, parent, tmp, tmp1, tmp2, tmp3, tmprm,
699 tmpadd, &root, maxtrees, &nextree, multf, bestever,
700 &saved, place, bestrees, treenode, &grbg, zeros);
701 if (saved) {
702 like = bstlike2 = -tempsum->sumsteps;
703 there = parent;
704 mulf = true;
705 }
706 }
707 } else if (-tempsum->sumsteps >= like) {
708 there = parent;
709 mulf = true;
710 like = -tempsum->sumsteps;
711 }
712 if (trybelow) {
713 sumnsteps(temp, parent, tempadd, 0, endsite);
714 sumnsteps2(tempsum, temp, parentback, 0, endsite, threshwt);
715 if (lastrearr) {
716 belowsum = tempsum->sumsteps;
717 if (-tempsum->sumsteps >= bstlike2 && belowbetter &&
718 (forknode->numdesc > 2 ||
719 (forknode->numdesc == 2 &&
720 parent->back->index != forknode->index))) {
721 trysave = false;
722 memcpy(temp->base, parentback->base, endsite*sizeof(long));
723 memcpy(temp->numsteps, parentback->numsteps, endsite*sizeof(long));
724 memcpy(temp->numnuc, parentback->numnuc, endsite*sizeof(nucarray));
725 temp->numdesc = parentback->numdesc + 1;
726 multifillin(temp, tempadd, 1);
727 sumnsteps2(tempsum, parent, temp, 0, endsite, threshwt);
728 if (-tempsum->sumsteps < bstlike2) {
729 multf = false;
730 bestever = false;
731 trysave = true;
732 }
733 if (-belowsum > bstlike2) {
734 multf = false;
735 bestever = true;
736 trysave = true;
737 }
738 if (trysave) {
739 if (treenode[parent->index - 1] != parent)
740 tempblw = parent->back;
741 else
742 tempblw = parent;
743 savelocrearr(item, forknode, tempblw, tmp, tmp1, tmp2, tmp3, tmprm,
744 tmpadd, &root, maxtrees, &nextree, multf, bestever,
745 &saved, place, bestrees, treenode, &grbg, zeros);
746 if (saved) {
747 like = bstlike2 = -belowsum;
748 there = tempblw;
749 mulf = false;
750 }
751 }
752 }
753 } else if (-tempsum->sumsteps > like) {
754 like = -tempsum->sumsteps;
755 if (-tempsum->sumsteps > bestyet) {
756 if (treenode[parent->index - 1] != parent)
757 tempblw = parent->back;
758 else
759 tempblw = parent;
760 there = tempblw;
761 mulf = false;
762 }
763 }
764 }
765 q = parent->next;
766 while (q != parent) {
767 if (q->back && q->back != item) {
768 memcpy(temp1->base, q->base, endsite*sizeof(long));
769 memcpy(temp1->numsteps, q->numsteps, endsite*sizeof(long));
770 memcpy(temp1->numnuc, q->numnuc, endsite*sizeof(nucarray));
771 temp1->numdesc = q->numdesc;
772 multifillin(temp1, parentback, 0);
773 if (lastrearr)
774 belowbetter = (-parentsum < bstlike2);
775 if (!q->back->tip) {
776 memcpy(temp->base, q->back->base, endsite*sizeof(long));
777 memcpy(temp->numsteps, q->back->numsteps, endsite*sizeof(long));
778 memcpy(temp->numnuc, q->back->numnuc, endsite*sizeof(nucarray));
779 temp->numdesc = q->back->numdesc + 1;
780 multifillin(temp, tempadd, 1);
781 sumnsteps2(tempsum, temp1, temp, 0, endsite, threshwt);
782 if (lastrearr) {
783 if (-tempsum->sumsteps >= bstlike2) {
784 belowbetter = false;
785 bestever = false;
786 multf = true;
787 if (-tempsum->sumsteps > bstlike2)
788 bestever = true;
789 savelocrearr(item, forknode, q->back, tmp, tmp1, tmp2, tmp3, tmprm,
790 tmpadd, &root, maxtrees, &nextree, multf, bestever,
791 &saved, place, bestrees, treenode, &grbg, zeros);
792 if (saved) {
793 like = bstlike2 = -tempsum->sumsteps;
794 there = q->back;
795 mulf = true;
796 }
797 }
798 } else if (-tempsum->sumsteps >= like) {
799 like = -tempsum->sumsteps;
800 there = q->back;
801 mulf = true;
802 }
803 }
804 sumnsteps(temp, q->back, tempadd, 0, endsite);
805 sumnsteps2(tempsum, temp, temp1, 0, endsite, threshwt);
806 if (lastrearr) {
807 if (-tempsum->sumsteps >= bstlike2) {
808 trysave = false;
809 multf = false;
810 if (belowbetter) {
811 bestever = false;
812 trysave = true;
813 }
814 if (-tempsum->sumsteps > bstlike2) {
815 bestever = true;
816 trysave = true;
817 }
818 if (trysave) {
819 if (treenode[q->back->index - 1] != q->back)
820 tempblw = q;
821 else
822 tempblw = q->back;
823 savelocrearr(item, forknode, tempblw, tmp, tmp1, tmp2, tmp3, tmprm,
824 tmpadd, &root, maxtrees, &nextree, multf, bestever,
825 &saved, place, bestrees, treenode, &grbg, zeros);
826 if (saved) {
827 like = bstlike2 = -tempsum->sumsteps;
828 there = tempblw;
829 mulf = false;
830 }
831 }
832 }
833 } else if (-tempsum->sumsteps > like) {
834 like = -tempsum->sumsteps;
835 if (-tempsum->sumsteps > bestyet) {
836 if (treenode[q->back->index - 1] != q->back)
837 tempblw = q;
838 else
839 tempblw = q->back;
840 there = tempblw;
841 mulf = false;
842 }
843 }
844 }
845 q = q->next;
846 }
847 } /* trydescendants */
848
849
trylocal(node * item,node * forknode)850 void trylocal(node *item, node *forknode)
851 {
852 /* rearranges below forknode, below descendants of forknode when
853 there are more than 2 descendants, then unroots the back of
854 forknode and rearranges on its descendants */
855 node *q;
856 boolean bestever, multf, saved;
857
858 memcpy(temprm->base, zeros, endsite*sizeof(long));
859 memcpy(temprm->numsteps, zeros, endsite*sizeof(long));
860 memcpy(temprm->oldbase, item->base, endsite*sizeof(long));
861 memcpy(temprm->oldnumsteps, item->numsteps, endsite*sizeof(long));
862 memcpy(tempf->base, forknode->base, endsite*sizeof(long));
863 memcpy(tempf->numsteps, forknode->numsteps, endsite*sizeof(long));
864 memcpy(tempf->numnuc, forknode->numnuc, endsite*sizeof(nucarray));
865 tempf->numdesc = forknode->numdesc - 1;
866 multifillin(tempf, temprm, -1);
867 if (!forknode->back) {
868 sumnsteps2(tempsum, tempf, tempadd, 0, endsite, threshwt);
869 if (lastrearr) {
870 if (-tempsum->sumsteps > bstlike2) {
871 bestever = true;
872 multf = false;
873 savelocrearr(item, forknode, forknode, tmp, tmp1, tmp2, tmp3, tmprm,
874 tmpadd, &root, maxtrees, &nextree, multf, bestever,
875 &saved, place, bestrees, treenode, &grbg, zeros);
876 if (saved) {
877 like = bstlike2 = -tempsum->sumsteps;
878 there = forknode;
879 mulf = false;
880 }
881 }
882 } else if (-tempsum->sumsteps > like) {
883 like = -tempsum->sumsteps;
884 if (-tempsum->sumsteps > bestyet) {
885 there = forknode;
886 mulf = false;
887 }
888 }
889 } else {
890 sumnsteps(temp, tempf, tempadd, 0, endsite);
891 sumnsteps2(tempsum, temp, forknode->back, 0, endsite, threshwt);
892 if (lastrearr) {
893 if (-tempsum->sumsteps > bstlike2) {
894 bestever = true;
895 multf = false;
896 savelocrearr(item, forknode, forknode, tmp, tmp1, tmp2, tmp3, tmprm,
897 tmpadd, &root, maxtrees, &nextree, multf, bestever,
898 &saved, place, bestrees, treenode, &grbg, zeros);
899 if (saved) {
900 like = bstlike2 = -tempsum->sumsteps;
901 there = forknode;
902 mulf = false;
903 }
904 }
905 } else if (-tempsum->sumsteps > like) {
906 like = -tempsum->sumsteps;
907 if (-tempsum->sumsteps > bestyet) {
908 there = forknode;
909 mulf = false;
910 }
911 }
912 trydescendants(item, forknode, forknode->back, tempf, false);
913 }
914 q = forknode->next;
915 while (q != forknode) {
916 if (q->back != item) {
917 memcpy(temp2->base, q->base, endsite*sizeof(long));
918 memcpy(temp2->numsteps, q->numsteps, endsite*sizeof(long));
919 memcpy(temp2->numnuc, q->numnuc, endsite*sizeof(nucarray));
920 temp2->numdesc = q->numdesc - 1;
921 multifillin(temp2, temprm, -1);
922 if (!q->back->tip) {
923 trydescendants(item, forknode, q->back, temp2, true);
924 } else {
925 sumnsteps(temp1, q->back, tempadd, 0, endsite);
926 sumnsteps2(tempsum, temp1, temp2, 0, endsite, threshwt);
927 if (lastrearr) {
928 if (-tempsum->sumsteps > bstlike2) {
929 multf = false;
930 bestever = true;
931 savelocrearr(item, forknode, q->back, tmp, tmp1, tmp2, tmp3,
932 tmprm, tmpadd, &root, maxtrees, &nextree, multf,
933 bestever, &saved, place, bestrees, treenode,
934 &grbg, zeros);
935 if (saved) {
936 like = bstlike2 = -tempsum->sumsteps;
937 there = q->back;
938 mulf = false;
939 }
940 }
941 } else if ((-tempsum->sumsteps) > like) {
942 like = -tempsum->sumsteps;
943 if (-tempsum->sumsteps > bestyet) {
944 there = q->back;
945 mulf = false;
946 }
947 }
948 }
949 }
950 q = q->next;
951 }
952 } /* trylocal */
953
954
trylocal2(node * item,node * forknode,node * other)955 void trylocal2(node *item, node *forknode, node *other)
956 {
957 /* rearranges below forknode, below descendants of forknode when
958 there are more than 2 descendants, then unroots the back of
959 forknode and rearranges on its descendants. Used if forknode
960 has binary descendants */
961 node *q;
962 boolean bestever=0, multf, saved, belowbetter, trysave;
963
964 memcpy(tempf->base, other->base, endsite*sizeof(long));
965 memcpy(tempf->numsteps, other->numsteps, endsite*sizeof(long));
966 memcpy(tempf->oldbase, forknode->base, endsite*sizeof(long));
967 memcpy(tempf->oldnumsteps, forknode->numsteps, endsite*sizeof(long));
968 tempf->numdesc = other->numdesc;
969 if (forknode->back)
970 trydescendants(item, forknode, forknode->back, tempf, false);
971 if (!other->tip) {
972 memcpy(temp->base, other->base, endsite*sizeof(long));
973 memcpy(temp->numsteps, other->numsteps, endsite*sizeof(long));
974 memcpy(temp->numnuc, other->numnuc, endsite*sizeof(nucarray));
975 temp->numdesc = other->numdesc + 1;
976 multifillin(temp, tempadd, 1);
977 if (forknode->back)
978 sumnsteps2(tempsum, forknode->back, temp, 0, endsite, threshwt);
979 else
980 sumnsteps2(tempsum, NULL, temp, 0, endsite, threshwt);
981 belowbetter = true;
982 if (lastrearr) {
983 if (-tempsum->sumsteps >= bstlike2) {
984 belowbetter = false;
985 bestever = false;
986 multf = true;
987 if (-tempsum->sumsteps > bstlike2)
988 bestever = true;
989 savelocrearr(item, forknode, other, tmp, tmp1, tmp2, tmp3, tmprm,
990 tmpadd, &root, maxtrees, &nextree, multf, bestever,
991 &saved, place, bestrees, treenode, &grbg, zeros);
992 if (saved) {
993 like = bstlike2 = -tempsum->sumsteps;
994 there = other;
995 mulf = true;
996 }
997 }
998 } else if (-tempsum->sumsteps >= like) {
999 there = other;
1000 mulf = true;
1001 like = -tempsum->sumsteps;
1002 }
1003 if (forknode->back) {
1004 memcpy(temprm->base, forknode->back->base, endsite*sizeof(long));
1005 memcpy(temprm->numsteps, forknode->back->numsteps, endsite*sizeof(long));
1006 } else {
1007 memcpy(temprm->base, zeros, endsite*sizeof(long));
1008 memcpy(temprm->numsteps, zeros, endsite*sizeof(long));
1009 }
1010 memcpy(temprm->oldbase, other->back->base, endsite*sizeof(long));
1011 memcpy(temprm->oldnumsteps, other->back->numsteps, endsite*sizeof(long));
1012 q = other->next;
1013 while (q != other) {
1014 memcpy(temp2->base, q->base, endsite*sizeof(long));
1015 memcpy(temp2->numsteps, q->numsteps, endsite*sizeof(long));
1016 memcpy(temp2->numnuc, q->numnuc, endsite*sizeof(nucarray));
1017 if (forknode->back) {
1018 temp2->numdesc = q->numdesc;
1019 multifillin(temp2, temprm, 0);
1020 } else {
1021 temp2->numdesc = q->numdesc - 1;
1022 multifillin(temp2, temprm, -1);
1023 }
1024 if (!q->back->tip)
1025 trydescendants(item, forknode, q->back, temp2, true);
1026 else {
1027 sumnsteps(temp1, q->back, tempadd, 0, endsite);
1028 sumnsteps2(tempsum, temp1, temp2, 0, endsite, threshwt);
1029 if (lastrearr) {
1030 if (-tempsum->sumsteps >= bstlike2) {
1031 trysave = false;
1032 multf = false;
1033 if (belowbetter) {
1034 bestever = false;
1035 trysave = true;
1036 }
1037 if (-tempsum->sumsteps > bstlike2) {
1038 bestever = true;
1039 trysave = true;
1040 }
1041 if (trysave) {
1042 savelocrearr(item, forknode, q->back, tmp, tmp1, tmp2, tmp3,
1043 tmprm, tmpadd, &root, maxtrees, &nextree, multf,
1044 bestever, &saved, place, bestrees, treenode,
1045 &grbg, zeros);
1046 if (saved) {
1047 like = bstlike2 = -tempsum->sumsteps;
1048 there = q->back;
1049 mulf = false;
1050 }
1051 }
1052 }
1053 } else if (-tempsum->sumsteps > like) {
1054 like = -tempsum->sumsteps;
1055 if (-tempsum->sumsteps > bestyet) {
1056 there = q->back;
1057 mulf = false;
1058 }
1059 }
1060 }
1061 q = q->next;
1062 }
1063 }
1064 } /* trylocal2 */
1065
1066
tryrearr(node * p,boolean * success)1067 static void tryrearr(node *p, boolean *success)
1068 {
1069 /* evaluates one rearrangement of the tree.
1070 if the new tree has greater "likelihood" than the old
1071 one sets success = TRUE and keeps the new tree.
1072 otherwise, restores the old tree */
1073 node *forknode, *newfork, *other, *oldthere;
1074 double oldlike;
1075 boolean oldmulf;
1076
1077 if (p->back == NULL)
1078 return;
1079 forknode = treenode[p->back->index - 1];
1080 if (!forknode->back && forknode->numdesc <= 2 && alltips(forknode, p))
1081 return;
1082 oldlike = bestyet;
1083 like = -10.0 * spp * chars;
1084 memcpy(tempadd->base, p->base, endsite*sizeof(long));
1085 memcpy(tempadd->numsteps, p->numsteps, endsite*sizeof(long));
1086 memcpy(tempadd->oldbase, zeros, endsite*sizeof(long));
1087 memcpy(tempadd->oldnumsteps, zeros, endsite*sizeof(long));
1088 if (forknode->numdesc > 2) {
1089 oldthere = there = forknode;
1090 oldmulf = mulf = true;
1091 trylocal(p, forknode);
1092 } else {
1093 findbelow(&other, p, forknode);
1094 oldthere = there = other;
1095 oldmulf = mulf = false;
1096 trylocal2(p, forknode, other);
1097 }
1098 if ((like <= oldlike) || (there == oldthere && mulf == oldmulf))
1099 return;
1100 recompute = true;
1101 re_move(p, &forknode, &root, recompute, treenode, &grbg, zeros);
1102 if (mulf)
1103 add(there, p, NULL, &root, recompute, treenode, &grbg, zeros);
1104 else {
1105 if (forknode->numdesc > 0)
1106 getnufork(&newfork, &grbg, treenode, zeros);
1107 else
1108 newfork = forknode;
1109 add(there, p, newfork, &root, recompute, treenode, &grbg, zeros);
1110 }
1111 if (like > oldlike + LIKE_EPSILON) {
1112 *success = true;
1113 bestyet = like;
1114 }
1115 } /* tryrearr */
1116
1117
repreorder(node * p,boolean * success)1118 static void repreorder(node *p, boolean *success)
1119 {
1120 /* traverses a binary tree, calling PROCEDURE tryrearr
1121 at a node before calling tryrearr at its descendants */
1122 node *q, *this;
1123
1124 if (p == NULL)
1125 return;
1126 if (!p->visited) {
1127 tryrearr(p, success);
1128 p->visited = true;
1129 }
1130 if (!p->tip) {
1131 q = p;
1132 while (q->next != p) {
1133 this = q->next->back;
1134 repreorder(q->next->back,success);
1135 if (q->next->back == this)
1136 q = q->next;
1137 }
1138 }
1139 } /* repreorder */
1140
1141
rearrange(node ** r)1142 static void rearrange(node **r)
1143 {
1144 /* traverses the tree (preorder), finding any local
1145 rearrangement which decreases the number of steps.
1146 if traversal succeeds in increasing the tree's
1147 "likelihood", PROCEDURE rearrange runs traversal again */
1148 boolean success=true;
1149
1150 while (success) {
1151 success = false;
1152 clearvisited(treenode);
1153 repreorder(*r, &success);
1154 }
1155 } /* rearrange */
1156
1157
describe()1158 void describe()
1159 {
1160 /* prints ancestors, steps and table of numbers of steps in
1161 each site */
1162
1163 /* if (treeprint) {
1164 fprintf(outfile, "\nrequires a total of %10.3f\n", like / -10.0);
1165 fprintf(outfile, "\n between and length\n");
1166 fprintf(outfile, " ------- --- ------\n");
1167 printbranchlengths(root);
1168 }
1169 if (stepbox)
1170 writesteps(chars, weights, oldweight, root);
1171 if (ancseq) {
1172 hypstates(chars, root, treenode, &garbage, basechar);
1173 putc('\n', outfile);
1174 }
1175 putc('\n', outfile);*/
1176 if (trout) {
1177 col = 0;
1178 treeout3(root, nextree, &col, root);
1179 }
1180 } /* describe */
1181
1182
dnapars_coordinates(node * p,double lengthsum,long * tipy,double * tipmax)1183 void dnapars_coordinates(node *p, double lengthsum, long *tipy,
1184 double *tipmax)
1185 {
1186 /* establishes coordinates of nodes */
1187 node *q, *first, *last;
1188 double xx;
1189
1190 if (p == NULL)
1191 return;
1192 if (p->tip) {
1193 p->xcoord = (long)(over * lengthsum + 0.5);
1194 p->ycoord = (*tipy);
1195 p->ymin = (*tipy);
1196 p->ymax = (*tipy);
1197 (*tipy) += down;
1198 if (lengthsum > (*tipmax))
1199 (*tipmax) = lengthsum;
1200 return;
1201 }
1202 q = p->next;
1203 do {
1204 xx = q->v;
1205 if (xx > 100.0)
1206 xx = 100.0;
1207 dnapars_coordinates(q->back, lengthsum + xx, tipy,tipmax);
1208 q = q->next;
1209 } while (p != q);
1210 first = p->next->back;
1211 q = p;
1212 while (q->next != p)
1213 q = q->next;
1214 last = q->back;
1215 p->xcoord = (long)(over * lengthsum + 0.5);
1216 if ((p == root) || count_sibs(p) > 2)
1217 p->ycoord = p->next->next->back->ycoord;
1218 else
1219 p->ycoord = (first->ycoord + last->ycoord) / 2;
1220 p->ymin = first->ymin;
1221 p->ymax = last->ymax;
1222 } /* dnapars_coordinates */
1223
1224
1225 /*void dnapars_printree()
1226 {
1227 * prints out diagram of the tree2 *
1228 long tipy;
1229 double scale, tipmax;
1230 long i;
1231
1232 if (!treeprint)
1233 return;
1234 putc('\n', outfile);
1235 tipy = 1;
1236 tipmax = 0.0;
1237 dnapars_coordinates(root, 0.0, &tipy, &tipmax);
1238 scale = 1.0 / (long)(tipmax + 1.000);
1239 for (i = 1; i <= (tipy - down); i++)
1240 drawline3(i, scale, root);
1241 putc('\n', outfile);
1242 } * dnapars_printree *
1243 */
1244
globrearrange()1245 void globrearrange()
1246 {
1247 /* does global rearrangements */
1248 long j;
1249 double gotlike;
1250 boolean frommulti;
1251 node *item, *nufork;
1252
1253 recompute = true;
1254 do {
1255 //printf(" ");
1256 gotlike = bestlike = bstlike2; /* order matters here ! */
1257 for (j = 0; j < nonodes; j++) {
1258 bestyet = -10.0 * spp * chars;
1259 if (j < spp)
1260 item = treenode[enterorder[j] -1];
1261 else
1262 item = treenode[j];
1263
1264 if ((item != root) &&
1265 ((j < spp) || ((j >= spp) && (item->numdesc > 0))) &&
1266 !((item->back->index == root->index) && (root->numdesc == 2)
1267 && alltips(root, item))) {
1268 re_move(item, &nufork, &root, recompute, treenode, &grbg, zeros);
1269 frommulti = (nufork->numdesc > 0);
1270 clearcollapse(treenode);
1271 there = root;
1272 memcpy(tempadd->base, item->base, endsite*sizeof(long));
1273 memcpy(tempadd->numsteps, item->numsteps, endsite*sizeof(long));
1274 memcpy(tempadd->oldbase, zeros, endsite*sizeof(long));
1275 memcpy(tempadd->oldnumsteps, zeros, endsite*sizeof(long));
1276 if (frommulti){
1277 oldnufork = nufork;
1278 getnufork(&nufork, &grbg, treenode, zeros);
1279 }
1280 addpreorder(root, item, nufork);
1281 if (frommulti)
1282 oldnufork = NULL;
1283 if (!mulf)
1284 add(there, item, nufork, &root, recompute, treenode, &grbg, zeros);
1285 else
1286 add(there, item, NULL, &root, recompute, treenode, &grbg, zeros);
1287 }
1288 /*if (progress) {
1289 if (j % ((nonodes / 72) + 1) == 0)
1290 putchar('.');
1291 fflush(stdout);
1292 }*/
1293 if (tree_build_interrupted) {
1294 longjmp(lngjmp_env, 0);
1295 }
1296 }
1297 /*if (progress) {
1298 putchar('\n');
1299 }*/
1300 } while (bestlike > gotlike);
1301 } /* globrearrange */
1302
1303
load_tree(long treei)1304 void load_tree(long treei)
1305 { /* restores a tree from bestrees */
1306 long j, nextnode;
1307 boolean recompute = false;
1308 node *dummy;
1309
1310 for (j = spp - 1; j >= 1; j--)
1311 re_move(treenode[j], &dummy, &root, recompute, treenode, &grbg,
1312 zeros);
1313 root = treenode[0];
1314 recompute = true;
1315 add(treenode[0], treenode[1], treenode[spp], &root, recompute,
1316 treenode, &grbg, zeros);
1317 nextnode = spp + 2;
1318 for (j = 3; j <= spp; j++) {
1319 if (bestrees[treei].btree[j - 1] > 0)
1320 add(treenode[bestrees[treei].btree[j - 1] - 1], treenode[j - 1],
1321 treenode[nextnode++ - 1], &root, recompute, treenode, &grbg,
1322 zeros);
1323 else
1324 add(treenode[treenode[-bestrees[treei].btree[j-1]-1]->back->index-1],
1325 treenode[j - 1], NULL, &root, recompute, treenode, &grbg,
1326 zeros);
1327 }
1328 }
1329
1330
grandrearr()1331 void grandrearr()
1332 {
1333 /* calls global rearrangement on best trees */
1334 long treei;
1335 boolean done;
1336
1337 done = false;
1338 do {
1339 treei = findunrearranged(bestrees, nextree, true);
1340 if (treei < 0)
1341 done = true;
1342 else
1343 bestrees[treei].gloreange = true;
1344
1345 if (!done) {
1346 load_tree(treei);
1347 globrearrange();
1348 done = rearrfirst;
1349 }
1350 } while (!done);
1351 } /* grandrearr */
1352
1353
1354
maketree(char * toevaluate)1355 static void maketree(char *toevaluate)
1356 {
1357 /* constructs a binary tree from the pointers in treenode.
1358 adds each node at location which yields highest "likelihood"
1359 then rearranges the tree for greatest "likelihood" */
1360 long i, j, numtrees, nextnode;
1361 boolean done, firsttree, goteof, haslengths;
1362 node *item, *nufork, *dummy;
1363 pointarray nodep;
1364
1365 numtrees = 0;
1366
1367 if (setjmp(lngjmp_env)) goto way_out;
1368
1369 if (!usertree) {
1370 for (i = 1; i <= spp; i++)
1371 enterorder[i - 1] = i;
1372 if (jumble)
1373 randumize(seed, enterorder);
1374 recompute = true;
1375 root = treenode[enterorder[0] - 1];
1376 add(treenode[enterorder[0] - 1], treenode[enterorder[1] - 1],
1377 treenode[spp], &root, recompute, treenode, &grbg, zeros);
1378 /*if (progress) {
1379 printf("Adding species:\n");
1380 writename(0, 2, enterorder);
1381 }*/
1382 lastrearr = false;
1383 oldnufork = NULL;
1384 for (i = 3; i <= spp; i++) {
1385 bestyet = -10.0 * spp * chars;
1386 item = treenode[enterorder[i - 1] - 1];
1387 getnufork(&nufork, &grbg, treenode, zeros);
1388 there = root;
1389 memcpy(tempadd->base, item->base, endsite*sizeof(long));
1390 memcpy(tempadd->numsteps, item->numsteps, endsite*sizeof(long));
1391 memcpy(tempadd->oldbase, zeros, endsite*sizeof(long));
1392 memcpy(tempadd->oldnumsteps, zeros, endsite*sizeof(long));
1393 addpreorder(root, item, nufork);
1394 if (!mulf)
1395 add(there, item, nufork, &root, recompute, treenode, &grbg, zeros);
1396 else
1397 add(there, item, NULL, &root, recompute, treenode, &grbg, zeros);
1398 like = bestyet;
1399 rearrange(&root);
1400 /*if (progress) {
1401 writename(i - 1, 1, enterorder);
1402 }*/
1403 if (tree_build_interrupted) {
1404 longjmp(lngjmp_env, 0);
1405 }
1406 lastrearr = (i == spp);
1407 if (lastrearr) {
1408 bestlike = bestyet;
1409 if (jumb == 1) {
1410 bstlike2 = bestlike;
1411 nextree = 1;
1412 initbestrees(bestrees, maxtrees, true);
1413 initbestrees(bestrees, maxtrees, false);
1414 }
1415 globrearrange();
1416 }
1417 }
1418 done = false;
1419 while (!done && findunrearranged(bestrees, nextree, true) >= 0) {
1420 grandrearr();
1421 done = rearrfirst;
1422 }
1423 recompute = false;
1424 for (i = spp - 1; i >= 1; i--)
1425 re_move(treenode[i], &dummy, &root, recompute, treenode, &grbg, zeros);
1426 if (jumb == njumble) {
1427 collapsebestrees(&root, &grbg, treenode, bestrees, place, zeros,
1428 chars, recompute, progress);
1429 /*if (treeprint) {
1430 putc('\n', outfile);
1431 if (nextree == 2)
1432 fprintf(outfile, "One most parsimonious tree found:\n");
1433 else
1434 fprintf(outfile, "%6ld trees in all found\n", nextree - 1);
1435 }*/
1436 if (nextree > maxtrees + 1) {
1437 /*if (treeprint)
1438 fprintf(outfile, "here are the first %4ld of them\n", (long)maxtrees);*/
1439 nextree = maxtrees + 1;
1440 }
1441 /*if (treeprint)
1442 putc('\n', outfile);*/
1443 for (i = 0; i <= (nextree - 2); i++) {
1444 root = treenode[0];
1445 add(treenode[0], treenode[1], treenode[spp], &root, recompute,
1446 treenode, &grbg, zeros);
1447 nextnode = spp + 2;
1448 for (j = 3; j <= spp; j++) {
1449 if (bestrees[i].btree[j - 1] > 0)
1450 add(treenode[bestrees[i].btree[j - 1] - 1], treenode[j - 1],
1451 treenode[nextnode++ - 1], &root, recompute, treenode, &grbg,
1452 zeros);
1453 else
1454 add(treenode[treenode[-bestrees[i].btree[j - 1]-1]->back->index-1],
1455 treenode[j - 1], NULL, &root, recompute, treenode, &grbg, zeros);
1456 }
1457 reroot(treenode[outgrno - 1], root);
1458 postorder(root);
1459 evaluate(root);
1460 treelength(root, chars, treenode);
1461 //dnapars_printree();
1462 describe();
1463 for (j = 1; j < spp; j++)
1464 re_move(treenode[j], &dummy, &root, recompute, treenode,
1465 &grbg, zeros);
1466 }
1467 }
1468 } else {
1469 /* Open in binary: ftell() is broken for UNIX line-endings under WIN32 */
1470 //openfile(&intree,INTREE,"input tree", "rb",progname,intreename);
1471 //numtrees = countsemic(&intree);
1472 numtrees = 1;
1473 /*if (numtrees > 2)
1474 initseed(&inseed, &inseed0, seed);
1475 if (numtrees > MAXNUMTREES) {
1476 printf("\nERROR: number of input trees is read incorrectly from %s\n",
1477 intreename);
1478 exxit(-1);
1479 }*/
1480 /*if (treeprint) {
1481 fprintf(outfile, "User-defined tree");
1482 if (numtrees > 1)
1483 putc('s', outfile);
1484 fprintf(outfile, ":\n");
1485 }*/
1486 fsteps = (long **)Malloc(maxuser*sizeof(long *));
1487 for (j = 1; j <= maxuser; j++)
1488 fsteps[j - 1] = (long *)Malloc(endsite*sizeof(long));
1489 /*if (trout)
1490 fprintf(outtree, "%ld\n", numtrees);*/
1491 nodep = NULL;
1492 which = 1;
1493 while (which <= numtrees) {
1494 char *tmpfname;
1495 firsttree = true;
1496 nextnode = 0;
1497 haslengths = true;
1498 tmpfname = strdup(create_tmp_filename_from_C());
1499 intree = fl_fopen_from_C(tmpfname, "rb+");
1500 fwrite(toevaluate, strlen(toevaluate), 1, intree);
1501 fflush(intree);
1502 fseek(intree, SEEK_SET, 0);
1503 treeread(intree, &root, treenode, &goteof, &firsttree, nodep, &nextnode,
1504 &haslengths, &grbg, initdnaparsnode,false,nonodes);
1505 fclose(intree);
1506 fl_unlink_from_C(tmpfname);
1507 free(tmpfname);
1508 /*if (treeprint)
1509 fprintf(outfile, "\n\n");*/
1510 if (outgropt)
1511 reroot(treenode[outgrno - 1], root);
1512 postorder(root);
1513 evaluate(root);
1514 treelength(root, chars, treenode);
1515 //dnapars_printree();
1516 nextree = 2;
1517 describe();
1518 if (which < numtrees)
1519 gdispose(root, &grbg, treenode);
1520 which++;
1521 }
1522 //FClose(intree);
1523 //putc('\n', outfile);
1524 if (numtrees > 1 && chars > 1 )
1525 standev(chars, numtrees, minwhich, minsteps, nsteps, fsteps, seed);
1526 for (j = 1; j <= maxuser; j++)
1527 free(fsteps[j - 1]);
1528 free(fsteps);
1529 }
1530 /*if (jumb == njumble) {
1531 if (progress) {
1532 printf("\nOutput written to file \"%s\"\n", outfilename);
1533 if (trout) {
1534 printf("\nTree");
1535 if ((usertree && numtrees > 1) || (!usertree && nextree != 2))
1536 printf("s");
1537 printf(" also written onto file \"%s\"\n", outtreename);
1538 }
1539 }
1540 }*/
1541 return;
1542 way_out:
1543 if (usertree) {
1544 for (j = 1; j <= maxuser; j++) free(fsteps[j - 1]);
1545 free(fsteps);
1546 }
1547 trout = false;
1548 jumb = njumble + 1;
1549 jumble = false;
1550 } /* maketree */
1551
1552
reallocchars()1553 static void reallocchars()
1554 { /* The amount of chars can change between runs
1555 this function reallocates all the variables
1556 whose size depends on the amount of chars */
1557 long i;
1558
1559 for (i=0; i < spp; i++){
1560 free(y[i]);
1561 y[i] = (Char *)Malloc(chars*sizeof(Char));
1562 }
1563 free(weight);
1564 free(oldweight);
1565 free(alias);
1566 free(ally);
1567 free(location);
1568
1569 weight = (long *)Malloc(chars*sizeof(long));
1570 oldweight = (long *)Malloc(chars*sizeof(long));
1571 alias = (long *)Malloc(chars*sizeof(long));
1572 ally = (long *)Malloc(chars*sizeof(long));
1573 location = (long *)Malloc(chars*sizeof(long));
1574 }
1575
1576
freerest()1577 void freerest()
1578 { /* free variables that are allocated each data set */
1579 long i;
1580
1581 if (!usertree) {
1582 freenode(&temp);
1583 freenode(&temp1);
1584 freenode(&temp2);
1585 freenode(&tempsum);
1586 freenode(&temprm);
1587 freenode(&tempadd);
1588 freenode(&tempf);
1589 freenode(&tmp);
1590 freenode(&tmp1);
1591 freenode(&tmp2);
1592 freenode(&tmp3);
1593 freenode(&tmprm);
1594 freenode(&tmpadd);
1595 }
1596 /*for (i = 0; i < spp; i++)
1597 free(y[i]);
1598 free(y);*/
1599 for (i = 1; i <= maxtrees; i++)
1600 free(bestrees[i - 1].btree);
1601 free(bestrees);
1602 //free(nayme);
1603 free(enterorder);
1604 free(place);
1605 free(weight);
1606 free(oldweight);
1607 free(alias);
1608 free(ally);
1609 free(location);
1610 freegrbg(&grbg);
1611 if (ancseq)
1612 freegarbage(&garbage);
1613 free(threshwt);
1614 free(zeros);
1615 freenodes(nonodes, treenode);
1616 } /* freerest */
1617
1618
1619
dnapars(char ** seq,char ** seqname,int notu,int njumbles,int * pjumble_no,int * steps,char * toevaluate,int arg_maxtrees,int * bt_weights,dnapars_S_option s_option)1620 char* dnapars(char** seq, char** seqname, int notu, int njumbles, int *pjumble_no, int *steps, char* toevaluate, int arg_maxtrees,
1621 int *bt_weights, dnapars_S_option s_option)
1622 { /* DNA parsimony by uphill search */
1623
1624 /* reads in spp, chars, and the data. Then calls maketree to
1625 construct the tree */
1626 progname = "Dnapars";
1627 //openfile(&infile,INFILE,"input file", "r",argv[0],infilename);
1628 //openfile(&outfile,OUTFILE,"output file", "w",argv[0],outfilename);
1629 //outfile = stdout;
1630 ibmpc = IBMCRT;
1631 ansi = ANSICRT;
1632 msets = 1;
1633 firstset = true;
1634 garbage = NULL;
1635 grbg = NULL;
1636 doinit(notu, strlen(seq[0]), arg_maxtrees, toevaluate, s_option);
1637 if (!toevaluate && njumbles > 0) {
1638 njumble = njumbles;
1639 jumble = true;
1640 }
1641 /*if (weights || justwts)
1642 openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename);*/
1643 if (trout) {
1644 outtreename = strdup(create_tmp_filename_from_C()); //openfile(&outtree,OUTTREE,"output tree file", "w",argv[0],outtreename);
1645 outtree = fl_fopen_from_C(outtreename, "wb");
1646 }
1647 for (ith = 1; ith <= msets; ith++) {
1648 if (!(justwts && !firstset))
1649 allocrest();
1650 /*if (msets > 1 && !justwts) {
1651 fprintf(outfile, "\nData set # %ld:\n\n", ith);
1652 if (progress)
1653 printf("\nData set # %ld:\n\n", ith);
1654 }*/
1655 int i;
1656 if (bt_weights != NULL) for (i = 0; i < chars; i++) weight[i] = bt_weights[i];
1657 else for (i = 0; i < chars; i++) weight[i] = 1;
1658 doinput(seq, seqname);
1659 if (ith == 1)
1660 firstset = false;
1661 for (jumb = 1; jumb <= njumble; jumb++) {
1662 maketree(toevaluate);
1663 if (jumble && pjumble_no) {
1664 *steps = (int)(bestlike / -10);
1665 *pjumble_no = jumb;
1666 awake_from_C();
1667 }
1668 }
1669 if (!justwts)
1670 freerest();
1671 }
1672 freetree(nonodes, treenode);
1673 //FClose(infile);
1674 //FClose(outfile);
1675 /*if (weights || justwts)
1676 FClose(weightfile);*/
1677 char *tree = NULL;
1678 fclose(outtree);
1679 if (trout) {
1680 outtree = fl_fopen_from_C(outtreename, "rb"); // read tree from tmp file and return it as a char*
1681 fseek(outtree, 0, SEEK_END);
1682 long L=ftell(outtree);
1683 tree = (char*)malloc(L+1);
1684 fseek(outtree, 0, SEEK_SET);
1685 fread(tree, L, 1, outtree);
1686 tree[L] = 0;
1687 fclose(outtree);
1688 *steps = (int)(like / -10.);
1689 }
1690 fl_unlink_from_C(outtreename);
1691 free(outtreename);
1692 /*if (usertree)
1693 FClose(intree);*/
1694 return tree;
1695 } /* DNA parsimony by uphill search */
1696
1697