1 #include "phylip.h"
2 #include "seq.h"
3 #include <unistd.h>
4 #include <setjmp.h>
5
6 /*
7 derived from file protpars.c of PHYLIP version 3.696 with the following copyright:
8
9 version 3.696.
10 Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe.
11
12 Copyright (c) 1993-2014, Joseph Felsenstein.
13 All rights reserved.
14
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 1. Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 2. Redistributions in binary form must reproduce the above copyright notice,
22 this list of conditions and the following disclaimer in the documentation
23 and/or other materials provided with the distribution.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #define maxtrees 100 /* maximum number of tied trees stored */
39 //static int maxtrees;
40 static int nmlngth;
41
42 static jmp_buf lngjmp_env;
43
44 typedef enum {
45 universal, ciliate, mito, vertmito, flymito, yeastmito
46 } codetype;
47
48 /* nodes will form a binary tree */
49
50 typedef struct gseq {
51 seqptr seq;
52 struct gseq *next;
53 } gseq;
54
55 /* function prototypes */
56 extern int tree_build_interrupted;
57 extern char *create_tmp_filename_from_C(void);
58 extern FILE *fl_fopen_from_C(const char *fname, const char *mode);
59 extern int fl_unlink_from_C(const char*fname);
60 void padtosize(char *pname, char *name, int length);
61 //void protgnu(gseq **);
62 //void protchuck(gseq *);
63 void code(void);
64 void setup(void);
65 static void getoptions(void);
66 void protalloctree(void);
67 static void protfreetree();
68 static void allocrest(void);
69 static void doinit(int, int, char*);
70 //void protinputdata(void);
71
72 void protmakevalues(void);
73 static void doinput(char** seq, char** seqname);
74 void protfillin(node *, node *, node *);
75 void protpreorder(node *);
76 void protadd(node *, node *, node *);
77 void protre_move(node **, node **);
78 static void evaluate(node *);
79 void protpostorder(node *);
80 void protreroot(node *);
81 void protsavetraverse(node *, long *, boolean *);
82
83 void protsavetree(long *, boolean *);
84 static void tryadd(node *, node **, node **);
85 static void addpreorder(node *, node *, node *);
86 static void tryrearr(node *, boolean *);
87 static void repreorder(node *, boolean *);
88 static void rearrange(node **);
89 void protgetch(Char *);
90 void protaddelement(node **, long *, long *, boolean *);
91 void prottreeread(void);
92 void protancestset(long *, long *, long *, long *, long *);
93
94 void prothyprint(long , long , boolean *, node *, boolean *, boolean *);
95 void prothyptrav(node *, sitearray *, long, long, long *, boolean *,
96 sitearray);
97 //void prothypstates(long *);
98 static void describe(void);
99 static void maketree(char*);
100 void reallocnode(node* p);
101 static void reallocchars(void);
102 extern void awake_from_C(void);
103 /* function prototypes */
104
105
106
107 static Char infilename[FNMLNGTH], outfilename[FNMLNGTH], intreename[FNMLNGTH], *outtreename, weightfilename[FNMLNGTH];
108 static node *root;
109 static long chars, col, msets, ith, njumble, jumb;
110 /* chars = number of sites in actual sequences */
111 static long inseed, inseed0;
112 static boolean jumble, usertree, weights, thresh, trout, progress, stepbox,
113 justwts, ancseq, mulsets, firstset;
114 codetype whichcode;
115 long fullset, fulldel;
116 static pointarray treenode; /* pointers to all nodes in tree */
117 static double threshold;
118 static steptr threshwt;
119 static longer seed;
120 static long *enterorder;
121 sitearray translate[(long)quest - (long)ala + 1];
122 aas trans[4][4][4];
123 static long **fsteps;
124 static bestelm *bestrees;
125 boolean dummy;
126 static gseq *garbage;
127 static node *temp, *temp1;
128 Char ch;
129 aas tmpa;
130 static char *progname;
131
132 /* Local variables for maketree, propagated globally for c version: */
133 static long minwhich;
134 static double like, bestyet, bestlike, minsteps, bstlike2;
135 static boolean lastrearr, recompute;
136 static node *there;
137 static double nsteps[maxuser];
138 static long *place;
139 static boolean *names;
140
141
142 /*void protgnu(gseq **p)
143 {
144 * this and the following are do-it-yourself garbage collectors.
145 Make a new node or pull one off the garbage list *
146 if (garbage != NULL) {
147 *p = garbage;
148 free((*p)->seq);
149 (*p)->seq = (seqptr)Malloc(chars*sizeof(sitearray));
150 garbage = garbage->next;
151 } else {
152 *p = (gseq *)Malloc(sizeof(gseq));
153 (*p)->seq = (seqptr)Malloc(chars*sizeof(sitearray));
154 }
155 (*p)->next = NULL;
156 } * protgnu *
157
158
159 void protchuck(gseq *p)
160 {
161 * collect garbage on p -- put it on front of garbage list *
162 p->next = garbage;
163 garbage = p;
164 } * protchuck */
165
166
code()167 void code()
168 {
169 /* make up table of the code 1 = u, 2 = c, 3 = a, 4 = g */
170 trans[0][0][0] = phe;
171 trans[0][0][1] = phe;
172 trans[0][0][2] = leu;
173 trans[0][0][3] = leu;
174 trans[0][1][0] = ser1;
175 trans[0][1][1] = ser1;
176 trans[0][1][2] = ser1;
177 trans[0][1][3] = ser1;
178 trans[0][2][0] = tyr;
179 trans[0][2][1] = tyr;
180 trans[0][2][2] = stop;
181 trans[0][2][3] = stop;
182 trans[0][3][0] = cys;
183 trans[0][3][1] = cys;
184 trans[0][3][2] = stop;
185 trans[0][3][3] = trp;
186 trans[1][0][0] = leu;
187 trans[1][0][1] = leu;
188 trans[1][0][2] = leu;
189 trans[1][0][3] = leu;
190 trans[1][1][0] = pro;
191 trans[1][1][1] = pro;
192 trans[1][1][2] = pro;
193 trans[1][1][3] = pro;
194 trans[1][2][0] = his;
195 trans[1][2][1] = his;
196 trans[1][2][2] = gln;
197 trans[1][2][3] = gln;
198 trans[1][3][0] = arg;
199 trans[1][3][1] = arg;
200 trans[1][3][2] = arg;
201 trans[1][3][3] = arg;
202 trans[2][0][0] = ileu;
203 trans[2][0][1] = ileu;
204 trans[2][0][2] = ileu;
205 trans[2][0][3] = met;
206 trans[2][1][0] = thr;
207 trans[2][1][1] = thr;
208 trans[2][1][2] = thr;
209 trans[2][1][3] = thr;
210 trans[2][2][0] = asn;
211 trans[2][2][1] = asn;
212 trans[2][2][2] = lys;
213 trans[2][2][3] = lys;
214 trans[2][3][0] = ser2;
215 trans[2][3][1] = ser2;
216 trans[2][3][2] = arg;
217 trans[2][3][3] = arg;
218 trans[3][0][0] = val;
219 trans[3][0][1] = val;
220 trans[3][0][2] = val;
221 trans[3][0][3] = val;
222 trans[3][1][0] = ala;
223 trans[3][1][1] = ala;
224 trans[3][1][2] = ala;
225 trans[3][1][3] = ala;
226 trans[3][2][0] = asp;
227 trans[3][2][1] = asp;
228 trans[3][2][2] = glu;
229 trans[3][2][3] = glu;
230 trans[3][3][0] = gly;
231 trans[3][3][1] = gly;
232 trans[3][3][2] = gly;
233 trans[3][3][3] = gly;
234 if (whichcode == mito)
235 trans[0][3][2] = trp;
236 if (whichcode == vertmito) {
237 trans[0][3][2] = trp;
238 trans[2][3][2] = stop;
239 trans[2][3][3] = stop;
240 trans[2][0][2] = met;
241 }
242 if (whichcode == flymito) {
243 trans[0][3][2] = trp;
244 trans[2][0][2] = met;
245 trans[2][3][2] = ser2;
246 }
247 if (whichcode == yeastmito) {
248 trans[0][3][2] = trp;
249 trans[1][0][2] = thr;
250 trans[2][0][2] = met;
251 }
252 } /* code */
253
254
setup()255 void setup()
256 {
257 /* set up set table to get aasets from aas */
258 aas a, b;
259 long i, j, k, l, s;
260
261 for (a = ala; (long)a <= (long)stop; a = (aas)((long)a + 1)) {
262 translate[(long)a - (long)ala][0] = 1L << ((long)a);
263 translate[(long)a - (long)ala][1] = 1L << ((long)a);
264 }
265 for (i = 0; i <= 3; i++) {
266 for (j = 0; j <= 3; j++) {
267 for (k = 0; k <= 3; k++) {
268 for (l = 0; l <= 3; l++) {
269 translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[l][j][k]);
270 translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[i][l][k]);
271 translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[i][j][l]);
272 }
273 }
274 }
275 }
276 translate[(long)del - (long)ala][1] = 1L << ((long)del);
277 fulldel = (1L << ((long)stop + 1)) - (1L << ((long)ala));
278 fullset = fulldel & (~(1L << ((long)del)));
279 translate[(long)asx - (long)ala][0]
280 = (1L << ((long)asn)) | (1L << ((long)asp));
281 translate[(long)glx - (long)ala][0]
282 = (1L << ((long)gln)) | (1L << ((long)glu));
283 translate[(long)ser - (long)ala][0]
284 = (1L << ((long)ser1)) | (1L << ((long)ser2));
285 translate[(long)unk - (long)ala][0] = fullset;
286 translate[(long)quest - (long)ala][0] = fulldel;
287 translate[(long)asx - (long)ala][1] = translate[(long)asn - (long)ala][1]
288 | translate[(long)asp - (long)ala][1];
289 translate[(long)glx - (long)ala][1] = translate[(long)gln - (long)ala][1]
290 | translate[(long)glu - (long)ala][1];
291 translate[(long)ser - (long)ala][1] = translate[(long)ser1 - (long)ala][1]
292 | translate[(long)ser2 - (long)ala][1];
293 translate[(long)unk - (long)ala][1] = fullset;
294 translate[(long)quest - (long)ala][1] = fulldel;
295 for (a = ala; (long)a <= (long)quest; a = (aas)((long)a + 1)) {
296 s = 0;
297 for (b = ala; (long)b <= (long)stop; b = (aas)((long)b + 1)) {
298 if (((1L << ((long)b)) & translate[(long)a - (long)ala][1]) != 0)
299 s |= translate[(long)b - (long)ala][1];
300 }
301 translate[(long)a - (long)ala][2] = s;
302 }
303 } /* setup */
304
305
getoptions()306 static void getoptions()
307 {
308 /* interactively set options */
309 //long loopcount, loopcount2;
310 //Char ch, ch2;
311
312 //fprintf(outfile, "\nProtein parsimony algorithm, version %s\n\n",VERSION);
313 //putchar('\n');
314 jumble = false;
315 njumble = 1;
316 outgrno = 1;
317 outgropt = false;
318 thresh = false;
319 trout = true;
320 usertree = false;
321 weights = false;
322 whichcode = universal;
323 printdata = false;
324 progress = false;
325 treeprint = false;
326 stepbox = false;
327 ancseq = false;
328 dotdiff = true;
329 interleaved = true;
330 /*loopcount = 0;
331 for (;;) {
332 cleerhome();
333 printf("\nProtein parsimony algorithm, version %s\n\n",VERSION);
334 printf("Setting for this run:\n");
335 printf(" U Search for best tree? %s\n",
336 (usertree ? "No, use user trees in input file" : "Yes"));
337 if (!usertree) {
338 printf(" J Randomize input order of sequences?");
339 if (jumble)
340 printf(" Yes (seed =%8ld,%3ld times)\n", inseed0, njumble);
341 else
342 printf(" No. Use input order\n");
343 }
344 printf(" O Outgroup root?");
345 if (outgropt)
346 printf(" Yes, at sequence number%3ld\n", outgrno);
347 else
348 printf(" No, use as outgroup species%3ld\n", outgrno);
349 printf(" T Use Threshold parsimony?");
350 if (thresh)
351 printf(" Yes, count steps up to%4.1f per site\n", threshold);
352 else
353 printf(" No, use ordinary parsimony\n");
354 printf(" C Use which genetic code? %s\n",
355 (whichcode == universal) ? "Universal" :
356 (whichcode == ciliate) ? "Ciliate" :
357 (whichcode == mito) ? "Universal mitochondrial" :
358 (whichcode == vertmito) ? "Vertebrate mitochondrial" :
359 (whichcode == flymito) ? "Fly mitochondrial" :
360 (whichcode == yeastmito) ? "Yeast mitochondrial" : "");
361 printf(" W Sites weighted? %s\n",
362 (weights ? "Yes" : "No"));
363 printf(" M Analyze multiple data sets?");
364 if (mulsets)
365 printf(" Yes, %2ld %s\n", msets,
366 (justwts ? "sets of weights" : "data sets"));
367 else
368 printf(" No\n");
369 printf(" I Input sequences interleaved? %s\n",
370 (interleaved ? "Yes" : "No, sequential"));
371 printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n",
372 (ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)"));
373 printf(" 1 Print out the data at start of run %s\n",
374 (printdata ? "Yes" : "No"));
375 printf(" 2 Print indications of progress of run %s\n",
376 (progress ? "Yes" : "No"));
377 printf(" 3 Print out tree %s\n",
378 (treeprint ? "Yes" : "No"));
379 printf(" 4 Print out steps in each site %s\n",
380 (stepbox ? "Yes" : "No"));
381 printf(" 5 Print sequences at all nodes of tree %s\n",
382 (ancseq ? "Yes" : "No"));
383 if (ancseq || printdata)
384 printf(" . Use dot-differencing to display them %s\n",
385 dotdiff ? "Yes" : "No");
386 printf(" 6 Write out trees onto tree file? %s\n",
387 (trout ? "Yes" : "No"));
388 if(weights && justwts){
389 printf(
390 "WARNING: W option and Multiple Weights options are both on. ");
391 printf(
392 "The W menu option is unnecessary and has no additional effect. \n");
393 }
394 printf(
395 "\nAre these settings correct? (type Y or the letter for one to change)\n");
396 fflush(stdout);
397 scanf("%c%*[^\n]", &ch);
398 getchar();
399 uppercase(&ch);
400 if (ch == 'Y')
401 break;
402 if (((!usertree) && (strchr("WCJOTUMI12345.60", ch) != NULL))
403 || (usertree && ((strchr("WCOTUMI12345.60", ch) != NULL)))){
404 switch (ch) {
405
406 case 'J':
407 jumble = !jumble;
408 if (jumble)
409 initjumble(&inseed, &inseed0, seed, &njumble);
410 else njumble = 1;
411 break;
412
413 case 'W':
414 weights = !weights;
415 break;
416
417 case 'O':
418 outgropt = !outgropt;
419 if (outgropt)
420 initoutgroup(&outgrno, spp);
421 else outgrno = 1;
422 break;
423
424 case 'T':
425 thresh = !thresh;
426 if (thresh)
427 initthreshold(&threshold);
428 break;
429
430 case 'C':
431 printf("\nWhich genetic code?\n");
432 printf(" type for\n\n");
433 printf(" U Universal\n");
434 printf(" M Mitochondrial\n");
435 printf(" V Vertebrate mitochondrial\n");
436 printf(" F Fly mitochondrial\n");
437 printf(" Y Yeast mitochondrial\n\n");
438 loopcount2 = 0;
439 do {
440 printf("type U, M, V, F, or Y\n");
441 fflush(stdout);
442 scanf("%c%*[^\n]", &ch);
443 getchar();
444 if (ch == '\n')
445 ch = ' ';
446 uppercase(&ch);
447 countup(&loopcount2, 10);
448 } while (ch != 'U' && ch != 'M' && ch != 'V'
449 && ch != 'F' && ch != 'Y');
450 switch (ch) {
451
452 case 'U':
453 whichcode = universal;
454 break;
455
456 case 'M':
457 whichcode = mito;
458 break;
459
460 case 'V':
461 whichcode = vertmito;
462 break;
463
464 case 'F':
465 whichcode = flymito;
466 break;
467
468 case 'Y':
469 whichcode = yeastmito;
470 break;
471 }
472 break;
473
474 case 'M':
475 mulsets = !mulsets;
476 if (mulsets){
477 printf("Multiple data sets or multiple weights?");
478 loopcount2 = 0;
479 do {
480 printf(" (type D or W)\n");
481 #ifdef WIN32
482 phyFillScreenColor();
483 #endif
484 fflush(stdout);
485 scanf("%c%*[^\n]", &ch2);
486 getchar();
487 if (ch2 == '\n')
488 ch2 = ' ';
489 uppercase(&ch2);
490 countup(&loopcount2, 10);
491 } while ((ch2 != 'W') && (ch2 != 'D'));
492 justwts = (ch2 == 'W');
493 if (justwts)
494 justweights(&msets);
495 else
496 initdatasets(&msets);
497 if (!jumble) {
498 jumble = true;
499 initjumble(&inseed, &inseed0, seed, &njumble);
500 }
501 }
502 break;
503
504 case 'I':
505 interleaved = !interleaved;
506 break;
507
508 case 'U':
509 usertree = !usertree;
510 break;
511
512 case '0':
513 initterminal(&ibmpc, &ansi);
514 break;
515
516 case '1':
517 printdata = !printdata;
518 break;
519
520 case '2':
521 progress = !progress;
522 break;
523
524 case '3':
525 treeprint = !treeprint;
526 break;
527
528 case '4':
529 stepbox = !stepbox;
530 break;
531
532 case '5':
533 ancseq = !ancseq;
534 break;
535
536 case '.':
537 dotdiff = !dotdiff;
538 break;
539
540 case '6':
541 trout = !trout;
542 break;
543 }
544 } else
545 printf("Not a possible option!\n");
546 countup(&loopcount, 100);
547 }*/
548 } /* getoptions */
549
550
protalloctree()551 void protalloctree()
552 { /* allocate treenode dynamically */
553 long i, j;
554 node *p, *q;
555
556 treenode = (pointarray)Malloc(nonodes*sizeof(node *));
557 for (i = 0; i < (spp); i++) {
558 treenode[i] = (node *)Malloc(sizeof(node));
559 treenode[i]->numsteps = (steptr)Malloc(chars*sizeof(long));
560 treenode[i]->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
561 treenode[i]->seq = (aas *)Malloc(chars*sizeof(aas));
562 }
563 for (i = spp; i < (nonodes); i++) {
564 q = NULL;
565 for (j = 1; j <= 3; j++) {
566 p = (node *)Malloc(sizeof(node));
567 p->numsteps = (steptr)Malloc(chars*sizeof(long));
568 p->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
569 p->seq = (aas *)Malloc(chars*sizeof(aas));
570 p->next = q;
571 q = p;
572 }
573 p->next->next->next = p;
574 treenode[i] = p;
575 }
576 } /* protalloctree */
577
578
protfreetree()579 static void protfreetree()
580 {
581 int i;
582 node *p;
583 for (i = 0; i < nonodes; i++) {
584 p = treenode[i];
585 free(p->numsteps);
586 free(p->siteset);
587 free(p->seq);
588 free(p);
589 }
590 free(treenode);
591
592 for (i = 1; i <= maxtrees; i++) free(bestrees[i-1].btree);
593 free(bestrees);
594 free(enterorder);
595 free(place);
596 free(weight);
597 free(threshwt);
598 free(temp->numsteps);
599 free(temp->siteset);
600 free(temp->seq);
601 free(temp);
602 free(temp1->numsteps);
603 free(temp1->siteset);
604 free(temp1->seq);
605 free(temp1);
606 if (usertree) {
607 for (i = 0; i < maxuser; i++) {
608 free(fsteps[i]);
609 }
610 free(fsteps);
611 }
612 }
613
614
reallocnode(node * p)615 void reallocnode(node* p)
616 {
617 free(p->numsteps);
618 free(p->siteset);
619 free(p->seq);
620 p->numsteps = (steptr)Malloc(chars*sizeof(long));
621 p->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
622 p->seq = (aas *)Malloc(chars*sizeof(aas));
623 }
624
625
reallocchars(void)626 static void reallocchars(void)
627 { /* reallocates variables that are dependand on the number of chars
628 * do we need to reallocate the garbage list too? */
629 long i;
630 node *p;
631
632 if (usertree)
633 for (i = 0; i < maxuser; i++) {
634 free(fsteps[i]);
635 fsteps[i] = (long *)Malloc(chars*sizeof(long));
636 }
637
638 for (i = 0; i < nonodes; i++) {
639 reallocnode(treenode[i]);
640 if (i >= spp) {
641 p=treenode[i]->next;
642 while (p != treenode[i]) {
643 reallocnode(p);
644 p = p->next;
645 }
646 }
647 }
648
649 free(weight);
650 free(threshwt);
651 free(temp->numsteps);
652 free(temp->siteset);
653 free(temp->seq);
654 free(temp1->numsteps);
655 free(temp1->siteset);
656 free(temp1->seq);
657
658 weight = (steptr)Malloc(chars*sizeof(long));
659 threshwt = (steptr)Malloc(chars*sizeof(long));
660 temp->numsteps = (steptr)Malloc(chars*sizeof(long));
661 temp->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
662 temp->seq = (aas *)Malloc(chars*sizeof(aas));
663 temp1->numsteps = (steptr)Malloc(chars*sizeof(long));
664 temp1->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
665 temp1->seq = (aas *)Malloc(chars*sizeof(aas));
666 }
667
668
allocrest()669 static void allocrest()
670 { /* allocate remaining global arrays and variables dynamically */
671 long i;
672
673 if (usertree) {
674 fsteps = (long **)Malloc(maxuser*sizeof(long *));
675 for (i = 0; i < maxuser; i++)
676 fsteps[i] = (long *)Malloc(chars*sizeof(long));
677 }
678 bestrees = (bestelm *)Malloc(maxtrees*sizeof(bestelm));
679 for (i = 1; i <= maxtrees; i++)
680 bestrees[i - 1].btree = (long *)Malloc(spp*sizeof(long));
681 //nayme = (naym *)Malloc(spp*sizeof(naym));
682 enterorder = (long *)Malloc(spp*sizeof(long));
683 place = (long *)Malloc(nonodes*sizeof(long));
684 weight = (steptr)Malloc(chars*sizeof(long));
685 threshwt = (steptr)Malloc(chars*sizeof(long));
686 temp = (node *)Malloc(sizeof(node));
687 temp->numsteps = (steptr)Malloc(chars*sizeof(long));
688 temp->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
689 temp->seq = (aas *)Malloc(chars*sizeof(aas));
690 temp1 = (node *)Malloc(sizeof(node));
691 temp1->numsteps = (steptr)Malloc(chars*sizeof(long));
692 temp1->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
693 temp1->seq = (aas *)Malloc(chars*sizeof(aas));
694 } /* allocrest */
695
696
doinit(int myspp,int mychars,char * toevaluate)697 static void doinit(int myspp, int mychars, char* toevaluate)
698 {
699 /* initializes variables */
700
701 //inputnumbers(&spp, &chars, &nonodes, 1);
702 spp = myspp;
703 chars = mychars;
704 nonodes = (spp * 2 - 1);
705 getoptions();
706 usertree = (toevaluate != NULL);
707 /*if (printdata)
708 fprintf(outfile, "%2ld species, %3ld sites\n\n", spp, chars);*/
709 protalloctree();
710 allocrest();
711 } /* doinit*/
712
713
714 /*void protinputdata()
715 {
716 * input the names and sequences for each species *
717 long i, j, k, l, aasread, aasnew = 0;
718 Char charstate;
719 boolean allread, done;
720 aas aa; * temporary amino acid for input *
721
722 if (printdata)
723 headings(chars, "Sequences", "---------");
724 aasread = 0;
725 allread = false;
726 while (!(allread)) {
727 * eat white space -- if the separator line has spaces on it*
728 do {
729 charstate = gettc(infile);
730 } while (charstate == ' ' || charstate == '\t');
731 ungetc(charstate, infile);
732 if (eoln(infile)) {
733 scan_eoln(infile);
734 }
735 i = 1;
736 while (i <= spp) {
737 if ((interleaved && aasread == 0) || !interleaved)
738 initname(i - 1);
739 j = interleaved ? aasread : 0;
740 done = false;
741 while (!done && !eoff(infile)) {
742 if (interleaved)
743 done = true;
744 while (j < chars && !(eoln(infile) || eoff(infile))) {
745 charstate = gettc(infile);
746 if (charstate == '\n' || charstate == '\t')
747 charstate = ' ';
748 if (charstate == ' ' || (charstate >= '0' && charstate <= '9'))
749 continue;
750 uppercase(&charstate);
751 if ((!isalpha(charstate) && charstate != '?' &&
752 charstate != '-' && charstate != '*') || charstate == 'J' ||
753 charstate == 'O' || charstate == 'U') {
754 printf("WARNING -- BAD AMINO ACID:%c",charstate);
755 printf(" AT POSITION%5ld OF SPECIES %3ld\n",j,i);
756 exxit(-1);
757 }
758 j++;
759 aa = (charstate == 'A') ? ala :
760 (charstate == 'B') ? asx :
761 (charstate == 'C') ? cys :
762 (charstate == 'D') ? asp :
763 (charstate == 'E') ? glu :
764 (charstate == 'F') ? phe :
765 (charstate == 'G') ? gly : aa;
766 aa = (charstate == 'H') ? his :
767 (charstate == 'I') ? ileu :
768 (charstate == 'K') ? lys :
769 (charstate == 'L') ? leu :
770 (charstate == 'M') ? met :
771 (charstate == 'N') ? asn :
772 (charstate == 'P') ? pro :
773 (charstate == 'Q') ? gln :
774 (charstate == 'R') ? arg : aa;
775 aa = (charstate == 'S') ? ser :
776 (charstate == 'T') ? thr :
777 (charstate == 'V') ? val :
778 (charstate == 'W') ? trp :
779 (charstate == 'X') ? unk :
780 (charstate == 'Y') ? tyr :
781 (charstate == 'Z') ? glx :
782 (charstate == '*') ? stop :
783 (charstate == '?') ? quest:
784 (charstate == '-') ? del : aa;
785
786 treenode[i - 1]->seq[j - 1] = aa;
787 memcpy(treenode[i - 1]->siteset[j - 1],
788 translate[(long)aa - (long)ala], sizeof(sitearray));
789 }
790 if (interleaved)
791 continue;
792 if (j < chars)
793 scan_eoln(infile);
794 else if (j == chars)
795 done = true;
796 }
797 if (interleaved && i == 1)
798 aasnew = j;
799 scan_eoln(infile);
800 if ((interleaved && j != aasnew) || ((!interleaved) && j != chars)){
801 printf("ERROR: SEQUENCES OUT OF ALIGNMENT\n");
802 exxit(-1);}
803 i++;
804 }
805 if (interleaved) {
806 aasread = aasnew;
807 allread = (aasread == chars);
808 } else
809 allread = (i > spp);
810 }
811 if (printdata) {
812 for (i = 1; i <= ((chars - 1) / 60 + 1); i++) {
813 for (j = 1; j <= (spp); j++) {
814 for (k = 0; k < nmlngth; k++)
815 putc(nayme[j - 1][k], outfile);
816 fprintf(outfile, " ");
817 l = i * 60;
818 if (l > chars)
819 l = chars;
820 for (k = (i - 1) * 60 + 1; k <= l; k++) {
821 if (j > 1 && treenode[j - 1]->seq[k - 1] == treenode[0]->seq[k - 1])
822 charstate = '.';
823 else {
824 tmpa = treenode[j-1]->seq[k-1];
825 charstate = (tmpa == ala) ? 'A' :
826 (tmpa == asx) ? 'B' :
827 (tmpa == cys) ? 'C' :
828 (tmpa == asp) ? 'D' :
829 (tmpa == glu) ? 'E' :
830 (tmpa == phe) ? 'F' :
831 (tmpa == gly) ? 'G' :
832 (tmpa == his) ? 'H' :
833 (tmpa ==ileu) ? 'I' :
834 (tmpa == lys) ? 'K' :
835 (tmpa == leu) ? 'L' : charstate;
836 charstate = (tmpa == met) ? 'M' :
837 (tmpa == asn) ? 'N' :
838 (tmpa == pro) ? 'P' :
839 (tmpa == gln) ? 'Q' :
840 (tmpa == arg) ? 'R' :
841 (tmpa == ser) ? 'S' :
842 (tmpa ==ser1) ? 'S' :
843 (tmpa ==ser2) ? 'S' : charstate;
844 charstate = (tmpa == thr) ? 'T' :
845 (tmpa == val) ? 'V' :
846 (tmpa == trp) ? 'W' :
847 (tmpa == unk) ? 'X' :
848 (tmpa == tyr) ? 'Y' :
849 (tmpa == glx) ? 'Z' :
850 (tmpa == del) ? '-' :
851 (tmpa ==stop) ? '*' :
852 (tmpa==quest) ? '?' : charstate;
853 }
854 putc(charstate, outfile);
855 if (k % 10 == 0 && k % 60 != 0)
856 putc(' ', outfile);
857 }
858 putc('\n', outfile);
859 }
860 putc('\n', outfile);
861 }
862 putc('\n', outfile);
863 }
864 putc('\n', outfile);
865 } * protinputdata */
866
867
convertprotseq(char ** seq)868 static void convertprotseq(char **seq)
869 {
870 aas aa;
871 char charstate;
872 int i, j;
873 for (i = 1; i <= spp; i++) {
874 for (j = 1; j <= chars; j++) {
875 aa = unk;
876 charstate = seq[i-1][j-1];
877 aa = (charstate == 'A') ? ala :
878 (charstate == 'B') ? asx :
879 (charstate == 'C') ? cys :
880 (charstate == 'D') ? asp :
881 (charstate == 'E') ? glu :
882 (charstate == 'F') ? phe :
883 (charstate == 'G') ? gly : aa;
884 aa = (charstate == 'H') ? his :
885 (charstate == 'I') ? ileu :
886 (charstate == 'K') ? lys :
887 (charstate == 'L') ? leu :
888 (charstate == 'M') ? met :
889 (charstate == 'N') ? asn :
890 (charstate == 'P') ? pro :
891 (charstate == 'Q') ? gln :
892 (charstate == 'R') ? arg : aa;
893 aa = (charstate == 'S') ? ser :
894 (charstate == 'T') ? thr :
895 (charstate == 'V') ? val :
896 (charstate == 'W') ? trp :
897 (charstate == 'X') ? unk :
898 (charstate == 'Y') ? tyr :
899 (charstate == 'Z') ? glx :
900 (charstate == '*') ? stop :
901 (charstate == '?') ? quest:
902 (charstate == '-') ? del : aa;
903
904 treenode[i - 1]->seq[j - 1] = aa;
905 memcpy(treenode[i - 1]->siteset[j - 1],
906 translate[(long)aa - (long)ala], sizeof(sitearray));
907 }
908 }
909 }
910
911
protmakevalues()912 void protmakevalues()
913 {
914 /* set up fractional likelihoods at tips */
915 long i, j;
916 node *p;
917
918 for (i = 1; i <= nonodes; i++) {
919 treenode[i - 1]->back = NULL;
920 treenode[i - 1]->tip = (i <= spp);
921 treenode[i - 1]->index = i;
922 for (j = 0; j < (chars); j++)
923 treenode[i - 1]->numsteps[j] = 0;
924 if (i > spp) {
925 p = treenode[i - 1]->next;
926 while (p != treenode[i - 1]) {
927 p->back = NULL;
928 p->tip = false;
929 p->index = i;
930 for (j = 0; j < (chars); j++)
931 p->numsteps[j] = 0;
932 p = p->next;
933 }
934 }
935 }
936 } /* protmakevalues */
937
938
doinput(char ** seq,char ** seqname)939 static void doinput(char** seq, char** seqname)
940 {
941 /* reads the input data */
942 long i;
943
944 if (justwts) {
945 if (firstset) {
946 //protinputdata();
947 convertprotseq(seq);
948 nayme = seqname;
949 }
950 /*for (i = 0; i < chars; i++)
951 weight[i] = 1;
952 inputweights(chars, weight, &weights);
953 if (justwts) {
954 fprintf(outfile, "\n\nWeights set # %ld:\n\n", ith);
955 if (progress)
956 printf("\nWeights set # %ld:\n\n", ith);
957 }
958 if (printdata)
959 printweights(outfile, 0, chars, weight, "Sites");*/
960 } else {
961 if (!firstset){
962 //samenumsp(&chars, ith);
963 reallocchars();
964 }
965 /*for (i = 0; i < chars; i++)
966 weight[i] = 1;
967 if (weights) {
968 inputweights(chars, weight, &weights);
969 }
970 if (weights)
971 printweights(outfile, 0, chars, weight, "Sites");*/
972 //protinputdata();
973 convertprotseq(seq);
974 nayme = seqname;
975 }
976 if(!thresh)
977 threshold = spp * 3.0;
978 for (i = 0 ; i < (chars) ; i++){
979 weight[i]*=10;
980 threshwt[i] = (long)(threshold * weight[i] + 0.5);
981 }
982 nmlngth = 0;
983 for (i = 0; i < spp; i++) {
984 if(strlen(seqname[i]) > nmlngth) nmlngth = strlen(seqname[i]);
985 }
986
987 protmakevalues();
988 } /* doinput */
989
990
protfillin(node * p,node * left,node * rt)991 void protfillin(node *p, node *left, node *rt)
992 {
993 /* sets up for each node in the tree the aa set for site m
994 at that point and counts the changes. The program
995 spends much of its time in this function */
996 boolean counted, done;
997 aas aa;
998 long s = 0;
999 sitearray ls, rs, qs;
1000 long i, j, m, n;
1001
1002 for (m = 0; m < chars; m++) {
1003 if (left != NULL)
1004 memcpy(ls, left->siteset[m], sizeof(sitearray));
1005 if (rt != NULL)
1006 memcpy(rs, rt->siteset[m], sizeof(sitearray));
1007 if (left == NULL) {
1008 n = rt->numsteps[m];
1009 memcpy(qs, rs, sizeof(sitearray));
1010 }
1011 else if (rt == NULL) {
1012 n = left->numsteps[m];
1013 memcpy(qs, ls, sizeof(sitearray));
1014 }
1015 else {
1016 n = left->numsteps[m] + rt->numsteps[m];
1017 if ((ls[0] == rs[0]) && (ls[1] == rs[1]) && (ls[2] == rs[2])) {
1018 qs[0] = ls[0];
1019 qs[1] = ls[1];
1020 qs[2] = ls[2];
1021 }
1022 else {
1023 counted = false;
1024 for (i = 0; (!counted) && (i <= 3); i++) {
1025 switch (i) {
1026
1027 case 0:
1028 s = ls[0] & rs[0];
1029 break;
1030
1031 case 1:
1032 s = (ls[0] & rs[1]) | (ls[1] & rs[0]);
1033 break;
1034
1035 case 2:
1036 s = (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]);
1037 break;
1038
1039 case 3:
1040 s = ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0];
1041 break;
1042
1043 }
1044 if (s != 0) {
1045 qs[0] = s;
1046 counted = true;
1047 } else
1048 n += weight[m];
1049 }
1050 switch (i) {
1051 case 1:
1052 qs[1] = qs[0] | (ls[0] & rs[1]) | (ls[1] & rs[0]);
1053 qs[2] = qs[1] | (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]);
1054 break;
1055 case 2:
1056 qs[1] = qs[0] | (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]);
1057 qs[2] = qs[1] | ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0];
1058 break;
1059 case 3:
1060 qs[1] = qs[0] | ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0];
1061 qs[2] = qs[1] | ls[1] | (ls[2] & rs[2]) | rs[1];
1062 break;
1063 case 4:
1064 qs[1] = qs[0] | ls[1] | (ls[2] & rs[2]) | rs[1];
1065 qs[2] = qs[1] | ls[2] | rs[2];
1066 break;
1067 }
1068 for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) {
1069 done = false;
1070 for (i = 0; (!done) && (i <= 1); i++) {
1071 if (((1L << ((long)aa)) & qs[i]) != 0) {
1072 for (j = i+1; j <= 2; j++)
1073 qs[j] |= translate[(long)aa - (long)ala][j-i];
1074 done = true;
1075 }
1076 }
1077 }
1078 }
1079 }
1080 p->numsteps[m] = n;
1081 memcpy(p->siteset[m], qs, sizeof(sitearray));
1082 }
1083 } /* protfillin */
1084
1085
protpreorder(node * p)1086 void protpreorder(node *p)
1087 {
1088 /* recompute number of steps in preorder taking both ancestoral and
1089 descendent steps into account */
1090 if (p != NULL && !p->tip) {
1091 protfillin (p->next, p->next->next->back, p->back);
1092 protfillin (p->next->next, p->back, p->next->back);
1093 protpreorder (p->next->back);
1094 protpreorder (p->next->next->back);
1095 }
1096 } /* protpreorder */
1097
1098
protadd(node * below,node * newtip,node * newfork)1099 void protadd(node *below, node *newtip, node *newfork)
1100 {
1101 /* inserts the nodes newfork and its left descendant, newtip,
1102 to the tree. below becomes newfork's right descendant */
1103
1104 if (below != treenode[below->index - 1])
1105 below = treenode[below->index - 1];
1106 if (below->back != NULL)
1107 below->back->back = newfork;
1108 newfork->back = below->back;
1109 below->back = newfork->next->next;
1110 newfork->next->next->back = below;
1111 newfork->next->back = newtip;
1112 newtip->back = newfork->next;
1113 if (root == below)
1114 root = newfork;
1115 root->back = NULL;
1116
1117 if (recompute) {
1118 protfillin (newfork, newfork->next->back, newfork->next->next->back);
1119 protpreorder(newfork);
1120 if (newfork != root)
1121 protpreorder(newfork->back);
1122 }
1123 } /* protadd */
1124
1125
protre_move(node ** item,node ** fork)1126 void protre_move(node **item, node **fork)
1127 {
1128 /* removes nodes item and its ancestor, fork, from the tree.
1129 the new descendant of fork's ancestor is made to be
1130 fork's second descendant (other than item). Also
1131 returns pointers to the deleted nodes, item and fork */
1132 node *p, *q, *other;
1133
1134 if ((*item)->back == NULL) {
1135 *fork = NULL;
1136 return;
1137 }
1138 *fork = treenode[(*item)->back->index - 1];
1139 if ((*item) == (*fork)->next->back)
1140 other = (*fork)->next->next->back;
1141 else other = (*fork)->next->back;
1142 if (root == *fork)
1143 root = other;
1144 p = (*item)->back->next->back;
1145 q = (*item)->back->next->next->back;
1146 if (p != NULL) p->back = q;
1147 if (q != NULL) q->back = p;
1148 (*fork)->back = NULL;
1149 p = (*fork)->next;
1150 do {
1151 p->back = NULL;
1152 p = p->next;
1153 } while (p != (*fork));
1154 (*item)->back = NULL;
1155 if (recompute) {
1156 protpreorder(other);
1157 if (other != root) protpreorder(other->back);
1158 }
1159 } /* protre_move */
1160
1161
evaluate(node * r)1162 static void evaluate(node *r)
1163 {
1164 /* determines the number of steps needed for a tree. this is the
1165 minimum number of steps needed to evolve sequences on this tree */
1166 long i, steps, term;
1167 double sum;
1168
1169 sum = 0.0;
1170 for (i = 0; i < (chars); i++) {
1171 steps = r->numsteps[i];
1172 if (steps <= threshwt[i])
1173 term = steps;
1174 else
1175 term = threshwt[i];
1176 sum += term;
1177 if (usertree && which <= maxuser)
1178 fsteps[which - 1][i] = term;
1179 }
1180 if (usertree && which <= maxuser) {
1181 nsteps[which - 1] = sum;
1182 if (which == 1) {
1183 minwhich = 1;
1184 minsteps = sum;
1185 } else if (sum < minsteps) {
1186 minwhich = which;
1187 minsteps = sum;
1188 }
1189 }
1190 like = -sum;
1191 } /* evaluate */
1192
1193
protpostorder(node * p)1194 void protpostorder(node *p)
1195 {
1196 /* traverses a binary tree, calling PROCEDURE fillin at a
1197 node's descendants before calling fillin at the node */
1198 if (p->tip)
1199 return;
1200 protpostorder(p->next->back);
1201 protpostorder(p->next->next->back);
1202 protfillin(p, p->next->back, p->next->next->back);
1203 } /* protpostorder */
1204
1205
protreroot(node * outgroup)1206 void protreroot(node *outgroup)
1207 {
1208 /* reorients tree, putting outgroup in desired position. */
1209 node *p, *q;
1210
1211 if (outgroup->back->index == root->index)
1212 return;
1213 p = root->next;
1214 q = root->next->next;
1215 p->back->back = q->back;
1216 q->back->back = p->back;
1217 p->back = outgroup;
1218 q->back = outgroup->back;
1219 outgroup->back->back = q;
1220 outgroup->back = p;
1221 } /* protreroot */
1222
1223
protsavetraverse(node * p,long * pos,boolean * found)1224 void protsavetraverse(node *p, long *pos, boolean *found)
1225 {
1226 /* sets BOOLEANs that indicate which way is down */
1227 p->bottom = true;
1228 if (p->tip)
1229 return;
1230 p->next->bottom = false;
1231 protsavetraverse(p->next->back, pos,found);
1232 p->next->next->bottom = false;
1233 protsavetraverse(p->next->next->back, pos,found);
1234 } /* protsavetraverse */
1235
1236
protsavetree(long * pos,boolean * found)1237 void protsavetree(long *pos, boolean *found)
1238 {
1239 /* record in place where each species has to be
1240 added to reconstruct this tree */
1241 long i, j;
1242 node *p;
1243 boolean done;
1244
1245 protreroot(treenode[outgrno - 1]);
1246 protsavetraverse(root, pos,found);
1247 for (i = 0; i < (nonodes); i++)
1248 place[i] = 0;
1249 place[root->index - 1] = 1;
1250 for (i = 1; i <= (spp); i++) {
1251 p = treenode[i - 1];
1252 while (place[p->index - 1] == 0) {
1253 place[p->index - 1] = i;
1254 while (!p->bottom)
1255 p = p->next;
1256 p = p->back;
1257 }
1258 if (i > 1) {
1259 place[i - 1] = place[p->index - 1];
1260 j = place[p->index - 1];
1261 done = false;
1262 while (!done) {
1263 place[p->index - 1] = spp + i - 1;
1264 while (!p->bottom)
1265 p = p->next;
1266 p = p->back;
1267 done = (p == NULL);
1268 if (!done)
1269 done = (place[p->index - 1] != j);
1270 }
1271 }
1272 }
1273 } /* protsavetree */
1274
1275
tryadd(node * p,node ** item,node ** nufork)1276 static void tryadd(node *p, node **item, node **nufork)
1277 {
1278 /* temporarily adds one fork and one tip to the tree.
1279 if the location where they are added yields greater
1280 "likelihood" than other locations tested up to that
1281 time, then keeps that location as there */
1282 long pos;
1283 boolean found;
1284 node *rute, *q;
1285
1286 if (p == root)
1287 protfillin(temp, *item, p);
1288 else {
1289 protfillin(temp1, *item, p);
1290 protfillin(temp, temp1, p->back);
1291 }
1292 evaluate(temp);
1293 if (lastrearr) {
1294 if (like < bestlike) {
1295 if ((*item) == (*nufork)->next->next->back) {
1296 q = (*nufork)->next;
1297 (*nufork)->next = (*nufork)->next->next;
1298 (*nufork)->next->next = q;
1299 q->next = (*nufork);
1300 }
1301 }
1302 else if (like >= bstlike2) {
1303 recompute = false;
1304 protadd(p, (*item), (*nufork));
1305 rute = root->next->back;
1306 protsavetree(&pos,&found);
1307 protreroot(rute);
1308 if (like > bstlike2) {
1309 bestlike = bstlike2 = like;
1310 pos = 1;
1311 nextree = 1;
1312 addtree(pos, &nextree, dummy, place, bestrees);
1313 } else {
1314 pos = 0;
1315 findtree(&found, &pos, nextree, place, bestrees);
1316 if (!found) {
1317 if (nextree <= maxtrees)
1318 addtree(pos, &nextree, dummy, place, bestrees);
1319 }
1320 }
1321 protre_move (item, nufork);
1322 recompute = true;
1323 }
1324 }
1325 if (like >= bestyet) {
1326 bestyet = like;
1327 there = p;
1328 }
1329 } /* tryadd */
1330
1331
addpreorder(node * p,node * item,node * nufork)1332 static void addpreorder(node *p, node *item, node *nufork)
1333 {
1334 /* traverses a binary tree, calling PROCEDURE tryadd
1335 at a node before calling tryadd at its descendants */
1336
1337 if (p == NULL)
1338 return;
1339 tryadd(p, &item,&nufork);
1340 if (!p->tip) {
1341 addpreorder(p->next->back, item, nufork);
1342 addpreorder(p->next->next->back, item, nufork);
1343 }
1344 } /* addpreorder */
1345
1346
tryrearr(node * p,boolean * success)1347 static void tryrearr(node *p, boolean *success)
1348 {
1349 /* evaluates one rearrangement of the tree.
1350 if the new tree has greater "likelihood" than the old
1351 one sets success := TRUE and keeps the new tree.
1352 otherwise, restores the old tree */
1353 node *frombelow, *whereto, *forknode, *q;
1354 double oldlike;
1355
1356 if (p->back == NULL)
1357 return;
1358 forknode = treenode[p->back->index - 1];
1359 if (forknode->back == NULL)
1360 return;
1361 oldlike = bestyet;
1362 if (p->back->next->next == forknode)
1363 frombelow = forknode->next->next->back;
1364 else
1365 frombelow = forknode->next->back;
1366 whereto = treenode[forknode->back->index - 1];
1367 if (whereto->next->back == forknode)
1368 q = whereto->next->next->back;
1369 else
1370 q = whereto->next->back;
1371 protfillin(temp1, frombelow, q);
1372 protfillin(temp, temp1, p);
1373 protfillin(temp1, temp, whereto->back);
1374 evaluate(temp1);
1375 if (like - oldlike < LIKE_EPSILON) {
1376 if (p == forknode->next->next->back) {
1377 q = forknode->next;
1378 forknode->next = forknode->next->next;
1379 forknode->next->next = q;
1380 q->next = forknode;
1381 }
1382 }
1383 else {
1384 recompute = false;
1385 protre_move(&p, &forknode);
1386 protfillin(whereto, whereto->next->back, whereto->next->next->back);
1387 recompute = true;
1388 protadd(whereto, p, forknode);
1389 *success = true;
1390 bestyet = like;
1391 }
1392 } /* tryrearr */
1393
1394
repreorder(node * p,boolean * success)1395 static void repreorder(node *p, boolean *success)
1396 {
1397 /* traverses a binary tree, calling PROCEDURE tryrearr
1398 at a node before calling tryrearr at its descendants */
1399 if (p == NULL)
1400 return;
1401 tryrearr(p,success);
1402 if (!p->tip) {
1403 repreorder(p->next->back,success);
1404 repreorder(p->next->next->back,success);
1405 }
1406 } /* repreorder */
1407
1408
rearrange(node ** r)1409 static void rearrange(node **r)
1410 {
1411 /* traverses the tree (preorder), finding any local
1412 rearrangement which decreases the number of steps.
1413 if traversal succeeds in increasing the tree's
1414 "likelihood", PROCEDURE rearrange runs traversal again */
1415 boolean success = true;
1416 while (success) {
1417 success = false;
1418 repreorder(*r, &success);
1419 }
1420 } /* rearrange */
1421
1422
protgetch(Char * c)1423 void protgetch(Char *c)
1424 {
1425 /* get next nonblank character */
1426 do {
1427 if (eoln(intree))
1428 scan_eoln(intree);
1429 *c = gettc(intree);
1430 if (*c == '\n' || *c == '\t')
1431 *c = ' ';
1432 } while (!(*c != ' ' || eoff(intree)));
1433 } /* protgetch */
1434
1435
protaddelement(node ** p,long * nextnode,long * lparens,boolean * names)1436 void protaddelement(node **p,long *nextnode,long *lparens,boolean *names)
1437 {
1438 /* recursive procedure adds nodes to user-defined tree */
1439 node *q;
1440 long i, n;
1441 boolean found;
1442 Char str[nmlngth + 1];
1443
1444 protgetch(&ch);
1445
1446 if (ch == '(' ) {
1447 if ((*lparens) >= spp - 1) {
1448 printf("\nERROR IN USER TREE: TOO MANY LEFT PARENTHESES\n");
1449 exxit(-1);
1450 }
1451 (*nextnode)++;
1452 (*lparens)++;
1453 q = treenode[(*nextnode) - 1];
1454 protaddelement(&q->next->back, nextnode,lparens,names);
1455 q->next->back->back = q->next;
1456 findch(',', &ch, which);
1457 protaddelement(&q->next->next->back, nextnode,lparens,names);
1458 q->next->next->back->back = q->next->next;
1459 findch(')', &ch, which);
1460 *p = q;
1461 return;
1462 }
1463 /*for (i = 0; i < nmlngth; i++)
1464 str[i] = ' ';*/
1465 n = 1;
1466 do {
1467 /*if (ch == '_')
1468 ch = ' ';*/
1469 str[n - 1] = ch;
1470 if (eoln(intree))
1471 scan_eoln(intree);
1472 ch = gettc(intree);
1473 n++;
1474 } while (ch != ',' && ch != ')' && ch != ':' && n <= nmlngth);
1475 str[n-1] = 0;
1476 n = 1;
1477 do {
1478 int l = strlen(nayme[n-1]);
1479 found = true;
1480 for (i = 0; i <= l; i++)
1481 found = (found && ((str[i] == nayme[n - 1][i]) /*||
1482 ((nayme[n - 1][i] == '_') && (str[i] == ' '))*/ ));
1483 if (found) {
1484 if (names[n - 1] == false) {
1485 *p = treenode[n - 1];
1486 names[n - 1] = true;
1487 } else {
1488 printf("\nERROR IN USER TREE: DUPLICATE NAME FOUND -- ");
1489 for (i = 0; i < l; i++)
1490 putchar(nayme[n - 1][i]);
1491 putchar('\n');
1492 exxit(-1);
1493 }
1494 } else
1495 n++;
1496 } while (!(n > spp || found));
1497 if (n <= spp)
1498 return;
1499 printf("CANNOT FIND SPECIES: %s\n", str);
1500 } /* protaddelement */
1501
1502
prottreeread()1503 void prottreeread()
1504 {
1505 /* read in user-defined tree and set it up */
1506 long nextnode, lparens, i;
1507
1508 root = treenode[spp];
1509 nextnode = spp;
1510 root->back = NULL;
1511 names = (boolean *)Malloc(spp*sizeof(boolean));
1512 for (i = 0; i < (spp); i++)
1513 names[i] = false;
1514 lparens = 0;
1515 protaddelement(&root, &nextnode,&lparens,names);
1516 if (ch == '[') {
1517 do
1518 ch = gettc(intree);
1519 while (ch != ']');
1520 ch = gettc(intree);
1521 }
1522 findch(';', &ch, which);
1523 scan_eoln(intree);
1524 free(names);
1525 } /* prottreeread */
1526
1527
protancestset(long * a,long * b,long * c,long * d,long * k)1528 void protancestset(long *a, long *b, long *c, long *d, long *k)
1529 {
1530 /* sets up the aa set array. */
1531 aas aa;
1532 long s, sa, sb;
1533 long i, j, m, n;
1534 boolean counted;
1535
1536 counted = false;
1537 *k = 0;
1538 for (i = 0; i <= 5; i++) {
1539 if (*k < 3) {
1540 s = 0;
1541 if (i > 3)
1542 n = i - 3;
1543 else
1544 n = 0;
1545 for (j = n; j <= (i - n); j++) {
1546 if (j < 3)
1547 sa = a[j];
1548 else
1549 sa = fullset;
1550 for (m = n; m <= (i - j - n); m++) {
1551 if (m < 3)
1552 sb = sa & b[m];
1553 else
1554 sb = sa;
1555 if (i - j - m < 3)
1556 sb &= c[i - j - m];
1557 s |= sb;
1558 }
1559 }
1560 if (counted || s != 0) {
1561 d[*k] = s;
1562 (*k)++;
1563 counted = true;
1564 }
1565 }
1566 }
1567 for (i = 0; i <= 1; i++) {
1568 for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) {
1569 if (((1L << ((long)aa)) & d[i]) != 0) {
1570 for (j = i + 1; j <= 2; j++)
1571 d[j] |= translate[(long)aa - (long)ala][j - i];
1572 }
1573 }
1574 }
1575 } /* protancestset */
1576
1577
1578 /*void prothyprint(long b1, long b2, boolean *bottom, node *r,
1579 boolean *nonzero, boolean *maybe)
1580 {
1581 * print out states in sites b1 through b2 at node *
1582 long i;
1583 boolean dot;
1584 Char ch = 0;
1585 aas aa;
1586
1587 if (*bottom) {
1588 if (!outgropt)
1589 fprintf(outfile, " ");
1590 else
1591 fprintf(outfile, "root ");
1592 } else
1593 fprintf(outfile, "%3ld ", r->back->index - spp);
1594 if (r->tip) {
1595 for (i = 0; i < nmlngth; i++)
1596 putc(nayme[r->index - 1][i], outfile);
1597 } else
1598 fprintf(outfile, "%4ld ", r->index - spp);
1599 if (*bottom)
1600 fprintf(outfile, " ");
1601 else if (*nonzero)
1602 fprintf(outfile, " yes ");
1603 else if (*maybe)
1604 fprintf(outfile, " maybe ");
1605 else
1606 fprintf(outfile, " no ");
1607 for (i = b1 - 1; i < b2; i++) {
1608 aa = r->seq[i];
1609 switch (aa) {
1610
1611 case ala:
1612 ch = 'A';
1613 break;
1614
1615 case asx:
1616 ch = 'B';
1617 break;
1618
1619 case cys:
1620 ch = 'C';
1621 break;
1622
1623 case asp:
1624 ch = 'D';
1625 break;
1626
1627 case glu:
1628 ch = 'E';
1629 break;
1630
1631 case phe:
1632 ch = 'F';
1633 break;
1634
1635 case gly:
1636 ch = 'G';
1637 break;
1638
1639 case his:
1640 ch = 'H';
1641 break;
1642
1643 case ileu:
1644 ch = 'I';
1645 break;
1646
1647 case lys:
1648 ch = 'K';
1649 break;
1650
1651 case leu:
1652 ch = 'L';
1653 break;
1654
1655 case met:
1656 ch = 'M';
1657 break;
1658
1659 case asn:
1660 ch = 'N';
1661 break;
1662
1663 case pro:
1664 ch = 'P';
1665 break;
1666
1667 case gln:
1668 ch = 'Q';
1669 break;
1670
1671 case arg:
1672 ch = 'R';
1673 break;
1674
1675 case ser:
1676 ch = 'S';
1677 break;
1678
1679 case ser1:
1680 ch = 'S';
1681 break;
1682
1683 case ser2:
1684 ch = 'S';
1685 break;
1686
1687 case thr:
1688 ch = 'T';
1689 break;
1690
1691 case trp:
1692 ch = 'W';
1693 break;
1694
1695 case tyr:
1696 ch = 'Y';
1697 break;
1698
1699 case val:
1700 ch = 'V';
1701 break;
1702
1703 case glx:
1704 ch = 'Z';
1705 break;
1706
1707 case del:
1708 ch = '-';
1709 break;
1710
1711 case stop:
1712 ch = '*';
1713 break;
1714
1715 case unk:
1716 ch = 'X';
1717 break;
1718
1719 case quest:
1720 ch = '?';
1721 break;
1722 }
1723 if (!(*bottom) && dotdiff)
1724 dot = (r->siteset[i] [0] == treenode[r->back->index - 1]->siteset[i][0]
1725 || ((r->siteset[i][0] &
1726 (~((1L << ((long)ser1)) | (1L << ((long)ser2)) |
1727 (1L << ((long)ser))))) == 0 &&
1728 (treenode[r->back->index - 1]->siteset[i] [0] &
1729 (~((1L << ((long)ser1)) | (1L << ((long)ser2)) |
1730 (1L << ((long)ser))))) == 0));
1731 else
1732 dot = false;
1733 if (dot)
1734 putc('.', outfile);
1735 else
1736 putc(ch, outfile);
1737 if ((i + 1) % 10 == 0)
1738 putc(' ', outfile);
1739 }
1740 putc('\n', outfile);
1741 } * prothyprint *
1742
1743
1744 void prothyptrav(node *r, sitearray *hypset, long b1, long b2, long *k,
1745 boolean *bottom, sitearray nothing)
1746 {
1747 boolean maybe, nonzero;
1748 long i;
1749 aas aa;
1750 long anc = 0, hset;
1751 gseq *ancset, *temparray;
1752
1753 protgnu(&ancset);
1754 protgnu(&temparray);
1755 maybe = false;
1756 nonzero = false;
1757 for (i = b1 - 1; i < b2; i++) {
1758 if (!r->tip) {
1759 protancestset(hypset[i], r->next->back->siteset[i],
1760 r->next->next->back->siteset[i], temparray->seq[i], k);
1761 memcpy(r->siteset[i], temparray->seq[i], sizeof(sitearray));
1762 }
1763 if (!(*bottom))
1764 anc = treenode[r->back->index - 1]->siteset[i][0];
1765 if (!r->tip) {
1766 hset = r->siteset[i][0];
1767 r->seq[i] = quest;
1768 for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) {
1769 if (hset == 1L << ((long)aa))
1770 r->seq[i] = aa;
1771 }
1772 if (hset == ((1L << ((long)asn)) | (1L << ((long)asp))))
1773 r->seq[i] = asx;
1774 if (hset == ((1L << ((long)gln)) | (1L << ((long)gly))))
1775 r->seq[i] = glx;
1776 if (hset == ((1L << ((long)ser1)) | (1L << ((long)ser2))))
1777 r->seq[i] = ser;
1778 if (hset == fullset)
1779 r->seq[i] = unk;
1780 }
1781 nonzero = (nonzero || (r->siteset[i][0] & anc) == 0);
1782 maybe = (maybe || r->siteset[i][0] != anc);
1783 }
1784 prothyprint(b1, b2,bottom,r,&nonzero,&maybe);
1785 *bottom = false;
1786 if (!r->tip) {
1787 memcpy(temparray->seq, r->next->back->siteset, chars*sizeof(sitearray));
1788 for (i = b1 - 1; i < b2; i++)
1789 protancestset(hypset[i], r->next->next->back->siteset[i], nothing,
1790 ancset->seq[i], k);
1791 prothyptrav(r->next->back, ancset->seq, b1, b2,k,bottom,nothing );
1792 for (i = b1 - 1; i < b2; i++)
1793 protancestset(hypset[i], temparray->seq[i], nothing, ancset->seq[i],k);
1794 prothyptrav(r->next->next->back, ancset->seq, b1, b2, k,bottom,nothing);
1795 }
1796 protchuck(temparray);
1797 protchuck(ancset);
1798 } * prothyptrav *
1799
1800
1801 void prothypstates(long *k)
1802 {
1803 * fill in and describe states at interior nodes *
1804 boolean bottom;
1805 sitearray nothing;
1806 long i, n;
1807 seqptr hypset;
1808
1809 fprintf(outfile, "\nFrom To Any Steps? State at upper node\n");
1810 fprintf(outfile, " ");
1811 fprintf(outfile, "( . means same as in the node below it on tree)\n\n");
1812 memcpy(nothing, translate[(long)quest - (long)ala], sizeof(sitearray));
1813 hypset = (seqptr)Malloc(chars*sizeof(sitearray));
1814 for (i = 0; i < (chars); i++)
1815 memcpy(hypset[i], nothing, sizeof(sitearray));
1816 bottom = true;
1817 for (i = 1; i <= ((chars - 1) / 40 + 1); i++) {
1818 putc('\n', outfile);
1819 n = i * 40;
1820 if (n > chars)
1821 n = chars;
1822 bottom = true;
1823 prothyptrav(root, hypset, i * 40 - 39, n, k,&bottom,nothing);
1824 }
1825 free(hypset);
1826 } * prothypstates */
1827
1828
describe()1829 static void describe()
1830 {
1831 /* prints ancestors, steps and table of numbers of steps in
1832 each site */
1833 /*long i,j,k;
1834
1835 if (treeprint)
1836 fprintf(outfile, "\nrequires a total of %10.3f\n", like / -10);
1837 if (stepbox) {
1838 putc('\n', outfile);
1839 if (weights)
1840 fprintf(outfile, "weighted ");
1841 fprintf(outfile, "steps in each position:\n");
1842 fprintf(outfile, " ");
1843 for (i = 0; i <= 9; i++)
1844 fprintf(outfile, "%4ld", i);
1845 fprintf(outfile, "\n *-----------------------------------------\n");
1846 for (i = 0; i <= (chars / 10); i++) {
1847 fprintf(outfile, "%5ld", i * 10);
1848 putc('!', outfile);
1849 for (j = 0; j <= 9; j++) {
1850 k = i * 10 + j;
1851 if (k == 0 || k > chars)
1852 fprintf(outfile, " ");
1853 else
1854 fprintf(outfile, "%4ld", root->numsteps[k - 1] / 10);
1855 }
1856 putc('\n', outfile);
1857 }
1858 }
1859 if (ancseq) {
1860 prothypstates(&k);
1861 putc('\n', outfile);
1862 }
1863 putc('\n', outfile);*/
1864 if (trout) {
1865 col = 0;
1866 treeout(root, nextree, &col, root);
1867 }
1868 } /* describe */
1869
1870
maketree(char * toevaluate)1871 static void maketree(char *toevaluate)
1872 {
1873 /* constructs a binary tree from the pointers in treenode.
1874 adds each node at location which yields highest "likelihood"
1875 then rearranges the tree for greatest "likelihood" */
1876 long i, j, numtrees;
1877 double gotlike;
1878 node *item, *nufork, *dummy;
1879
1880 if (setjmp(lngjmp_env)) {
1881 trout = false;
1882 goto way_out;
1883 }
1884
1885 if (!usertree) {
1886 for (i = 1; i <= (spp); i++)
1887 enterorder[i - 1] = i;
1888 if (jumble)
1889 randumize(seed, enterorder);
1890 root = treenode[enterorder[0] - 1];
1891 recompute = true;
1892 protadd(treenode[enterorder[0] - 1], treenode[enterorder[1] - 1],
1893 treenode[spp]);
1894 /*if (progress) {
1895 printf("\nAdding species:\n");
1896 writename(0, 2, enterorder);
1897 }*/
1898 lastrearr = false;
1899 for (i = 3; i <= (spp); i++) {
1900 bestyet = -30.0*spp*chars;
1901 there = root;
1902 item = treenode[enterorder[i - 1] - 1];
1903 nufork = treenode[spp + i - 2];
1904 addpreorder(root, item, nufork);
1905 protadd(there, item, nufork);
1906 like = bestyet;
1907 rearrange(&root);
1908 /*if (progress)
1909 writename(i - 1, 1, enterorder);*/
1910 lastrearr = (i == spp);
1911 if (lastrearr) {
1912 /*if (progress) {
1913 printf("\nDoing global rearrangements\n");
1914 printf(" !");
1915 for (j = 1; j <= nonodes; j++)
1916 if ( j % (( nonodes / 72 ) + 1 ) == 0 )
1917 putchar('-');
1918 printf("!\n");
1919 }*/
1920 bestlike = bestyet;
1921 if (jumb == 1) {
1922 bstlike2 = bestlike = -30.0*spp*chars;
1923 nextree = 1;
1924 }
1925 do {
1926 /*if (progress)
1927 printf(" ");*/
1928 gotlike = bestlike;
1929 for (j = 0; j < (nonodes); j++) {
1930 //bestyet = -30.0*spp*chars;
1931 item = treenode[j];
1932 if (item != root) {
1933 bestyet = -30.0*spp*chars;
1934 nufork = treenode[treenode[j]->back->index - 1];
1935 protre_move(&item, &nufork);
1936 there = root;
1937 addpreorder(root, item, nufork);
1938 protadd(there, item, nufork);
1939 }
1940 /*if (progress) {
1941 if ( j % (( nonodes / 72 ) + 1 ) == 0 )
1942 putchar('.');
1943 fflush(stdout);
1944 }*/
1945 if (tree_build_interrupted) {
1946 longjmp(lngjmp_env, 0);
1947 }
1948 }
1949 /*if (progress)
1950 putchar('\n');*/
1951 } while (bestlike > gotlike);
1952 }
1953 }
1954 /*if (progress)
1955 putchar('\n');*/
1956 for (i = spp - 1; i >= 1; i--)
1957 protre_move(&treenode[i], &dummy);
1958 if (jumb == njumble) {
1959 /*if (treeprint) {
1960 putc('\n', outfile);
1961 if (nextree == 2)
1962 fprintf(outfile, "One most parsimonious tree found:\n");
1963 else
1964 fprintf(outfile, "%6ld trees in all found\n", nextree - 1);
1965 }*/
1966 if (nextree > maxtrees + 1) {
1967 /*if (treeprint)
1968 fprintf(outfile, "here are the first%4ld of them\n", (long)maxtrees);*/
1969 nextree = maxtrees + 1;
1970 }
1971 /*if (treeprint)
1972 putc('\n', outfile);*/
1973 recompute = false;
1974 for (i = 0; i <= (nextree - 2); i++) {
1975 root = treenode[0];
1976 protadd(treenode[0], treenode[1], treenode[spp]);
1977 for (j = 3; j <= (spp); j++)
1978 protadd(treenode[bestrees[i].btree[j - 1] - 1], treenode[j - 1],
1979 treenode[spp + j - 2]);
1980 protreroot(treenode[outgrno - 1]);
1981 protpostorder(root);
1982 evaluate(root);
1983 //printree(root, 1.0);
1984 describe();
1985 for (j = 1; j < (spp); j++)
1986 protre_move(&treenode[j], &dummy);
1987 }
1988 }
1989 } else {
1990 /* Open in binary: ftell() is broken for UNIX line-endings under WIN32 */
1991 //openfile(&intree,INTREE,"input tree file", "rb",progname,intreename);
1992 //numtrees = countsemic(&intree);
1993 numtrees = 1;
1994 /*if (numtrees > 2)
1995 initseed(&inseed, &inseed0, seed);*/
1996 /*if (treeprint) {
1997 fprintf(outfile, "User-defined tree");
1998 if (numtrees > 1)
1999 putc('s', outfile);
2000 fprintf(outfile, ":\n\n\n\n");
2001 }*/
2002 which = 1;
2003 while (which <= numtrees) {
2004 char *tmpfname = strdup(create_tmp_filename_from_C());
2005 intree = fl_fopen_from_C(tmpfname, "rb+");
2006 fwrite(toevaluate, strlen(toevaluate), 1, intree);
2007 fflush(intree);
2008 fseek(intree, SEEK_SET, 0);
2009 prottreeread();
2010 fclose(intree);
2011 fl_unlink_from_C(tmpfname);
2012 free(tmpfname);
2013 if (outgropt)
2014 protreroot(treenode[outgrno - 1]);
2015 protpostorder(root);
2016 evaluate(root);
2017 //printree(root, 1.0);
2018 describe();
2019 which++;
2020 }
2021 /*printf("\n");
2022 FClose(intree);
2023 putc('\n', outfile);*/
2024 if (numtrees > 1 && chars > 1 )
2025 standev(chars, numtrees, minwhich, minsteps, nsteps, fsteps, seed);
2026 }
2027 /*if (jumb == njumble && progress) {
2028 printf("Output written to file \"%s\"\n", outfilename);
2029 if (trout)
2030 printf("\nTrees also written onto file \"%s\"\n", outtreename);
2031 }*/
2032 way_out:
2033 return;
2034 } /* maketree */
2035
2036
protpars(char ** seq,char ** seqname,int notu,int njumbles,int * pjumble_no,int * steps,char * toevaluate,int arg_maxtrees,int * bt_weights,int unused)2037 char* protpars(char** seq, char** seqname, int notu, int njumbles, int *pjumble_no, int *steps, char* toevaluate, int arg_maxtrees/*unused*/, int *bt_weights, int unused)
2038 { /* Protein parsimony by uphill search */
2039 //init(argc,argv);
2040 progname = "Protpars";
2041 //openfile(&infile,INFILE,"input file", "r",argv[0],infilename);
2042 //openfile(&outfile,OUTFILE,"output file", "w",argv[0],outfilename);
2043
2044 ibmpc = IBMCRT;
2045 ansi = ANSICRT;
2046 garbage = NULL;
2047 mulsets = false;
2048 msets = 1;
2049 firstset = true;
2050 code();
2051 setup();
2052 doinit(notu, strlen(seq[0]), toevaluate);
2053 if (!toevaluate && njumbles > 0) {
2054 njumble = njumbles;
2055 jumble = true;
2056 }
2057 /*if (weights || justwts)
2058 openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename);*/
2059 if (trout) {
2060 outtreename = strdup(create_tmp_filename_from_C()); //openfile(&outtree,OUTTREE,"output tree file", "w",argv[0],outtreename);
2061 outtree = fl_fopen_from_C(outtreename, "wb");
2062 }
2063 for (ith = 1; ith <= msets; ith++) {
2064 int i;
2065 if (bt_weights != NULL) for (i = 0; i < chars; i++) weight[i] = bt_weights[i];
2066 else for (i = 0; i < chars; i++) weight[i] = 1;
2067 doinput(seq, seqname);
2068 if (ith == 1)
2069 firstset = false;
2070 /*if (msets > 1 && !justwts) {
2071 fprintf(outfile, "Data set # %ld:\n\n",ith);
2072 if (progress)
2073 printf("Data set # %ld:\n\n",ith);
2074 }*/
2075 for (jumb = 1; jumb <= njumble; jumb++) {
2076 maketree(toevaluate);
2077 if (jumble && pjumble_no) {
2078 *steps = (int)(bestyet / -10);
2079 *pjumble_no = jumb;
2080 awake_from_C();
2081 }
2082 }
2083 protfreetree();
2084 }
2085 /*FClose(infile);
2086 FClose(outfile);
2087 FClose(outtree);
2088 printf("\nDone.\n\n");*/
2089 char *tree = NULL;
2090 FClose(outtree);
2091 if (trout) {
2092 outtree = fl_fopen_from_C(outtreename, "rb"); // read tree from tmp file and return it as a char*
2093 fseek(outtree, 0, SEEK_END);
2094 long L=ftell(outtree);
2095 tree = (char*)malloc(L+1);
2096 fseek(outtree, 0, SEEK_SET);
2097 fread(tree, L, 1, outtree);
2098 tree[L] = 0;
2099 fclose(outtree);
2100 *steps = (int)(like / -10);
2101 }
2102 fl_unlink_from_C(outtreename);
2103 free(outtreename);
2104 return tree;
2105 } /* Protein parsimony by uphill search */
2106