1 #include <U2Core/disable-warnings.h>
2 U2_DISABLE_WARNINGS
3
4 #include "phylip.h"
5 #include "cons.h"
6
7 int tree_pairing;
8
9 extern Phylip_Char outfilename[FNMLNGTH], intreename[FNMLNGTH], intree2name[FNMLNGTH], outtreename[FNMLNGTH];
10 node *root;
11
12 long numopts, outgrno_cons, col, setsz;
13 long maxgrp; /* max. no. of groups in all trees found */
14
15 boolean trout, firsttree, noroot, outgropt_cons, didreroot, prntsets,
16 treeprint_cons, goteof, strict, mr=false, mre=false,
17 ml=false; /* initialized all false for Treedist */
18 extern boolean progress;
19 pointarray nodep_cons;
20 pointarray treenode;
21 group_type **grouping, **grping2, **group2;/* to store groups found */
22 double *lengths, *lengths2;
23 long **order, **order2, lasti;
24 group_type *fullset;
25 node *grbg;
26 long tipy;
27
28 double **timesseen, **tmseen2, **times2 ;
29 double *tchange2;
30 double trweight, ntrees, mlfrac;
31
32 /* prototypes */
33 void censor(void);
34 boolean compatible(long, long);
35 void elimboth(long);
36 void enterpartition (group_type*, long*);
37 void reorient(node* n);
38
39 /* begin hash table code */
40
41
42 hashtype hashp;
43
44
45 /**
46 * namesGetBucket - return the bucket for a given name
47 */
namesGetBucket(plotstring searchname)48 long namesGetBucket(plotstring searchname) {
49 long i;
50 long sum = 0;
51
52 for (i = 0; (i < MAXNCH) && (searchname[i] != '\0'); i++) {
53 sum += searchname[i];
54 }
55 return (sum % NUM_BUCKETS);
56 }
57
58
59 /**
60 * namesAdd - add a name to the hash table
61 *
62 * The argument is added at the head of the appropriate linked list. No
63 * checking is done for duplicates. The caller can call
64 * namesSearch to check for an existing name prior to calling
65 * namesAdd.
66 */
namesAdd(plotstring addname)67 void namesAdd(plotstring addname) {
68 long bucket = namesGetBucket(addname);
69 namenode *hp, *temp;
70
71 temp = hashp[bucket];
72 hashp[bucket] = (namenode *)Malloc(sizeof(namenode));
73 hp = hashp[bucket];
74 strcpy(hp->naym, addname);
75 hp->next = temp;
76 hp->hitCount = 0;
77 }
78
79 /**
80 * namesSearch - search for a name in the hash table
81 *
82 * Return true if the name is found, else false.
83 */
namesSearch(plotstring searchname)84 boolean namesSearch(plotstring searchname) {
85 long i = namesGetBucket(searchname);
86 namenode *p;
87
88 p = hashp[i];
89 if (p == NULL) {
90 return false;
91 }
92 do {
93 if (strcmp(searchname,p->naym) == 0) {
94 p->hitCount++;
95 return true;
96 }
97 p = p->next;
98 } while (p != NULL);
99
100 return false;
101 }
102
103 /**
104 * Go through hash table and check that the hit count on all entries is one.
105 * If it is zero, then a species was missed, if it is two, then there is a
106 * duplicate species.
107 */
108
namesCheckTable(void)109 void namesCheckTable(void) {
110 namenode *p;
111 long i;
112
113 for (i=0; i< NUM_BUCKETS; i++) {
114 p = hashp[i];
115 while (p != NULL){
116 if(p->hitCount >1){
117 printf("\n\nERROR in user tree: duplicate name found: ");
118 puts(p->naym);
119 printf("\n\n");
120 exxit(-1);
121 } else if(p->hitCount == 0){
122 printf("\n\nERROR in user tree: name %s not found\n\n\n",
123 p->naym);
124 exxit(-1);
125 }
126 p->hitCount = 0;
127 p = p->next;
128 }
129 }
130 }
131
132 /**
133 * namesClearTable - empty names out of the table and
134 * return allocated memory
135 */
namesClearTable(void)136 void namesClearTable(void) {
137 long i;
138 namenode *p, *temp;
139
140 for (i=0; i< NUM_BUCKETS; i++) {
141 p = hashp[i];
142 if (p != NULL) {
143 do {
144 temp = p;
145 p = p->next;
146 free(temp);
147 } while (p != NULL);
148 hashp[i] = NULL;
149 }
150 }
151 }
152 /* end hash table code */
153
consens_starter(const char * filename,double fraction,bool _strict,bool _mre,bool _mr,bool _m1)154 void consens_starter( const char* filename, double fraction, bool _strict, bool _mre, bool _mr, bool _m1 )
155 {
156 /* Local variables added by Dan F. */
157 pattern_elm ***pattern_array;
158 long trees_in = 0;
159 long i, j;
160 long tip_count = 0;
161 node *p, *q;
162 intree = fopen(filename, "rb");
163 if(!intree){
164 exxit(-1);
165 }
166
167 /* Initial settings */
168 ibmpc = IBMCRT;
169 ansi = ANSICRT;
170 didreroot = false;
171 firsttree = true;
172 spp = 0 ;
173 col = 0 ;
174 /* This is needed so functions in cons.c work */
175 tree_pairing = NO_PAIRING ;
176
177 strict = _strict;
178 mr = _mr;
179 mre = _mre;
180 ml = _m1;
181 mlfrac = fraction;
182 noroot = true;
183 numopts = 0;
184 outgrno_cons = 1;
185 outgropt_cons = false;
186 trout = false;
187 prntsets = true;
188 progress = false;
189 treeprint_cons = false;
190
191 ntrees = 0.0;
192 maxgrp = 32767; /* initial size of set hash table */
193 lasti = -1;
194
195 trees_in = countsemic(&intree);
196 countcomma(&intree,&tip_count);
197 tip_count++; /* countcomma does a raw comma count, tips is one greater */
198
199
200 /* Read the tree file and put together grouping, order, and timesseen */
201 read_groups (&pattern_array, trees_in, tip_count, intree);
202 /* Compute the consensus tree. */
203
204 nodep_cons = (pointarray)Malloc(2*(1+spp)*sizeof(node *));
205 for (i = 0; i < spp; i++) {
206 nodep_cons[i] = (node *)Malloc(sizeof(node));
207 for (j = 0; j < MAXNCH; j++)
208 nodep_cons[i]->nayme[j] = '\0';
209 strncpy(nodep_cons[i]->nayme, nayme[i], MAXNCH);
210 }
211 for (i = spp; i < 2*(1+spp); i++)
212 nodep_cons[i] = NULL;
213 consensus(pattern_array, trees_in);
214 printf("\n");
215
216 /*if (progress)
217 printf("Output written to file \"%s\"\n\n", outfilename);
218 for (i = 0; i < spp; i++)
219 free(nodep_cons[i]);
220 for (i = spp; i < 2*(1 + spp); i++) {
221 if (nodep_cons[i] != NULL) {
222 p = nodep_cons[i]->next;
223 do {
224 q = p->next;
225 free(p);
226 p = q;
227 } while (p != nodep_cons[i]);
228 free(p);
229 }
230 }
231 free(nodep_cons);
232 FClose(intree);*/
233
234 #ifdef MAC
235 fixmacfile(outfilename);
236
237 #endif
238 printf("Done.\n\n");
239 }
240
consens_free_res()241 void consens_free_res(){
242 node *p, *q;
243 for (int i = 0; i < spp; i++)
244 free(nodep_cons[i]);
245 for (int i = spp; i < 2*(1 + spp); i++) {
246 if (nodep_cons[i] != NULL) {
247 p = nodep_cons[i]->next;
248 do {
249 q = p->next;
250 free(p);
251 p = q;
252 } while (p != nodep_cons[i]);
253 free(p);
254 }
255 }
256 free(nodep_cons);
257 FClose(intree);
258
259 #ifdef MAC
260 fixmacfile(outfilename);
261
262 #endif
263 printf("Done.\n\n");
264 }
265
266
initconsnode(node ** p,node ** grbg,node * q,long len,long nodei,long * ntips,long * parens,initops whichinit,pointarray treenode,pointarray nodep,Phylip_Char * str,Phylip_Char * ch,FILE * intree)267 void initconsnode(node **p, node **grbg, node *q, long len, long nodei,
268 long *ntips, long *parens, initops whichinit,
269 pointarray treenode, pointarray nodep, Phylip_Char *str,
270 Phylip_Char *ch, FILE *intree)
271 {
272 /* initializes a node */
273 long i;
274 Phylip_Char c;
275 boolean minusread;
276 double valyew, divisor, fracchange;
277
278 switch (whichinit) {
279 case bottom:
280 gnu(grbg, p);
281 (*p)->index = nodei;
282 (*p)->tip = false;
283 for (i=0; i<MAXNCH; i++)
284 (*p)->nayme[i] = '\0';
285 nodep[(*p)->index - 1] = (*p);
286 (*p)->v = 0;
287 break;
288 case nonbottom:
289 gnu(grbg, p);
290 (*p)->index = nodei;
291 (*p)->v = 0;
292 break;
293 case tip:
294 (*ntips)++;
295 gnu(grbg, p);
296 nodep[(*ntips) - 1] = *p;
297 setupnode(*p, *ntips);
298 (*p)->tip = true;
299 strncpy ((*p)->nayme, str, MAXNCH);
300 if (firsttree && prntsets) {
301 // fprintf(outfile, " %ld. ", *ntips);
302 //for (i = 0; i < len; i++)
303 // putc(str[i], outfile);
304 // putc('\n', outfile);
305 //if ((*ntips > 0) && (((*ntips) % 10) == 0))
306 // putc('\n', outfile);
307 }
308 (*p)->v = 0;
309 break;
310 case length:
311 processlength(&valyew, &divisor, ch, &minusread, intree, parens);
312 fracchange = 1.0;
313 (*p)->v = valyew / divisor / fracchange;
314 break;
315 case treewt:
316 if (!eoln(intree)) {
317 if (fscanf(intree, "%lf", &trweight) == 1) {
318 getch(ch, parens, intree);
319 if (*ch != ']') {
320 printf("\n\nERROR: Missing right square bracket\n\n");
321 exxit(-1);
322 } else {
323 getch(ch, parens, intree);
324 if (*ch != ';') {
325 printf("\n\nERROR: Missing semicolon after square brackets\n\n");
326 exxit(-1);
327 }
328 }
329 }
330 else {
331 printf("\n\nERROR: Expecting tree weight in last comment field\n\n");
332 exxit(-1);
333 }
334 }
335 break;
336 case unittrwt:
337 /* This comes not only when setting trweight but also at the end of
338 * any tree. The following code saves the current position in a
339 * file and reads to a new line. If there is a new line then we're
340 * at the end of tree, otherwise warn the user. This function should
341 * really leave the file alone, so once we're done with 'intree'
342 * we seek the position back so that it doesn't look like we did
343 * anything */
344 trweight = 1.0 ;
345 i = ftell (intree);
346 c = ' ';
347 while (c == ' ') {
348 if (eoff(intree)) {
349 fseek(intree,i,SEEK_SET);
350 return;
351 }
352 c = gettc(intree);
353 }
354 fseek(intree,i,SEEK_SET);
355 if ( c != '\n' && c!= '\r')
356 printf("WARNING: Tree weight set to 1.0\n");
357 if ( c == '\r' )
358 if ( (c == gettc(intree)) != '\n')
359 ungetc(c, intree);
360 break;
361 case hsnolength:
362 (*p)->v = -1; /* signal value that a length is missing */
363 break;
364 default: /* cases hslength, iter, hsnolength */
365 break; /* should there be an error message here?*/
366 }
367 } /* initconsnode */
368
369
censor(void)370 void censor(void)
371 {
372 /* delete groups that are too rare to be in the consensus tree */
373 long i;
374
375 i = 1;
376 do {
377 if (timesseen[i-1])
378 if (!(mre || (mr && (2*(*timesseen[i-1]) > ntrees))
379 || (ml && ((*timesseen[i-1]) >= int(mlfrac*ntrees+0.5)))
380 || (strict && ((*timesseen[i-1]) == ntrees)))) {
381 free(grouping[i - 1]);
382 free(timesseen[i - 1]);
383 grouping[i - 1] = NULL;
384 timesseen[i - 1] = NULL;
385 }
386 i++;
387 } while (i < maxgrp);
388 } /* censor */
389
390
compress(long * n)391 void compress(long *n)
392 {
393 /* push all the nonempty subsets to the front end of their array */
394 long i, j;
395
396 i = 1;
397 j = 1;
398 do {
399 while (grouping[i - 1] != NULL)
400 i++;
401 if (j <= i)
402 j = i + 1;
403 while ((grouping[j - 1] == NULL) && (j < maxgrp))
404 j++;
405 if (j < maxgrp) {
406 grouping[i - 1] = (group_type *)Malloc(setsz * sizeof(group_type));
407 timesseen[i - 1] = (double *)Malloc(sizeof(double));
408 memcpy(grouping[i - 1], grouping[j - 1], setsz * sizeof(group_type));
409 *timesseen[i - 1] = *timesseen[j - 1];
410 free(grouping[j - 1]);
411 free(timesseen[j - 1]);
412 grouping[j - 1] = NULL;
413 timesseen[j - 1] = NULL;
414 }
415 } while (j != maxgrp);
416 (*n) = i - 1;
417 } /* compress */
418
419
sort(long n)420 void sort(long n)
421 {
422 /* Shell sort keeping grouping, timesseen in same order */
423 long gap, i, j;
424 group_type *stemp;
425 double rtemp;
426
427 gap = n / 2;
428 stemp = (group_type *)Malloc(setsz * sizeof(group_type));
429 while (gap > 0) {
430 for (i = gap + 1; i <= n; i++) {
431 j = i - gap;
432 while (j > 0) {
433 if (*timesseen[j - 1] < *timesseen[j + gap - 1]) {
434 memcpy(stemp, grouping[j - 1], setsz * sizeof(group_type));
435 memcpy(grouping[j - 1], grouping[j + gap - 1], setsz * sizeof(group_type));
436 memcpy(grouping[j + gap - 1], stemp, setsz * sizeof(group_type));
437 rtemp = *timesseen[j - 1];
438 *timesseen[j - 1] = *timesseen[j + gap - 1];
439 *timesseen[j + gap - 1] = rtemp;
440 }
441 j -= gap;
442 }
443 }
444 gap /= 2;
445 }
446 free(stemp);
447 } /* sort */
448
449
compatible(long i,long j)450 boolean compatible(long i, long j)
451 {
452 /* are groups i and j compatible? */
453 boolean comp;
454 long k;
455
456 comp = true;
457 for (k = 0; k < setsz; k++)
458 if ((grouping[i][k] & grouping[j][k]) != 0)
459 comp = false;
460 if (!comp) {
461 comp = true;
462 for (k = 0; k < setsz; k++)
463 if ((grouping[i][k] & ~grouping[j][k]) != 0)
464 comp = false;
465 if (!comp) {
466 comp = true;
467 for (k = 0; k < setsz; k++)
468 if ((grouping[j][k] & ~grouping[i][k]) != 0)
469 comp = false;
470 if (!comp) {
471 comp = noroot;
472 if (comp) {
473 for (k = 0; k < setsz; k++)
474 if ((fullset[k] & ~grouping[i][k] & ~grouping[j][k]) != 0)
475 comp = false;
476 }
477 }
478 }
479 }
480 return comp;
481 } /* compatible */
482
483
eliminate(long * n,long * n2)484 void eliminate(long *n, long *n2)
485 {
486 /* eliminate groups incompatible with preceding ones */
487 long i, j, k;
488 boolean comp;
489
490 for (i = 2; i <= (*n); i++) {
491 comp = true;
492 for (j = 0; comp && (j <= i - 2); j++) {
493 if ((timesseen[j] != NULL) && *timesseen[j] > 0) {
494 comp = compatible(i-1,j);
495 if (!comp) {
496 (*n2)++;
497 times2[(*n2) - 1] = (double *)Malloc(sizeof(double));
498 group2[(*n2) - 1] = (group_type *)Malloc(setsz * sizeof(group_type));
499 *times2[(*n2) - 1] = *timesseen[i - 1];
500 memcpy(group2[(*n2) - 1], grouping[i - 1], setsz * sizeof(group_type));
501 *timesseen[i - 1] = 0.0;
502 for (k = 0; k < setsz; k++)
503 grouping[i - 1][k] = 0;
504 }
505 }
506 }
507 if (*timesseen[i - 1] == 0.0) {
508 free(grouping[i - 1]);
509 free(timesseen[i - 1]);
510 timesseen[i - 1] = NULL;
511 grouping[i - 1] = NULL;
512 }
513 }
514 } /* eliminate */
515
516
printset(long n)517 void printset(long n)
518 {
519 /* print out the n sets of species */
520 long i, j, k, size;
521 boolean noneprinted;
522
523 printf("\nSet (species in order) ");
524 for (i = 1; i <= spp - 25; i++)
525 putchar(' ');
526 printf(" How many times out of %7.2f\n\n", ntrees);
527 noneprinted = true;
528 for (i = 0; i < n; i++) {
529 if ((timesseen[i] != NULL) && (*timesseen[i] > 0)) {
530 size = 0;
531 k = 0;
532 for (j = 1; j <= spp; j++) {
533 if (j == ((k+1)*SETBITS+1)) k++;
534 if (((1L << (j - 1 - k*SETBITS)) & grouping[i][k]) != 0)
535 size++;
536 }
537 if (size != 1 && !(noroot && size >= (spp-1))) {
538 noneprinted = false;
539 k = 0;
540 for (j = 1; j <= spp; j++) {
541 if (j == ((k+1)*SETBITS+1)) k++;
542 if (((1L << (j - 1 - k*SETBITS)) & grouping[i][k]) != 0)
543 putchar('*');
544 else
545 putchar('.');
546 if (j % 10 == 0)
547 putchar(' ');
548 }
549 for (j = 1; j <= 23 - spp; j++)
550 putchar(' ');
551 printf(" %5.2f\n", *timesseen[i]);
552 }
553 }
554 }
555 if (noneprinted)
556 printf(" NONE\n");
557 } /* printset */
558
559
bigsubset(group_type * st,long n)560 void bigsubset(group_type *st, long n)
561 {
562 /* Find a maximal subset of st among the n groupings,
563 to be the set at the base of the tree. */
564 long i, j;
565 group_type *su;
566 boolean max, same;
567
568 su = (group_type *)Malloc(setsz * sizeof(group_type));
569 for (i = 0; i < setsz; i++)
570 su[i] = 0;
571 for (i = 0; i < n; i++) {
572 max = true;
573 for (j = 0; j < setsz; j++)
574 if ((grouping[i][j] & ~st[j]) != 0)
575 max = false;
576 if (max) {
577 same = true;
578 for (j = 0; j < setsz; j++)
579 if (grouping[i][j] != st[j])
580 same = false;
581 max = !same;
582 }
583 if (max) {
584 for (j = 0; j < setsz; j ++)
585 if ((su[j] & ~grouping[i][j]) != 0)
586 max = false;
587 if (max) {
588 same = true;
589 for (j = 0; j < setsz; j ++)
590 if (su[j] != grouping[i][j])
591 same = false;
592 max = !same;
593 }
594 if (max)
595 memcpy(su, grouping[i], setsz * sizeof(group_type));
596 }
597 }
598 memcpy(st, su, setsz * sizeof(group_type));
599 free(su);
600 } /* bigsubset */
601
602
recontraverse(node ** p,group_type * st,long n,long * nextnode)603 void recontraverse(node **p, group_type *st, long n, long *nextnode)
604 {
605 /* traverse to add next node to consensus tree */
606 long i, j = 0, k = 0, l = 0;
607
608 boolean found, same = 0, zero, zero2;
609 group_type *tempset, *st2;
610 node *q, *r;
611
612 for (i = 1; i <= spp; i++) { /* count species in set */
613 if (i == ((l+1)*SETBITS+1)) l++;
614 if (((1L << (i - 1 - l*SETBITS)) & st[l]) != 0) {
615 k++; /* k is the number of species in the set */
616 j = i; /* j is set to last species in the set */
617 }
618 }
619 if (k == 1) { /* if only 1, set up that tip */
620 *p = nodep_cons[j - 1];
621 (*p)->tip = true;
622 (*p)->index = j;
623 return;
624 }
625 gnu(&grbg, p); /* otherwise make interior node */
626 (*p)->tip = false;
627 (*p)->index = *nextnode;
628 nodep_cons[*nextnode - 1] = *p;
629 (*nextnode)++;
630 (*p)->deltav = 0.0;
631 for (i = 0; i < n; i++) { /* go through all sets */
632 same = true; /* to find one which is this one */
633 for (j = 0; j < setsz; j++)
634 if (grouping[i][j] != st[j])
635 same = false;
636 if (same)
637 (*p)->deltav = *timesseen[i];
638 }
639 tempset = (group_type *)Malloc(setsz * sizeof(group_type));
640 memcpy(tempset, st, setsz * sizeof(group_type));
641 q = *p;
642 st2 = (group_type *)Malloc(setsz * sizeof(group_type));
643 memcpy(st2, st, setsz * sizeof(group_type));
644 zero = true; /* having made two copies of the set ... */
645 for (j = 0; j < setsz; j++) /* see if they are empty */
646 if (tempset[j] != 0)
647 zero = false;
648 if (!zero)
649 bigsubset(tempset, n); /* find biggest set within it */
650 zero = zero2 = false; /* ... tempset is that subset */
651 while (!zero && !zero2) {
652 zero = zero2 = true;
653 for (j = 0; j < setsz; j++) {
654 if (st2[j] != 0)
655 zero = false;
656 if (tempset[j] != 0)
657 zero2 = false;
658 }
659 if (!zero && !zero2) {
660 gnu(&grbg, &q->next);
661 q->next->index = q->index;
662 q = q->next;
663 q->tip = false;
664 r = *p;
665 recontraverse(&q->back, tempset, n, nextnode); /* put it on tree */
666 *p = r;
667 q->back->back = q;
668 for (j = 0; j < setsz; j++)
669 st2[j] &= ~tempset[j]; /* remove that subset from the set */
670 memcpy(tempset, st2, setsz * sizeof(group_type)); /* that becomes set */
671 found = false;
672 i = 1;
673 while (!found && i <= n) {
674 if (grouping[i - 1] != 0) {
675 same = true;
676 for (j = 0; j < setsz; j++)
677 if (grouping[i - 1][j] != tempset[j])
678 same = false;
679 }
680 if ((grouping[i - 1] != 0) && same)
681 found = true;
682 else
683 i++;
684 }
685 zero = true;
686 for (j = 0; j < setsz; j++)
687 if (tempset[j] != 0)
688 zero = false;
689 if (!zero && !found)
690 bigsubset(tempset, n);
691 }
692 }
693 q->next = *p;
694 free(tempset);
695 free(st2);
696 } /* recontraverse */
697
698
reconstruct(long n)699 void reconstruct(long n)
700 {
701 /* reconstruct tree from the subsets */
702 long nextnode;
703 group_type *s;
704
705 nextnode = spp + 1;
706 s = (group_type *)Malloc(setsz * sizeof(group_type));
707 memcpy(s, fullset, setsz * sizeof(group_type));
708 recontraverse(&root, s, n, &nextnode);
709 free(s);
710 } /* reconstruct */
711
712
coordinates(node * p,long * tipy)713 void coordinates(node *p, long *tipy)
714 {
715 /* establishes coordinates of nodes */
716 node *q, *first, *last;
717 long maxx;
718
719 if (p->tip) {
720 p->xcoord = 0;
721 p->ycoord = *tipy;
722 p->ymin = *tipy;
723 p->ymax = *tipy;
724 (*tipy) += down;
725 return;
726 }
727 q = p->next;
728 maxx = 0;
729 while (q != p) {
730 coordinates(q->back, tipy);
731 if (!q->back->tip) {
732 if (q->back->xcoord > maxx)
733 maxx = q->back->xcoord;
734 }
735 q = q->next;
736 }
737 first = p->next->back;
738 q = p;
739 while (q->next != p)
740 q = q->next;
741 last = q->back;
742 p->xcoord = maxx + OVER_C;
743 p->ycoord = (long)((first->ycoord + last->ycoord) / 2);
744 p->ymin = first->ymin;
745 p->ymax = last->ymax;
746 } /* coordinates */
747
748
drawline(long i)749 void drawline(long i)
750 {
751 /* draws one row of the tree diagram by moving up tree */
752 node *p, *q;
753 long n, j;
754 boolean extra, done, trif;
755 node *r, *first = NULL, *last = NULL;
756 boolean found;
757
758 p = root;
759 q = root;
760 fprintf(outfile, " ");
761 extra = false;
762 trif = false;
763 do {
764 if (!p->tip) {
765 found = false;
766 r = p->next;
767 while (r != p && !found) {
768 if (i >= r->back->ymin && i <= r->back->ymax) {
769 q = r->back;
770 found = true;
771 } else
772 r = r->next;
773 }
774 first = p->next->back;
775 r = p;
776 while (r->next != p)
777 r = r->next;
778 last = r->back;
779 }
780 done = (p->tip || p == q);
781 n = p->xcoord - q->xcoord;
782 if (extra) {
783 n--;
784 extra = false;
785 }
786 if (q->ycoord == i && !done) {
787 if (trif)
788 putc('-', outfile);
789 else
790 putc('+', outfile);
791 trif = false;
792 if (!q->tip) {
793 for (j = 1; j <= n - 8; j++)
794 putc('-', outfile);
795 if (noroot && (root->next->next->next == root) &&
796 (((root->next->back == q) && root->next->next->back->tip)
797 || ((root->next->next->back == q) && root->next->back->tip)))
798 fprintf(outfile, "-------|");
799 else {
800 if (!strict) { /* write number of times seen */
801 if (q->deltav >= 10000)
802 fprintf(outfile, "-%5.0f-|", (double)q->deltav);
803 else if (q->deltav >= 1000)
804 fprintf(outfile, "--%4.0f-|", (double)q->deltav);
805 else if (q->deltav >= 100)
806 fprintf(outfile, "-%5.1f-|", (double)q->deltav);
807 else if (q->deltav >= 10)
808 fprintf(outfile, "--%4.1f-|", (double)q->deltav);
809 else
810 fprintf(outfile, "--%4.2f-|", (double)q->deltav);
811 } else
812 fprintf(outfile, "-------|");
813 }
814 extra = true;
815 trif = true;
816 } else {
817 for (j = 1; j < n; j++)
818 putc('-', outfile);
819 }
820 } else if (!p->tip && last->ycoord > i && first->ycoord < i &&
821 (i != p->ycoord || p == root)) {
822 putc('|', outfile);
823 for (j = 1; j < n; j++)
824 putc(' ', outfile);
825 } else {
826 for (j = 1; j <= n; j++)
827 putc(' ', outfile);
828 if (trif)
829 trif = false;
830 }
831 if (q != p)
832 p = q;
833 } while (!done);
834 if (p->ycoord == i && p->tip) {
835 for (j = 0; (j < MAXNCH) && (p->nayme[j] != '\0'); j++)
836 putc(p->nayme[j], outfile);
837 }
838 putc('\n', outfile);
839 } /* drawline */
840
841
printree()842 void printree()
843 {
844 /* prints out diagram of the tree */
845 long i;
846 long tipy;
847
848 if (treeprint_cons) {
849 fprintf(outfile, "\nCONSENSUS TREE:\n");
850 if (mr || mre || ml) {
851 if (noroot) {
852 fprintf(outfile, "the numbers on the branches indicate the number\n");
853 fprintf(outfile, "of times the partition of the species into the two sets\n");
854 fprintf(outfile, "which are separated by that branch occurred\n");
855 } else {
856 fprintf(outfile, "the numbers forks indicate the number\n");
857 fprintf(outfile, "of times the group consisting of the species\n");
858 fprintf(outfile, "which are to the right of that fork occurred\n");
859 }
860 fprintf(outfile, "among the trees, out of %6.2f trees\n", ntrees);
861 if (ntrees <= 1.001)
862 fprintf(outfile, "(trees had fractional weights)\n");
863 }
864 tipy = 1;
865 coordinates(root, &tipy);
866 putc('\n', outfile);
867 for (i = 1; i <= tipy - down; i++)
868 drawline(i);
869 putc('\n', outfile);
870 }
871 if (noroot) {
872 printf("\n remember:");
873 if (didreroot)
874 printf(" (though rerooted by outgroup)");
875 printf(" this is an unrooted tree!\n");
876 }
877 putchar('\n');
878 } /* printree */
879
880
enterpartition(group_type * s1,long * n)881 void enterpartition (group_type *s1, long *n)
882 {
883 /* try to put this partition in list of partitions. If implied by others,
884 don't bother. If others implied by it, replace them. If this one
885 vacuous because only one element in s1, forget it */
886 long i, j;
887 boolean found;
888
889 /* this stuff all to be rewritten but left here so pieces can be used */
890 found = false;
891 for (i = 0; i < (*n); i++) { /* go through looking whether it is there */
892 found = true;
893 for (j = 0; j < setsz; j++) { /* check both parts of partition */
894 found = found && (grouping[i][j] == s1[j]);
895 found = found && (group2[i][j] == (fullset[j] & (~s1[j])));
896 }
897 if (found)
898 break;
899 }
900 if (!found) { /* if not, add it to the slot after the end,
901 which must be empty */
902 grouping[i] = (group_type *)Malloc(setsz * sizeof(group_type));
903 timesseen[i] = (double *)Malloc(sizeof(double));
904 group2[i] = (group_type *)Malloc(setsz * sizeof(group_type));
905 for (j = 0; j < setsz; j++)
906 grouping[i][j] = s1[j];
907 *timesseen[i] = 1;
908 (*n)++;
909 }
910 } /* enterpartition */
911
912
elimboth(long n)913 void elimboth(long n)
914 {
915 /* for Adams case: eliminate pairs of groups incompatible with each other */
916 long i, j;
917 boolean comp;
918
919 for (i = 0; i < n-1; i++) {
920 for (j = i+1; j < n; j++) {
921 comp = compatible(i,j);
922 if (!comp) {
923 *timesseen[i] = 0.0;
924 *timesseen[j] = 0.0;
925 }
926 }
927 if (*timesseen[i] == 0.0) {
928 free(grouping[i]);
929 free(timesseen[i]);
930 timesseen[i] = NULL;
931 grouping[i] = NULL;
932 }
933 }
934 if (*timesseen[n-1] == 0.0) {
935 free(grouping[n-1]);
936 free(timesseen[n-1]);
937 timesseen[n-1] = NULL;
938 grouping[n-1] = NULL;
939 }
940 } /* elimboth */
941
942
consensus(pattern_elm *** pattern_array,long trees_in)943 void consensus(pattern_elm ***pattern_array, long trees_in)
944 {
945 long i, n, n2, tipy;
946
947 group2 = (group_type **) Malloc(maxgrp*sizeof(group_type *));
948 for (i = 0; i < maxgrp; i++)
949 group2[i] = NULL;
950 times2 = (double **)Malloc(maxgrp*sizeof(double *));
951 for (i = 0; i < maxgrp; i++)
952 times2[i] = NULL;
953 n2 = 0;
954 censor(); /* drop groups that are too rare */
955 compress(&n); /* push everybody to front of array */
956 if (!strict) { /* drop those incompatible, if any */
957 sort(n);
958 eliminate(&n, &n2);
959 compress(&n);
960 }
961 reconstruct(n);
962 tipy = 1;
963 coordinates(root, &tipy);
964 if (prntsets) {
965 printf("\nSets included in the consensus tree\n");
966 printset(n);
967 for (i = 0; i < n2; i++) {
968 if (!grouping[i]) {
969 grouping[i] = (group_type *)Malloc(setsz * sizeof(group_type));
970 timesseen[i] = (double *)Malloc(sizeof(double));
971 }
972 memcpy(grouping[i], group2[i], setsz * sizeof(group_type));
973 *timesseen[i] = *times2[i];
974 }
975 n = n2;
976 printf("\n\nSets NOT included in consensus tree:");
977 if (n2 == 0)
978 printf(" NONE\n");
979 else {
980 putchar('\n');
981 printset(n);
982 }
983 }
984 putchar('\n');
985 if (strict)
986 printf("\nStrict consensus tree\n");
987 if (mre)
988 printf("\nExtended majority rule consensus tree\n");
989 if (ml) {
990 printf("\nM consensus tree (l = %4.2f)\n", mlfrac);
991 printf(" l\n");
992 }
993 if (mr)
994 printf("\nMajority rule consensus tree\n");
995 printree();
996 free(nayme);
997 for (i = 0; i < maxgrp; i++)
998 free(grouping[i]);
999 free(grouping);
1000 for (i = 0; i < maxgrp; i++)
1001 free(order[i]);
1002 free(order);
1003 for (i = 0; i < maxgrp; i++)
1004 if (timesseen[i] != NULL)
1005 free(timesseen[i]);
1006 free(timesseen);
1007 } /* consensus */
1008
1009
rehash()1010 void rehash()
1011 {
1012 group_type *s;
1013 long i, j;
1014 double temp, ss, smult;
1015 boolean done;
1016
1017 long old_maxgrp = maxgrp;
1018 long new_maxgrp = maxgrp*2;
1019
1020 tmseen2 = (double **)Malloc(new_maxgrp*sizeof(double *));
1021 grping2 = (group_type **)Malloc(new_maxgrp*sizeof(group_type *));
1022 order2 = (long **)Malloc(new_maxgrp*sizeof(long *));
1023 lengths2 = (double *)Malloc(new_maxgrp*sizeof(double));
1024 tchange2 = (double *)Malloc(new_maxgrp*sizeof(double));
1025
1026 for (i = 0; i < new_maxgrp; i++)
1027 {
1028 tmseen2[i] = NULL;
1029 grping2[i] = NULL;
1030 order2[i] = NULL;
1031 lengths2[i] = 0.0;
1032 tchange2[i] = 0.0;
1033 }
1034
1035
1036 smult = (sqrt(5.0) - 1) / 2;
1037 s = (group_type *)Malloc(setsz * sizeof(group_type));
1038
1039 for (i = 0; i < old_maxgrp; i++) {
1040 long old_index = *order[i];
1041 long new_index = -1;
1042 memcpy(s, grouping[old_index], setsz * sizeof(group_type));
1043 ss = 0.0;
1044 for (j = 0; j < setsz; j++)
1045 ss += s[j] * smult; /* pow(2, SETBITS*j)*/;
1046 temp = ss;
1047 new_index = (long)(new_maxgrp * (temp - floor(temp)));
1048 done = false;
1049 while (!done) {
1050 if (!grping2[new_index])
1051 {
1052
1053 grping2[new_index] = (group_type *)Malloc(setsz * sizeof(group_type));
1054 memcpy(grping2[new_index], grouping[old_index], setsz * sizeof(group_type));
1055
1056 order2[i] = (long *)Malloc(sizeof(long));
1057 *order2[i] = new_index;
1058
1059 tmseen2[new_index] = (double *)Malloc(sizeof(double));
1060 *tmseen2[new_index] = *timesseen[old_index];
1061
1062 lengths2[new_index] = lengths[old_index];
1063
1064 free(grouping[old_index]);
1065 free(timesseen[old_index]);
1066 free(order[i]);
1067
1068 grouping[old_index] = NULL;
1069 timesseen[old_index] = NULL;
1070 order[i] = NULL;
1071
1072 done = true; /* successfully found place for this item */
1073
1074 } else {
1075 new_index++;
1076 if (new_index >= new_maxgrp) new_index -= new_maxgrp;
1077 }
1078 }
1079 }
1080
1081 free(lengths);
1082 free(timesseen);
1083 free(grouping);
1084 free(order);
1085
1086 free(s);
1087
1088 timesseen = tmseen2;
1089 grouping = grping2;
1090 lengths = lengths2;
1091 order = order2;
1092
1093 maxgrp = new_maxgrp;
1094
1095 } /* rehash */
1096
1097
enternodeset(node * r)1098 void enternodeset(node* r)
1099 { /* enter a set of species into the hash table */
1100 long i, j, start;
1101 double ss, n;
1102 boolean done, same;
1103 double times ;
1104 group_type *s;
1105
1106 s = r->nodeset;
1107
1108
1109 /* do not enter full sets */
1110 same = true;
1111 for (i = 0; i < setsz; i++)
1112 if (s[i] != fullset[i])
1113 same = false;
1114 if (same)
1115 return;
1116
1117 times = trweight;
1118 ss = 0.0; /* compute the hashcode for the set */
1119 n = ((sqrt(5.0) - 1.0) / 2.0); /* use an irrational multiplier */
1120 for (i = 0; i < setsz; i++)
1121 ss += s[i] * n;
1122 i = (long)(maxgrp * (ss - floor(ss))) + 1; /* use fractional part of code */
1123 start = i;
1124 done = false; /* go through seeing if it is there */
1125
1126 while (!done) {
1127 if (grouping[i - 1]) { /* ... i.e. if group is absent, or */
1128 same = false; /* (will be false if timesseen = 0) */
1129 if (!(timesseen[i-1] == 0)) { /* ... if timesseen = 0 */
1130 same = true;
1131 for (j = 0; j < setsz; j++) {
1132 if (s[j] != grouping[i - 1][j])
1133 same = false;
1134 }
1135 }
1136 else { /* if group is present but timessen = 0 */
1137 for (j = 0; j < setsz; j++) /* replace by correct group */
1138 grouping[i - 1][j] = s[j];
1139 *timesseen[i - 1] = 1;
1140 }
1141 }
1142 if (grouping[i - 1] && same) { /* if it is there, increment timesseen */
1143 *timesseen[i - 1] += times;
1144 lengths[i - 1] = nodep_cons[r->index - 1]->v;
1145 done = true;
1146 } else if (!grouping[i - 1]) { /* if not there and slot empty ... */
1147 grouping[i - 1] = (group_type *)Malloc(setsz * sizeof(group_type));
1148 lasti++;
1149 order[lasti] = (long *)Malloc(sizeof(long));
1150 timesseen[i - 1] = (double *)Malloc(sizeof(double));
1151 memcpy(grouping[i - 1], s, setsz * sizeof(group_type));
1152 *timesseen[i - 1] = times;
1153 *order[lasti] = i - 1;
1154 done = true;
1155 lengths[i - 1] = nodep_cons[r->index -1]->v;
1156 } else { /* otherwise look to put it in next slot ... */
1157 i++;
1158 if (i > maxgrp) i -= maxgrp;
1159 }
1160 if (!done && i == start) { /* if no place to put it, expand hash table */
1161
1162 rehash();
1163
1164 done = true;
1165 enternodeset(r); /* calls this procedure again, but now there
1166 should be space */
1167 }
1168 }
1169 } /* enternodeset */
1170
1171
1172 /* recursively crawls through tree, setting nodeset values to be the
1173 * bitwise OR of bits from downstream nodes
1174 */
accumulate(node * r)1175 void accumulate(node *r)
1176 {
1177 node *q;
1178 long i;
1179
1180 /* zero out nodeset values. since we are re-using tree nodes,
1181 * the malloc only happens the first time we encounter a node. */
1182 if (!r->nodeset)
1183 {
1184 r->nodeset = (group_type *)Malloc(setsz * sizeof(group_type));
1185 }
1186 for (i = 0; i < setsz; i++)
1187 {
1188 r->nodeset[i] = 0L;
1189 }
1190
1191 if (r->tip) {
1192 /* tip nodes should have a single bit set corresponding to index-1 */
1193 i = (r->index-1) / (long)SETBITS;
1194 r->nodeset[i] = 1L << (r->index - 1 - i*SETBITS);
1195 }
1196 else {
1197 /* for loop should not visit r->back -- we've likely come from there */
1198 for (q = r->next; q != r; q = q->next) {
1199
1200 /* recursive call to this function */
1201 accumulate(q->back);
1202
1203 /* bitwise OR of bits from downstream nodes */
1204 for (i = 0; i < setsz; i++)
1205 r->nodeset[i] |= q->back->nodeset[i];
1206 }
1207 }
1208
1209 if ((!r->tip && (r->next->next != r)) || r->tip)
1210 enternodeset(r);
1211 } /* accumulate */
1212
1213
dupname2(Phylip_Char * name,node * p,node * thisV)1214 void dupname2(Phylip_Char *name, node *p, node *thisV)
1215 {
1216 /* search for a duplicate name recursively */
1217 node *q;
1218
1219 if (p->tip) {
1220 if (p != thisV) {
1221 if (namesSearch(p->nayme)) {
1222 printf("\n\nERROR in user tree: duplicate name found: ");
1223 puts(p->nayme);
1224 printf("\n\n");
1225 exxit(-1);
1226 } else {
1227 namesAdd(p->nayme);
1228 }
1229 }
1230 } else {
1231 q = p;
1232 while (p->next != q) {
1233 dupname2(name, p->next->back, thisV);
1234 p = p->next;
1235 }
1236 }
1237 } /* dupname2 */
1238
1239
dupname(node * p)1240 void dupname(node *p)
1241 {
1242 /* Recursively searches tree, starting at p, to verify that
1243 * each tip name occurs only once. When called with root as
1244 * its argument, at final recusive exit, all tip names should
1245 * be in the hash "hashp".
1246 */
1247 node *q;
1248
1249 if (p->tip) {
1250 if (namesSearch(p->nayme)) {
1251 printf("\n\nERROR in user tree: duplicate name found: ");
1252 puts(p->nayme);
1253 printf("\n\n");
1254 exxit(-1);
1255 } else {
1256 namesAdd(p->nayme);
1257 }
1258 } else {
1259 q = p;
1260 while (p->next != q) {
1261 dupname(p->next->back);
1262 p = p->next;
1263 }
1264 }
1265 } /* dupname */
1266
1267
missingnameRecurs(node * p)1268 void missingnameRecurs(node *p)
1269 {
1270 /* search for missing names in first tree */
1271 node *q;
1272
1273 if (p->tip) {
1274 if (!namesSearch(p->nayme)) {
1275 printf("\n\nERROR in user tree: name %s not found in first tree\n\n\n",
1276 p->nayme);
1277 exxit(-1);
1278 }
1279 } else {
1280 q = p;
1281 while (p->next != q) {
1282 missingnameRecurs(p->next->back);
1283 p = p->next;
1284 }
1285 }
1286 } /* missingnameRecurs */
1287
1288 /**
1289 * wrapper for recursive missingname function
1290 */
missingname(node * p)1291 void missingname(node *p){
1292 missingnameRecurs(p);
1293 namesCheckTable();
1294 } /* missingname */
1295
1296
gdispose(node * p)1297 void gdispose(node *p)
1298 {
1299 /* go through tree throwing away nodes */
1300 node *q, *r;
1301
1302 if (p->tip) {
1303 chuck(&grbg, p);
1304 return;
1305 }
1306 q = p->next;
1307 while (q != p) {
1308 gdispose(q->back);
1309 r = q;
1310 q = q->next;
1311 chuck(&grbg, r);
1312 }
1313 chuck(&grbg, p);
1314 } /* gdispose */
1315
1316
initreenode(node * p)1317 void initreenode(node *p)
1318 {
1319 /* traverse tree and assign species names to tip nodes */
1320 node *q;
1321
1322 if (p->tip) {
1323 memcpy(nayme[p->index - 1], p->nayme, MAXNCH);
1324 } else {
1325 q = p->next;
1326 while (q && q != p) {
1327 initreenode(q->back);
1328 q = q->next;
1329 }
1330 }
1331 } /* initreenode */
1332
1333
reroot(node * outgroup,long * nextnode)1334 void reroot(node *outgroup, long *nextnode)
1335 {
1336 /* reroots and reorients tree, placing root at outgroup */
1337 long i;
1338 node *p, *q;
1339 double newv;
1340
1341 /* count root's children & find last */
1342 p = root;
1343 i = 0;
1344 while (p->next != root) {
1345 p = p->next;
1346 i++;
1347 }
1348 if (i == 2) {
1349 /* 2 children: */
1350 q = root->next;
1351
1352 newv = q->back->v + p->back->v;
1353
1354 /* if outgroup is already here, just move
1355 * its length to the other branch and finish */
1356 if (outgroup == p->back) {
1357 /* flip branch order at root so that outgroup
1358 * is first, just to be consistent */
1359 root->next = p;
1360 p->next = q;
1361 q->next = root;
1362
1363 q->back->v = newv;
1364 p->back->v = 0;
1365 return;
1366 }
1367 if (outgroup == q) {
1368 p->back->v = newv;
1369 q->back->v = 0;
1370 return;
1371 }
1372
1373 /* detach root by linking child nodes */
1374 q->back->back = p->back;
1375 p->back->back = q->back;
1376 p->back->v = newv;
1377 q->back->v = newv;
1378 } else { /* 3+ children */
1379 p->next = root->next; /* join old root nodes */
1380 nodep_cons[root->index-1] = root->next; /* make root->next the primary node */
1381
1382 /* create new root nodes */
1383 gnu(&grbg, &root->next);
1384 q = root->next;
1385 gnu(&grbg, &q->next);
1386 p = q->next;
1387 p->next = root;
1388 q->tip = false;
1389 p->tip = false;
1390 nodep_cons[*nextnode] = root;
1391 (*nextnode)++;
1392 root->index = *nextnode;
1393 root->next->index = root->index;
1394 root->next->next->index = root->index;
1395 }
1396 newv = outgroup->v;
1397 /* root is 3 "floating" nodes */
1398 /* q == root->next */
1399 /* p == root->next->next */
1400
1401 /* attach root at outgroup */
1402 q->back = outgroup;
1403 p->back = outgroup->back;
1404 outgroup->back->back = p;
1405 outgroup->back = q;
1406 outgroup->v = 0;
1407 outgroup->back->v = 0;
1408 root->v = 0;
1409 p->v = newv;
1410 p->back->v = newv;
1411 reorient(root);
1412 } /* reroot */
1413
1414
reorient(node * n)1415 void reorient(node* n) {
1416 node* p;
1417
1418 if ( n->tip ) return;
1419 if ( nodep_cons[n->index - 1] != n ) {
1420 nodep_cons[n->index - 1] = n;
1421 if ( n->back )
1422 n->v = n->back->v;
1423 }
1424
1425 for ( p = n->next ; p != n ; p = p->next)
1426 reorient(p->back);
1427 }
1428
1429
store_pattern(pattern_elm *** pattern_array,int trees_in_file)1430 void store_pattern (pattern_elm ***pattern_array, int trees_in_file)
1431 {
1432 /* put a tree's groups into a pattern array.
1433 Don't forget that when not Adams, grouping[] is not compressed. . . */
1434 long i, total_groups=0, j=0, k;
1435
1436 /* First, find out how many groups exist in the given tree. */
1437 for (i = 0 ; i < maxgrp ; i++)
1438 if ((grouping[i] != NULL) &&
1439 (*timesseen[i] > 0))
1440 /* If this is group exists and is present in the current tree, */
1441 total_groups++ ;
1442
1443 /* Then allocate a space to store the bit patterns. . . */
1444 for (i = 0 ; i < setsz ; i++) {
1445 pattern_array[i][trees_in_file]
1446 = (pattern_elm *) Malloc(sizeof(pattern_elm)) ;
1447 pattern_array[i][trees_in_file]->apattern =
1448 (group_type *) Malloc (total_groups * sizeof (group_type)) ;
1449 pattern_array[i][trees_in_file]->length =
1450 (double *) Malloc (maxgrp * sizeof (double)) ;
1451 for ( j = 0 ; j < maxgrp ; j++ ) {
1452 pattern_array[i][trees_in_file]->length[j] = -1;
1453 }
1454 pattern_array[i][trees_in_file]->patternsize = (long *)Malloc(sizeof(long));
1455 }
1456 j = 0;
1457 /* Then go through groupings again, and copy in each element
1458 appropriately. */
1459 for (i = 0 ; i < maxgrp ; i++)
1460 if (grouping[i] != NULL) {
1461 if (*timesseen[i] > 0) {
1462 for (k = 0 ; k < setsz ; k++)
1463 pattern_array[k][trees_in_file]->apattern[j] = grouping[i][k] ;
1464 pattern_array[0][trees_in_file]->length[j] = lengths[i];
1465 j++ ;
1466
1467 /*
1468 EWFIX.BUG.756
1469
1470 updates timesseen_changes to the current value
1471 pointed to by timesseen
1472
1473 treedist uses this to determine if group i has been seen
1474 by comparing timesseen_changes[i] (the count now) with
1475 timesseen[i] (the count after reading next tree)
1476
1477 We could make treedist more efficient by not keeping
1478 timesseen (and groupings, etc) around, but doing it
1479 this way allows us to share code between treedist and
1480 consense.
1481
1482 */
1483 *timesseen[i] = 0;
1484 }
1485 }
1486 *pattern_array[0][trees_in_file]->patternsize = total_groups;
1487
1488 } /* store_pattern */
1489
1490
samename(naym name1,plotstring name2)1491 boolean samename(naym name1, plotstring name2)
1492 {
1493 return !(strncmp(name1, name2, MAXNCH));
1494 } /* samename */
1495
1496
reordertips()1497 void reordertips()
1498 {
1499 /* Reorders nodep[] and indexing to match species order from first tree */
1500 /* Assumes tree has spp tips and nayme[] has spp elements, and that there is a
1501 * one-to-one mapping between tip names and the names in nayme[].
1502 */
1503
1504 long i, j;
1505 node *t;
1506
1507 for (i = 0; i < spp-1; i++) {
1508 for (j = i + 1; j < spp; j++) {
1509 if (samename(nayme[i], nodep_cons[j]->nayme)) {
1510 /* switch the pointers in
1511 * nodep[] and set index accordingly for each node. */
1512 t = nodep_cons[i];
1513
1514 nodep_cons[i] = nodep_cons[j];
1515 nodep_cons[i]->index = i+1;
1516
1517 nodep_cons[j] = t;
1518 nodep_cons[j]->index = j+1;
1519
1520 break; /* next i */
1521 }
1522 }
1523 }
1524 } /* reordertips */
1525
read_groups(pattern_elm **** pattern_array,long total_trees,long tip_count,FILE * intree)1526 void read_groups (pattern_elm ****pattern_array,
1527 long total_trees, long tip_count, FILE *intree)
1528 {
1529 /* read the trees. Accumulate sets. */
1530 int i, j, k;
1531 boolean haslengths, initial;
1532 long nextnode, trees_read = 0;
1533
1534 /* do allocation first *****************************************/
1535 grouping = (group_type **) Malloc(maxgrp*sizeof(group_type *));
1536 lengths = (double *) Malloc(maxgrp*sizeof(double));
1537
1538 for (i = 0; i < maxgrp; i++)
1539 grouping[i] = NULL;
1540
1541 order = (long **) Malloc(maxgrp*sizeof(long *));
1542 for (i = 0; i < maxgrp; i++)
1543 order[i] = NULL;
1544
1545 timesseen = (double **)Malloc(maxgrp*sizeof(double *));
1546 for (i = 0; i < maxgrp; i++)
1547 timesseen[i] = NULL;
1548
1549 nayme = (naym *)Malloc(tip_count*sizeof(naym));
1550 hashp = (hashtype)Malloc(sizeof(namenode) * NUM_BUCKETS);
1551 for (i=0;i<NUM_BUCKETS;i++) {
1552 hashp[i] = NULL;
1553 }
1554 setsz = (long)ceil((double)tip_count/(double)SETBITS);
1555 if (tree_pairing != NO_PAIRING)
1556 {
1557 /* Now that we know setsz, we can malloc pattern_array
1558 and pattern_array[n] accordingly. */
1559 (*pattern_array) =
1560 (pattern_elm ***)Malloc(setsz * sizeof(pattern_elm **));
1561
1562 /* For this assignment, let's assume that there will be no
1563 more than maxtrees. */
1564 for (j = 0 ; j < setsz ; j++)
1565 {
1566 (*pattern_array)[j] =
1567 (pattern_elm **)Malloc(total_trees * sizeof(pattern_elm *));
1568 for(k = 0 ; k < total_trees ; k++ )
1569 {
1570 (*pattern_array)[j][k] = NULL;
1571 }
1572 }
1573 }
1574
1575 fullset = (group_type *)Malloc(setsz * sizeof(group_type));
1576 for (j = 0; j < setsz; j++)
1577 fullset[j] = 0L;
1578 k = 0;
1579 for (j = 1; j <= tip_count; j++) {
1580 if (j == ((k+1)*SETBITS+1)) k++;
1581 fullset[k] |= 1L << (j - k*SETBITS - 1);
1582 }
1583 /* end allocation **********************************************/
1584
1585 firsttree = true;
1586 grbg = NULL;
1587 initial = true;
1588 while (!eoff(intree)) { /* go till end of input tree file */
1589 for (i = 0; i < maxgrp; i++) {
1590 lengths[i] = -1;
1591 }
1592 goteof = false;
1593 nextnode = 0;
1594 haslengths = true;
1595 allocate_nodep(&nodep_cons, &intree, &spp);
1596 assert(spp == tip_count);
1597 treeread(intree, &root, treenode, &goteof, &firsttree, nodep_cons,
1598 &nextnode, &haslengths, &grbg, initconsnode,true,-1);
1599 if (!initial) {
1600 missingname(root);
1601 reordertips();
1602 } else {
1603 initial = false;
1604 dupname(root);
1605 initreenode(root);
1606 }
1607 if (goteof)
1608 continue;
1609 ntrees += trweight;
1610 if (noroot) {
1611 reroot(nodep_cons[outgrno_cons - 1], &nextnode);
1612 didreroot = outgropt_cons;
1613 }
1614 accumulate(root);
1615 gdispose(root);
1616 for (j = 0; j < 2*(1+spp); j++)
1617 nodep_cons[j] = NULL;
1618 free(nodep_cons);
1619 /* Added by Dan F. */
1620 if (tree_pairing != NO_PAIRING) {
1621 /* If we're computing pairing or need separate tree sets, store the
1622 current pattern as an element of it's trees array. */
1623 store_pattern ((*pattern_array), trees_read) ;
1624 trees_read++ ;
1625 for (i = 0; i < maxgrp; i++)
1626 if (grouping[i] != NULL) {
1627 *timesseen[i] = 0;
1628 }
1629 }
1630 }
1631 } /* read_groups */
1632
clean_up_final()1633 void clean_up_final()
1634 {
1635 long i;
1636 for(i=0;i<maxgrp;i++)
1637 {
1638 if(grouping[i] != NULL) {
1639 free(grouping[i]);
1640 }
1641 if(order[i] != NULL) {
1642 free(order[i]);
1643 }
1644 if(timesseen[i] != NULL) {
1645 free(timesseen[i]);
1646 }
1647 }
1648 free(grouping);
1649 free(nayme);
1650 free(order);
1651 free(timesseen);
1652 free(fullset);
1653 free(lengths);
1654
1655 namesClearTable();
1656 free(hashp);
1657 }
1658