/* * treesort.c * * * Part of TREE-PUZZLE 5.2 (July 2004) * * (c) 2003-2004 by Heiko A. Schmidt, Korbinian Strimmer, and Arndt von Haeseler * (c) 1999-2003 by Heiko A. Schmidt, Korbinian Strimmer, * M. Vingron, and Arndt von Haeseler * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler * * All parts of the source except where indicated are distributed under * the GNU public licence. See http://www.opensource.org for details. * * ($Id$) * */ #define EXTERN extern #include "treesort.h" int YYY=0; /* fprintf(stderr, "YYY: %d (%s:%d)\n", YYY++, __FILE__, __LINE__); */ /*******************************************/ /* tree sorting */ /*******************************************/ /* compute address of the 4 int (sort key) in the 4 int node */ int ct_sortkeyaddr(int addr) { int a, res; a = addr % 4; res = addr - a + 3; return res; } /* ct_sortkeyaddr */ /**********/ /* compute address of the next edge pointer in a 4 int node (0->1->2->0) */ int ct_nextedgeaddr(int addr) { int a, res; a = addr % 4; if ( a == 2 ) { res = addr - 2; } else { res = addr + 1; } return res; } /* ct_nextedgeaddr */ /**********/ /* compute address of 1st edge of a 4 int node from node number */ int ct_1stedge(int node) { int res; res = 4 * node; return res; } /* ct_1stedge */ /**********/ /* compute address of 2nd edge of a 4 int node from node number */ int ct_2ndedge(int node) { int res; res = 4 * node +1; return res; } /* ct_2ndedge */ /**********/ /* compute address of 3rd edge of a 4 int node from node number */ int ct_3rdedge(int node) { int res; res = 4 * node +2; return res; } /* ct_3rdedge */ /**********/ /* check whether node 'node' is a leaf (2nd/3rd edge pointer = -1) */ int ct_isleaf(int node, int *ctree) { return (ctree[ct_3rdedge(node)] < 0); } /* ct_isleaf */ /**********/ /* compute node number of 4 int node from an edge addr. */ int ct_addr2node(int addr) { int a, res; a = addr % 4; res = (int) ((addr - a) / 4); return res; } /* ct_addr2node */ /**********/ /* print graph pointers for checking */ void printctree(int *ctree) { int n; for (n=0; n < 2*Maxspc; n++) { printf("n[%3d] = (%3d.%2d, %3d.%2d, %3d.%2d | %3d)\n", n, (int) ctree[ct_1stedge(n)]/4, (int) ctree[ct_1stedge(n)]%4, (int) ctree[ct_2ndedge(n)]/4, (int) ctree[ct_2ndedge(n)]%4, (int) ctree[ct_3rdedge(n)]/4, (int) ctree[ct_3rdedge(n)]%4, ctree[ct_3rdedge(n)+1]); } printf("\n"); } /* printctree */ /**********/ /* allocate memory for ctree 3 ints pointer plus 1 check byte */ int *initctree(void) { int *snodes; int n; snodes = (int *) calloc((size_t) (4 * 2 * Maxspc), sizeof(int)); if (snodes == NULL) maerror("snodes in copytree"); for (n=0; n<(4 * 2 * Maxspc); n++) { snodes[n]=-1; } return snodes; } /* initctree */ /**********/ /* free memory of a tree for sorting */ void freectree(int **snodes) { free(*snodes); *snodes = NULL; } /* freectree */ #if 0 /**********/ /* trueID (HAS) */ /* copy subtree recursively */ void copyOTU_trueID(int *ctree, /* in/out: tree array struct */ int *ct_nextnode,/* in/out: next free node */ int ct_curredge,/* in: currend edge to add subtree */ int *ct_nextleaf,/* in/out: next free leaf (0-maxspc) */ int ed, /* in: current edge in puzzling tree */ ONEEDGE *edge, /* in: tree topology */ int *edgeofleaf, /* in: external edge list */ int numleaves, /* in: number of leaves */ int *trueID) /* in: permutation vector */ { int i, nextcurredge; /* test whether we are on a leaf */ if (edge[ed].downright == NULL && edge[ed].downleft == NULL) { for (i = 1; i < numleaves; i++) { if (edgeofleaf[i] == ed) { /* i is the leaf of ed */ nextcurredge = ct_1stedge(*ct_nextleaf); ctree[ct_curredge] = nextcurredge; ctree[nextcurredge] = ct_curredge; /* trueID (HAS) */ ctree[ct_sortkeyaddr(nextcurredge)] = trueID[i]; (*ct_nextleaf)++; return; } } } /* we are NOT on a leaf */ nextcurredge = ct_1stedge(*ct_nextnode); ctree[ct_curredge] = nextcurredge; ctree[nextcurredge] = ct_curredge; (*ct_nextnode)++; nextcurredge = ct_nextedgeaddr(nextcurredge); copyOTU_trueID(ctree, ct_nextnode, nextcurredge, ct_nextleaf, edge[ed].downleft->numedge, edge, edgeofleaf, numleaves, trueID); nextcurredge = ct_nextedgeaddr(nextcurredge); copyOTU_trueID(ctree, ct_nextnode, nextcurredge, ct_nextleaf, edge[ed].downright->numedge, edge, edgeofleaf, numleaves, trueID); } /* copyOTU_trueID */ /********/ /* trueID (HAS) */ /* copy treestructure to sorting structure */ void copytree_trueID(int *ctree, /* out: copy for effective sorting */ int *trueID, /* in: permutation vector */ ONEEDGE *edgeset, /* in: intermediate tree topology */ int *edgeofleaf, /* dito. */ int numleaves) /* in: number of leaves */ { int ct_curredge; int ct_nextleaf; int ct_nextnode; ct_nextnode = Maxspc; ct_curredge = ct_1stedge(ct_nextnode); ct_nextleaf = 1; ctree[ct_1stedge(0)] = ct_curredge; ctree[ct_curredge] = ct_1stedge(0); /* trueID (HAS) */ ctree[ct_sortkeyaddr(0)] = trueID[0]; ct_nextnode++; ct_curredge = ct_nextedgeaddr(ct_curredge); copyOTU_trueID(ctree, &ct_nextnode, ct_curredge, &ct_nextleaf, edgeset[edgeofleaf[0]].downleft->numedge, edgeset, edgeofleaf, numleaves, trueID); ct_curredge = ct_nextedgeaddr(ct_curredge); copyOTU_trueID(ctree, &ct_nextnode, ct_curredge, &ct_nextleaf, edgeset[edgeofleaf[0]].downright->numedge, edgeset, edgeofleaf, numleaves, trueID); } /* copytree_trueID */ #endif /**********/ /* copy subtree recursively */ void copyOTU(int *ctree, /* in/out: tree array struct */ int *ct_nextnode,/* in/out: next free node */ int ct_curredge,/* in: currend edge to add subtree */ int *ct_nextleaf,/* in/out: next free leaf (0-maxspc) */ int ed, /* in: current edge in puzzling tree */ ONEEDGE *edge, /* in: tree topology */ int *edgeofleaf, /* in: external edge list */ int numleaves) /* in: number of leaves */ { int nextcurredge; /* test whether we are on a leaf */ if (edge[ed].downright == NULL && edge[ed].downleft == NULL) { nextcurredge = ct_1stedge(*ct_nextleaf); ctree[ct_curredge] = nextcurredge; ctree[nextcurredge] = ct_curredge; ctree[ct_sortkeyaddr(nextcurredge)] = edge[ed].taxon; (*ct_nextleaf)++; return; } /* we are NOT on a leaf */ nextcurredge = ct_1stedge(*ct_nextnode); ctree[ct_curredge] = nextcurredge; ctree[nextcurredge] = ct_curredge; (*ct_nextnode)++; nextcurredge = ct_nextedgeaddr(nextcurredge); copyOTU(ctree, ct_nextnode, nextcurredge, ct_nextleaf, edge[ed].downleft->numedge, edge, edgeofleaf, numleaves); nextcurredge = ct_nextedgeaddr(nextcurredge); copyOTU(ctree, ct_nextnode, nextcurredge, ct_nextleaf, edge[ed].downright->numedge, edge, edgeofleaf, numleaves); } /* copyOTU */ /**********/ /* copy treestructure to sorting structure */ void copytree(int *ctree, /* out: copy for effective sorting */ ONEEDGE *edgeset, /* in: intermediate tree topology */ int *edgeofleaf, /* dito. */ int rootleaf, int numleaves) /* in: number of leaves */ { int ct_curredge; int ct_nextleaf; int ct_nextnode; ct_nextnode = Maxspc; ct_curredge = ct_1stedge(ct_nextnode); ct_nextleaf = 1; ctree[ct_1stedge(0)] = ct_curredge; ctree[ct_curredge] = ct_1stedge(0); ctree[ct_sortkeyaddr(0)] = rootleaf; ct_nextnode++; ct_curredge = ct_nextedgeaddr(ct_curredge); copyOTU(ctree, &ct_nextnode, ct_curredge, &ct_nextleaf, edgeset[edgeofleaf[rootleaf]].downleft->numedge, edgeset, edgeofleaf, numleaves); ct_curredge = ct_nextedgeaddr(ct_curredge); copyOTU(ctree, &ct_nextnode, ct_curredge, &ct_nextleaf, edgeset[edgeofleaf[rootleaf]].downright->numedge, edgeset, edgeofleaf, numleaves); } /* copytree */ /**********/ /* sort subtree from edge recursively by indices */ int sortOTU(int edge, int *ctree) { int key1, key2; int edge1, edge2; int tempedge; /* if leaf, return taxonID */ if (ctree[ct_2ndedge((int) (edge / 4))] < 0) return ctree[ct_sortkeyaddr(edge)]; edge1 = ctree[ct_nextedgeaddr(edge)]; edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))]; /* printf ("visiting [%5d] -> [%5d], [%5d]\n", edge, edge1, edge2); */ /* printf ("visiting [%2d.%2d] -> [%2d.%2d], [%2d.%2d]\n", (int)(edge/4), edge%4, (int)(edge1/4), edge1%4, (int)(edge2/4), edge2%4); */ key1 = sortOTU(edge1, ctree); key2 = sortOTU(edge2, ctree); if (key2 < key1) { tempedge = ctree[ctree[edge1]]; ctree[ctree[edge1]] = ctree[ctree[edge2]]; ctree[ctree[edge2]] = tempedge; tempedge = ctree[edge1]; ctree[edge1] = ctree[edge2]; ctree[edge2] = tempedge; ctree[ct_sortkeyaddr(edge)] = key2; } else { ctree[ct_sortkeyaddr(edge)] = key1; } return ctree[ct_sortkeyaddr(edge)]; } /* sortOTU */ /**********/ /* sort ctree recursively by indices */ int sortctree(int *ctree) { int n, startnode=-1; /* find virtual root node (ID=0) */ for(n=0; n>>>\n"); tmpptr = list; *sortlist = list; while (tmpptr != NULL) { (*tmpptr).sortnext = (*tmpptr).succ; (*tmpptr).sortlast = (*tmpptr).pred; tmpptr = (*tmpptr).succ; } while (xchange > 0) { curr = *sortlist; xchange = 0; if (curr == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n"); while((*curr).sortnext != NULL) { next = (*curr).sortnext; if ((*curr).count >= (*next).count) curr = (*curr).sortnext; else { if ((*curr).sortlast != NULL) (*((*curr).sortlast)).sortnext = next; if (*sortlist == curr) *sortlist = next; (*next).sortlast = (*curr).sortlast; if ((*next).sortnext != NULL) (*((*next).sortnext)).sortlast = curr; (*curr).sortnext = (*next).sortnext; (*curr).sortlast = next; (*next).sortnext = curr; xchange++; } } } } /* sortbynum */ /**********/ /* print puzzling step tree stuctures for checking */ void printfpstrees(treelistitemtype *list) { char ch; treelistitemtype *tmpptr = NULL; tmpptr = list; ch = '-'; while (tmpptr != NULL) { printf ("%c[%2d] %5d %s\n", ch, (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree); tmpptr = (*tmpptr).succ; ch = ' '; } } /* printfpstrees */ /**********/ /* print sorted puzzling step tree stucture with names */ void fprintffullpstree(FILE *outf, char *treestr) { int count = 0; int idnum = 0; int n; for(n=0; treestr[n] != '\0'; n++){ while(isdigit((int)treestr[n])){ idnum = (10 * idnum) + ((int)treestr[n]-48); n++; count++; } if (count > 0){ # ifdef USEQUOTES fprintf(outf, "'"); # endif (void)fputid(outf, idnum); # ifdef USEQUOTES fprintf(outf, "'"); # endif count = 0; idnum = 0; } fprintf(outf, "%c", treestr[n]); } } /* fprintffullpstree */ /**********/ /* print sorted puzzling step tree stuctures with names */ void fprintfsortedpstrees(FILE *output, treelistitemtype *list, /* tree list */ int itemnum, /* order number */ int itemsum, /* number of trees */ int comment, /* with statistics, or puzzle report ? */ float cutoff) /* cutoff percentage */ { treelistitemtype *tmpptr = NULL; treelistitemtype *slist = NULL; int num = 1; float percent; if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n"); sortbynum(list, &slist); tmpptr = slist; while (tmpptr != NULL) { percent = (float)(100.0 * (*tmpptr).count / itemsum); if ((cutoff == 0.0) || (cutoff <= percent)) { if (comment) fprintf (output, "[ %d. %d %.2f %d %d %d ]", num++, (*tmpptr).count, percent, (*tmpptr).id, itemnum, itemsum); else { if (num == 1){ fprintf (output, "\n"); fprintf (output, "The following tree(s) occured in more than %.2f%% of the %d puzzling steps.\n", cutoff, itemsum); fprintf (output, "The trees are orderd descending by the number of occurences.\n"); fprintf (output, "\n"); fprintf (output, "\n occurences ID Phylip tree\n"); } fprintf (output, "%2d. %5d %6.2f%% %5d ", num++, (*tmpptr).count, percent, (*tmpptr).id); } fprintffullpstree(output, (*tmpptr).tree); fprintf (output, "\n"); } tmpptr = (*tmpptr).sortnext; } if (!comment) { fprintf (output, "\n"); switch(num) { case 1: fprintf (output, "There were no tree topologies (out of %d) occuring with a percentage \n>= %.2f%% of the %d puzzling steps.\n", itemnum, cutoff, itemsum); break; case 2: fprintf (output, "There was one tree topology (out of %d) occuring with a percentage \n>= %.2f%%.\n", itemnum, cutoff); break; default: fprintf (output, "There were %d tree topologies (out of %d) occuring with a percentage \n>= %.2f%%.\n", num-1, itemnum, cutoff); break; } fprintf (output, "\n"); fprintf (output, "\n"); } } /* fprintfsortedpstrees */ /**********/ /* print sorted tree topologies for checking */ void printfsortedpstrees(treelistitemtype *list) { treelistitemtype *tmpptr = NULL; treelistitemtype *slist = NULL; sortbynum(list, &slist); tmpptr = slist; while (tmpptr != NULL) { printf ("[%2d] %5d %s\n", (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree); tmpptr = (*tmpptr).sortnext; } } /* printfsortedpstrees */ /*******************************************/ /* end of tree sorting */ /*******************************************/