/* * pstep-recursive.c * * * Part of TREE-PUZZLE 5.2 (July 2004) * * (c) 2003-2004 by Heiko A. Schmidt, Korbinian Strimmer, and Arndt von Haeseler * (c) 1999-2003 by Heiko A. Schmidt, Korbinian Strimmer, * M. Vingron, and Arndt von Haeseler * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler * * All parts of the source except where indicated are distributed under * the GNU public licence. See http://www.opensource.org for details. * * ($Id$) * */ #ifdef HAVE_CONFIG_H # include #endif #define EXTERN extern #include #include #include"puzzle.h" /* #include"util.h" */ /* #include"pstep.h" */ /******************************************************************************/ /* functions for representing and building puzzling step trees */ /******************************************************************************/ /* initialize tree with the following starting configuration * A(=0) * [up =NULL] * [downleft =1] * [downright=2] * * o * | * | * +-------------+--------------+ * | | * | | * o o * * C(=1) B(=2) * [up =0] [up =0] * [downleft =NULL] [downleft =NULL] * [downright=NULL] [downright=NULL] * * nextedge = 3 * nextleaf = 3 * and set according edge maps * */ void inittree_orig(ONEEDGE_ORIG **edge, /* out: new array of edges */ int **edgeofleaf,/* out: array of external edge ptrs */ int Maxspc, /* in: Number of species (n) */ int Maxbrnch, /* in: Number of branches (2n-3) */ int *nextedge, /* out: next free edge index (=3) */ int *nextleaf) /* out: next free leaf index (=3) */ { int i; ONEEDGE_ORIG *tmpedge; int *tmpedgeofleaf; /* allocate the memory for the whole tree */ /* allocate memory for vector with all the edges of the tree */ tmpedge = (ONEEDGE_ORIG *) calloc((size_t) Maxbrnch, sizeof(ONEEDGE_ORIG) ); if (tmpedge == NULL) maerror("edge in inittree"); *edge = tmpedge; /* allocate memory for vetmpctor with edge numbers of leaves */ tmpedgeofleaf = (int *) calloc((size_t) Maxspc, sizeof(int) ); if (tmpedgeofleaf == NULL) maerror("edgeofleaf in inittree"); *edgeofleaf = tmpedgeofleaf; /* allocate memory for all the edges the edge map */ for (i = 0; i < Maxbrnch; i++) { (tmpedge)[i].edgemap = (int *) calloc((size_t) Maxbrnch, sizeof(int) ); if (tmpedge[i].edgemap == NULL) maerror("edgemap in inittree"); } /* number all edges */ for (i = 0; i < Maxbrnch; i++) tmpedge[i].numedge = i; /* initialize tree */ *nextedge = 3; *nextleaf = 3; /* edge maps */ (tmpedge[0].edgemap)[0] = 0; /* you are on the right edge */ (tmpedge[0].edgemap)[1] = 4; /* go down left for leaf 1 */ (tmpedge[0].edgemap)[2] = 5; /* go down right for leaf 2 */ (tmpedge[1].edgemap)[0] = 1; /* go up for leaf 0 */ (tmpedge[1].edgemap)[1] = 0; /* you are on the right edge */ (tmpedge[1].edgemap)[2] = 3; /* go up/down right for leaf 2 */ (tmpedge[2].edgemap)[0] = 1; /* go up for leaf 0 */ (tmpedge[2].edgemap)[1] = 2; /* go up/down left for leaf 1 */ (tmpedge[2].edgemap)[2] = 0; /* you are on the right edge */ /* interconnection */ tmpedge[0].up = NULL; tmpedge[0].downleft = &tmpedge[1]; tmpedge[0].downright = &tmpedge[2]; tmpedge[1].up = &tmpedge[0]; tmpedge[1].downleft = NULL; tmpedge[1].downright = NULL; tmpedge[2].up = &tmpedge[0]; tmpedge[2].downleft = NULL; tmpedge[2].downright = NULL; /* edges of leaves */ tmpedgeofleaf[0] = 0; tmpedgeofleaf[1] = 1; tmpedgeofleaf[2] = 2; } /* inittree */ /******************/ /* add next leaf on the specified edge */ void addnextleaf_orig(int dockedge, /* insert here */ ONEEDGE_ORIG *edge, /* edge array */ int *edgeofleaf, /* ext. edge idx array */ int Maxspc, /* No. of species */ int Maxbrnch, /* No. of branches */ int *in_nextedge, /* next free edge idx */ int *in_nextleaf) /* next free leaf idx */ { int i; int nextedge; int nextleaf; nextedge=*in_nextedge; nextleaf=*in_nextleaf; if (dockedge >= nextedge) { /* Trying to add leaf nextleaf to nonexisting edge dockedge */ fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR F TO DEVELOPERS\n\n\n"); # if PARALLEL PP_Finalize(); # endif exit(1); } if (nextleaf >= Maxspc) { /* Trying to add leaf nextleaf to a tree with Maxspc leaves */ fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR G TO DEVELOPERS\n\n\n"); # if PARALLEL PP_Finalize(); # endif exit(1); } /* necessary change in edgeofleaf if dockedge == edgeofleaf[0] */ if (edgeofleaf[0] == dockedge) edgeofleaf[0] = nextedge; /* adding nextedge to the tree */ edge[nextedge].up = edge[dockedge].up; edge[nextedge].downleft = &edge[dockedge]; edge[nextedge].downright = &edge[nextedge+1]; edge[dockedge].up = &edge[nextedge]; if (edge[nextedge].up != NULL) { if ( ((edge[nextedge].up)->downleft) == &edge[dockedge] ) (edge[nextedge].up)->downleft = &edge[nextedge]; else (edge[nextedge].up)->downright = &edge[nextedge]; } /* adding nextedge + 1 to the tree */ edge[nextedge+1].up = &edge[nextedge]; edge[nextedge+1].downleft = NULL; edge[nextedge+1].downright = NULL; edgeofleaf[nextleaf] = nextedge+1; /* the two new edges get info about the old edges */ /* nextedge */ for (i = 0; i < nextedge; i++) { switch ( (edge[dockedge].edgemap)[i] ) { /* down right changes to down left */ case 5: (edge[nextedge].edgemap)[i] = 4; break; /* null changes to down left */ case 0: (edge[nextedge].edgemap)[i] = 4; break; default:(edge[nextedge].edgemap)[i] = (edge[dockedge].edgemap)[i]; break; } } /* nextedge + 1 */ for (i = 0; i < nextedge; i++) { switch ( (edge[dockedge].edgemap)[i] ) { /* up/down left changes to up */ case 2: (edge[nextedge+1].edgemap)[i] = 1; break; /* up/down right changes to up */ case 3: (edge[nextedge+1].edgemap)[i] = 1; break; /* down left changes to up/down left */ case 4: (edge[nextedge+1].edgemap)[i] = 2; break; /* down right changes to up/down left */ case 5: (edge[nextedge+1].edgemap)[i] = 2; break; /* null changes to up/down left */ case 0: (edge[nextedge+1].edgemap)[i] = 2; break; /* up stays up */ default:(edge[nextedge+1].edgemap)[i] = (edge[dockedge].edgemap)[i]; break; } } /* dockedge */ for (i = 0; i < nextedge; i++) { switch ( (edge[dockedge].edgemap)[i] ) { /* up/down right changes to up */ case 3: (edge[dockedge].edgemap)[i] = 1; break; /* up/down left changes to up */ case 2: (edge[dockedge].edgemap)[i] = 1; break; default: break; } } /* all edgemaps are updated for the two new edges */ /* nextedge */ (edge[nextedge].edgemap)[nextedge] = 0; (edge[nextedge].edgemap)[nextedge+1] = 5; /* down right */ /* nextedge + 1 */ (edge[nextedge+1].edgemap)[nextedge] = 1; /* up */ (edge[nextedge+1].edgemap)[nextedge+1] = 0; /* all other edges */ for (i = 0; i < nextedge; i++) { (edge[i].edgemap)[nextedge] = (edge[i].edgemap)[dockedge]; (edge[i].edgemap)[nextedge+1] = (edge[i].edgemap)[dockedge]; } /* an extra for dockedge */ (edge[dockedge].edgemap)[nextedge] = 1; /* up */ (edge[dockedge].edgemap)[nextedge+1] = 3; /* up/down right */ nextleaf++; nextedge = nextedge + 2; *in_nextedge=nextedge; *in_nextleaf=nextleaf; } /* addnextleaf */ /******************/ /* free memory (to be called after inittree) */ void freetree_orig(ONEEDGE_ORIG *edge, /* edge array */ int *edgeofleaf, /* ext. edge idx array */ int Maxspc) /* No. of species */ { int i; for (i = 0; i < 2 * Maxspc - 3; i++) free(edge[i].edgemap); free(edge); free(edgeofleaf); } /* freetree */ /******************/ /* writes OTU sitting on edge ed */ void writeOTU_orig(FILE *outfp, /* output file */ int ed, /* edge to subtree */ ONEEDGE_ORIG *edge, /* edge array */ int *edgeofleaf, /* ext. edge idx array */ int nextedge, /* next free edge idx */ int nextleaf, /* next free leaf idx */ int *column, /* current screen depth */ int *trueID) /* species permutation */ { int i; /* test whether we are on a leaf */ if (edge[ed].downright == NULL && edge[ed].downleft == NULL) { for (i = 1; i < nextleaf; i++) { if (edgeofleaf[i] == ed) { /* i is the leaf of ed */ *column += fputid(outfp, trueID[i]); return; } } } /* we are NOT on a leaf */ fprintf(outfp, "("); (*column)++; writeOTU_orig(outfp, edge[ed].downleft->numedge, edge, edgeofleaf, nextedge, nextleaf, column, trueID); fprintf(outfp, ","); (*column)++; (*column)++; if (*column > 55) { *column = 2; fprintf(outfp, "\n "); } writeOTU_orig(outfp, edge[ed].downright->numedge, edge, edgeofleaf, nextedge, nextleaf, column, trueID); fprintf(outfp, ")"); (*column)++; } /* writeOTU */ /******************/ /* write tree */ void writetree_orig(FILE *outfp, /* output file */ ONEEDGE_ORIG *edge, /* edge array */ int *edgeofleaf, /* ext. edge idx array */ int nextedge, /* next free edge idx */ int nextleaf, /* next free leaf idx */ int *trueID) /* species permutation */ { int column; column = 1; fprintf(outfp, "("); column += fputid(outfp, trueID[0]) + 3; fprintf(outfp, ","); writeOTU_orig(outfp, edge[edgeofleaf[0]].downleft->numedge, edge, edgeofleaf, nextedge, nextleaf, &column, trueID); column++; column++; fprintf(outfp, ","); writeOTU_orig(outfp, edge[edgeofleaf[0]].downright->numedge, edge, edgeofleaf, nextedge, nextleaf, &column, trueID); fprintf(outfp, ");\n"); } /* writetree */ /******************/ /* clear all edgeinfos */ void resetedgeinfo_orig(ONEEDGE_ORIG *edge, /* edge array */ int nextedge) /* next free edge idx */ { int i; for (i = 0; i < nextedge; i++) edge[i].edgeinfo = 0; } /* resetedgeinfo */ /******************/ /* increment all edgeinfo between leaf A and B */ void incrementedgeinfo_orig(int A, /* start leaf of penalty path */ int B, /* start leaf of penalty path */ ONEEDGE_ORIG *edge, /* edge array */ int *edgeofleaf) /* ext. edge idx array */ { int curredge, finaledge, nextstep; if (A == B) return; finaledge = edgeofleaf[B]; curredge = edgeofleaf[A]; edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1; while (curredge != finaledge) { nextstep = (edge[curredge].edgemap)[finaledge]; switch (nextstep) { /* up */ case 1: curredge = (edge[curredge].up)->numedge; break; /* up/down left */ case 2: curredge = ((edge[curredge].up)->downleft)->numedge; break; /* up/down right */ case 3: curredge = ((edge[curredge].up)->downright)->numedge; break; /* down left */ case 4: curredge = (edge[curredge].downleft)->numedge; break; /* down right */ case 5: curredge = (edge[curredge].downright)->numedge; break; } edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1; } } /* incrementedgeinfo */ /******************/ /* checks which edge has the lowest edgeinfo if there are several edges with the same lowest edgeinfo, one of them will be selected randomly */ void minimumedgeinfo_orig(ONEEDGE_ORIG *edge, /* edge array */ int *edgeofleaf, /* ext. edge idx array */ int nextedge, /* next free edge idx */ int nextleaf, /* next free leaf idx */ int *out_minedge, /* minimum edge set */ uli *out_mininfo) /* minumum penalty */ { int i, k, howmany, randomnum; int minedge; /* minimum edge set */ uli mininfo; /* minumum penalty */ howmany = 1; minedge = 0; mininfo = edge[0].edgeinfo; for (i = 1; i < nextedge; i++) if (edge[i].edgeinfo <= mininfo) { if (edge[i].edgeinfo == mininfo) { howmany++; } else { minedge = i; mininfo = edge[i].edgeinfo; howmany = 1; } } if (howmany > 1) { /* draw random edge */ randomnum = randominteger(howmany) + 1; /* 1 to howmany */ i = -1; for (k = 0; k < randomnum; k++) { do { i++; } while (edge[i].edgeinfo != mininfo); minedge = i; } } *out_minedge=minedge; *out_mininfo=mininfo; } /* minimumedgeinfo */ /*************************************************************************/ /* global functions of the puzzling step */ /*************************************************************************/ /* perform one single puzzling step to produce one intermediate tree */ void onepstep( /* PStep (intermediate) tree topol: */ ONEEDGE_ORIG **edge, /* out: new array of edges */ int **edgeofleaf, /* out: array of extern edge ptrs */ unsigned char *quartettopols, /* in: quartetblock with all topols */ int Maxspc, /* in: Number of species (n) */ ivector permutation) /* in: species permutation (trueID) */ { int Maxbrnch = (2*Maxspc)-3; /* Number of branches (2n-3) */ int nextedge; /* next free edge index (=3) */ int nextleaf; /* next free leaf index (=3) */ int minedge; /* insertion edge of minimum penalty */ uli mininfo; /* minimum penalty */ int a, b, c, i; /* quartet leaves, i to be inserted */ int chooseX, chooseY; /* end leaves of penalty path */ /* allocate and initialize new tree topology */ inittree_orig(edge, edgeofleaf, Maxspc, Maxbrnch, &nextedge, &nextleaf); /* adding all other leafs */ for (i = 3; i < Maxspc; i++) { /* clear all edgeinfos */ resetedgeinfo_orig(*edge, nextedge); /* * core of quartet puzzling algorithm */ for (a = 0; a < nextleaf - 2; a++) for (b = a + 1; b < nextleaf - 1; b++) for (c = b + 1; c < nextleaf; c++) { /* check which two leaves out of a,b,c are closer related to each other than to leaf i according to a least squares fit of the continous Bayesian weights to the seven trivial "attractive regions". We assign a score of 1 to all edges on the penalty path between these two leaves chooseX and chooseY */ checkquartet(a, b, c, i, permutation, &chooseX, &chooseY); incrementedgeinfo_orig(chooseX, chooseY, *edge, *edgeofleaf); } /* for all quartets q=(a,b,c,i) */ /* find out which edge has lowest edgeinfo */ minimumedgeinfo_orig(*edge, *edgeofleaf, nextedge, nextleaf, &minedge, &mininfo); /* add the next leaf on minedge */ addnextleaf_orig(minedge, *edge, *edgeofleaf, Maxspc, Maxbrnch, &nextedge, &nextleaf); } /* adding all other leafs */ } /* onepstep */ /*************************************************************************/ /* perform Numtrial single puzzling steps constructing Numtrial intermediate */ /* trees, sort each of them and extract splits for consensus step */ void allpstep(uli Numtrial, /* in: no. psteps to perform */ unsigned char *quartetinfo, /* in: quartetblock with all topols*/ int Maxspc, /* in: Number of species (n) */ int fixedorder_optn) /* in: 'fixed' anchored RNG (debug)*/ { /* misc variables */ ivector trueIDtmp; /* species permutation (trueID) */ uli Currtrial; /* step counter */ /* PStep (intermediate) tree topol: */ ONEEDGE_ORIG *tmpedgeset; /* array of edges */ int *tmpedgeofleaf; /* array of extern edge pointers */ /* for unique sorting of tree topologies */ int *ctree; /* sort tree */ int startnode; /* root in sort tree */ char *trstr; /* phylip tree string */ treelistitemtype *treeitem; /* list entry of tree */ # if PARALLEL cmatrix *bp; /* temporary buffer for splits in slaves */ int n; /* step count for init/free split buffer */ # endif /* PARALLEL */ # if PARALLEL /* add times to arry */ addtimes(GENERAL, &tarr); /* alloc temporary split memory */ bp = (cmatrix *) calloc((size_t) Numtrial, sizeof(void *)); for(n=0; n phylip tree string */ trstr=sprintfctree(ctree, psteptreestrlen); /* add sorted tree to unique tree list */ treeitem = addtree2list(&trstr, 1, &psteptreelist, &psteptreenum, &psteptreesum); # if ! PARALLEL /* output unique tree to trace list, if option set */ /* not done on slave processes */ if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER)) { /* print: order no/# topol per this id/tree id/sum of unique topologies/sum of trees so far */ fprintf(qptlist, "%ld.\t1\t%d\t%d\t%d\t%d\n", Currtrial + 1, (*treeitem).count, (*treeitem).id, psteptreenum, psteptreesum); } # endif /* ! PARALLEL */ # ifdef VERBOSE1 printf("%s\n", trstr); printfsortedpstrees(psteptreelist); # endif /* free sorting tree structure */ freectree(&ctree); /* free tree before building the next tree */ freetree_orig(tmpedgeset, tmpedgeofleaf, Maxspc); # if ! PARALLEL /* generate message roughly every 15 minutes */ /* check timer */ time(&time2); if ( (time2 - time1) > TIMECHECK_INTERVAL) { double tc2, mintogo, minutes, hours; /* every TIMECHECK_INTERVAL seconds */ /* percentage of completed trees */ if (mflag == 0) { fprintf(STDOUT, "\n"); mflag = 1; } tc2 = 100.0*Currtrial/Numtrial; /* excluded for less runtime, but less accuracy of the 15min */ /* + 100.0*nq/Numquartets/Numtrial; */ mintogo = (100.0-tc2) * (double) (time2-time0)/60.0/tc2; hours = floor(mintogo/60.0); minutes = mintogo - 60.0*hours; fprintf(STDOUT, "%2.2f%%", tc2); fprintf(STDOUT, " completed (remaining"); fprintf(STDOUT, " time: %.0f", hours); fprintf(STDOUT, " hours %.0f", minutes); fprintf(STDOUT, " minutes)\n"); fflush(STDOUT); time1 = time2; } /* check timer */ # endif /* ! PARALLEL */ addtimes(PUZZLING, &tarr); } /* for Numtrials */ # if PARALLEL /* in slaves: send results: splits, trees */ PP_SendSplitsBlock(Maxspc, Numtrial, bp, psteptreenum, psteptreelist); /* free temporary split memory */ for (Currtrial = 0; Currtrial < Numtrial; Currtrial++) { free_cmatrix(bp[Currtrial]); } free(bp); /* sent! Thus, in slave process not needed any more */ freetreelist(&psteptreelist, &psteptreenum, &psteptreesum); # endif /* PARALLEL */ } /* allpstep */