1 /* 2 * puzzle.h 3 * 4 * 5 * Part of TREE-PUZZLE 5.2 (July 2004) 6 * 7 * (c) 2003-2004 by Heiko A. Schmidt, Korbinian Strimmer, and Arndt von Haeseler 8 * (c) 1999-2003 by Heiko A. Schmidt, Korbinian Strimmer, 9 * M. Vingron, and Arndt von Haeseler 10 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler 11 * 12 * All parts of the source except where indicated are distributed under 13 * the GNU public licence. See http://www.opensource.org for details. 14 * 15 * ($Id$) 16 * 17 */ 18 19 20 #ifndef _PUZZLE_ 21 #define _PUZZLE_ 22 23 #ifndef PACKAGE 24 # define PACKAGE "tree-puzzle" 25 #endif 26 #ifndef VERSION 27 # define VERSION "5.2-generic" 28 #endif 29 #define DATE "July 2004" 30 31 /* prototypes */ 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <time.h> 35 #include <math.h> 36 #include <ctype.h> 37 #include <string.h> 38 #include <limits.h> 39 #include <float.h> 40 #include "pstep.h" 41 #include "util.h" 42 #include "ml.h" 43 #include "consensus.h" 44 #include "treesort.h" 45 #ifdef PARALLEL 46 # include "ppuzzle.h" 47 #endif 48 49 50 #define STDOUT stdout 51 52 /* time check interval in seconds for ML/puzzling step status (900 = 15min) */ 53 # define TIMECHECK_INTERVAL 900 54 55 /* filenames */ 56 # define FILENAMELENGTH 2048 57 58 /* maximum exp difference, such that 1.0+exp(-TP_MAX_EXP_DIFF) == 1.0 */ 59 # define TP_MAX_EXP_DIFF 40 60 61 # define INFILEDEFAULT "infile" 62 # define OUTFILEDEFAULT "outfile" 63 # define TREEFILEDEFAULT "outtree" 64 # define INTREEDEFAULT "intree" 65 # define DISTANCESDEFAULT "outdist" 66 # define TRIANGLEDEFAULT "outlm.eps" 67 # define UNRESOLVEDDEFAULT "outqlist" 68 # define ALLQUARTDEFAULT "outallquart" 69 # define ALLQUARTLHDEFAULT "outallquartlh" 70 # define SITELHDEFAULT "outsitelh" 71 # define SITELHBDEFAULT "outsitelhb" 72 # define SITERATEDEFAULT "outsiterate" 73 # define SITERATEBDEFAULT "outsiterateb" 74 # define OUTPARAMDEFAULT "outparam" 75 # define SUBSETDEFAULT "insubsetmatr" 76 # define OUTPTLISTDEFAULT "outpstep" 77 # define OUTPTORDERDEFAULT "outptorder" 78 79 # define INFILE infilename 80 # define OUTFILE outfilename 81 # define TREEFILE outtreename 82 # define INTREE intreename 83 # define DISTANCES outdistname 84 # define TRIANGLE outlmname 85 # define UNRESOLVED outqlistname 86 # define ALLQUART outallquartname 87 # define ALLQUARTLH outallquartlhname 88 # define SITELH outsitelhname 89 # define SITELHB outsitelhbname 90 # define SITERATE outsiteratename 91 # define SITERATEB outsiteratebname 92 # define OUTPARAM outparamname 93 # define SUBSET insubsetmatrname 94 # define OUTPTLIST outpstepname 95 # define OUTPTORDER outptordername 96 97 # define FILEPREFIX fileprefixname 98 99 EXTERN char infilename [FILENAMELENGTH]; 100 EXTERN char outfilename [FILENAMELENGTH]; 101 EXTERN char outtreename [FILENAMELENGTH]; 102 EXTERN char intreename [FILENAMELENGTH]; 103 EXTERN char outdistname [FILENAMELENGTH]; 104 EXTERN char outlmname [FILENAMELENGTH]; 105 EXTERN char outqlistname [FILENAMELENGTH]; 106 EXTERN char outallquartname [FILENAMELENGTH]; 107 EXTERN char outallquartlhname [FILENAMELENGTH]; 108 EXTERN char outsitelhname [FILENAMELENGTH]; 109 EXTERN char outsitelhbname [FILENAMELENGTH]; 110 EXTERN char outsiteratename [FILENAMELENGTH]; 111 EXTERN char outsiteratebname [FILENAMELENGTH]; 112 EXTERN char outparamname [FILENAMELENGTH]; 113 EXTERN char insubsetmatrname [FILENAMELENGTH]; 114 EXTERN char outpstepname [FILENAMELENGTH]; 115 EXTERN char outptordername [FILENAMELENGTH]; 116 117 EXTERN char fileprefixname [FILENAMELENGTH]; 118 119 120 #define OUTFILEEXT "puzzle" 121 #define TREEFILEEXT "tree" 122 #define DISTANCESEXT "dist" 123 #define TRIANGLEEXT "eps" 124 #define UNRESOLVEDEXT "qlist" 125 #define ALLQUARTEXT "allquart" 126 #define ALLQUARTLHEXT "allquartlh" 127 #define SITELHEXT "sitelh" 128 #define SITELHBEXT "sitelhb" 129 #define SITERATEEXT "siterate" 130 #define SITERATEBEXT "siterateb" 131 #define OUTPARAMEXT "param" 132 #define SUBSETEXT "subsetmatr" 133 #define OUTPTLISTEXT "pstep" 134 #define OUTPTORDEREXT "ptorder" 135 136 137 /* resetprefix values (xxx) */ 138 #define RESET_NONE 0 139 #define RESET_INFILE 1 140 #define RESET_INTREE 2 141 142 /* setprefix_optn values (xxx) */ 143 #define SETPREFIX_NONE 0 144 #define SETPREFIX_INFILE 1 145 #define SETPREFIX_INTREE 2 146 #define SETPREFIX_RESET 3 147 #define SETPREFIX_PREFIX 4 148 149 /* auto_aamodel/auto_datatype values (xxx) */ 150 #define AUTO_OFF 0 151 #define AUTO_GUESS 1 152 #define AUTO_DEFAULT 2 153 154 155 /* qptlist values (xxx) */ 156 #define PSTOUT_NONE 0 157 #define PSTOUT_ORDER 1 158 #define PSTOUT_LISTORDER 2 159 #define PSTOUT_LIST 3 160 161 /* dtat_optn values (xxx) */ 162 #define NUCLEOTIDE 0 163 #define AMINOACID 1 164 #define BINARY 2 165 166 /* typ_optn values (xxx) */ 167 #define TREERECON_OPTN 0 168 #define LIKMAPING_OPTN 1 169 #define NUMTYPES 2 170 171 /* puzzlemodes (xxx) */ 172 #define QUARTPUZ 0 173 #define USERTREE 1 174 #define CONSENSUS 2 175 #define PAIRDIST 3 176 #define NUMPUZZLEMODES 4 177 178 /* modes for rootsearch (xxx) */ 179 #define ROOT_USER 0 180 #define ROOT_AUTO 1 181 #define ROOT_DISPLAYED 2 182 #define NUMROOTSEARCH 3 183 184 #if 0 185 /* rhetmodes (xxx) Modes of rate heterogeneity */ 186 #define UNIFORMRATE 0 187 #define GAMMARATE 1 188 #define TWORATE 2 189 #define MIXEDRATE 3 190 #endif 191 192 /* defines for types of quartet likelihood computation (xxx) */ 193 #define EXACT 0 194 #define APPROX 1 195 196 /* defines for results of sequence number check (seqnumcheck) */ 197 #define SEQNUM_OK 0 198 #define SEQNUM_TOOFEW 1 199 #define SEQNUM_TOOMANY 2 200 201 202 #if 0 203 typedef struct { 204 uli fullres_pro; 205 uli fullres_con; 206 uli partres_pro; 207 uli partres_con; 208 uli unres; 209 uli missing; 210 uli qsum; 211 } qsupportarr_t; 212 213 EXTERN cmatrix biparts; /* bipartitions of tree of current puzzling step */ 214 EXTERN cmatrix consbiparts; /* bipartitions of majority rule consensus tree */ 215 216 EXTERN int xsize; /* depth of consensus tree picture */ 217 EXTERN ivector consconfid; /* confidence values of majority rule consensus tree */ 218 EXTERN ivector conssizes; /* partition sizes of majority rule consensus tree */ 219 EXTERN ivector xcor; /* x-coordinates of consensus tree nodes */ 220 EXTERN ivector ycor; /* y-coordinates of consensus tree nodes */ 221 EXTERN ivector ycormax; /* maximal y-coordinates of consensus tree nodes */ 222 EXTERN ivector ycormin; /* minimal y-coordinates of consensus tree nodes */ 223 224 /* splits for consensus */ 225 EXTERN int splitlength; /* length of one entry in splitpatterns */ 226 EXTERN uli maxbiparts; /* memory reserved for maxbiparts bipartitions */ 227 EXTERN uli numbiparts; /* number of different bipartitions */ 228 229 EXTERN int *splitsizes; /* size of all different splits of all trees */ 230 EXTERN uli *splitcomptemp; /* temp variable to compress bipartitions coding */ 231 EXTERN uli *splitfreqs; /* frequencies of different splits of all trees */ 232 EXTERN uli *splitpatterns; /* all different splits of all trees */ 233 234 EXTERN qsupportarr_t *qsupportarr; /* quartet support values per split */ 235 236 EXTERN uli consincluded; /* number of included biparts in the consensus tree */ 237 EXTERN uli consfifty; /* number of biparts >= 50% */ 238 EXTERN uli conscongruent; /* number of first incongruent bipart */ 239 #endif 240 241 /* variables */ 242 EXTERN cmatrix Seqchars; /* characters contained in data set */ 243 EXTERN ivector Seqgapchar; /* counter for gaps contained in sequence */ 244 EXTERN ivector Seqotherchar; /* counter for ambiguous contained in sequence */ 245 EXTERN cmatrix treepict; /* picture of consensus tree */ 246 EXTERN double minscore; /* value of edgescore on minedge */ 247 EXTERN double tstvf84; /* F84 transition/transversion ratio */ 248 EXTERN double tstvratio; /* expected transition/transversion ratio */ 249 EXTERN double yrtsratio; /* expected pyrimidine/purine transition ratio */ 250 EXTERN dvector ulkl; /* log L of user trees */ 251 EXTERN dmatrix allsites; /* log L per sites of utrees (numutrees,Numptrn) */ 252 EXTERN dvector ulklc; /* log L of utrees (clock) (numutrees,Numptrn) */ 253 EXTERN dmatrix allsitesc; /* log L per sites of user trees (clock) */ 254 EXTERN FILE *utfp; /* pointer to user tree file */ 255 EXTERN FILE *ofp; /* pointer to output file */ 256 EXTERN FILE *seqfp; /* pointer to sequence input file */ 257 EXTERN FILE *tfp; /* pointer to tree file */ 258 EXTERN FILE *dfp; /* pointer to distance file */ 259 EXTERN FILE *trifp; /* pointer to triangle file */ 260 EXTERN FILE *unresfp; /* pointer to file with unresolved quartets */ 261 EXTERN FILE *tmpfp; /* pointer to temporary file */ 262 EXTERN FILE *qptlist; /* pointer to file with puzzling step trees */ 263 EXTERN FILE *qptorder; /* pointer to file with unique puzzling step trees */ 264 EXTERN int SHcodon; /* whether SH should be applied to 1st, 2nd codon positions */ 265 EXTERN int utree_optn; /* use first user tree for estimation */ 266 EXTERN int listqptrees; /* list puzzling step trees */ 267 EXTERN int approxqp; /* approximate QP quartets */ 268 269 EXTERN int codon_optn; /* declares what positions in a codon should be used */ 270 EXTERN int compclock; /* computation of clocklike branch lengths */ 271 EXTERN int chooseA; /* leaf variable */ 272 EXTERN int chooseB; /* leaf variable */ 273 EXTERN int clustA, clustB, clustC, clustD; /* number of members of LM clusters */ 274 275 EXTERN int Frequ_optn; /* use empirical base frequencies */ 276 277 /* PSTEP_ORIG_H */ 278 EXTERN int Maxbrnch; /* 2*Maxspc - 3 */ 279 280 EXTERN int Maxseqc; /* number of sequence characters per taxum */ 281 EXTERN int mflag; /* flag used for correct printing of runtime messages */ 282 283 EXTERN int numclust; /* number of clusters in LM analysis */ 284 EXTERN int outgroup; /* outgroup */ 285 EXTERN int puzzlemode; /* computation of QP tree and/or ML distances */ 286 EXTERN int rootsearch; /* how location of root is found */ 287 EXTERN int rhetmode; /* model of rate heterogeneity */ 288 EXTERN int seqnumcheck; /* result of sequence number checkheterogeneity */ 289 EXTERN int usebestq_optn; /* use only best quartet topology, no bayesian weights */ 290 EXTERN int printrmatr_optn; /* print rate matrix to screen */ 291 EXTERN int usebranch_optn; /* use branch lengths given in usertree */ 292 EXTERN int show_optn; /* show unresolved quartets */ 293 EXTERN int savequart_optn; /* save memory block which quartets to file */ 294 EXTERN int savequartlh_optn; /* save quartet likelihoods to file */ 295 EXTERN int saveqlhbin_optn; /* save quartet likelihoods binary */ 296 EXTERN int savesitelh_optn; /* save site likelihoods (PHILIP-like) */ 297 EXTERN int savesitelhb_optn; /* save site likelihoods (binary) */ 298 EXTERN int savesiterate_optn; /* save site rates (PHILIP-like) */ 299 EXTERN int savesiterateb_optn; /* save site rates (binary) */ 300 EXTERN int readquart_optn; /* read memory block which quartets from file */ 301 EXTERN int fixedorder_optn; /* use fixed puzzling insert order 1,2,3,...n */ 302 #if 0 303 EXTERN int skipmlbranch_optn;/* skip ml branches in final tree */ 304 #endif 305 EXTERN int conssub50_optn; /* do consensus down to percentage of first incongruence/ambiguity */ 306 EXTERN int qsupport_optn; /* compute quartet support for the splits */ 307 EXTERN int dotreetest_optn; /* compute tree statistics (ELW, KH, SH) */ 308 EXTERN int dotreelh_optn; /* compute lh/branch lengths (QP,consensus) */ 309 EXTERN int setprefix_optn; /* use other FILEPREFIX that INFILENAME */ 310 EXTERN int consensus_optn; /* do consensus tree instead of user trees */ 311 EXTERN int sym_optn; /* symmetrize doublet frequencies */ 312 EXTERN int ytaxcounter; /* counter for establishing y-coordinates of all taxa */ 313 EXTERN int numutrees; /* number of users trees in input tree file */ 314 EXTERN ivector clusterA, clusterB, clusterC, clusterD; /* clusters for LM analysis */ 315 316 EXTERN ivector ycortax; /* y-coordinates of all taxa */ 317 318 EXTERN uli badqs; /* number of bad quartets */ 319 EXTERN uli unresqs; /* number of unresolved quartets, formerly badqs */ 320 EXTERN uli partresqs; /* number of partly resolved quartets */ 321 EXTERN uli fullresqs; /* number of fully resolved quartets */ 322 EXTERN uli missingqs; /* number of fully resolved quartets */ 323 EXTERN ulivector badtaxon; /* involment of each taxon in a bad quartet */ 324 EXTERN ulimatrix qinfomatr; /* sums of quartet topologies per taxon [0]=sum */ 325 EXTERN uli Currtrial; /* counter for puzzling steps */ 326 327 EXTERN uli Numquartets; /* number of quartets */ 328 EXTERN uli Numtrial; /* number of puzzling steps */ 329 EXTERN uli lmqts; /* quartets investigated in LM analysis (0 = ALL) */ 330 331 EXTERN int auto_datatype; /* guess datatype ? */ 332 EXTERN int guessdata_optn; /* guessed datatype */ 333 334 EXTERN int auto_aamodel; /* guess amino acid modell ? */ 335 EXTERN int guessauto_aamodel; /* guessed amino acid modell ? */ 336 EXTERN int guessDayhf_optn; /* guessed Dayhoff model option */ 337 EXTERN int guessJtt_optn; /* guessed JTT model option */ 338 EXTERN int guessblosum62_optn; /* guessed BLOSUM 62 model option */ 339 EXTERN int guessmtrev_optn; /* guessed mtREV model option */ 340 EXTERN int guesscprev_optn; /* guessed cpREV model option */ 341 EXTERN int guessvtmv_optn; /* guessed VT model option */ 342 EXTERN int guesswag_optn; /* guessed WAG model option */ 343 344 /* missing data: for handling subsets */ 345 EXTERN int readsubset_optn; /* guessed WAG model option */ 346 EXTERN int Maxsubset; /* number of subsets */ 347 EXTERN imatrix ss_setovlgraph; /* size of overlap >= 3 between 2 subsets */ 348 EXTERN imatrix ss_setoverlaps; /* size of overlap between 2 subsets */ 349 EXTERN imatrix ss_setovllist; /* list with ovlerlapping subsets */ 350 EXTERN ivector ss_setovllistsize; /* size of list with ovlerlapping subsets */ 351 EXTERN imatrix ss_matrix; /* boolean list: taxon in set? */ 352 EXTERN imatrix ss_list; /* list of taxa in set */ 353 EXTERN ivector ss_listsize; /* size of list with taxa */ 354 355 /* counter variables needed in likelihood mapping analysis */ 356 EXTERN uli ar1, ar2, ar3; 357 EXTERN uli reg1, reg2, reg3, reg4, reg5, reg6, reg7; 358 EXTERN uli reg1l, reg1r, reg2u, reg2d, reg3u, reg3d, 359 reg4u, reg4d, reg5l, reg5r, reg6u, reg6d; 360 EXTERN unsigned char *quartetinfo; /* place where quartets are stored */ 361 EXTERN dvector qweight; /* for use in QP and LM analysis */ 362 EXTERN dvector sqdiff; 363 EXTERN ivector qworder; 364 EXTERN ivector sqorder; 365 366 EXTERN int randseed; 367 EXTERN int psteptreestrlen; 368 369 #if 0 370 typedef struct treelistitemtypedummy { 371 struct treelistitemtypedummy *pred; 372 struct treelistitemtypedummy *succ; 373 struct treelistitemtypedummy *sortnext; 374 struct treelistitemtypedummy *sortlast; 375 char *tree; 376 int count; 377 int id; 378 int idx; 379 } treelistitemtype; 380 381 EXTERN treelistitemtype *psteptreelist; 382 EXTERN treelistitemtype *psteptreesortlist; 383 EXTERN int psteptreenum; 384 EXTERN int psteptreesum; 385 #endif 386 387 388 /* prototypes */ 389 void makeF84model(void); 390 void compnumqts(void); 391 void setoptions(void); 392 int openfiletoread(FILE **, char[], char[]); 393 int openfiletowrite(FILE **, char[], char[]); 394 int openfiletoappend(FILE **, char[], char[]); 395 void closefile(FILE *); 396 void symdoublets(void); 397 void computeexpectations(void); 398 void putdistance(FILE *); 399 void findidenticals(FILE *); 400 double averagedist(int maxspc, dmatrix distanmat, double *meandist, double *mindist, double *maxdist, double *stddevdist, double *vardist); 401 void initps(FILE *); 402 void plotlmpoint(FILE *, double, double); 403 void finishps(FILE *); 404 void makelmpoint(FILE *, double, double, double); 405 void timestamp(FILE *); 406 void writeoutputfile(FILE *, int); 407 408 /* definitions for writing output */ 409 #define WRITEALL 0 410 #define WRITEPARAMS 1 411 #define WRITEREST 2 412 413 void writetimesstat(FILE *ofp); 414 void writecutree(FILE *, int); 415 void starttimer(void); 416 void checktimer(uli); 417 418 /* void estimateparametersnotree(void); * moved to mlparam.c */ 419 /* void estimateparameterstree(void); * moved to mlparam.c */ 420 421 int main(int, char *[]); 422 int ulicmp(const void *, const void *); 423 int intcmp(const void *, const void *); 424 425 void readid(FILE *, int); 426 char readnextcharacter(FILE *, int, int); 427 428 /* skip rest of the line */ 429 void skiprestofline(FILE *ifp, /* input file stream */ 430 int notu, /* taxon number - for error msg. */ 431 int nsite); /* sequence position - for error msg. */ 432 433 /* skip control characters and blanks */ 434 void skipcntrl(FILE *ifp, /* input file stream */ 435 int notu, /* taxon number - for error msg. */ 436 int nsite); /* sequence position - for error msg. */ 437 438 /* read sequences of one data set */ 439 void getseqs(FILE *ifp, /* in: input file stream */ 440 int Maxspc, /* in: number of taxa */ 441 int Maxseqc, /* in: number of sites */ 442 cmatrix *seqch, /* out: alignment matrix */ 443 cmatrix identif); /* io: taxon names (10 char w/o stop) */ 444 445 /* initialize identifer arrays */ 446 void initid(int t, /* in: number of taxa */ 447 cmatrix *identif, /* out: name array w/o end of string */ 448 cmatrix *namestr); /* out: name array with end of string */ 449 450 /* copy undelimited identifer array to '\0' delimited identifer array */ 451 void identif2namestr(int num, /* number of taxa */ 452 cmatrix Identif, /* non-delimited names */ 453 cmatrix Namestr); /* proper delimited names */ 454 455 void fputid10(FILE *, int); 456 int fputid(FILE *, int); 457 458 /* read first line of sequence data set */ 459 void getsizesites(FILE *ifp, /* in: input file stream */ 460 int *Maxspc, /* out: number of taxa */ 461 int *Maxseqc); /* out: number of sites */ 462 463 /* read alignment from file */ 464 void readsequencefile(FILE *seqfp, /* in: sequence input file stream */ 465 int *Maxspc, /* out: number of taxa */ 466 int *Maxseqc, /* out: number of sites */ 467 cmatrix *identif, /* out: name array w/o end of str */ 468 cmatrix *namestr, /* out: name array with end of str */ 469 cmatrix *Seqchars); /* out: alignment matrix */ 470 471 /* read subsets from file */ 472 void readsubsetfile(FILE *seqfp, /* in: sequence input file stream */ 473 int Maxspc, /* in: number of taxa */ 474 cmatrix namestr, /* in: names taxa (seq file) */ 475 int *Maxsubset, /* out: number of subsets */ 476 imatrix *ss_setovlgraph, /* out: size of overlap >= 3 between 2 subsets */ 477 imatrix *ss_setoverlaps, /* out: size of overlap between 2 subsets */ 478 imatrix *ss_setovllist, /* out: list with ovlerlapping subsets */ 479 ivector *ss_setovllistsize, /* out: size of list with ovlerlapping subsets */ 480 imatrix *ss_matrix, /* out: boolean list: taxon in set? */ 481 imatrix *ss_list, /* out: list of taxa in set */ 482 ivector *ss_listsize); /* out: size of list with taxa */ 483 484 /* permute taxon order */ 485 void permutetaxa_ss(int Maxspc, /* in: number of taxa */ 486 ivector permutation, /* permuted taxon order */ 487 cmatrix namestr, /* in: names taxa (seq file) */ 488 int Maxsubset, /* out: number of subsets */ 489 imatrix ss_setovlgraph,/* out: size of overlap >= 3 between 2 subsets */ 490 imatrix ss_setoverlaps,/* out: size of overlap between 2 subsets */ 491 imatrix ss_setovllist, /* out: list with overlapping subsets */ 492 ivector ss_setovllistsize,/* out: size of list with ovlerlapping subsets */ 493 imatrix ss_matrix, /* out: bool list: taxon in set? */ 494 imatrix ss_list, /* out: list of taxa in set */ 495 ivector ss_listsize); /* out: size of list with taxa */ 496 497 /* print subsets */ 498 void fprintfss(FILE *ofp, /* in: output file stream */ 499 int Maxspc, /* in: number of taxa */ 500 int Maxsubset, /* out: number of subsets */ 501 imatrix ss_setovlgraph, /* out: size of overlap >= 3 between 2 subsets */ 502 imatrix ss_setoverlaps, /* out: size of overlap between 2 subsets */ 503 imatrix ss_setovllist, /* out: list with ovlerlapping subsets */ 504 ivector ss_setovllistsize, /* out: size of list with ovlerlapping subsets */ 505 imatrix ss_matrix, /* out: boolean list: taxon in set? */ 506 imatrix ss_list, /* out: list of taxa in set */ 507 ivector ss_listsize); /* out: size of list with taxa */ 508 509 /* check connectedness of subsets */ 510 void checkss(FILE *ofp, /* in: output file stream */ 511 int Maxspc, /* in: number of taxa */ 512 int Maxsubset, /* out: number of subsets */ 513 imatrix ss_setovlgraph, /* out: size of overlap >= 3 between 2 subsets */ 514 imatrix ss_setoverlaps, /* out: size of overlap between 2 subsets */ 515 imatrix ss_setovllist, /* out: list with ovlerlapping subsets */ 516 ivector ss_setovllistsize, /* out: size of list with ovlerlapping subsets */ 517 imatrix ss_matrix, /* out: boolean list: taxon in set? */ 518 imatrix ss_list, /* out: list of taxa in set */ 519 ivector ss_listsize); /* out: size of list with taxa */ 520 521 /* guess data type: NUCLEOTIDE:0, AMINOACID:1, BINARY:2 */ 522 int guessdatatype(cmatrix Seqchars, /* alignment matrix (Maxspc x Maxseqc) */ 523 int Maxspc, /* number of taxa */ 524 int Maxseqc); /* number of sites */ 525 526 void translatedataset(int maxspc, int maxseqc, int *maxsite, cmatrix seqchars, cmatrix *seqchar, ivector *seqgapchar, ivector *seqotherchar); 527 void estimatebasefreqs(void); 528 void guessmodel(void); 529 530 #if 0 531 int *initctree(); 532 void copytree_trueID(int *ctree, /* out: copy for effective sorting */ 533 int *trueID, /* in: permutation vector */ 534 ONEEDGE *edgeset, /* in: intermediate tree topology */ 535 int *edgeofleaf, /* dito. */ 536 int nextleaf); /* in: next free leaf (bound) */ 537 538 void freectree(int **snodes); 539 void printctree(int *ctree); 540 char *sprintfctree(int *ctree, int strlen); 541 void fprintffullpstree(FILE *outf, char *treestr); 542 int printfsortctree(int *ctree); 543 int sortctree(int *ctree); 544 int ct_1stedge(int node); 545 int ct_2ndedge(int node); 546 int ct_3rdedge(int node); 547 548 void printfpstrees(treelistitemtype *list); 549 void printfsortedpstrees(treelistitemtype *list); 550 void fprintfsortedpstrees(FILE *output, treelistitemtype *list, int itemnum, int itemsum, int comment, float cutoff); 551 552 void sortbynum(treelistitemtype *list, treelistitemtype **sortlist); 553 treelistitemtype *addtree2list(char **tree, 554 int numtrees, 555 treelistitemtype **list, 556 int *numitems, 557 int *numsum); 558 void freetreelist(treelistitemtype **list, 559 int *numitems, 560 int *numsum); 561 #endif 562 563 #if 0 564 void resetedgeinfo(void); 565 void incrementedgeinfo(int, int); 566 void minimumedgeinfo(void); 567 #endif 568 569 570 #if 0 571 void initconsensus(void); 572 573 /* recursive function to get bipartitions */ 574 void makepart_trueID(int i, /* */ 575 int curribrnch, /* in: current branch in traversal */ 576 ONEEDGE *edge, /* in: tree topology */ 577 int *edgeofleaf, /* in: ext. edge list */ 578 int *trueID, /* in: permutation list */ 579 cmatrix biparts, /* out: split strings, edge by edge */ 580 int Maxspc); /* in: number of species in tree */ 581 582 583 /* compute bipartitions of current puzzling step tree */ 584 void computebiparts_trueID(ONEEDGE *edge, /* in: tree topology */ 585 int *edgeofleaf, /* in: ext. edge list */ 586 int *trueID, /* in: permutation list */ 587 cmatrix biparts, /* out: splits */ 588 int outgroup, /* in: outgroup of tree */ 589 int Maxspc); /* in: number of taxa in tree */ 590 591 void printsplit(FILE *, uli); 592 593 void makenewsplitentries(cmatrix bip, /* in: split string vector */ 594 int numspl, /* in: no. of new splits */ 595 uli **in_splitpatterns,/* io: known compr splits */ 596 int **in_splitsizes, /* io: kn. split sizes: '.'*/ 597 uli **in_splitfreqs, /* io: kn. split frequences*/ 598 uli *in_numbiparts, /* io: no. of splits so far*/ 599 uli *in_maxbiparts, /* io: reserved memory */ 600 int Maxspc); /* in: no. of species */ 601 602 /* copy bipartition n of all different splitpatterns to consbiparts[k] */ 603 void copysplit(uli n, uli *splitpatterns, int k, cmatrix consbipartsvec); 604 605 void makeconsensus(uli, int); 606 607 /* write node (writeconsensustree) */ 608 void writenode(FILE *treefile, /* in: output stream */ 609 int node, /* current node */ 610 int qsupp_optn, /* 'print quartet support' flag */ 611 qsupportarr_t *qsupparr, /* quartet support values */ 612 int *column); /* current line position */ 613 614 /* write consensus tree */ 615 void writeconsensustree(FILE *treefile, /* in: output stream */ 616 int qsupp_optn, /* 'print quartsupp' flag */ 617 qsupportarr_t *qsupparr); /* quartet support values */ 618 619 void writeconsensustree(FILE *, int, qsupportarr_t *); 620 void nodecoordinates(int); 621 void drawnode(int, int); 622 void plotconsensustree(FILE *); 623 #endif 624 625 unsigned char *callocquartets(int); 626 void freequartets(void); 627 unsigned char readquartet(int, int, int, int); 628 void writequartet(int, int, int, int, unsigned char); 629 void sort3doubles(dvector, ivector); 630 void computeallquartets(void); 631 632 /* check the branching structure between the leaves (not the taxa!) 633 A, B, C, and I (A, B, C, I don't need to be ordered). As a result, 634 the two leaves that are closer related to each other than to leaf I 635 are found in chooseX and chooseY. If the branching structure is 636 not uniquely defined, chooseX and chooseY are chosen randomly 637 from the possible taxa */ 638 void checkquartet(int A, /* quartet taxon ID */ 639 int B, /* dito. */ 640 int C, /* dito. */ 641 int I, /* dito., to be inserted */ 642 int *chooseX, /* (chooseX,chooseY | x,I) */ 643 int *chooseY); /* chooseX+Y are non-neighbors of I */ 644 645 void checkquartet_trueID(int A, /* quartet leaf idx in permutation array */ 646 int B, /* dito. */ 647 int C, /* dito. */ 648 int I, /* dito., to be inserted */ 649 int *trueID, /* permutation array */ 650 int *chooseX, /* (chooseX,chooseY | x,I) */ 651 int *chooseY); /* chooseX+Y are non-neighbors of I */ 652 653 void num2quart(uli qnum, int *a, int *b, int *c, int *d); 654 uli numquarts(int maxspc); 655 uli quart2num (int a, int b, int c, int d); 656 657 void writetpqfheader(int nspec, FILE *ofp, int flag); 658 659 660 /* extracted from main (xxx) */ 661 void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx); 662 663 664 /* definitions for timing */ 665 666 #define OVERALL 0 667 #define GENERAL 1 668 #define OPTIONS 2 669 #define PARAMEST 3 670 #define QUARTETS 4 671 #define PUZZLING 5 672 #define TREEEVAL 6 673 674 typedef struct { 675 int currentjob; 676 clock_t tempcpu; 677 clock_t tempfullcpu; 678 clock_t tempcpustart; 679 time_t temptime; 680 time_t tempfulltime; 681 time_t temptimestart; 682 683 clock_t maxcpu; 684 clock_t mincpu; 685 time_t maxtime; 686 time_t mintime; 687 688 double maxcpublock; 689 double mincpublock; 690 double mincputick; 691 double mincputicktime; 692 double maxtimeblock; 693 double mintimeblock; 694 695 double generalcpu; 696 double optionscpu; 697 double paramestcpu; 698 double quartcpu; 699 double quartblockcpu; 700 double quartmaxcpu; 701 double quartmincpu; 702 double puzzcpu; 703 double puzzblockcpu; 704 double puzzmaxcpu; 705 double puzzmincpu; 706 double treecpu; 707 double treeblockcpu; 708 double treemaxcpu; 709 double treemincpu; 710 double cpu; 711 double fullcpu; 712 713 double generaltime; 714 double optionstime; 715 double paramesttime; 716 double quarttime; 717 double quartblocktime; 718 double quartmaxtime; 719 double quartmintime; 720 double puzztime; 721 double puzzblocktime; 722 double puzzmaxtime; 723 double puzzmintime; 724 double treetime; 725 double treeblocktime; 726 double treemaxtime; 727 double treemintime; 728 double time; 729 double fulltime; 730 } timearray_t; 731 732 EXTERN double cputime, walltime; 733 EXTERN double fullcpu, fulltime; 734 EXTERN double fullcputime, fullwalltime; 735 EXTERN double altcputime, altwalltime; 736 EXTERN clock_t cputimestart, cputimestop, cputimedummy; 737 EXTERN time_t walltimestart, walltimestop, walltimedummy; 738 EXTERN clock_t Startcpu; /* start cpu time */ 739 EXTERN clock_t Stopcpu; /* stop cpu time */ 740 EXTERN time_t MLstepStarttime; /* start time */ 741 EXTERN time_t MLstepStoptime; /* stop time */ 742 EXTERN time_t PStepStarttime; /* start time */ 743 EXTERN time_t PStepStoptime; /* stop time */ 744 EXTERN time_t PEstStarttime; /* start time */ 745 EXTERN time_t PEstStoptime; /* stop time */ 746 EXTERN time_t Starttime; /* start time */ 747 EXTERN time_t Stoptime; /* stop time */ 748 EXTERN time_t time0; /* timer variable */ 749 EXTERN time_t time1; /* yet another timer */ 750 EXTERN time_t time2; /* yet another timer */ 751 EXTERN timearray_t tarr; 752 753 void resetqblocktime(timearray_t *ta); 754 void resetpblocktime(timearray_t *ta); 755 void inittimearr(timearray_t *ta); 756 void addtimes(int jobtype, timearray_t *ta); 757 #ifdef TIMEDEBUG 758 void printtimearr(timearray_t *ta); 759 #endif /* TIMEDEBUG */ 760 761 /* compute Bayesian weights from log-lkls d1, d2, d3 */ 762 unsigned char loglkl2weight(int a, 763 int b, 764 int c, 765 int i, 766 double d1, 767 double d2, 768 double d3, 769 int usebestq); 770 771 #if 0 772 #ifdef PARALLEL 773 # include "ppuzzle.h" 774 #endif 775 #endif 776 #endif /* _PUZZLE_ */ 777 778