1 /*
2  * puzzle.h
3  *
4  *
5  * Part of TREE-PUZZLE 5.2 (July 2004)
6  *
7  * (c) 2003-2004 by Heiko A. Schmidt, Korbinian Strimmer, and Arndt von Haeseler
8  * (c) 1999-2003 by Heiko A. Schmidt, Korbinian Strimmer,
9  *                  M. Vingron, and Arndt von Haeseler
10  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11  *
12  * All parts of the source except where indicated are distributed under
13  * the GNU public licence.  See http://www.opensource.org for details.
14  *
15  * ($Id$)
16  *
17  */
18 
19 
20 #ifndef _PUZZLE_
21 #define _PUZZLE_
22 
23 #ifndef PACKAGE
24 #  define PACKAGE    "tree-puzzle"
25 #endif
26 #ifndef VERSION
27 #  define VERSION    "5.2-generic"
28 #endif
29 #define DATE       "July 2004"
30 
31 /* prototypes */
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <time.h>
35 #include <math.h>
36 #include <ctype.h>
37 #include <string.h>
38 #include <limits.h>
39 #include <float.h>
40 #include "pstep.h"
41 #include "util.h"
42 #include "ml.h"
43 #include "consensus.h"
44 #include "treesort.h"
45 #ifdef PARALLEL
46 #  include "ppuzzle.h"
47 #endif
48 
49 
50 #define STDOUT stdout
51 
52 /* time check interval in seconds for ML/puzzling step status (900 = 15min) */
53 #  define TIMECHECK_INTERVAL 900
54 
55 /* filenames */
56 #  define FILENAMELENGTH 2048
57 
58 /* maximum exp difference, such that 1.0+exp(-TP_MAX_EXP_DIFF) == 1.0 */
59 #  define TP_MAX_EXP_DIFF 40
60 
61 #  define INFILEDEFAULT     "infile"
62 #  define OUTFILEDEFAULT    "outfile"
63 #  define TREEFILEDEFAULT   "outtree"
64 #  define INTREEDEFAULT     "intree"
65 #  define DISTANCESDEFAULT  "outdist"
66 #  define TRIANGLEDEFAULT   "outlm.eps"
67 #  define UNRESOLVEDDEFAULT "outqlist"
68 #  define ALLQUARTDEFAULT   "outallquart"
69 #  define ALLQUARTLHDEFAULT "outallquartlh"
70 #  define SITELHDEFAULT     "outsitelh"
71 #  define SITELHBDEFAULT    "outsitelhb"
72 #  define SITERATEDEFAULT   "outsiterate"
73 #  define SITERATEBDEFAULT  "outsiterateb"
74 #  define OUTPARAMDEFAULT   "outparam"
75 #  define SUBSETDEFAULT     "insubsetmatr"
76 #  define OUTPTLISTDEFAULT  "outpstep"
77 #  define OUTPTORDERDEFAULT "outptorder"
78 
79 #  define INFILE     infilename
80 #  define OUTFILE    outfilename
81 #  define TREEFILE   outtreename
82 #  define INTREE     intreename
83 #  define DISTANCES  outdistname
84 #  define TRIANGLE   outlmname
85 #  define UNRESOLVED outqlistname
86 #  define ALLQUART   outallquartname
87 #  define ALLQUARTLH outallquartlhname
88 #  define SITELH     outsitelhname
89 #  define SITELHB    outsitelhbname
90 #  define SITERATE   outsiteratename
91 #  define SITERATEB  outsiteratebname
92 #  define OUTPARAM   outparamname
93 #  define SUBSET     insubsetmatrname
94 #  define OUTPTLIST  outpstepname
95 #  define OUTPTORDER outptordername
96 
97 #  define FILEPREFIX fileprefixname
98 
99 EXTERN char infilename        [FILENAMELENGTH];
100 EXTERN char outfilename       [FILENAMELENGTH];
101 EXTERN char outtreename       [FILENAMELENGTH];
102 EXTERN char intreename        [FILENAMELENGTH];
103 EXTERN char outdistname       [FILENAMELENGTH];
104 EXTERN char outlmname         [FILENAMELENGTH];
105 EXTERN char outqlistname      [FILENAMELENGTH];
106 EXTERN char outallquartname   [FILENAMELENGTH];
107 EXTERN char outallquartlhname [FILENAMELENGTH];
108 EXTERN char outsitelhname     [FILENAMELENGTH];
109 EXTERN char outsitelhbname    [FILENAMELENGTH];
110 EXTERN char outsiteratename   [FILENAMELENGTH];
111 EXTERN char outsiteratebname  [FILENAMELENGTH];
112 EXTERN char outparamname      [FILENAMELENGTH];
113 EXTERN char insubsetmatrname  [FILENAMELENGTH];
114 EXTERN char outpstepname      [FILENAMELENGTH];
115 EXTERN char outptordername    [FILENAMELENGTH];
116 
117 EXTERN char fileprefixname     [FILENAMELENGTH];
118 
119 
120 #define OUTFILEEXT    "puzzle"
121 #define TREEFILEEXT   "tree"
122 #define DISTANCESEXT  "dist"
123 #define TRIANGLEEXT   "eps"
124 #define UNRESOLVEDEXT "qlist"
125 #define ALLQUARTEXT   "allquart"
126 #define ALLQUARTLHEXT "allquartlh"
127 #define SITELHEXT     "sitelh"
128 #define SITELHBEXT    "sitelhb"
129 #define SITERATEEXT   "siterate"
130 #define SITERATEBEXT  "siterateb"
131 #define OUTPARAMEXT   "param"
132 #define SUBSETEXT     "subsetmatr"
133 #define OUTPTLISTEXT  "pstep"
134 #define OUTPTORDEREXT "ptorder"
135 
136 
137 /* resetprefix values  (xxx) */
138 #define RESET_NONE   0
139 #define RESET_INFILE 1
140 #define RESET_INTREE 2
141 
142 /* setprefix_optn values  (xxx) */
143 #define SETPREFIX_NONE   0
144 #define SETPREFIX_INFILE 1
145 #define SETPREFIX_INTREE 2
146 #define SETPREFIX_RESET  3
147 #define SETPREFIX_PREFIX 4
148 
149 /* auto_aamodel/auto_datatype values  (xxx) */
150 #define AUTO_OFF      0
151 #define AUTO_GUESS    1
152 #define AUTO_DEFAULT  2
153 
154 
155 /* qptlist values  (xxx) */
156 #define PSTOUT_NONE      0
157 #define PSTOUT_ORDER     1
158 #define PSTOUT_LISTORDER 2
159 #define PSTOUT_LIST      3
160 
161 /* dtat_optn values  (xxx) */
162 #define NUCLEOTIDE 0
163 #define AMINOACID  1
164 #define BINARY     2
165 
166 /* typ_optn values  (xxx) */
167 #define TREERECON_OPTN 0
168 #define LIKMAPING_OPTN 1
169 #define NUMTYPES 2
170 
171 /* puzzlemodes (xxx) */
172 #define QUARTPUZ 0
173 #define USERTREE 1
174 #define CONSENSUS 2
175 #define PAIRDIST 3
176 #define NUMPUZZLEMODES 4
177 
178 /* modes for rootsearch (xxx) */
179 #define ROOT_USER 0
180 #define ROOT_AUTO 1
181 #define ROOT_DISPLAYED 2
182 #define NUMROOTSEARCH 3
183 
184 #if 0
185 /* rhetmodes (xxx) Modes of rate heterogeneity */
186 #define UNIFORMRATE 0
187 #define GAMMARATE   1
188 #define TWORATE     2
189 #define MIXEDRATE   3
190 #endif
191 
192 /* defines for types of quartet likelihood computation (xxx) */
193 #define EXACT  0
194 #define APPROX 1
195 
196 /* defines for results of sequence number check (seqnumcheck) */
197 #define SEQNUM_OK 0
198 #define SEQNUM_TOOFEW 1
199 #define SEQNUM_TOOMANY 2
200 
201 
202 #if 0
203 	typedef struct {
204 		uli fullres_pro;
205 		uli fullres_con;
206 		uli partres_pro;
207 		uli partres_con;
208 		uli unres;
209 		uli missing;
210 		uli qsum;
211 	} qsupportarr_t;
212 
213 	EXTERN cmatrix biparts;      /* bipartitions of tree of current puzzling step */
214 	EXTERN cmatrix consbiparts;  /* bipartitions of majority rule consensus tree */
215 
216 	EXTERN int xsize;            /* depth of consensus tree picture */
217 	EXTERN ivector consconfid;   /* confidence values of majority rule consensus tree */
218 	EXTERN ivector conssizes;    /* partition sizes of majority rule consensus tree */
219 	EXTERN ivector xcor;         /* x-coordinates of consensus tree nodes */
220 	EXTERN ivector ycor;         /* y-coordinates of consensus tree nodes */
221 	EXTERN ivector ycormax;      /* maximal y-coordinates of consensus tree nodes */
222 	EXTERN ivector ycormin;      /* minimal y-coordinates of consensus tree nodes */
223 
224 	/* splits for consensus */
225 	EXTERN int splitlength;      /* length of one entry in splitpatterns */
226 	EXTERN uli maxbiparts;       /* memory reserved for maxbiparts bipartitions */
227 	EXTERN uli numbiparts;       /* number of different bipartitions */
228 
229 	EXTERN int *splitsizes;      /* size of all different splits of all trees */
230 	EXTERN uli *splitcomptemp;   /* temp variable to compress bipartitions coding */
231 	EXTERN uli *splitfreqs;      /* frequencies of different splits of all trees */
232 	EXTERN uli *splitpatterns;   /* all different splits of all trees */
233 
234 	EXTERN qsupportarr_t *qsupportarr; /* quartet support values per split */
235 
236 	EXTERN uli consincluded;     /* number of included biparts in the consensus tree */
237 	EXTERN uli consfifty;        /* number of biparts >= 50% */
238 	EXTERN uli conscongruent;    /* number of first incongruent bipart */
239 #endif
240 
241 /* variables */
242 EXTERN cmatrix Seqchars;     /* characters contained in data set */
243 EXTERN ivector Seqgapchar;   /* counter for gaps contained in sequence */
244 EXTERN ivector Seqotherchar; /* counter for ambiguous contained in sequence */
245 EXTERN cmatrix treepict;     /* picture of consensus tree */
246 EXTERN double minscore;      /* value of edgescore on minedge */
247 EXTERN double tstvf84;       /* F84 transition/transversion ratio */
248 EXTERN double tstvratio;     /* expected transition/transversion ratio */
249 EXTERN double yrtsratio;     /* expected pyrimidine/purine transition ratio */
250 EXTERN dvector ulkl;         /* log L of user trees */
251 EXTERN dmatrix allsites;     /* log L per sites of utrees (numutrees,Numptrn) */
252 EXTERN dvector ulklc;        /* log L of utrees (clock) (numutrees,Numptrn) */
253 EXTERN dmatrix allsitesc;    /* log L per sites of user trees (clock) */
254 EXTERN FILE *utfp;           /* pointer to user tree file */
255 EXTERN FILE *ofp;            /* pointer to output file */
256 EXTERN FILE *seqfp;          /* pointer to sequence input file */
257 EXTERN FILE *tfp;            /* pointer to tree file */
258 EXTERN FILE *dfp;            /* pointer to distance file */
259 EXTERN FILE *trifp;          /* pointer to triangle file */
260 EXTERN FILE *unresfp;        /* pointer to file with unresolved quartets */
261 EXTERN FILE *tmpfp;          /* pointer to temporary file */
262 EXTERN FILE *qptlist;        /* pointer to file with puzzling step trees */
263 EXTERN FILE *qptorder;       /* pointer to file with unique puzzling step trees */
264 EXTERN int SHcodon;          /* whether SH should be applied to 1st, 2nd codon positions */
265 EXTERN int utree_optn;       /* use first user tree for estimation */
266 EXTERN int listqptrees;      /* list puzzling step trees */
267 EXTERN int approxqp;         /* approximate QP quartets */
268 
269 EXTERN int codon_optn;       /* declares what positions in a codon should be used */
270 EXTERN int compclock;        /* computation of clocklike branch lengths */
271 EXTERN int chooseA;          /* leaf variable */
272 EXTERN int chooseB;          /* leaf variable */
273 EXTERN int clustA, clustB, clustC, clustD; /* number of members of LM clusters */
274 
275 EXTERN int Frequ_optn;       /* use empirical base frequencies */
276 
277 /* PSTEP_ORIG_H */
278 EXTERN int Maxbrnch;         /* 2*Maxspc - 3 */
279 
280 EXTERN int Maxseqc;          /* number of sequence characters per taxum */
281 EXTERN int mflag;            /* flag used for correct printing of runtime messages */
282 
283 EXTERN int numclust;         /* number of clusters in LM analysis */
284 EXTERN int outgroup;         /* outgroup */
285 EXTERN int puzzlemode;       /* computation of QP tree and/or ML distances */
286 EXTERN int rootsearch;       /* how location of root is found */
287 EXTERN int rhetmode;         /* model of rate heterogeneity */
288 EXTERN int seqnumcheck;      /* result of sequence number checkheterogeneity  */
289 EXTERN int usebestq_optn;    /* use only best quartet topology, no bayesian weights */
290 EXTERN int printrmatr_optn;  /* print rate matrix to screen */
291 EXTERN int usebranch_optn;   /* use branch lengths given in usertree */
292 EXTERN int show_optn;        /* show unresolved quartets    */
293 EXTERN int savequart_optn;   /* save memory block which quartets to file */
294 EXTERN int savequartlh_optn; /* save quartet likelihoods to file */
295 EXTERN int saveqlhbin_optn;  /* save quartet likelihoods binary */
296 EXTERN int savesitelh_optn;  /* save site likelihoods (PHILIP-like) */
297 EXTERN int savesitelhb_optn; /* save site likelihoods (binary) */
298 EXTERN int savesiterate_optn;  /* save site rates (PHILIP-like) */
299 EXTERN int savesiterateb_optn; /* save site rates (binary) */
300 EXTERN int readquart_optn;   /* read memory block which quartets from file */
301 EXTERN int fixedorder_optn;  /* use fixed puzzling insert order 1,2,3,...n */
302 #if 0
303 EXTERN int skipmlbranch_optn;/* skip ml branches in final tree */
304 #endif
305 EXTERN int conssub50_optn;   /* do  consensus down to percentage of first incongruence/ambiguity */
306 EXTERN int qsupport_optn;    /* compute quartet support for the splits */
307 EXTERN int dotreetest_optn;  /* compute tree statistics (ELW, KH, SH) */
308 EXTERN int dotreelh_optn;    /* compute lh/branch lengths (QP,consensus) */
309 EXTERN int setprefix_optn;   /* use other FILEPREFIX that INFILENAME */
310 EXTERN int consensus_optn;   /* do consensus tree instead of user trees */
311 EXTERN int sym_optn;         /* symmetrize doublet frequencies */
312 EXTERN int ytaxcounter;      /* counter for establishing y-coordinates of all taxa */
313 EXTERN int numutrees;        /* number of users trees in input tree file */
314 EXTERN ivector clusterA, clusterB, clusterC, clusterD;  /* clusters for LM analysis */
315 
316 EXTERN ivector ycortax;      /* y-coordinates of all taxa */
317 
318 EXTERN uli badqs;            /* number of bad quartets */
319 EXTERN uli unresqs;          /* number of unresolved quartets, formerly badqs */
320 EXTERN uli partresqs;        /* number of partly resolved quartets */
321 EXTERN uli fullresqs;        /* number of fully resolved quartets */
322 EXTERN uli missingqs;        /* number of fully resolved quartets */
323 EXTERN ulivector badtaxon;   /* involment of each taxon in a bad quartet */
324 EXTERN ulimatrix qinfomatr;  /* sums of quartet topologies per taxon [0]=sum */
325 EXTERN uli Currtrial;        /* counter for puzzling steps */
326 
327 EXTERN uli Numquartets;      /* number of quartets */
328 EXTERN uli Numtrial;         /* number of puzzling steps */
329 EXTERN uli lmqts;            /* quartets investigated in LM analysis (0 = ALL) */
330 
331 EXTERN int auto_datatype;       /* guess datatype ? */
332 EXTERN int guessdata_optn;      /* guessed datatype */
333 
334 EXTERN int auto_aamodel;        /* guess amino acid modell ? */
335 EXTERN int guessauto_aamodel;   /* guessed amino acid modell ? */
336 EXTERN int guessDayhf_optn;     /* guessed Dayhoff model option */
337 EXTERN int guessJtt_optn;       /* guessed JTT model option */
338 EXTERN int guessblosum62_optn;  /* guessed BLOSUM 62 model option */
339 EXTERN int guessmtrev_optn;     /* guessed mtREV model option */
340 EXTERN int guesscprev_optn;     /* guessed cpREV model option */
341 EXTERN int guessvtmv_optn;      /* guessed VT model option */
342 EXTERN int guesswag_optn;       /* guessed WAG model option */
343 
344 /* missing data: for handling subsets */
345 EXTERN int     readsubset_optn;           /* guessed WAG model option */
346 EXTERN int     Maxsubset;                 /* number of subsets */
347 EXTERN imatrix ss_setovlgraph;            /* size of overlap >= 3 between 2 subsets */
348 EXTERN imatrix ss_setoverlaps;            /* size of overlap between 2 subsets */
349 EXTERN imatrix ss_setovllist;             /* list with ovlerlapping subsets */
350 EXTERN ivector ss_setovllistsize;         /* size of list with ovlerlapping subsets */
351 EXTERN imatrix ss_matrix;                 /* boolean list: taxon in set? */
352 EXTERN imatrix ss_list;                   /* list of taxa in set */
353 EXTERN ivector ss_listsize;                /* size of list with taxa */
354 
355 /* counter variables needed in likelihood mapping analysis */
356 EXTERN uli ar1, ar2, ar3;
357 EXTERN uli reg1, reg2, reg3, reg4, reg5, reg6, reg7;
358 EXTERN uli reg1l, reg1r, reg2u, reg2d, reg3u, reg3d,
359  reg4u, reg4d, reg5l, reg5r, reg6u, reg6d;
360 EXTERN unsigned char *quartetinfo; /* place where quartets are stored */
361 EXTERN dvector qweight; /* for use in QP and LM analysis */
362 EXTERN dvector sqdiff;
363 EXTERN ivector qworder;
364 EXTERN ivector sqorder;
365 
366 EXTERN int randseed;
367 EXTERN int psteptreestrlen;
368 
369 #if 0
370 	typedef struct treelistitemtypedummy {
371 		struct treelistitemtypedummy *pred;
372 		struct treelistitemtypedummy *succ;
373 		struct treelistitemtypedummy *sortnext;
374 		struct treelistitemtypedummy *sortlast;
375 		char  *tree;
376 		int    count;
377 		int    id;
378 		int    idx;
379 	} treelistitemtype;
380 
381 	EXTERN treelistitemtype *psteptreelist;
382 	EXTERN treelistitemtype *psteptreesortlist;
383 	EXTERN int               psteptreenum;
384 	EXTERN int               psteptreesum;
385 #endif
386 
387 
388 /* prototypes */
389 void makeF84model(void);
390 void compnumqts(void);
391 void setoptions(void);
392 int  openfiletoread(FILE **, char[], char[]);
393 int  openfiletowrite(FILE **, char[], char[]);
394 int  openfiletoappend(FILE **, char[], char[]);
395 void closefile(FILE *);
396 void symdoublets(void);
397 void computeexpectations(void);
398 void putdistance(FILE *);
399 void findidenticals(FILE *);
400 double averagedist(int maxspc, dmatrix  distanmat, double *meandist, double *mindist, double *maxdist, double *stddevdist, double *vardist);
401 void initps(FILE *);
402 void plotlmpoint(FILE *, double, double);
403 void finishps(FILE *);
404 void makelmpoint(FILE *, double, double, double);
405 void timestamp(FILE *);
406 void writeoutputfile(FILE *, int);
407 
408 /* definitions for writing output */
409 #define WRITEALL    0
410 #define WRITEPARAMS 1
411 #define WRITEREST   2
412 
413 void writetimesstat(FILE *ofp);
414 void writecutree(FILE *, int);
415 void starttimer(void);
416 void checktimer(uli);
417 
418 /* void estimateparametersnotree(void); * moved to mlparam.c */
419 /* void estimateparameterstree(void);   * moved to mlparam.c */
420 
421 int main(int, char *[]);
422 int ulicmp(const void *, const void *);
423 int intcmp(const void *, const void *);
424 
425 void readid(FILE *, int);
426 char readnextcharacter(FILE *, int, int);
427 
428 /* skip rest of the line */
429 void skiprestofline(FILE *ifp,    /* input file stream */
430                     int   notu,   /* taxon number      - for error msg. */
431                     int   nsite); /* sequence position - for error msg. */
432 
433 /* skip control characters and blanks */
434 void skipcntrl(FILE *ifp,    /* input file stream */
435                int   notu,   /* taxon number      - for error msg. */
436                int   nsite); /* sequence position - for error msg. */
437 
438 /* read sequences of one data set */
439 void getseqs(FILE    *ifp,      /* in:  input file stream */
440              int      Maxspc,   /* in:  number of taxa */
441              int      Maxseqc,  /* in:  number of sites */
442              cmatrix *seqch,    /* out: alignment matrix */
443              cmatrix  identif); /* io:  taxon names (10 char w/o stop) */
444 
445 /* initialize identifer arrays */
446 void initid(int      t,        /* in:  number of taxa */
447             cmatrix *identif,  /* out: name array w/o end of string */
448             cmatrix *namestr); /* out: name array with end of string */
449 
450 /* copy undelimited identifer array to '\0' delimited identifer array */
451 void identif2namestr(int     num,      /* number of taxa         */
452                      cmatrix Identif,  /* non-delimited names    */
453                      cmatrix Namestr); /* proper delimited names */
454 
455 void fputid10(FILE *, int);
456 int fputid(FILE *, int);
457 
458 /* read first line of sequence data set */
459 void getsizesites(FILE *ifp,      /* in: input file stream */
460                   int  *Maxspc,   /* out: number of taxa */
461                   int  *Maxseqc); /* out: number of sites */
462 
463 /* read alignment from file */
464 void readsequencefile(FILE    *seqfp,     /* in:  sequence input file stream */
465                       int     *Maxspc,    /* out: number of taxa             */
466                       int     *Maxseqc,   /* out: number of sites            */
467                       cmatrix *identif,   /* out: name array w/o end of str  */
468                       cmatrix *namestr,   /* out: name array with end of str */
469                       cmatrix *Seqchars); /* out: alignment matrix           */
470 
471 /* read subsets from file */
472 void readsubsetfile(FILE    *seqfp,              /* in:  sequence input file stream */
473                     int      Maxspc,             /* in:  number of taxa             */
474                     cmatrix  namestr,            /* in:  names taxa (seq file)      */
475                     int     *Maxsubset,          /* out: number of subsets          */
476                     imatrix *ss_setovlgraph,     /* out: size of overlap >= 3 between 2 subsets */
477                     imatrix *ss_setoverlaps,     /* out: size of overlap between 2 subsets */
478                     imatrix *ss_setovllist,      /* out: list with ovlerlapping subsets */
479                     ivector *ss_setovllistsize,  /* out: size of list with ovlerlapping subsets */
480                     imatrix *ss_matrix,          /* out: boolean list: taxon in set? */
481                     imatrix *ss_list,            /* out: list of taxa in set */
482                     ivector *ss_listsize);        /* out: size of list with taxa */
483 
484 /* permute taxon order */
485 void permutetaxa_ss(int      Maxspc,        /* in:  number of taxa           */
486                     ivector  permutation,   /* permuted taxon order          */
487                     cmatrix  namestr,       /* in:  names taxa (seq file)    */
488                     int      Maxsubset,     /* out: number of subsets        */
489                     imatrix  ss_setovlgraph,/* out: size of overlap >= 3 between 2 subsets */
490                     imatrix  ss_setoverlaps,/* out: size of overlap between 2 subsets */
491                     imatrix  ss_setovllist, /* out: list with overlapping subsets */
492                     ivector  ss_setovllistsize,/* out: size of list with ovlerlapping subsets */
493                     imatrix  ss_matrix,     /* out: bool list: taxon in set? */
494                     imatrix  ss_list,       /* out: list of taxa in set */
495                     ivector  ss_listsize);  /* out: size of list with taxa */
496 
497 /* print subsets */
498 void fprintfss(FILE    *ofp,              /* in:  output file stream         */
499              int      Maxspc,             /* in:  number of taxa             */
500              int      Maxsubset,          /* out: number of subsets          */
501              imatrix  ss_setovlgraph,     /* out: size of overlap >= 3 between 2 subsets */
502              imatrix  ss_setoverlaps,     /* out: size of overlap between 2 subsets */
503              imatrix  ss_setovllist,      /* out: list with ovlerlapping subsets */
504              ivector  ss_setovllistsize,  /* out: size of list with ovlerlapping subsets */
505              imatrix  ss_matrix,          /* out: boolean list: taxon in set? */
506              imatrix  ss_list,            /* out: list of taxa in set */
507              ivector  ss_listsize);        /* out: size of list with taxa */
508 
509 /* check connectedness of subsets */
510 void checkss(FILE    *ofp,              /* in:  output file stream         */
511              int      Maxspc,             /* in:  number of taxa             */
512              int      Maxsubset,          /* out: number of subsets          */
513              imatrix  ss_setovlgraph,     /* out: size of overlap >= 3 between 2 subsets */
514              imatrix  ss_setoverlaps,     /* out: size of overlap between 2 subsets */
515              imatrix  ss_setovllist,      /* out: list with ovlerlapping subsets */
516              ivector  ss_setovllistsize,  /* out: size of list with ovlerlapping subsets */
517              imatrix  ss_matrix,          /* out: boolean list: taxon in set? */
518              imatrix  ss_list,            /* out: list of taxa in set */
519              ivector  ss_listsize);        /* out: size of list with taxa */
520 
521 /* guess data type: NUCLEOTIDE:0, AMINOACID:1, BINARY:2 */
522 int guessdatatype(cmatrix Seqchars,   /* alignment matrix (Maxspc x Maxseqc) */
523                   int     Maxspc,     /* number of taxa */
524                   int     Maxseqc);   /* number of sites */
525 
526 void translatedataset(int maxspc, int maxseqc, int *maxsite, cmatrix seqchars, cmatrix *seqchar, ivector *seqgapchar, ivector *seqotherchar);
527 void estimatebasefreqs(void);
528 void guessmodel(void);
529 
530 #if 0
531 	int *initctree();
532 	void copytree_trueID(int          *ctree,      /* out: copy for effective sorting */
533               	int          *trueID,     /* in:  permutation vector         */
534               	ONEEDGE      *edgeset,    /* in: intermediate tree topology  */
535               	int          *edgeofleaf, /*     dito.                       */
536               	int           nextleaf);  /* in: next free leaf (bound)      */
537 
538 	void freectree(int **snodes);
539 	void printctree(int *ctree);
540 	char *sprintfctree(int *ctree, int strlen);
541 	void fprintffullpstree(FILE *outf, char *treestr);
542 	int printfsortctree(int *ctree);
543 	int sortctree(int *ctree);
544 	int ct_1stedge(int node);
545 	int ct_2ndedge(int node);
546 	int ct_3rdedge(int node);
547 
548 	void printfpstrees(treelistitemtype *list);
549 	void printfsortedpstrees(treelistitemtype *list);
550 	void fprintfsortedpstrees(FILE *output, treelistitemtype *list, int itemnum, int itemsum, int comment, float cutoff);
551 
552 	void sortbynum(treelistitemtype *list, treelistitemtype **sortlist);
553 	treelistitemtype *addtree2list(char             **tree,
554                                int                numtrees,
555                                treelistitemtype **list,
556                                int               *numitems,
557                                int               *numsum);
558 	void freetreelist(treelistitemtype **list,
559                   	int               *numitems,
560                   	int               *numsum);
561 #endif
562 
563 #if 0
564 	void resetedgeinfo(void);
565 	void incrementedgeinfo(int, int);
566 	void minimumedgeinfo(void);
567 #endif
568 
569 
570 #if 0
571 	void initconsensus(void);
572 
573 	/* recursive function to get bipartitions */
574 	void makepart_trueID(int           i,          /*  */
575               	int           curribrnch, /* in: current branch in traversal */
576               	ONEEDGE      *edge,       /* in: tree topology    */
577               	int          *edgeofleaf, /* in: ext. edge list   */
578               	int          *trueID,     /* in: permutation list */
579               	cmatrix       biparts,    /* out: split strings, edge by edge */
580               	int           Maxspc);	/* in: number of species in tree */
581 
582 
583 	/* compute bipartitions of current puzzling step tree */
584 	void computebiparts_trueID(ONEEDGE      *edge,       /* in: tree topology          */
585                     	int          *edgeofleaf, /* in: ext. edge list         */
586                     	int          *trueID,     /* in: permutation list       */
587                     	cmatrix       biparts,    /* out: splits                */
588                     	int           outgroup,   /* in: outgroup of tree       */
589                     	int           Maxspc);    /* in: number of taxa in tree */
590 
591 	void printsplit(FILE *, uli);
592 
593 	void makenewsplitentries(cmatrix  bip,             /* in: split string vector */
594                          	int      numspl,          /* in: no. of new splits   */
595                          	uli    **in_splitpatterns,/* io: known compr splits  */
596                          	int    **in_splitsizes,   /* io: kn. split sizes: '.'*/
597                          	uli    **in_splitfreqs,   /* io: kn. split frequences*/
598                          	uli     *in_numbiparts,   /* io: no. of splits so far*/
599 	                 	uli     *in_maxbiparts,   /* io: reserved memory     */
600                          	int      Maxspc);         /* in: no. of species      */
601 
602 	/* copy bipartition n of all different splitpatterns to consbiparts[k] */
603 	void copysplit(uli n, uli *splitpatterns, int k, cmatrix consbipartsvec);
604 
605 	void makeconsensus(uli, int);
606 
607 	/* write node (writeconsensustree) */
608 	void writenode(FILE          *treefile,    /* in: output stream */
609                	int            node,        /* current node */
610                	int            qsupp_optn,  /* 'print quartet support' flag */
611                	qsupportarr_t *qsupparr,    /* quartet support values */
612                	int           *column);     /* current line position */
613 
614 	/* write consensus tree */
615 	void writeconsensustree(FILE          *treefile,   /* in: output stream */
616                         	int            qsupp_optn, /* 'print quartsupp' flag */
617                         	qsupportarr_t *qsupparr);  /* quartet support values */
618 
619 	void writeconsensustree(FILE *, int, qsupportarr_t *);
620 	void nodecoordinates(int);
621 	void drawnode(int, int);
622 	void plotconsensustree(FILE *);
623 #endif
624 
625 unsigned char *callocquartets(int);
626 void freequartets(void);
627 unsigned char readquartet(int, int, int, int);
628 void writequartet(int, int, int, int, unsigned char);
629 void sort3doubles(dvector, ivector);
630 void computeallquartets(void);
631 
632 /* check the branching structure between the leaves (not the taxa!)
633    A, B, C, and I (A, B, C, I don't need to be ordered). As a result,
634    the two leaves that are closer related to each other than to leaf I
635    are found in chooseX and chooseY. If the branching structure is
636    not uniquely defined, chooseX and chooseY are chosen randomly
637    from the possible taxa */
638 void checkquartet(int  A,          /* quartet taxon ID                 */
639                   int  B,          /* dito.                            */
640                   int  C,          /* dito.                            */
641                   int  I,          /* dito., to be inserted            */
642                   int *chooseX,    /* (chooseX,chooseY | x,I)          */
643                   int *chooseY);   /* chooseX+Y are non-neighbors of I */
644 
645 void checkquartet_trueID(int  A,   /* quartet leaf idx in permutation array */
646                   int  B,          /* dito.                                 */
647                   int  C,          /* dito.                                 */
648                   int  I,          /* dito., to be inserted                 */
649                   int *trueID,     /* permutation array                     */
650                   int *chooseX,    /* (chooseX,chooseY | x,I)               */
651                   int *chooseY);   /* chooseX+Y are non-neighbors of I      */
652 
653 void num2quart(uli qnum, int *a, int *b, int *c, int *d);
654 uli numquarts(int maxspc);
655 uli quart2num (int a, int b, int c, int d);
656 
657 void writetpqfheader(int nspec, FILE *ofp, int flag);
658 
659 
660 /* extracted from main (xxx) */
661 void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx);
662 
663 
664 /* definitions for timing */
665 
666 #define OVERALL   0
667 #define GENERAL   1
668 #define OPTIONS   2
669 #define PARAMEST  3
670 #define QUARTETS  4
671 #define PUZZLING  5
672 #define TREEEVAL  6
673 
674 typedef struct {
675 	int      currentjob;
676 	clock_t  tempcpu;
677 	clock_t  tempfullcpu;
678 	clock_t  tempcpustart;
679 	time_t   temptime;
680 	time_t   tempfulltime;
681 	time_t   temptimestart;
682 
683         clock_t  maxcpu;
684 	clock_t  mincpu;
685 	time_t   maxtime;
686 	time_t   mintime;
687 
688 	double   maxcpublock;
689 	double   mincpublock;
690 	double   mincputick;
691 	double   mincputicktime;
692 	double   maxtimeblock;
693 	double   mintimeblock;
694 
695 	double   generalcpu;
696 	double   optionscpu;
697 	double   paramestcpu;
698 	double   quartcpu;
699 	double   quartblockcpu;
700 	double   quartmaxcpu;
701 	double   quartmincpu;
702 	double   puzzcpu;
703 	double   puzzblockcpu;
704 	double   puzzmaxcpu;
705 	double   puzzmincpu;
706 	double   treecpu;
707 	double   treeblockcpu;
708 	double   treemaxcpu;
709 	double   treemincpu;
710 	double   cpu;
711 	double   fullcpu;
712 
713 	double   generaltime;
714 	double   optionstime;
715 	double   paramesttime;
716 	double   quarttime;
717 	double   quartblocktime;
718 	double   quartmaxtime;
719 	double   quartmintime;
720 	double   puzztime;
721 	double   puzzblocktime;
722 	double   puzzmaxtime;
723 	double   puzzmintime;
724 	double   treetime;
725 	double   treeblocktime;
726 	double   treemaxtime;
727 	double   treemintime;
728 	double   time;
729 	double   fulltime;
730 } timearray_t;
731 
732 EXTERN double cputime, walltime;
733 EXTERN double fullcpu, fulltime;
734 EXTERN double fullcputime, fullwalltime;
735 EXTERN double altcputime, altwalltime;
736 EXTERN clock_t cputimestart,  cputimestop, cputimedummy;
737 EXTERN time_t  walltimestart, walltimestop, walltimedummy;
738 EXTERN clock_t Startcpu;     /* start cpu time */
739 EXTERN clock_t Stopcpu;      /* stop cpu time */
740 EXTERN time_t MLstepStarttime;     /* start time */
741 EXTERN time_t MLstepStoptime;      /* stop time */
742 EXTERN time_t PStepStarttime;     /* start time */
743 EXTERN time_t PStepStoptime;      /* stop time */
744 EXTERN time_t PEstStarttime;     /* start time */
745 EXTERN time_t PEstStoptime;      /* stop time */
746 EXTERN time_t Starttime;     /* start time */
747 EXTERN time_t Stoptime;      /* stop time */
748 EXTERN time_t time0;         /* timer variable */
749 EXTERN time_t time1;         /* yet another timer */
750 EXTERN time_t time2;         /* yet another timer */
751 EXTERN timearray_t tarr;
752 
753 void resetqblocktime(timearray_t *ta);
754 void resetpblocktime(timearray_t *ta);
755 void inittimearr(timearray_t *ta);
756 void addtimes(int jobtype, timearray_t *ta);
757 #ifdef TIMEDEBUG
758   void printtimearr(timearray_t *ta);
759 #endif /* TIMEDEBUG */
760 
761 /* compute Bayesian weights from log-lkls d1, d2, d3 */
762 unsigned char loglkl2weight(int    a,
763                             int    b,
764                             int    c,
765                             int    i,
766                             double d1,
767                             double d2,
768                             double d3,
769                             int    usebestq);
770 
771 #if 0
772 #ifdef PARALLEL
773 #  include "ppuzzle.h"
774 #endif
775 #endif
776 #endif /* _PUZZLE_ */
777 
778