1 /*
2 * puzzle1.c
3 *
4 *
5 * Part of TREE-PUZZLE 5.2 (July 2004)
6 *
7 * (c) 2003-2004 by Heiko A. Schmidt, Korbinian Strimmer, and Arndt von Haeseler
8 * (c) 1999-2003 by Heiko A. Schmidt, Korbinian Strimmer,
9 * M. Vingron, and Arndt von Haeseler
10 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 *
12 * All parts of the source except where indicated are distributed under
13 * the GNU public licence. See http://www.opensource.org for details.
14 *
15 * ($Id$)
16 *
17 */
18 #define USE_GTR 1
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #if !PARALLEL
25 # ifdef USE_CONSOLE_H
26 # include <console.h>
27 # endif
28 #endif
29
30 #define EXTERN
31
32 #include "puzzle.h"
33 #include "gamma.h"
34 #include "treetest.h"
35
36 #if PARALLEL
37 # include "ppuzzle.h"
38 #endif
39
40 /*epe*/
41 #ifdef VT
42 # include <VT.h>
43 #endif
44
45
46
47 /******* debugging stuff - HAS ;-) ********/
48
49 int PPP1=0;
50 /* fprintf(stderr, "PPP1: %d (%s:%d)\n", PPP1++, __FILE__, __LINE__); */
51
52
fprintftimespan(FILE * ofp,time_t start,time_t stop,time_t allstart,char * comment)53 void fprintftimespan(FILE *ofp, time_t start, time_t stop, time_t allstart, char *comment)
54 {
55 double timespan;
56 timespan = difftime(stop, start);
57 fprintf(ofp, "TTT: %-25s took %.0f seconds (= %.1f minutes = %.1f hours)\n",
58 comment, timespan, timespan/60., timespan/3600.);
59 timespan = difftime(stop, allstart);
60 fprintf(ofp, "TTT: from beginning: %.0f seconds (= %.1f minutes = %.1f hours)\n",
61 timespan, timespan/60., timespan/3600.);
62 fflush(ofp);
63 }
64
65 /******* end of debugging stuff - HAS ;-) ********/
66
67
68
num2quart(uli qnum,int * a,int * b,int * c,int * d)69 void num2quart(uli qnum, int *a, int *b, int *c, int *d)
70 {
71 double temp;
72 uli aa, bb, cc, dd;
73 uli lowval=0, highval=0;
74
75 aa=0; bb=1; cc=2; dd=3;
76
77 temp = (double)(24 * qnum);
78 temp = sqrt(temp);
79 temp = sqrt(temp);
80 /* temp = pow(temp, (double)(1/4)); */
81 dd = (uli) floor(temp) + 1;
82 if (dd < 3) dd = 3;
83 lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
84 highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24;
85 if (lowval >= qnum)
86 while ((lowval > qnum)) {
87 dd -= 1; lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
88 }
89 else {
90 while (highval <= qnum) {
91 dd += 1; highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24;
92 }
93 lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
94 }
95 qnum -= lowval;
96 if (qnum > 0) {
97 temp = (double)(6 * qnum);
98 temp = pow(temp, (double)(1/3));
99 cc = (uli) floor(temp);
100 if (cc < 2) cc= 2;
101 lowval = (uli) cc*(cc-1)*(cc-2)/6;
102 highval = (uli) (cc+1)*cc*(cc-1)/6;
103 if (lowval >= qnum)
104 while ((lowval > qnum)) {
105 cc -= 1; lowval = (uli) cc*(cc-1)*(cc-2)/6;
106 }
107 else {
108 while (highval <= qnum) {
109 cc += 1; highval = (uli) (cc+1)*cc*(cc-1)/6;
110 }
111 lowval = (uli) cc*(cc-1)*(cc-2)/6;
112 }
113 qnum -= lowval;
114 if (qnum > 0) {
115 temp = (double)(2 * qnum);
116 temp = sqrt(temp);
117 bb = (uli) floor(temp);
118 if (bb < 1) bb= 1;
119 lowval = (uli) bb*(bb-1)/2;
120 highval = (uli) (bb+1)*bb/2;
121 if (lowval >= qnum)
122 while ((lowval > qnum)) {
123 bb -= 1; lowval = (uli) bb*(bb-1)/2;
124 }
125 else {
126 while (highval <= qnum) {
127 bb += 1; highval = (uli) (bb+1)*bb/2;
128 }
129 lowval = (uli) bb*(bb-1)/2;
130 }
131 qnum -= lowval;
132 if (qnum > 0) {
133 aa = (uli) qnum;
134 if (aa < 0) aa= 0;
135 }
136 }
137 }
138 *d = (int)dd;
139 *c = (int)cc;
140 *b = (int)bb;
141 *a = (int)aa;
142 } /* num2quart */
143
144 /******************/
145
numquarts(int maxspc)146 uli numquarts(int maxspc)
147 {
148 uli tmp;
149 int a, b, c, d;
150
151 if (maxspc < 4)
152 return (uli)0;
153 else {
154 maxspc--;
155 a = maxspc-3;
156 b = maxspc-2;
157 c = maxspc-1;
158 d = maxspc;
159
160 tmp = (uli) 1 + a +
161 (uli) b * (b-1) / 2 +
162 (uli) c * (c-1) * (c-2) / 6 +
163 (uli) d * (d-1) * (d-2) * (d-3) / 24;
164 return (tmp);
165 }
166 } /* numquarts */
167
168 /******************/
169
quart2num(int a,int b,int c,int d)170 uli quart2num (int a, int b, int c, int d)
171 {
172 uli tmp;
173 if ((a>b) || (b>c) || (c>d)) {
174 fprintf(stderr, "Error PP5 not (%d <= %d <= %d <= %d) !!!\n", a, b, c,
175 d);
176 # if PARALLEL
177 PP_Finalize();
178 # endif
179 exit (1);
180 }
181 tmp = (uli) a +
182 (uli) b * (b-1) / 2 +
183 (uli) c * (c-1) * (c-2) / 6 +
184 (uli) d * (d-1) * (d-2) * (d-3) / 24;
185 return (tmp);
186 } /* quart2num */
187
188 /******************/
189
190
191
192 /* flag=0 old allquart binary */
193 /* flag=1 allquart binary */
194 /* flag=2 allquart ACSII */
195 /* flag=3 quartlh binary */
196 /* flag=4 quartlh ASCII */
197
writetpqfheader(int nspec,FILE * ofp,int flag)198 void writetpqfheader(int nspec,
199 FILE *ofp,
200 int flag)
201 { int currspec;
202
203 if (flag == 0) {
204 unsigned long nquart;
205 unsigned long blocklen;
206
207 nquart = numquarts(nspec);
208 /* compute number of bytes */
209 if (nquart % 2 == 0) { /* even number */
210 blocklen = (nquart)/2;
211 } else { /* odd number */
212 blocklen = (nquart + 1)/2;
213 }
214 /* fprintf(STDOUT, "Writing quartet file: %s\n", filename); */
215 fprintf(ofp, "TREE-PUZZLE\n%s\n\n", VERSION);
216 fprintf(ofp, "species: %d\n", nspec);
217 fprintf(ofp, "quartets: %lu\n", nquart);
218 fprintf(ofp, "bytes: %lu\n\n", blocklen);
219
220
221 /* fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp); */
222 }
223
224 if (flag == 1) fprintf(ofp, "##TPQF-BB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
225 if (flag == 2) fprintf(ofp, "##TPQF-BA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
226 if (flag == 3) fprintf(ofp, "##TPQF-LB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
227 if (flag == 4) fprintf(ofp, "##TPQF-LA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
228
229 for (currspec=0; currspec<nspec; currspec++) {
230 fputid(ofp, currspec);
231 fprintf(ofp, "\n");
232 } /* for each species */
233 fprintf(ofp, "\n");
234
235 } /* writetpqfheader */
236
237
238
writeallquarts(int nspec,char * filename,unsigned char * quartetinfo)239 void writeallquarts(int nspec,
240 char *filename,
241 unsigned char *quartetinfo)
242 { unsigned long nquart;
243 unsigned long blocklen;
244 FILE *ofp;
245
246 nquart = numquarts(nspec);
247
248 /* compute number of bytes */
249 if (nquart % 2 == 0) { /* even number */
250 blocklen = (nquart)/2;
251 } else { /* odd number */
252 blocklen = (nquart + 1)/2;
253 }
254
255 fprintf(STDOUT, "Writing quartet file: %s\n", filename);
256
257 openfiletowrite(&ofp, filename, "all quartets");
258
259 writetpqfheader(nspec, ofp, 0);
260
261 fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp);
262 fclose(ofp);
263
264 } /* writeallquart */
265
266
267
readallquarts(int nspec,char * filename,unsigned char * quartetinfo)268 void readallquarts(int nspec,
269 char *filename,
270 unsigned char *quartetinfo)
271 { unsigned long nquart;
272 unsigned long blocklen;
273 int currspec;
274 unsigned long dummynquart;
275 unsigned long dummyblocklen;
276 int dummynspec;
277 char dummyversion[128];
278 char dummyname[128];
279 FILE *ifp;
280
281 nquart = numquarts(nspec);
282
283 /* compute number of bytes */
284 if (nquart % 2 == 0) { /* even number */
285 blocklen = (nquart)/2;
286 } else { /* odd number */
287 blocklen = (nquart + 1)/2;
288 }
289
290 /* &(quartetinfo[0] */
291
292 openfiletoread(&ifp, filename, "all quartets");
293
294 fscanf(ifp, "TREE-PUZZLE\n");
295 fscanf(ifp, "%s\n\n", dummyversion);
296
297 fscanf(ifp, "species: %d\n", &dummynspec);
298 fscanf(ifp, "quartets: %lu\n", &dummynquart);
299 fscanf(ifp, "bytes: %lu\n\n", &dummyblocklen);
300
301 if ((nspec != dummynspec) ||
302 (nquart != dummynquart) ||
303 (blocklen != dummyblocklen)) {
304 fprintf(STDOUT, "ERROR: Parameters in quartet file %s do not match!!!\n",
305 filename);
306 # if PARALLEL
307 PP_SendDone();
308 MPI_Finalize();
309 # endif /* PARALLEL */
310 exit(1);
311 }
312
313 fprintf(STDOUT, "Reading quartet file: %s\n", filename);
314 fprintf(STDOUT, " generated by: TREE-PUZZLE %s\n", dummyversion);
315 fprintf(STDOUT, " current: TREE-PUZZLE %s\n", VERSION);
316
317 fprintf(STDOUT, " %d species, %lu quartets, %lu bytes\n",
318 dummynspec, dummynquart, dummyblocklen);
319
320 for (currspec=0; currspec<nspec; currspec++) {
321
322 fscanf(ifp, "%s\n", dummyname);
323 if (0 != strcmp(dummyname, Namestr[currspec])) {
324 fprintf(STDOUT, "\n\n\nUnable to proceed (species in quartet file does not match: '%s' != '%s')\n\n", dummyname, Namestr[currspec]);
325 # if PARALLEL
326 PP_Finalize();
327 # endif
328 exit (1);
329 }
330 fprintf(STDOUT, " %3d. %s\n", currspec+1, dummyname);
331
332 } /* for each species */
333 fprintf(STDOUT, "\n");
334
335 fread(&(quartetinfo[0]), sizeof(char), blocklen, ifp);
336 fclose(ifp);
337
338 } /* writeallquart */
339
340
341
342
343 /******************************************************************************/
344 /* options - file I/O - output */
345 /******************************************************************************/
346
347 /* compute TN parameters according to F84 Ts/Tv ratio */
makeF84model()348 void makeF84model()
349 {
350 double rho, piA, piC, piG, piT, piR, piY, ts, yr;
351
352 piA = Freqtpm[0];
353 piC = Freqtpm[1];
354 piG = Freqtpm[2];
355 piT = Freqtpm[3];
356 piR = piA + piG;
357 piY = piC + piT;
358 if (piC*piT*piR + piA*piG*piY == 0.0) {
359 fprintf(STDOUT, "\n\n\nF84 model not possible ");
360 fprintf(STDOUT, "(bad combination of base frequencies)\n");
361 tstvf84 = 0.0;
362 return;
363 }
364 rho = (piR*piY*(piR*piY*tstvf84 - (piA*piG + piC*piT)))/
365 (piC*piT*piR + piA*piG*piY);
366
367 if(piR == 0.0 || piY == 0.0 || (piR + rho) == 0.0) {
368 fprintf(STDOUT, "\n\n\nF84 model not possible ");
369 fprintf(STDOUT, "(bad combination of base frequencies)\n");
370 tstvf84 = 0.0;
371 return;
372 }
373 ts = 0.5 + 0.25*rho*(1.0/piR + 1.0/piY);
374 yr = (piY + rho)/piY * piR/(piR + rho);
375 if (ts < MINTS || ts > MAXTS) {
376 fprintf(STDOUT, "\n\n\nF84 model not possible ");
377 fprintf(STDOUT, "(bad Ts/Tv parameter)\n");
378 tstvf84 = 0.0;
379 return;
380 }
381 if (yr < MINYR || yr > MAXYR) {
382 fprintf(STDOUT, "\n\n\nF84 model not possible ");
383 fprintf(STDOUT, "(bad Y/R transition parameter)\n");
384 tstvf84 = 0.0;
385 return;
386 }
387 TSparam = ts;
388 YRparam = yr;
389 optim_optn = FALSE;
390 } /* makeF84model */
391
392 /* compute number of quartets used in LM analysis */
compnumqts()393 void compnumqts()
394 {
395 if (lmqts == 0) {
396 if (numclust == 4)
397 Numquartets = (uli) clustA*clustB*clustC*clustD;
398 if (numclust == 3)
399 Numquartets = (uli) clustA*clustB*clustC*(clustC-1)/2;
400 if (numclust == 2)
401 Numquartets = (uli) clustA*(clustA-1)/2 * clustB*(clustB-1)/2;
402 if (numclust == 1)
403 Numquartets = (uli) Maxspc*(Maxspc-1)*(Maxspc-2)*(Maxspc-3)/24;
404 } else {
405 Numquartets = lmqts;
406 }
407 } /* compnumqts */
408
409 /* set options interactively */
setoptions()410 void setoptions()
411 {
412 int i, valid;
413 double sumfreq;
414 char ch;
415
416 /* defaults */
417 rhetmode = UNIFORMRATE; /* assume rate homogeneity */
418 numcats = 1;
419 Geta = 0.05;
420 grate_optim = FALSE;
421 fracinv = 0.0;
422 fracinv_optim = FALSE;
423
424 compclock = FALSE; /* compute clocklike branch lengths */
425 locroot = -1; /* search for optimal place of root */
426 qcalg_optn = FALSE; /* don't use sampling of quartets */
427 approxp_optn = TRUE; /* approximate parameter estimates */
428 listqptrees = PSTOUT_NONE; /* list puzzling step trees */
429
430 /* approximate QP quartets? */
431 if (Maxspc <= 6) approxqp = FALSE;
432 else approxqp = TRUE;
433
434 codon_optn = 0; /* use all positions in a codon */
435
436 /* number of puzzling steps */
437 if (Maxspc <= 25) Numtrial = 1000;
438 else if (Maxspc <= 50) Numtrial = 10000;
439 else if (Maxspc <= 75) Numtrial = 25000;
440 else Numtrial = 50000;
441
442 utree_optn = TRUE; /* use first user tree for estimation */
443 outgroup = 0; /* use first taxon as outgroup */
444 sym_optn = FALSE; /* symmetrize doublet frequencies */
445 tstvf84 = 0.0; /* disable F84 model */
446 show_optn = FALSE; /* show unresolved quartets */
447 typ_optn = TREERECON_OPTN; /* tree reconstruction */
448 numclust = 1; /* one clusters in LM analysis */
449 lmqts = 0; /* all quartets in LM analysis */
450 compnumqts();
451 if (Numquartets > 10000) {
452 lmqts = 10000; /* 10000 quartets in LM analysis */
453 compnumqts();
454 }
455
456 /* correct default analysis types if too few/many sequences*/
457 if ((seqnumcheck!=SEQNUM_OK) && (puzzlemode==QUARTPUZ))
458 puzzlemode = (puzzlemode + 1) % NUMPUZZLEMODES;
459 /* correct default analysis types if too fewy sequences*/
460 if ((seqnumcheck==SEQNUM_TOOFEW) && (typ_optn==LIKMAPING_OPTN))
461 typ_optn = (typ_optn + 1) % NUMTYPES;
462
463 do {
464 fprintf(STDOUT, "\n\n\nGENERAL OPTIONS");
465 if (seqnumcheck==SEQNUM_OK) fprintf(STDOUT, "\n");
466 if (seqnumcheck==SEQNUM_TOOFEW) fprintf(STDOUT, " (no quartet methods available - too few sequences)\n");
467 if (seqnumcheck==SEQNUM_TOOMANY) fprintf(STDOUT, " (no quartet puzzling available - too many sequences)\n");
468 fprintf(STDOUT, " b Type of analysis? ");
469 if (typ_optn == TREERECON_OPTN) {
470 if (seqnumcheck==SEQNUM_TOOFEW)
471 fprintf(STDOUT, "Tree reconstruction (only possible)\n");
472 else fprintf(STDOUT, "Tree reconstruction\n");
473 }
474 /* not possible for |seqs| < 4 */
475 if (typ_optn == LIKMAPING_OPTN) fprintf(STDOUT, "Likelihood mapping\n");
476 if (typ_optn == TREERECON_OPTN) {
477 fprintf(STDOUT, " k Tree search procedure? ");
478 /* not possible for |seqs| < 4 and for |seqs| > 257 */
479 if (puzzlemode == QUARTPUZ) fprintf(STDOUT, "Quartet puzzling\n");
480 if (puzzlemode == USERTREE) fprintf(STDOUT, "Evaluate user defined trees\n");
481 if (puzzlemode == CONSENSUS) fprintf(STDOUT, "Consensus of user defined trees\n");
482 if (puzzlemode == PAIRDIST) fprintf(STDOUT, "Pairwise distances only (no tree)\n");
483 if (puzzlemode == QUARTPUZ) {
484 fprintf(STDOUT, " v Approximate quartet likelihood? %s\n",
485 (approxqp ? "Yes" : "No"));
486 fprintf(STDOUT, " u List unresolved quartets? %s\n",
487 (show_optn ? "Yes" : "No"));
488 fprintf(STDOUT, " n Number of puzzling steps? %lu\n",
489 Numtrial);
490 fprintf(STDOUT, " j List puzzling step trees? ");
491 switch (listqptrees) {
492 case PSTOUT_NONE: fprintf(STDOUT, "No\n"); break;
493 case PSTOUT_ORDER: fprintf(STDOUT, "Unique topologies\n"); break;
494 case PSTOUT_LISTORDER: fprintf(STDOUT, "Unique topologies & Chronological list\n"); break;
495 case PSTOUT_LIST: fprintf(STDOUT, "Chronological list only\n"); break;
496 }
497
498 fprintf(STDOUT, " o Display as outgroup? ");
499 fputid(STDOUT, outgroup);
500 fprintf(STDOUT, " (%d)\n", outgroup + 1);
501 }
502 if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE || puzzlemode == CONSENSUS) {
503 fprintf(STDOUT, " z Compute clocklike branch lengths? ");
504 if (compclock) fprintf(STDOUT, "Yes\n");
505 else fprintf(STDOUT, "No\n");
506 }
507 if (compclock)
508 if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE || puzzlemode == CONSENSUS) {
509 fprintf(STDOUT, " l Location of root? ");
510 if (locroot < 0) fprintf(STDOUT, "Best place (automatic search)\n");
511 else if (locroot < Maxspc) {
512 fprintf(STDOUT, "Branch %d (", locroot + 1);
513 fputid(STDOUT, locroot);
514 fprintf(STDOUT, ")\n");
515 } else fprintf(STDOUT, "Branch %d (internal branch)\n", locroot + 1);
516 }
517 }
518 if (typ_optn == LIKMAPING_OPTN) {
519 fprintf(STDOUT, " g Group sequences in clusters? ");
520 if (numclust == 1) fprintf(STDOUT, "No\n");
521 else fprintf(STDOUT, "Yes (%d clusters as specified)\n", numclust);
522 fprintf(STDOUT, " n Number of quartets? ");
523 if (lmqts == 0) fprintf(STDOUT, "%lu (all possible)\n", Numquartets);
524 else fprintf(STDOUT, "%lu (random choice)\n", lmqts);
525 }
526 fprintf(STDOUT, " e Parameter estimates? ");
527 if (approxp_optn) fprintf(STDOUT, "Approximate (faster)\n");
528 else fprintf(STDOUT, "Exact (slow)\n");
529 if (!((puzzlemode == USERTREE || puzzlemode == CONSENSUS) && typ_optn == TREERECON_OPTN)) {
530 fprintf(STDOUT, " x Parameter estimation uses? ");
531 if (qcalg_optn) fprintf(STDOUT, "Quartet sampling + NJ tree\n");
532 else fprintf(STDOUT, "Neighbor-joining tree\n");
533
534 } else {
535 fprintf(STDOUT, " x Parameter estimation uses? ");
536 if (utree_optn)
537 fprintf(STDOUT, "1st input tree\n");
538 else if (qcalg_optn) fprintf(STDOUT, "Quartet sampling + NJ tree\n");
539 else fprintf(STDOUT, "Neighbor-joining tree\n");
540 }
541 fprintf(STDOUT, "SUBSTITUTION PROCESS\n");
542 fprintf(STDOUT, " d Type of sequence input data? ");
543 if (auto_datatype == AUTO_GUESS) fprintf(STDOUT, "Auto: ");
544 if (data_optn == NUCLEOTIDE) fprintf(STDOUT, "Nucleotides\n");
545 if (data_optn == AMINOACID) fprintf(STDOUT, "Amino acids\n");
546 if (data_optn == BINARY) fprintf(STDOUT, "Binary states\n");
547 if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) {
548 fprintf(STDOUT, " h Codon positions selected? ");
549 if (codon_optn == 0) fprintf(STDOUT, "Use all positions\n");
550 if (codon_optn == 1) fprintf(STDOUT, "Use only 1st positions\n");
551 if (codon_optn == 2) fprintf(STDOUT, "Use only 2nd positions\n");
552 if (codon_optn == 3) fprintf(STDOUT, "Use only 3rd positions\n");
553 if (codon_optn == 4) fprintf(STDOUT, "Use 1st and 2nd positions\n");
554 }
555 fprintf(STDOUT, " m Model of substitution? ");
556 if (data_optn == NUCLEOTIDE) { /* nucleotides */
557 if (nuc_optn) {
558 if(HKY_optn)
559 fprintf(STDOUT, "HKY (Hasegawa et al. 1985)\n");
560 if(TN_optn) {
561 fprintf(STDOUT, "TN (Tamura-Nei 1993)\n");
562 fprintf(STDOUT, " p Constrain TN model to F84 model? ");
563 if (tstvf84 == 0.0)
564 fprintf(STDOUT, "No\n");
565 else fprintf(STDOUT, "Yes (Ts/Tv ratio = %.2f)\n", tstvf84);
566 }
567 if(GTR_optn)
568 fprintf(STDOUT, "GTR (e.g. Lanave et al. 1980)\n");
569 if((HKY_optn) || (TN_optn)) {
570 fprintf(STDOUT, " t Transition/transversion parameter? ");
571 if (optim_optn)
572 fprintf(STDOUT, "Estimate from data set\n");
573 else
574 fprintf(STDOUT, "%.2f\n", TSparam);
575 }
576 if (TN_optn) {
577 fprintf(STDOUT, " r Y/R transition parameter? ");
578 if (optim_optn)
579 fprintf(STDOUT, "Estimate from data set\n");
580 else
581 fprintf(STDOUT, "%.2f\n", YRparam);
582 }
583 if (GTR_optn) {
584 fprintf(STDOUT, " 1 A-C rate? ");
585 fprintf(STDOUT, "%-23.2f", GTR_ACrate);
586 fprintf(STDOUT, " 2 A-G rate? ");
587 fprintf(STDOUT, "%-23.2f\n", GTR_AGrate);
588 fprintf(STDOUT, " 3 A-T rate? ");
589 fprintf(STDOUT, "%-23.2f", GTR_ATrate);
590 fprintf(STDOUT, " 4 C-G rate? ");
591 fprintf(STDOUT, "%-23.2f\n", GTR_CGrate);
592 fprintf(STDOUT, " 5 C-T rate? ");
593 fprintf(STDOUT, "%-23.2f", GTR_CTrate);
594 fprintf(STDOUT, " 6 G-T rate? ");
595 fprintf(STDOUT, "%-23.2f\n", GTR_GTrate);
596 #if 0
597 fprintf(STDOUT, " 1 A <-> C mutation rate? ");
598 fprintf(STDOUT, "%.2f\n", GTR_ACrate);
599 fprintf(STDOUT, " 2 A <-> G mutation rate? ");
600 fprintf(STDOUT, "%.2f\n", GTR_AGrate);
601 fprintf(STDOUT, " 3 A <-> T mutation rate? ");
602 fprintf(STDOUT, "%.2f\n", GTR_ATrate);
603 fprintf(STDOUT, " 4 C <-> G mutation rate? ");
604 fprintf(STDOUT, "%.2f\n", GTR_CGrate);
605 fprintf(STDOUT, " 5 C <-> T mutation rate? ");
606 fprintf(STDOUT, "%.2f\n", GTR_CTrate);
607 fprintf(STDOUT, " 6 G <-> T mutation rate? ");
608 fprintf(STDOUT, "%.2f\n", GTR_GTrate);
609 #endif
610 }
611 }
612 if (SH_optn) {
613 fprintf(STDOUT, "SH (Schoeniger-von Haeseler 1994)\n");
614 fprintf(STDOUT, " t Transition/transversion parameter? ");
615 if (optim_optn)
616 fprintf(STDOUT, "Estimate from data set\n");
617 else
618 fprintf(STDOUT, "%.2f\n", TSparam);
619 }
620 }
621 if (data_optn == NUCLEOTIDE && SH_optn) {
622 fprintf(STDOUT, " h Doublets defined by? ");
623 if (SHcodon)
624 fprintf(STDOUT, "1st and 2nd codon positions\n");
625 else
626 fprintf(STDOUT, "1st+2nd, 3rd+4th, etc. site\n");
627 }
628 if (data_optn == AMINOACID) { /* amino acids */
629 switch (auto_aamodel) {
630 case AUTO_GUESS:
631 fprintf(STDOUT, "Auto: ");
632 break;
633 case AUTO_DEFAULT:
634 fprintf(STDOUT, "Def.: ");
635 break;
636 }
637 if (Dayhf_optn) fprintf(STDOUT, "Dayhoff (Dayhoff et al. 1978)\n");
638 if (Jtt_optn) fprintf(STDOUT, "JTT (Jones et al. 1992)\n");
639 if (mtrev_optn) fprintf(STDOUT, "mtREV24 (Adachi-Hasegawa 1996)\n");
640 if (cprev_optn) fprintf(STDOUT, "cpREV45 (Adachi et al. 2000)\n");
641 if (blosum62_optn) fprintf(STDOUT, "BLOSUM62 (Henikoff-Henikoff 92)\n");
642 if (vtmv_optn) fprintf(STDOUT, "VT (Mueller-Vingron 2000)\n");
643 if (wag_optn) fprintf(STDOUT, "WAG (Whelan-Goldman 2000)\n");
644 }
645 if (data_optn == BINARY) { /* binary states */
646 fprintf(STDOUT, "Two-state model (Felsenstein 1981)\n");
647 }
648 if (data_optn == AMINOACID)
649 fprintf(STDOUT, " f Amino acid frequencies? ");
650 else if (data_optn == NUCLEOTIDE && SH_optn)
651 fprintf(STDOUT, " f Doublet frequencies? ");
652 else if (data_optn == NUCLEOTIDE && nuc_optn)
653 fprintf(STDOUT, " f Nucleotide frequencies? ");
654 else if (data_optn == BINARY)
655 fprintf(STDOUT, " f Binary state frequencies? ");
656 fprintf(STDOUT, "%s\n", (Frequ_optn ? "Estimate from data set" :
657 "Use specified values"));
658 if (data_optn == NUCLEOTIDE && SH_optn)
659 fprintf(STDOUT, " s Symmetrize doublet frequencies? %s\n",
660 (sym_optn ? "Yes" : "No"));
661
662 fprintf(STDOUT, "RATE HETEROGENEITY\n");
663 fprintf(STDOUT, " w Model of rate heterogeneity? ");
664 if (rhetmode == UNIFORMRATE) fprintf(STDOUT, "Uniform rate\n");
665 if (rhetmode == GAMMARATE ) fprintf(STDOUT, "Gamma distributed rates\n");
666 if (rhetmode == TWORATE ) fprintf(STDOUT, "Two rates (1 invariable + 1 variable)\n");
667 if (rhetmode == MIXEDRATE ) fprintf(STDOUT, "Mixed (1 invariable + %d Gamma rates)\n", numcats);
668
669 if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
670 fprintf(STDOUT, " i Fraction of invariable sites? ");
671 if (fracinv_optim) fprintf(STDOUT, "Estimate from data set");
672 else fprintf(STDOUT, "%.2f", fracinv);
673 if (fracinv == 0.0 && !fracinv_optim) fprintf(STDOUT, " (all sites variable)");
674 fprintf(STDOUT, "\n");
675 }
676 if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
677 fprintf(STDOUT, " a Gamma distribution parameter alpha? ");
678 if (grate_optim)
679 fprintf(STDOUT, "Estimate from data set\n");
680 else if (Geta > 0.5)
681 fprintf(STDOUT, "%.2f (strong rate heterogeneity)\n", (1.0-Geta)/Geta);
682 else fprintf(STDOUT, "%.2f (weak rate heterogeneity)\n", (1.0-Geta)/Geta);
683 fprintf(STDOUT, " c Number of Gamma rate categories? %d\n", numcats);
684 }
685
686 fprintf(STDOUT, "\nQuit [q], confirm [y], or change [menu] settings: ");
687 fflush(STDOUT);
688
689 /* read one char */
690 ch = getchar();
691 if (ch != '\n') {
692 do ;
693 while (getchar() != '\n');
694 }
695 ch = (char) tolower((int) ch);
696
697 /* letters in use: a b c d e f g h i j k l m n o p q r s t u v w y x z */
698 /* letters not in use: */
699
700 switch (ch) {
701
702 case '\n': break;
703
704 case 'z': if (typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE || puzzlemode == CONSENSUS)) {
705 compclock = compclock + 1;
706 if (compclock == 2) compclock = 0;
707 } else {
708 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
709 }
710 break;
711
712 case 'l': if (compclock && typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE || puzzlemode == CONSENSUS)) {
713 fprintf(STDOUT, "\n\n\nEnter an invalid branch number to search ");
714 fprintf(STDOUT, "for the best location!\n");
715 fprintf(STDOUT, "\nPlace root at branch (1-%d, 0=automatic): ",
716 2*Maxspc-3);
717 fflush(STDOUT);
718 scanf("%d", &locroot);
719 do ;
720 while (getchar() != '\n');
721 if (locroot < 1 || locroot > 2*Maxspc-3) locroot = 0;
722 locroot = locroot - 1;
723 } else {
724 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
725 }
726 break;
727
728 case 'e': if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && fracinv_optim) {
729 fprintf(STDOUT, "\n\n\nInvariable sites estimation needs to be exact!\n");
730 } else {
731 approxp_optn = approxp_optn + 1;
732 if (approxp_optn == 2) approxp_optn = 0;
733 }
734 break;
735
736 case 'w': rhetmode = rhetmode + 1;
737 if (rhetmode == 4) rhetmode = UNIFORMRATE;
738 if (rhetmode == UNIFORMRATE) { /* uniform rate */
739 numcats = 1;
740 Geta = 0.05;
741 grate_optim = FALSE;
742 fracinv = 0.0;
743 fracinv_optim = FALSE;
744 }
745 if (rhetmode == GAMMARATE ) { /* Gamma distributed rates */
746 numcats = 8;
747 Geta = 0.05;
748 grate_optim = TRUE;
749 fracinv = 0.0;
750 fracinv_optim = FALSE;
751 }
752 if (rhetmode == TWORATE ) { /* two rates (1 invariable + 1 variable) */
753 approxp_optn = FALSE;
754 numcats = 1;
755 Geta = 0.05;
756 grate_optim = FALSE;
757 fracinv = 0.0;
758 fracinv_optim = TRUE;
759 }
760 if (rhetmode == MIXEDRATE ) { /* mixed (1 invariable + Gamma rates) */
761 approxp_optn = FALSE;
762 numcats = 8;
763 Geta = 0.05;
764 grate_optim = TRUE;
765 fracinv = 0.0;
766 fracinv_optim = TRUE;
767 }
768 break;
769
770 case 'i': if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
771 fprintf(STDOUT, "\n\n\nEnter an invalid value for ");
772 fprintf(STDOUT, "estimation from data set!\n");
773 fprintf(STDOUT, "\nFraction of invariable sites among all sites (%.2f-%.2f): ",
774 MINFI, MAXFI);
775 fflush(STDOUT);
776 scanf("%lf", &fracinv);
777 do ;
778 while (getchar() != '\n');
779 if (fracinv < MINFI || fracinv > MAXFI) {
780 fracinv_optim = TRUE;
781 fracinv = 0.0;
782 } else {
783 fracinv_optim = FALSE;
784 }
785 } else {
786 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
787 }
788 break;
789
790 case 'a': if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
791 fprintf(STDOUT, "\n\n\nEnter an invalid value for estimation from data set!\n");
792 fprintf(STDOUT, "\nGamma distribution parameter alpha (%.2f-%.2f): ",
793 (1.0-MAXGE)/MAXGE, (1.0-MINGE)/MINGE);
794 fflush(STDOUT);
795 scanf("%lf", &Geta);
796 do ;
797 while (getchar() != '\n');
798 if (Geta < (1.0-MAXGE)/MAXGE || Geta > (1.0-MINGE)/MINGE) {
799 grate_optim = TRUE;
800 Geta = 0.05;
801 } else {
802 grate_optim = FALSE;
803 Geta = 1.0/(1.0 + Geta);
804 }
805 } else
806 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
807 break;
808
809 case 'c': if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
810 fprintf(STDOUT, "\n\n\nNumber of Gamma rate categories (%d-%d): ",
811 MINCAT, MAXCAT);
812 fflush(STDOUT);
813 scanf("%d", &numcats);
814 do ;
815 while (getchar() != '\n');
816 if (numcats < MINCAT || numcats > MAXCAT) {
817 fprintf(STDOUT, "\n\n\nThis number of categories is not available!\n");
818 numcats = 4;
819 }
820 } else {
821 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
822 }
823 break;
824
825 case 'h': if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) {
826 codon_optn = codon_optn + 1;
827 if (codon_optn == 5) codon_optn = 0;
828 translatedataset(Maxspc, Maxseqc, &Maxsite, Seqchars, &Seqchar, &Seqgapchar, &Seqotherchar);
829
830 /* reestimate nucleotide frequencies only
831 if user did not specify other values */
832 if (Frequ_optn) estimatebasefreqs();
833
834 } else if (data_optn == NUCLEOTIDE && SH_optn) {
835 if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0) {
836 SHcodon = TRUE;
837 fprintf(STDOUT, "\n\n\nThis is the only possible option for the data set!\n");
838 }
839 if (Maxseqc % 3 != 0 && Maxseqc % 2 == 0) {
840 SHcodon = FALSE;
841 fprintf(STDOUT, "\n\n\nThis is the only possible option for the data set!\n");
842 }
843 if (Maxseqc % 2 == 0 && Maxseqc % 3 == 0) {
844 if (SHcodon)
845 SHcodon = FALSE;
846 else
847 SHcodon = TRUE;
848 translatedataset(Maxspc, Maxseqc, &Maxsite, Seqchars, &Seqchar, &Seqgapchar, &Seqotherchar);
849 /* reestimate nucleotide frequencies only
850 if user did not specify other values */
851 if (Frequ_optn) estimatebasefreqs();
852 }
853 } else {
854 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
855 }
856 break;
857
858 case 'x': if (typ_optn == TREERECON_OPTN && (puzzlemode == USERTREE || puzzlemode == CONSENSUS)) {
859 /* use of usertree for parameter estimation set? */
860 /* utree_optn==TRUE && qcalg_optn==? - 1st user tree (for substitution process and rate variation) */
861 /* utree_optn==FALSE && qcalg_optn==TRUE - quartet sampling (for substitution process) + NJ tree (for rate variation) */
862 /* utree_optn==FALSE && qcalg_optn==FALSE - neighbor-joining tree (for substitution process and rate variation) */
863
864 if (utree_optn) {
865 /* menu point 1 (1st utree) -> 2 (NJ) */
866 utree_optn = FALSE; /* no 1st usertree */
867 qcalg_optn = FALSE; /* no quartets */
868 } else {
869 /* menu point 2 (NJ) -> 3 (NJ+Q) */
870 qcalg_optn = qcalg_optn + 1;
871
872 /* seq-num < 4: only 1' (NJ) possible */
873 if ((seqnumcheck==SEQNUM_TOOFEW) && (qcalg_optn==1))
874 qcalg_optn = qcalg_optn + 1;
875
876 if (qcalg_optn == 2) {
877 /* menu point 3 (NJ+Q) -> 1 (1st utree) */
878 qcalg_optn = 0;
879 utree_optn = TRUE;
880 }
881 }
882 } else {
883 /* menu point 1' (NJ) -> 2' (NJ+Q) */
884 qcalg_optn = qcalg_optn + 1;
885
886 /* menu point 2' (NJ+Q) -> 1' (NJ) */
887 if (qcalg_optn == 2) qcalg_optn = 0;
888
889 /* seq-num < 4: only 1' (NJ) possible */
890 if ((seqnumcheck==SEQNUM_TOOFEW) && (qcalg_optn==1)) qcalg_optn=0;
891 }
892 break;
893
894 case 'k': if (typ_optn == TREERECON_OPTN) {
895 puzzlemode = (puzzlemode + 1) % NUMPUZZLEMODES;
896 if ((seqnumcheck!=SEQNUM_OK) && (puzzlemode==QUARTPUZ))
897 puzzlemode = (puzzlemode + 1) % NUMPUZZLEMODES;
898 } else {
899 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
900 }
901 break;
902
903 case 'b': typ_optn = (typ_optn + 1) % NUMTYPES;
904 if ((seqnumcheck==SEQNUM_TOOFEW) && (typ_optn==LIKMAPING_OPTN))
905 typ_optn = (typ_optn + 1) % NUMTYPES;
906 break;
907
908 case 'g': if (typ_optn == LIKMAPING_OPTN) {
909 clustA = clustB = clustC = clustD = 0;
910 if (numclust != 1) {
911 numclust = 1;
912 } else {
913 fprintf(STDOUT, "\n\n\nNumber of clusters (2-4): ");
914 fflush(STDOUT);
915 scanf("%d", &numclust);
916 do ;
917 while (getchar() != '\n');
918 if (numclust < 2 || numclust > 4) {
919 numclust = 1;
920 fprintf(STDOUT, "\n\n\nOnly 2, 3, or 4 ");
921 fprintf(STDOUT, "clusters possible\n");
922 } else {
923 fprintf(STDOUT, "\nDistribute all sequences over the ");
924 if (numclust == 2) {
925 fprintf(STDOUT, "two clusters a and b (At least two\n");
926 fprintf(STDOUT, "sequences per cluster are necessary), ");
927 }
928 if (numclust == 3) {
929 fprintf(STDOUT, "three clusters a, b, and c\n");
930 fprintf(STDOUT, "(At least one sequence in cluster a and b, and at least two\n");
931 fprintf(STDOUT, "sequences in c are necessary), ");
932 }
933 if (numclust == 4) {
934 fprintf(STDOUT, "four clusters a, b, c, and d\n");
935 fprintf(STDOUT, "(At least one sequence per cluster is necessary),\n");
936 }
937 fprintf(STDOUT, "type x to exclude a sequence:\n\n");
938
939 for (i = 0; i < Maxspc; i++) {
940 valid = FALSE;
941 do {
942 fputid10(STDOUT, i);
943 fprintf(STDOUT, ": ");
944 fflush(STDOUT);
945 /* read one char */
946 ch = getchar();
947 if (ch != '\n') {
948 do ;
949 while (getchar() != '\n');
950 }
951 ch = (char) tolower((int) ch);
952 if (ch == 'a' || ch == 'b' || ch == 'x')
953 valid = TRUE;
954 if (numclust == 3 || numclust == 4)
955 if (ch == 'c') valid = TRUE;
956 if (numclust == 4)
957 if (ch == 'd') valid = TRUE;
958 } while (!valid);
959 if (ch == 'a') {
960 clusterA[clustA] = i;
961 clustA++;
962 }
963 if (ch == 'b') {
964 clusterB[clustB] = i;
965 clustB++;
966 }
967 if (ch == 'c') {
968 clusterC[clustC] = i;
969 clustC++;
970 }
971 if (ch == 'd') {
972 clusterD[clustD] = i;
973 clustD++;
974 }
975 }
976 /* check clusters */
977 valid = TRUE;
978 if (numclust == 4) {
979 if (clustA == 0) {
980 valid = FALSE;
981 numclust = 1;
982 fprintf(STDOUT, "\n\n\nNo sequence in cluster a\n");
983 }
984 if (clustB == 0) {
985 valid = FALSE;
986 numclust = 1;
987 fprintf(STDOUT, "\n\n\nNo sequence in cluster b\n");
988 }
989 if (clustC == 0) {
990 valid = FALSE;
991 numclust = 1;
992 fprintf(STDOUT, "\n\n\nNo sequence in cluster c\n");
993 }
994 if (clustD == 0) {
995 valid = FALSE;
996 numclust = 1;
997 fprintf(STDOUT, "\n\n\nNo sequence in cluster d\n");
998 }
999 }
1000 if (numclust == 3) {
1001 if (clustA == 0) {
1002 valid = FALSE;
1003 numclust = 1;
1004 fprintf(STDOUT, "\n\n\nNo sequence in cluster a\n");
1005 }
1006 if (clustB == 0) {
1007 valid = FALSE;
1008 numclust = 1;
1009 fprintf(STDOUT, "\n\n\nNo sequence in cluster b\n");
1010 }
1011 if (clustC < 2) {
1012 valid = FALSE;
1013 numclust = 1;
1014 if (clustC == 0)
1015 fprintf(STDOUT, "\n\n\nNo sequence in cluster c\n");
1016 else
1017 fprintf(STDOUT, "\n\n\nOnly one sequence in cluster c\n");
1018 }
1019 }
1020 if (numclust == 2) {
1021 if (clustA < 2) {
1022 valid = FALSE;
1023 numclust = 1;
1024 if (clustA == 0)
1025 fprintf(STDOUT, "\n\n\nNo sequence in cluster a\n");
1026 else
1027 fprintf(STDOUT, "\n\n\nOnly one sequence in cluster a\n");
1028 }
1029 if (clustB < 2) {
1030 valid = FALSE;
1031 numclust = 1;
1032 if (clustB == 0)
1033 fprintf(STDOUT, "\n\n\nNo sequence in cluster b\n");
1034 else
1035 fprintf(STDOUT, "\n\n\nOnly one sequence in cluster b\n");
1036 }
1037 }
1038 if (valid) {
1039 fprintf(STDOUT, "\nNumber of sequences in each cluster:\n\n");
1040 fprintf(STDOUT, "Cluster a: %d\n", clustA);
1041 fprintf(STDOUT, "Cluster b: %d\n", clustB);
1042 if (numclust > 2)
1043 fprintf(STDOUT, "Cluster c: %d\n", clustC);
1044 if (numclust == 4)
1045 fprintf(STDOUT, "Cluster d: %d\n", clustD);
1046 fprintf(STDOUT, "\nExcluded sequences: ");
1047 if (numclust == 2) fprintf(STDOUT, "%d\n",
1048 Maxspc-clustA-clustB);
1049 if (numclust == 3) fprintf(STDOUT, "%d\n",
1050 Maxspc-clustA-clustB-clustC);
1051 if (numclust == 4) fprintf(STDOUT, "%d\n",
1052 Maxspc-clustA-clustB-clustC-clustD);
1053
1054 }
1055 }
1056 }
1057 /* number of resulting quartets */
1058 compnumqts();
1059
1060 } else {
1061 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1062 }
1063 break;
1064
1065 case 'd': if (auto_datatype == AUTO_GUESS) {
1066 auto_datatype = AUTO_OFF;
1067 guessdata_optn = data_optn;
1068 data_optn = 0;
1069 } else {
1070 data_optn = data_optn + 1;
1071 if (data_optn == 3) {
1072 auto_datatype = AUTO_GUESS;
1073 data_optn = guessdata_optn;
1074 }
1075 }
1076 /* translate characters into format used by ML engine */
1077 translatedataset(Maxspc, Maxseqc, &Maxsite, Seqchars, &Seqchar, &Seqgapchar, &Seqotherchar);
1078 estimatebasefreqs();
1079 break;
1080
1081 case 'u': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
1082 show_optn = 1 - show_optn;
1083 else
1084 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1085 break;
1086
1087 case 'j': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
1088 listqptrees = (listqptrees + 1) % 4;
1089 else
1090 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1091 break;
1092
1093 case 'v': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
1094 approxqp = 1 - approxqp;
1095 else
1096 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1097 break;
1098
1099 case 'f': if (Frequ_optn) {
1100 tstvf84 = 0.0;
1101 Frequ_optn = FALSE;
1102 sumfreq = 0.0;
1103 if (data_optn == AMINOACID)
1104 fprintf(STDOUT, "\n\n\nAmino acid");
1105 else if (data_optn == NUCLEOTIDE && SH_optn)
1106 fprintf(STDOUT, "\n\n\nDoublet");
1107 else if (data_optn == NUCLEOTIDE && nuc_optn)
1108 fprintf(STDOUT, "\n\n\nNucleotide");
1109 else if (data_optn == BINARY)
1110 fprintf(STDOUT, "\n\n\nBinary state");
1111 fprintf(STDOUT, " frequencies (in %%):\n\n");
1112 for (i = 0; i < gettpmradix() - 1; i++) {
1113 fprintf(STDOUT, "pi(%s) = ", int2code(i));
1114 fflush(STDOUT);
1115 scanf("%lf", &(Freqtpm[i]));
1116 do ;
1117 while (getchar() != '\n');
1118 Freqtpm[i] = Freqtpm[i]/100.0;
1119 if (Freqtpm[i] < 0.0) {
1120 fprintf(STDOUT, "\n\n\nNegative frequency not possible\n");
1121 estimatebasefreqs();
1122 break;
1123 }
1124 sumfreq = sumfreq + Freqtpm[i];
1125 if (sumfreq > 1.0) {
1126 fprintf(STDOUT, "\n\n\nThe sum of ");
1127 fprintf(STDOUT, "all frequencies exceeds");
1128 fprintf(STDOUT, " 100%%\n");
1129 estimatebasefreqs();
1130 break;
1131 }
1132 if (i == gettpmradix() - 2)
1133 Freqtpm[i+1] = 1.0 - sumfreq;
1134 }
1135 } else estimatebasefreqs();
1136 break;
1137
1138 case 's': if (data_optn == NUCLEOTIDE && SH_optn) {
1139 sym_optn = 1 - sym_optn;
1140 } else {
1141 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1142 }
1143 break;
1144
1145 case 'n': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
1146 {
1147 fprintf(STDOUT, "\n\n\nNumber of puzzling steps: ");
1148 fflush(STDOUT);
1149 scanf("%lu", &Numtrial);
1150 do ;
1151 while (getchar() != '\n');
1152 if (Numtrial < 1) {
1153 fprintf(STDOUT, "\n\n\nThe number of puzzling");
1154 fprintf(STDOUT, " steps can't be smaller than one\n");
1155 Numtrial = 1000;
1156 }
1157 }
1158 else if (typ_optn == LIKMAPING_OPTN)
1159 {
1160 fprintf(STDOUT, "\n\nEnter zero to use all possible");
1161 fprintf(STDOUT, " quartets in the analysis!\n");
1162 fprintf(STDOUT, "\nNumber of random quartets: ");
1163 fflush(STDOUT);
1164 scanf("%lu", &lmqts);
1165 do ;
1166 while (getchar() != '\n');
1167
1168 /* compute number of quartets used */
1169 compnumqts();
1170 }
1171 else
1172 {
1173 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1174 }
1175 break;
1176
1177 case 'o': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) {
1178 fprintf(STDOUT, "\n\n\nSequence to be displayed as outgroup (1-%d, or 0 for last): ",
1179 Maxspc);
1180 fflush(STDOUT);
1181 scanf("%d", &outgroup);
1182 do ;
1183 while (getchar() != '\n');
1184 if (outgroup < 0 || outgroup > Maxspc) {
1185 fprintf(STDOUT, "\n\n\nSequences are numbered ");
1186 fprintf(STDOUT, "from 1 to %d\n",
1187 Maxspc);
1188 outgroup = 1;
1189 }
1190 if (outgroup == 0) outgroup = Maxspc;
1191 outgroup = outgroup - 1;
1192 } else {
1193 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1194 }
1195 break;
1196
1197 case 'm': if (data_optn == NUCLEOTIDE) { /* nucleotide data */
1198 #ifndef USE_GTR
1199 if(HKY_optn && nuc_optn) {
1200 /* HKY -> TN */
1201 tstvf84 = 0.0;
1202 TSparam = 2.0;
1203 YRparam = 0.9;
1204 HKY_optn = FALSE;
1205 TN_optn = TRUE;
1206 GTR_optn = FALSE;
1207 optim_optn = TRUE;
1208 nuc_optn = TRUE;
1209 SH_optn = FALSE;
1210 break;
1211 }
1212 #else
1213 if(HKY_optn && nuc_optn) {
1214 /* HKY -> TN */
1215 tstvf84 = 0.0;
1216 TSparam = 2.0;
1217 YRparam = 0.9;
1218 HKY_optn = FALSE;
1219 TN_optn = TRUE;
1220 GTR_optn = FALSE;
1221 optim_optn = TRUE;
1222 nuc_optn = TRUE;
1223 SH_optn = FALSE;
1224 break;
1225 }
1226 if(TN_optn && nuc_optn) {
1227 /* TN -> GTR */
1228 tstvf84 = 0.0;
1229 TSparam = 2.0;
1230 YRparam = 0.9;
1231 GTR_ACrate = 1.0;
1232 GTR_AGrate = 1.0;
1233 GTR_ATrate = 1.0;
1234 GTR_CGrate = 1.0;
1235 GTR_CTrate = 1.0;
1236 GTR_GTrate = 1.0;
1237 HKY_optn = FALSE;
1238 TN_optn = FALSE;
1239 GTR_optn = TRUE;
1240 optim_optn = FALSE;
1241 nuc_optn = TRUE;
1242 SH_optn = FALSE;
1243 break;
1244 }
1245 #if 0
1246 if(GTR_optn && nuc_optn) {
1247 /* GTR -> TN */
1248 tstvf84 = 0.0;
1249 TSparam = 2.0;
1250 YRparam = 0.9;
1251 HKY_optn = FALSE;
1252 TN_optn = TRUE;
1253 GTR_optn = FALSE;
1254 optim_optn = TRUE;
1255 nuc_optn = TRUE;
1256 SH_optn = FALSE;
1257 break;
1258 }
1259 #endif
1260 #endif
1261 if(GTR_optn && nuc_optn) {
1262 /* if(TN_optn && nuc_optn) { */
1263 if (Maxseqc % 2 == 0 || Maxseqc % 3 == 0) {
1264 /* number of chars needs to be a multiple 2 or 3 */
1265 /* TN -> SH */
1266 if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0)
1267 SHcodon = TRUE;
1268 else
1269 SHcodon = FALSE;
1270 tstvf84 = 0.0;
1271 TSparam = 2.0;
1272 YRparam = 1.0;
1273 HKY_optn = TRUE;
1274 TN_optn = FALSE;
1275 GTR_optn = FALSE;
1276 optim_optn = TRUE;
1277 nuc_optn = FALSE;
1278 SH_optn = TRUE;
1279 /* translate characters into format */
1280 /* used by ML engine */
1281 translatedataset(Maxspc, Maxseqc, &Maxsite, Seqchars, &Seqchar, &Seqgapchar, &Seqotherchar);
1282 estimatebasefreqs();
1283 } else {
1284 fprintf(STDOUT, "\n\n\nSH model not ");
1285 fprintf(STDOUT, "available for the data set!\n");
1286 /* TN -> HKY */
1287 tstvf84 = 0.0;
1288 TSparam = 2.0;
1289 YRparam = 1.0;
1290 HKY_optn = TRUE;
1291 TN_optn = FALSE;
1292 GTR_optn = FALSE;
1293 optim_optn = TRUE;
1294 nuc_optn = TRUE;
1295 SH_optn = FALSE;
1296 }
1297 break;
1298 }
1299 if(SH_optn) {
1300 /* SH -> HKY */
1301 tstvf84 = 0.0;
1302 TSparam = 2.0;
1303 YRparam = 1.0;
1304 HKY_optn = TRUE;
1305 TN_optn = FALSE;
1306 GTR_optn = FALSE;
1307 optim_optn = TRUE;
1308 nuc_optn = TRUE;
1309 SH_optn = FALSE;
1310 /* translate characters into format */
1311 /* used by ML engine */
1312 translatedataset(Maxspc, Maxseqc, &Maxsite, Seqchars, &Seqchar, &Seqgapchar, &Seqotherchar);
1313 estimatebasefreqs();
1314 break;
1315 }
1316 break;
1317 }
1318 if (data_optn == AMINOACID) { /* amino acid data */
1319 if (auto_aamodel) {
1320 /* AUTO -> Dayhoff */
1321 Dayhf_optn = TRUE;
1322 Jtt_optn = FALSE;
1323 mtrev_optn = FALSE;
1324 cprev_optn = FALSE;
1325 blosum62_optn = FALSE;
1326 vtmv_optn = FALSE;
1327 wag_optn = FALSE;
1328 auto_aamodel = AUTO_OFF;
1329 break;
1330 }
1331 if (Dayhf_optn) {
1332 /* Dayhoff -> JTT */
1333 Dayhf_optn = FALSE;
1334 Jtt_optn = TRUE;
1335 mtrev_optn = FALSE;
1336 cprev_optn = FALSE;
1337 blosum62_optn = FALSE;
1338 vtmv_optn = FALSE;
1339 wag_optn = FALSE;
1340 auto_aamodel = AUTO_OFF;
1341 break;
1342 }
1343 if (Jtt_optn) {
1344 /* JTT -> mtREV */
1345 Dayhf_optn = FALSE;
1346 Jtt_optn = FALSE;
1347 mtrev_optn = TRUE;
1348 cprev_optn = FALSE;
1349 blosum62_optn = FALSE;
1350 vtmv_optn = FALSE;
1351 wag_optn = FALSE;
1352 auto_aamodel = AUTO_OFF;
1353 break;
1354 }
1355 #ifdef CPREV
1356 if (mtrev_optn) {
1357 /* mtREV -> cpREV */
1358 Dayhf_optn = FALSE;
1359 Jtt_optn = FALSE;
1360 mtrev_optn = FALSE;
1361 cprev_optn = TRUE;
1362 blosum62_optn = FALSE;
1363 vtmv_optn = FALSE;
1364 wag_optn = FALSE;
1365 auto_aamodel = AUTO_OFF;
1366 break;
1367 }
1368 #else /* ! CPREV */
1369 if (mtrev_optn) {
1370 /* mtREV -> BLOSUM 62 */
1371 Dayhf_optn = FALSE;
1372 Jtt_optn = FALSE;
1373 mtrev_optn = FALSE;
1374 cprev_optn = FALSE;
1375 blosum62_optn = TRUE;
1376 vtmv_optn = FALSE;
1377 wag_optn = FALSE;
1378 auto_aamodel = AUTO_OFF;
1379 break;
1380 }
1381 #endif /* ! CPREV */
1382
1383 #ifdef CPREV
1384 if (cprev_optn) {
1385 /* cpREV -> BLOSUM 62 */
1386 Dayhf_optn = FALSE;
1387 Jtt_optn = FALSE;
1388 mtrev_optn = FALSE;
1389 cprev_optn = FALSE;
1390 blosum62_optn = TRUE;
1391 vtmv_optn = FALSE;
1392 wag_optn = FALSE;
1393 auto_aamodel = AUTO_OFF;
1394 break;
1395 }
1396 #endif
1397 if (blosum62_optn) {
1398 /* BLOSUM 62 -> VT model */
1399 Dayhf_optn = FALSE;
1400 Jtt_optn = FALSE;
1401 mtrev_optn = FALSE;
1402 cprev_optn = FALSE;
1403 blosum62_optn = FALSE;
1404 vtmv_optn = TRUE;
1405 wag_optn = FALSE;
1406 auto_aamodel = AUTO_OFF;
1407 break;
1408 }
1409 if (vtmv_optn) {
1410 /* VT model -> WAG model */
1411 Dayhf_optn = FALSE;
1412 Jtt_optn = FALSE;
1413 mtrev_optn = FALSE;
1414 cprev_optn = FALSE;
1415 blosum62_optn = FALSE;
1416 vtmv_optn = FALSE;
1417 wag_optn = TRUE;
1418 auto_aamodel = AUTO_OFF;
1419 break;
1420 }
1421 if (wag_optn) {
1422 /* WAG model -> AUTO */
1423 Dayhf_optn = guessDayhf_optn;
1424 Jtt_optn = guessJtt_optn;
1425 mtrev_optn = guessmtrev_optn;
1426 cprev_optn = guesscprev_optn;
1427 blosum62_optn = guessblosum62_optn;
1428 vtmv_optn = guessvtmv_optn;
1429 wag_optn = guesswag_optn;
1430 auto_aamodel = guessauto_aamodel;
1431 break;
1432 }
1433 break;
1434 }
1435 if (data_optn == BINARY) {
1436 fprintf(STDOUT, "\n\n\nNo other model available!\n");
1437 }
1438 break;
1439
1440 case 't': if (data_optn != NUCLEOTIDE) {
1441 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1442 } else {
1443 tstvf84 = 0.0;
1444 fprintf(STDOUT, "\n\n\nEnter an invalid value for ");
1445 fprintf(STDOUT, "estimation from data set!\n");
1446 fprintf(STDOUT, "\nTransition/transversion parameter (%.2f-%.2f): ",
1447 MINTS, MAXTS);
1448 fflush(STDOUT);
1449 scanf("%lf", &TSparam);
1450 do ;
1451 while (getchar() != '\n');
1452 if (TSparam < MINTS || TSparam > MAXTS) {
1453 optim_optn = TRUE;
1454 TSparam = 2.0;
1455 } else {
1456 optim_optn = FALSE;
1457 }
1458 }
1459 break;
1460
1461 case 'q': fprintf(STDOUT, "\n\n\n");
1462 # if PARALLEL
1463 PP_SendDone();
1464 MPI_Finalize();
1465 # endif /* PARALLEL */
1466 exit(0);
1467
1468 break;
1469
1470 case 'r': if (!(TN_optn && nuc_optn)){
1471 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1472 } else {
1473 tstvf84 = 0.0;
1474 fprintf(STDOUT, "\n\n\nEnter an invalid value ");
1475 fprintf(STDOUT, "for estimation from data set!\n");
1476 fprintf(STDOUT, "\nY/R transition parameter (%.2f-%.2f): ", MINYR, MAXYR);
1477 fflush(STDOUT);
1478 scanf("%lf", &YRparam);
1479 do ;
1480 while (getchar() != '\n');
1481 if (YRparam < MINYR || YRparam > MAXYR) {
1482 optim_optn = TRUE;
1483 YRparam = 0.9;
1484 } else if (YRparam == 1.0) {
1485 TN_optn = FALSE;
1486 HKY_optn = TRUE;
1487 if (optim_optn) TSparam = 2.0;
1488 } else {
1489 optim_optn = FALSE;
1490 }
1491 }
1492 break;
1493
1494 case 'p': if (!(TN_optn && nuc_optn)){
1495 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1496 } else {
1497 fprintf(STDOUT, "\n\n\nThe F84 model (Felsenstein 1984) is a restricted");
1498 fprintf(STDOUT, " TN model, and the one\nF84 parameter uniquely");
1499 fprintf(STDOUT, " determines the two corresponding TN parameters!\n\n");
1500 fprintf(STDOUT, "F84 expected transition/transversion ratio: ");
1501 fflush(STDOUT);
1502 scanf("%lf", &tstvf84);
1503 do ;
1504 while (getchar() != '\n');
1505 if (tstvf84 <= 0.0) tstvf84 = 0.0;
1506 else makeF84model();
1507 }
1508 break;
1509
1510 case '1': if (!(GTR_optn && nuc_optn)){
1511 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1512 } else {
1513 fprintf(STDOUT, "\nA-C substitution rate: ");
1514 fflush(STDOUT);
1515 scanf("%lf", >R_ACrate);
1516 do ; while (getchar() != '\n');
1517 }
1518 break;
1519 case '2': if (!(GTR_optn && nuc_optn)){
1520 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1521 } else {
1522 fprintf(STDOUT, "\nA-G substitution rate: ");
1523 fflush(STDOUT);
1524 scanf("%lf", >R_AGrate);
1525 do ; while (getchar() != '\n');
1526 }
1527 break;
1528 case '3': if (!(GTR_optn && nuc_optn)){
1529 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1530 } else {
1531 fprintf(STDOUT, "\nA-T substitution rate: ");
1532 fflush(STDOUT);
1533 scanf("%lf", >R_ATrate);
1534 do ; while (getchar() != '\n');
1535 }
1536 break;
1537 case '4': if (!(GTR_optn && nuc_optn)){
1538 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1539 } else {
1540 fprintf(STDOUT, "\nC-G substitution rate: ");
1541 fflush(STDOUT);
1542 scanf("%lf", >R_CGrate);
1543 do ; while (getchar() != '\n');
1544 }
1545 break;
1546 case '5': if (!(GTR_optn && nuc_optn)){
1547 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1548 } else {
1549 fprintf(STDOUT, "\nC-T substitution rate: ");
1550 fflush(STDOUT);
1551 scanf("%lf", >R_CTrate);
1552 do ; while (getchar() != '\n');
1553 }
1554 break;
1555 case '6': if (!(GTR_optn && nuc_optn)){
1556 fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1557 } else {
1558 fprintf(STDOUT, "\nG-T substitution rate: ");
1559 fflush(STDOUT);
1560 scanf("%lf", >R_GTrate);
1561 do ; while (getchar() != '\n');
1562 }
1563 break;
1564
1565 case 'y': fprintf(STDOUT, "\n");
1566 break;
1567
1568 default: fprintf(STDOUT, "\n\n\nThis is not a possible option!\n");
1569 break;
1570 }
1571 } while (ch != 'y');
1572
1573 {
1574 int gaperror = 0;
1575 int notu;
1576 for (notu = 0; notu < Maxspc; notu++) {
1577 #ifndef USE_WINDOWS
1578 if ((Seqgapchar[notu] + Seqotherchar[notu]) == Maxsite) {
1579 #else
1580 if ((Seqgapchar[notu] + Seqotherchar[notu]) == alimaxsite) {
1581 #endif
1582 if (gaperror == 0) {
1583 fprintf(STDOUT, "\n\n\n");
1584 fprintf(STDOUT, "\n\n\nUnable to proceed (only ambiguous characters ('-', '?', 'X', ...) \nin sequence(s) '");
1585 } else {
1586 fprintf(STDOUT, "', '");
1587 }
1588 fputid(STDOUT, notu);
1589 gaperror++;
1590 }
1591
1592 if (gaperror > 0) {
1593 fprintf(STDOUT, "')\n\n\n");
1594 # if PARALLEL
1595 PP_Finalize();
1596 # endif
1597 exit(1);
1598 }
1599 } /* for */
1600 }
1601 } /* setoptions */
1602
1603 /* open file for reading */
1604 int openfiletoread(FILE **fp, char name[], char descr[])
1605 {
1606 int count = 0;
1607 int reset = 0;
1608 cvector str;
1609
1610 if ((*fp = fopen(name, "r")) == NULL) {
1611 fprintf(STDOUT, "\n\n\nPlease enter a file name for the %s: ", descr);
1612 fflush(STDOUT);
1613 str = mygets();
1614 while ((*fp = fopen(str, "r")) == NULL)
1615 {
1616 count++;
1617 if (count > 10)
1618 {
1619 fprintf(STDOUT, "\n\n\nToo many trials - quitting ...\n");
1620 # if PARALLEL
1621 PP_Finalize();
1622 # endif
1623 exit(1);
1624 }
1625 fprintf(STDOUT, "File '%s' not found, ", str);
1626 fprintf(STDOUT, "please enter alternative name: ");
1627 free_cvector(str);
1628 fflush(STDOUT);
1629 str = mygets();
1630 reset = 1;
1631 }
1632 fprintf(STDOUT, "\n");
1633 strcpy(name, str);
1634 free_cvector(str);
1635 }
1636 return(reset);
1637 } /* openfiletoread */
1638
1639
1640 /* open file for writing */
1641 int openfiletowrite(FILE **fp, char name[], char descr[])
1642 {
1643 int count = 0;
1644 int reset = 0;
1645 cvector str;
1646
1647 if ((*fp = fopen(name, "w")) == NULL) {
1648 fprintf(STDOUT, "\n\n\nPlease enter a file name for the %s: ", descr);
1649 fflush(STDOUT);
1650 str = mygets();
1651 while ((*fp = fopen(str, "w")) == NULL)
1652 {
1653 count++;
1654 if (count > 10)
1655 {
1656 fprintf(STDOUT, "\n\n\nToo many trials - quitting ...\n");
1657 # if PARALLEL
1658 PP_Finalize();
1659 # endif
1660 exit(1);
1661 }
1662 fprintf(STDOUT, "File '%s' not created, ", str);
1663 fprintf(STDOUT, "please enter other name: ");
1664 free_cvector(str);
1665 fflush(STDOUT);
1666 str = mygets();
1667 reset = 1;
1668 }
1669 fprintf(STDOUT, "\n");
1670 strcpy(name, str);
1671 free_cvector(str);
1672 }
1673 return(reset);
1674 } /* openfiletowrite */
1675
1676
1677 /* open file for appending */
1678 int openfiletoappend(FILE **fp, char name[], char descr[])
1679 {
1680 int count = 0;
1681 int reset = 0;
1682 cvector str;
1683
1684 if ((*fp = fopen(name, "a")) == NULL) {
1685 fprintf(STDOUT, "\n\n\nPlease enter a file name for the %s: ", descr);
1686 fflush(STDOUT);
1687 str = mygets();
1688 while ((*fp = fopen(str, "a")) == NULL)
1689 {
1690 count++;
1691 if (count > 10)
1692 {
1693 fprintf(STDOUT, "\n\n\nToo many trials - quitting ...\n");
1694 # if PARALLEL
1695 PP_Finalize();
1696 # endif
1697 exit(1);
1698 }
1699 fprintf(STDOUT, "File '%s' not created, ", str);
1700 fprintf(STDOUT, "please enter other name: ");
1701 free_cvector(str);
1702 fflush(STDOUT);
1703 str = mygets();
1704 reset = 1;
1705 }
1706 fprintf(STDOUT, "\n");
1707 strcpy(name, str);
1708 free_cvector(str);
1709 }
1710 return(reset);
1711 } /* openfiletoappend */
1712
1713
1714 /* close file */
1715 void closefile(FILE *fp)
1716 {
1717 fclose(fp);
1718 } /* closefile */
1719
1720 /* symmetrize doublet frequencies */
1721 void symdoublets()
1722 {
1723 int i, imean;
1724 double mean;
1725
1726 if (data_optn == NUCLEOTIDE && SH_optn && sym_optn) {
1727 /* ML frequencies */
1728 mean = (Freqtpm[1] + Freqtpm[4])/2.0; /* AC CA */
1729 Freqtpm[1] = mean;
1730 Freqtpm[4] = mean;
1731 mean = (Freqtpm[2] + Freqtpm[8])/2.0; /* AG GA */
1732 Freqtpm[2] = mean;
1733 Freqtpm[8] = mean;
1734 mean = (Freqtpm[3] + Freqtpm[12])/2.0; /* AT TA */
1735 Freqtpm[3] = mean;
1736 Freqtpm[12] = mean;
1737 mean = (Freqtpm[6] + Freqtpm[9])/2.0; /* CG GC */
1738 Freqtpm[6] = mean;
1739 Freqtpm[9] = mean;
1740 mean = (Freqtpm[7] + Freqtpm[13])/2.0; /* CT TC */
1741 Freqtpm[7] = mean;
1742 Freqtpm[13] = mean;
1743 mean = (Freqtpm[11] + Freqtpm[14])/2.0; /* GT TG */
1744 Freqtpm[11] = mean;
1745 Freqtpm[14] = mean;
1746
1747 /* base composition of each taxon */
1748 for (i = 0; i < Maxspc; i++) {
1749 imean = (Basecomp[i][1] + Basecomp[i][4])/2; /* AC CA */
1750 Basecomp[i][1] = imean;
1751 Basecomp[i][4] = imean;
1752 imean = (Basecomp[i][2] + Basecomp[i][8])/2; /* AG GA */
1753 Basecomp[i][2] = imean;
1754 Basecomp[i][8] = imean;
1755 imean = (Basecomp[i][3] + Basecomp[i][12])/2; /* AT TA */
1756 Basecomp[i][3] = imean;
1757 Basecomp[i][12] = imean;
1758 imean = (Basecomp[i][6] + Basecomp[i][9])/2; /* CG GC */
1759 Basecomp[i][6] = imean;
1760 Basecomp[i][9] = imean;
1761 imean = (Basecomp[i][7] + Basecomp[i][13])/2; /* CT TC */
1762 Basecomp[i][7] = imean;
1763 Basecomp[i][13] = imean;
1764 imean = (Basecomp[i][11] + Basecomp[i][14])/2; /* GT TG */
1765 Basecomp[i][11] = imean;
1766 Basecomp[i][14] = imean;
1767 }
1768 }
1769 } /* symdoublets */
1770
1771 /* show Ts/Tv ratio and Ts Y/R ratio */
1772 void computeexpectations()
1773 {
1774 double AlphaYBeta, AlphaRBeta, piR, piY, num, denom, pyr, pur;
1775
1776 if (nuc_optn == TRUE) { /* 4x4 nucs */
1777 piR = Freqtpm[0] + Freqtpm[2];
1778 piY = Freqtpm[1] + Freqtpm[3];
1779 AlphaRBeta = 4.0*TSparam / (1 + YRparam);
1780 AlphaYBeta = AlphaRBeta * YRparam;
1781 tstvratio = (AlphaRBeta*Freqtpm[0]*Freqtpm[2] +
1782 AlphaYBeta*Freqtpm[1]*Freqtpm[3])/(piR * piY);
1783 yrtsratio = (AlphaYBeta*Freqtpm[1]*Freqtpm[3]) /
1784 (AlphaRBeta*Freqtpm[0]*Freqtpm[2]);
1785 } else { /* 16x16 nucs */
1786 pyr = Freqtpm[1]*Freqtpm[3] + Freqtpm[5]*Freqtpm[7] +
1787 Freqtpm[9]*Freqtpm[11] + Freqtpm[4]*Freqtpm[12] +
1788 Freqtpm[5]*Freqtpm[13] + Freqtpm[6]*Freqtpm[14] +
1789 Freqtpm[7]*Freqtpm[15] + Freqtpm[13]*Freqtpm[15];
1790 pur = Freqtpm[0]*Freqtpm[2] + Freqtpm[4]*Freqtpm[6] +
1791 Freqtpm[0]*Freqtpm[8] + Freqtpm[1]*Freqtpm[9] +
1792 Freqtpm[2]*Freqtpm[10] + Freqtpm[8]*Freqtpm[10] +
1793 Freqtpm[3]*Freqtpm[11] + Freqtpm[12]*Freqtpm[14];
1794 num = pyr + pur;
1795 denom = Freqtpm[0]*Freqtpm[1] + Freqtpm[1]*Freqtpm[2] +
1796 Freqtpm[0]*Freqtpm[3] + Freqtpm[2]*Freqtpm[3] +
1797 Freqtpm[0]*Freqtpm[4] + Freqtpm[1]*Freqtpm[5] +
1798 Freqtpm[4]*Freqtpm[5] + Freqtpm[2]*Freqtpm[6] +
1799 Freqtpm[5]*Freqtpm[6] + Freqtpm[3]*Freqtpm[7] +
1800 Freqtpm[4]*Freqtpm[7] + Freqtpm[6]*Freqtpm[7] +
1801 Freqtpm[4]*Freqtpm[8] + Freqtpm[5]*Freqtpm[9] +
1802 Freqtpm[8]*Freqtpm[9] + Freqtpm[6]*Freqtpm[10] +
1803 Freqtpm[9]*Freqtpm[10] + Freqtpm[7]*Freqtpm[11] +
1804 Freqtpm[8]*Freqtpm[11] + Freqtpm[10]*Freqtpm[11] +
1805 Freqtpm[0]*Freqtpm[12] + Freqtpm[8]*Freqtpm[12] +
1806 Freqtpm[1]*Freqtpm[13] + Freqtpm[9]*Freqtpm[13] +
1807 Freqtpm[12]*Freqtpm[13] + Freqtpm[2]*Freqtpm[14] +
1808 Freqtpm[10]*Freqtpm[14] + Freqtpm[13]*Freqtpm[14] +
1809 Freqtpm[3]*Freqtpm[15] + Freqtpm[11]*Freqtpm[15] +
1810 Freqtpm[12]*Freqtpm[15] + Freqtpm[14]*Freqtpm[15];
1811 tstvratio = 2.0*TSparam * num/denom;
1812 yrtsratio = pyr/pur;
1813 }
1814 } /* computeexpectations */
1815
1816 /* write ML distance matrix to file */
1817 void putdistance(FILE *fp)
1818 {
1819 int i, j;
1820
1821 fprintf(fp, " %d\n", Maxspc);
1822 for (i = 0; i < Maxspc; i++) {
1823 fputid10(fp, i);
1824 for (j = 0; j < Maxspc; j++) {
1825 fprintf(fp, " %.5f", Distanmat[i][j]/100.0);
1826 /* seven in one row */
1827 if ((j + 1) % 7 == 0 && j+1 != Maxspc)
1828 fprintf(fp, "\n ");
1829 }
1830 fprintf(fp, "\n");
1831 }
1832 } /* putdistance */
1833
1834
1835 /* find identical sequences */
1836 void findidenticals(FILE *fp)
1837 {
1838 int i, j, noids;
1839 cvector useqs;
1840
1841 useqs = new_cvector(Maxspc);
1842
1843 for (i = 0; i < Maxspc; i++)
1844 useqs[i] = 0;
1845
1846 noids = TRUE;
1847 for (i = 0; i < Maxspc && noids; i++)
1848 for (j = i + 1; j < Maxspc && noids; j++)
1849 if (Distanmat[i][j] == 0.0) noids = FALSE;
1850
1851 if (noids)
1852 fprintf(fp, " All sequences are unique.\n");
1853 else {
1854 for (i = 0; i < Maxspc; i++) {
1855 noids = TRUE;
1856 for (j = i + 1; j < Maxspc && noids; j++)
1857 if (Distanmat[i][j] == 0.0) noids = FALSE;
1858
1859 if (!noids && useqs[i] == 0) {
1860 fputid(fp, i);
1861 useqs[i] = 1;
1862 for (j = i + 1; j < Maxspc; j++)
1863 if (Distanmat[i][j] == 0.0) {
1864 fprintf(fp, ", ");
1865 fputid(fp, j);
1866 useqs[j] = 1;
1867 }
1868 fprintf(fp, ".\n");
1869 }
1870 }
1871 }
1872 free_cvector(useqs);
1873 } /* findidenticals */
1874
1875 /* compute average distance */
1876 double averagedist(int maxspc, dmatrix distanmat, double *meandist, double *mindist, double *maxdist, double *stddevdist, double *vardist)
1877 {
1878 int i, j;
1879 double sum;
1880 double avg;
1881 double min;
1882 double max;
1883 double var;
1884 double stddev;
1885 double temp;
1886 double numofdists;
1887
1888 numofdists = ((double) (maxspc * (maxspc - 1)) / 2.0);
1889 sum = 0.0;
1890 for (i = 0; i < maxspc; i++)
1891 for (j = i + 1; j < maxspc; j++)
1892 sum = sum + distanmat[i][j];
1893
1894 /* avg = sum / (double) maxspc / ((double) maxspc - 1.0) * 2.0; */
1895 /* avg = sum / ((double) (maxspc * (maxspc - 1)) / 2.0); */
1896 avg = sum / numofdists;
1897
1898
1899 /*
1900 fprintf(stderr, "\nXXXX: old %f, new %f\n",
1901 1 / (double) maxspc / ((double) maxspc - 1.0) * 2.0,
1902 1 / ((double) (maxspc * (maxspc - 1)) / 2.0));
1903 */
1904
1905 sum = 0.0;
1906 min = distanmat[0][1];
1907 max = distanmat[0][1];
1908 for (i = 0; i < maxspc; i++)
1909 for (j = i + 1; j < maxspc; j++) {
1910 temp = avg/100 - distanmat[i][j]/100;
1911 sum += temp * temp;
1912 if (min > distanmat[i][j]) min = distanmat[i][j];
1913 if (max < distanmat[i][j]) max = distanmat[i][j];
1914 }
1915 var = sum / (numofdists - 1);
1916 stddev = sqrt(var);
1917
1918 #if 0
1919 fprintf(stderr, "\nXXXX: mean=%f, min=%f max=%f variance=%f, std-deviation=%f\n",
1920 avg/100, min/100, max/100, var, stddev);
1921 #endif
1922 *meandist = avg/100;
1923 *mindist = min/100;
1924 *maxdist = max/100;
1925 *vardist = var;
1926 *stddevdist = stddev;
1927 return avg;
1928 } /* averagedist */
1929
1930 /* first lines of EPSF likelihood mapping file */
1931 void initps(FILE *ofp)
1932 {
1933 fprintf(ofp, "%%!PS-Adobe-3.0 EPSF-3.0\n");
1934 fprintf(ofp, "%%%%BoundingBox: 60 210 550 650\n");
1935 fprintf(ofp, "%%%%Pages: 1\n");
1936 # ifndef ALPHA
1937 fprintf(ofp, "%%%%Creator: %s (version %s)\n", PACKAGE, VERSION);
1938 # else
1939 fprintf(ofp, "%%%%Creator: %s (version %s%s)\n", PACKAGE, VERSION, ALPHA);
1940 # endif
1941 fprintf(ofp, "%%%%Title: Likelihood Mapping Analysis\n");
1942 fprintf(ofp, "%%%%CreationDate: %s", asctime(localtime(&Starttime)) );
1943 fprintf(ofp, "%%%%DocumentFonts: Helvetica\n");
1944 fprintf(ofp, "%%%%DocumentNeededFonts: Helvetica\n");
1945 fprintf(ofp, "%%%%EndComments\n");
1946 fprintf(ofp, "%% use inch as unit\n");
1947 fprintf(ofp, "/inch {72 mul} def\n");
1948 fprintf(ofp, "%% triangle side length (3 inch)\n");
1949 fprintf(ofp, "/tl {3 inch mul} def\n");
1950 fprintf(ofp, "%% plot one dot (x-y coordinates on stack)\n");
1951 fprintf(ofp, "/dot {\n");
1952 fprintf(ofp, "newpath\n");
1953 fprintf(ofp, "0.002 tl 0 360 arc %% radius is 0.002 of the triangle length\n");
1954 fprintf(ofp, "closepath\n");
1955 fprintf(ofp, "fill\n");
1956 fprintf(ofp, "} def\n");
1957
1958 /* PS definition of a flush right print */
1959 fprintf(ofp, "\n%% flush right show\n");
1960 fprintf(ofp, "/centershow {\n");
1961 fprintf(ofp, " dup stringwidth pop %% get length of string\n");
1962 fprintf(ofp, " neg 0 rmoveto %% move width to left\n");
1963 fprintf(ofp, " show\n");
1964 fprintf(ofp, "} def\n");
1965 fprintf(ofp, "\n%% centered show\n");
1966
1967 /* PS definition of a centered print */
1968 fprintf(ofp, "/centershow {\n");
1969 fprintf(ofp, " dup stringwidth pop %% get length of string\n");
1970 fprintf(ofp, " -2 div %% devide length by -2\n");
1971 fprintf(ofp, " 0 rmoveto %% move half width to left\n");
1972 fprintf(ofp, " show\n");
1973 fprintf(ofp, "} def\n");
1974
1975
1976 fprintf(ofp, "%% preamble\n");
1977 fprintf(ofp, "/Helvetica findfont\n");
1978 fprintf(ofp, "12 scalefont\n");
1979 fprintf(ofp, "setfont\n");
1980 fprintf(ofp, "%% 0/0 for triangle of triangles\n");
1981 fprintf(ofp, "0.9 inch 3 inch translate\n");
1982 fprintf(ofp, "%% first triangle (the one with dots)\n");
1983 fprintf(ofp, "0.6 tl 1.2 tl 0.8660254038 mul translate\n");
1984 fprintf(ofp, "newpath\n");
1985 fprintf(ofp, " 0.0 tl 0.0 tl moveto\n");
1986 fprintf(ofp, " 1.0 tl 0.0 tl lineto\n");
1987 fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n");
1988 fprintf(ofp, "closepath\n");
1989 fprintf(ofp, "stroke\n");
1990
1991 if (numclust == 4) { /* four cluster analysis */
1992 fprintf(ofp, "%% label corners\n");
1993 fprintf(ofp, "0.5 tl 0.9 tl moveto\n"); /* old: 0.375 0.9 */
1994 fprintf(ofp, "((a,b)-(c,d)) centershow %% CHANGE HERE IF NECESSARY\n");
1995 fprintf(ofp, "-0.045 tl -0.08 tl moveto\n"); /* old: -0.16 -0.08 */
1996 fprintf(ofp, "((a,d)-(b,c)) centershow %% CHANGE HERE IF NECESSARY\n");
1997 fprintf(ofp, "1.045 tl -0.08 tl moveto\n"); /* old: -0.92 -0.08 */
1998 fprintf(ofp, "((a,c)-(b,d)) centershow %% CHANGE HERE IF NECESSARY\n");
1999 }
2000 if (numclust == 3) { /* three cluster analysis */
2001 fprintf(ofp, "%% label corners\n");
2002 fprintf(ofp, "0.5 tl 0.9 tl moveto\n"); /* old: 0.375 0.9 */
2003 fprintf(ofp, "((a,b)-(c,c)) centershow %% CHANGE HERE IF NECESSARY\n");
2004 fprintf(ofp, "-0.045 tl -0.08 tl moveto\n"); /* old: -0.16 -0.08 */
2005 fprintf(ofp, "((a,c)-(b,c)) centershow %% CHANGE HERE IF NECESSARY\n");
2006 fprintf(ofp, "1.045 tl -0.08 tl moveto\n"); /* old: -0.92 -0.08 */
2007 fprintf(ofp, "((a,c)-(b,c)) centershow %% CHANGE HERE IF NECESSARY\n");
2008 }
2009 if (numclust == 2) { /* two cluster analysis */
2010 fprintf(ofp, "%% label corners\n");
2011 fprintf(ofp, "0.5 tl 0.9 tl moveto\n"); /* old: 0.375 0.9 */
2012 fprintf(ofp, "((a,a)-(b,b)) centershow %% CHANGE HERE IF NECESSARY\n");
2013 fprintf(ofp, "-0.045 tl -0.08 tl moveto\n"); /* old: -0.16 -0.08 */
2014 fprintf(ofp, "((a,b)-(a,b)) centershow %% CHANGE HERE IF NECESSARY\n");
2015 fprintf(ofp, "1.045 tl -0.08 tl moveto\n"); /* old: -0.92 -0.08 */
2016 fprintf(ofp, "((a,b)-(a,b)) centershow %% CHANGE HERE IF NECESSARY\n");
2017 }
2018
2019 } /* initps */
2020
2021 /* plot one point of likelihood mapping analysis */
2022 void plotlmpoint(FILE *ofp, double w1, double w2)
2023 {
2024 fprintf(ofp,"%.10f tl %.10f tl dot\n",
2025 0.5*w1 + w2, w1*0.8660254038);
2026 } /* plotlmpoint */
2027
2028 /* last lines of EPSF likelihood mapping file */
2029 void finishps(FILE *ofp)
2030 {
2031 fprintf(ofp, "stroke\n");
2032 fprintf(ofp, "%% second triangle (the one with 3 basins)\n");
2033 fprintf(ofp, "/secondtriangle {\n");
2034 fprintf(ofp, "newpath\n");
2035 fprintf(ofp, " 0.0 tl 0.0 tl moveto\n");
2036 fprintf(ofp, " 1.0 tl 0.0 tl lineto\n");
2037 fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n");
2038 fprintf(ofp, "closepath\n");
2039 fprintf(ofp, "stroke\n");
2040 fprintf(ofp, "newpath\n");
2041 fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n");
2042 fprintf(ofp, " 0.50 tl 0.0000000000 tl lineto\n");
2043 fprintf(ofp, "stroke\n");
2044 fprintf(ofp, "newpath\n");
2045 fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n");
2046 fprintf(ofp, " 0.25 tl 0.4330127019 tl lineto\n");
2047 fprintf(ofp, "stroke\n");
2048 fprintf(ofp, "newpath\n");
2049 fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n");
2050 fprintf(ofp, " 0.75 tl 0.4330127019 tl lineto\n");
2051 fprintf(ofp, "stroke\n");
2052 fprintf(ofp, "0.44 tl 0.5 tl moveto %% up\n");
2053 fprintf(ofp, "(%.1f%%) show\n", (double) ar1*100.0/Numquartets);
2054 fprintf(ofp, "0.25 tl 0.15 tl moveto %% down left\n");
2055 fprintf(ofp, "(%.1f%%) show\n", (double) ar3*100.0/Numquartets);
2056 fprintf(ofp, "0.63 tl 0.15 tl moveto %% down right\n");
2057 fprintf(ofp, "(%.1f%%) show\n", (double) ar2*100.0/Numquartets);
2058 fprintf(ofp, "} def\n");
2059 fprintf(ofp, "%% third triangle (the one with 7 basins)\n");
2060 fprintf(ofp, "/thirdtriangle {\n");
2061 fprintf(ofp, "newpath\n");
2062 fprintf(ofp, " 0.0 tl 0.0 tl moveto\n");
2063 fprintf(ofp, " 1.0 tl 0.0 tl lineto\n");
2064 fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n");
2065 fprintf(ofp, "closepath\n");
2066 fprintf(ofp, "stroke\n");
2067 fprintf(ofp, "newpath\n");
2068 fprintf(ofp, " 0.25 tl 0.1443375673 tl moveto\n");
2069 fprintf(ofp, " 0.75 tl 0.1443375673 tl lineto\n");
2070 fprintf(ofp, " 0.50 tl 0.5773502692 tl lineto\n");
2071 fprintf(ofp, "closepath\n");
2072 fprintf(ofp, "stroke\n");
2073 fprintf(ofp, "newpath\n");
2074 fprintf(ofp, " 0.125 tl 0.2165063509 tl moveto\n");
2075 fprintf(ofp, " 0.250 tl 0.1443375673 tl lineto\n");
2076 fprintf(ofp, "stroke\n");
2077 fprintf(ofp, "newpath\n");
2078 fprintf(ofp, " 0.375 tl 0.6495190528 tl moveto\n");
2079 fprintf(ofp, " 0.500 tl 0.5773502692 tl lineto\n");
2080 fprintf(ofp, "stroke\n");
2081 fprintf(ofp, "newpath\n");
2082 fprintf(ofp, " 0.625 tl 0.6495190528 tl moveto\n");
2083 fprintf(ofp, " 0.500 tl 0.5773502692 tl lineto\n");
2084 fprintf(ofp, "stroke\n");
2085 fprintf(ofp, "newpath\n");
2086 fprintf(ofp, " 0.875 tl 0.2165063509 tl moveto\n");
2087 fprintf(ofp, " 0.750 tl 0.1443375673 tl lineto\n");
2088 fprintf(ofp, "stroke\n");
2089 fprintf(ofp, "newpath\n");
2090 fprintf(ofp, " 0.750 tl 0.00 tl moveto\n");
2091 fprintf(ofp, " 0.750 tl 0.1443375673 tl lineto\n");
2092 fprintf(ofp, "stroke\n");
2093 fprintf(ofp, "newpath\n");
2094 fprintf(ofp, " 0.250 tl 0.00 tl moveto\n");
2095 fprintf(ofp, " 0.250 tl 0.1443375673 tl lineto\n");
2096 fprintf(ofp, "stroke\n");
2097 fprintf(ofp, "0.42 tl 0.66 tl moveto %% up\n");
2098 fprintf(ofp, "(%.1f%%) show\n", (double) reg1*100.0/Numquartets);
2099 fprintf(ofp, "0.07 tl 0.05 tl moveto %% down left\n");
2100 fprintf(ofp, "(%.1f%%) show\n", (double) reg3*100.0/Numquartets);
2101 fprintf(ofp, "0.77 tl 0.05 tl moveto %% down right\n");
2102 fprintf(ofp, "(%.1f%%) show\n", (double) reg2*100.0/Numquartets);
2103 fprintf(ofp, "0.43 tl 0.05 tl moveto %% down side\n");
2104 fprintf(ofp, "(%.1f%%) show\n", (double) reg5*100.0/Numquartets);
2105 fprintf(ofp, "0.43 tl 0.28 tl moveto %% center\n");
2106 fprintf(ofp, "(%.1f%%) show\n", (double) reg7*100.0/Numquartets);
2107 fprintf(ofp, "gsave\n");
2108 fprintf(ofp, "-60 rotate\n");
2109 fprintf(ofp, "-0.07 tl 0.77 tl moveto %% right side\n");
2110 fprintf(ofp, "(%.1f%%) show\n", (double) reg4*100.0/Numquartets);
2111 fprintf(ofp, "grestore\n");
2112 fprintf(ofp, "gsave\n");
2113 fprintf(ofp, "60 rotate\n");
2114 fprintf(ofp, "0.4 tl -0.09 tl moveto %% left side\n");
2115 fprintf(ofp, "(%.1f%%) show\n", (double) reg6*100.0/Numquartets);
2116 fprintf(ofp, "grestore\n");
2117 fprintf(ofp, "} def\n");
2118 fprintf(ofp, "%% print the other two triangles\n");
2119 fprintf(ofp, "-0.6 tl -1.2 tl 0.8660254038 mul translate\n");
2120 fprintf(ofp, "secondtriangle\n");
2121 fprintf(ofp, "1.2 tl 0 translate\n");
2122 fprintf(ofp, "thirdtriangle\n");
2123 fprintf(ofp, "showpage\n");
2124 fprintf(ofp, "%%%%EOF\n");
2125 } /* finishps */
2126
2127 /* computes LM point from the three log-likelihood values,
2128 plots the point, and does some statistics */
2129 void makelmpoint(FILE *fp, double b1, double b2, double b3)
2130 {
2131 double w1, w2, w3, temp;
2132 unsigned char qpbranching;
2133 double temp1, temp2, temp3, onethird;
2134 double templog;
2135 unsigned char discreteweight[3], treebits[3];
2136
2137 onethird = 1.0/3.0;
2138 treebits[0] = (unsigned char) 1;
2139 treebits[1] = (unsigned char) 2;
2140 treebits[2] = (unsigned char) 4;
2141
2142 /* sort in descending order */
2143 qweight[0] = b1;
2144 qweight[1] = b2;
2145 qweight[2] = b3;
2146 sort3doubles(qweight, qworder);
2147
2148 /* compute Bayesian weights */
2149 templog = qweight[qworder[1]]-qweight[qworder[0]];
2150 if(templog < -TP_MAX_EXP_DIFF) /* possible, since 1.0+exp(>36) == 1.0 */
2151 qweight[qworder[1]] = 0.0;
2152 else
2153 qweight[qworder[1]] = exp(templog);
2154
2155 templog = qweight[qworder[2]]-qweight[qworder[0]];
2156 if(templog < -TP_MAX_EXP_DIFF) /* possible, since 1.0+exp(>36) == 1.0 */
2157 qweight[qworder[2]] = 0.0;
2158 else
2159 qweight[qworder[2]] = exp(templog);
2160
2161 qweight[qworder[0]] = 1.0;
2162
2163 temp = qweight[0] + qweight[1] + qweight[2];
2164 qweight[0] = qweight[0]/temp;
2165 qweight[1] = qweight[1]/temp;
2166 qweight[2] = qweight[2]/temp;
2167
2168 /* plot one point in likelihood mapping triangle */
2169 w1 = qweight[0];
2170 w2 = qweight[1];
2171 w3 = qweight[2];
2172 plotlmpoint(fp, w1, w2);
2173
2174 /* check areas 1,2,3 */
2175 if (treebits[qworder[0]] == 1) ar1++;
2176 else if (treebits[qworder[0]] == 2) ar2++;
2177 else ar3++;
2178
2179 /* check out regions 1,2,3,4,5,6,7 */
2180
2181 /* 100 distribution */
2182 temp1 = 1.0 - qweight[qworder[0]];
2183 sqdiff[0] = temp1*temp1 +
2184 qweight[qworder[1]]*qweight[qworder[1]] +
2185 qweight[qworder[2]]*qweight[qworder[2]];
2186 discreteweight[0] = treebits[qworder[0]];
2187
2188 /* 110 distribution */
2189 temp1 = 0.5 - qweight[qworder[0]];
2190 temp2 = 0.5 - qweight[qworder[1]];
2191 sqdiff[1] = temp1*temp1 + temp2*temp2 +
2192 qweight[qworder[2]]*qweight[qworder[2]];
2193 discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]];
2194
2195 /* 111 distribution */
2196 temp1 = onethird - qweight[qworder[0]];
2197 temp2 = onethird - qweight[qworder[1]];
2198 temp3 = onethird - qweight[qworder[2]];
2199 sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
2200 discreteweight[2] = (unsigned char) 7;
2201
2202 /* sort in descending order */
2203 sort3doubles(sqdiff, sqorder);
2204
2205 qpbranching = (unsigned char) discreteweight[sqorder[2]];
2206
2207 if (qpbranching == 1) {
2208 reg1++;
2209 if (w2 < w3) reg1l++;
2210 else reg1r++;
2211 }
2212 if (qpbranching == 2) {
2213 reg2++;
2214 if (w1 < w3) reg2d++;
2215 else reg2u++;
2216 }
2217 if (qpbranching == 4) {
2218 reg3++;
2219 if (w1 < w2) reg3d++;
2220 else reg3u++;
2221 }
2222 if (qpbranching == 3) {
2223 reg4++;
2224 if (w1 < w2) reg4d++;
2225 else reg4u++;
2226 }
2227 if (qpbranching == 6) {
2228 reg5++;
2229 if (w2 < w3) reg5l++;
2230 else reg5r++;
2231 }
2232 if (qpbranching == 5) {
2233 reg6++;
2234 if (w1 < w3) reg6d++;
2235 else reg6u++;
2236 }
2237 if (qpbranching == 7) reg7++;
2238 } /* makelmpoint */
2239
2240 #if 0
2241 /* TODO: move to treetest.c */
2242
2243 #define MINKHDIFF 0.01
2244
2245 /* TODO: move to treetest.c */
2246
2247 /*****************************************************************************/
2248 /* ELW/SR Test (Expected Likelihood Weights, Strimmer & Rambaut, 2002) */
2249 /*****************************************************************************/
2250 void elw_test(ivector Alias,
2251 dmatrix allsites,
2252 int numutrees,
2253 int numsites,
2254 double siglevel,
2255 int numboots,
2256 ivector *elw_test_passed,
2257 dvector *elw_Support)
2258 {
2259
2260 /*
2261 * Determine posterior probabilties and support values
2262 * for each hypothesis and store results in public arrays
2263 * posterior, support etc which will automatically be
2264 * created by this procedure.
2265 *
2266 * allsites log-likelihoods of each pattern
2267 * Alias map of patterns to sites in sequence
2268 * numboots number of bootstraps
2269 */
2270
2271 dvector deltaL; /* delta of likelihoods (double[numH]) */
2272 dvector elw_support; /* (double[numH]) */
2273 dvector posterior; /* (double[numH]) */
2274 ivector likelihoodOrder; /* (int[numH]) */
2275 ivector supportOrder; /* (int[numH]) */
2276 dvector rs; /* (int[numH]) */
2277
2278 double sum1;
2279 double sum;
2280 int numH; /* number of hypotheses */
2281 int numSites; /* number of sites */
2282 int s, p;
2283 int i, j, k;
2284 /* int j; */
2285 double maxL;
2286 double maxLogL;
2287 ivector sr_test_res; /* [numutrees]; */
2288 int elw_numboots = 1000;
2289
2290
2291 /* number of hypothesis */
2292 numH = numutrees;
2293
2294 /* allocate public arrays */
2295 deltaL = new_dvector(numH);
2296 elw_support = new_dvector(numH);
2297 posterior = new_dvector(numH);
2298 likelihoodOrder = new_ivector(numH);
2299 supportOrder = new_ivector(numH);
2300 sr_test_res = new_ivector(numutrees);
2301
2302 *elw_test_passed = sr_test_res;
2303 *elw_Support = elw_support;
2304
2305 /* number of sites */
2306 numSites = numsites;
2307
2308
2309 /******************************************/
2310 /* Compute log-likelihoods, their order, */
2311 /* their deltas and their posteriors */
2312 /******************************************/
2313
2314 /* initialize delta vector for each tree */
2315 for (j = 0; j < numSites; j++) {
2316 if (Alias == NULL) {
2317 p = j;
2318 } else {
2319 p = Alias[j];
2320 }
2321
2322 for (k = 0; k < numH; k++) {
2323 deltaL[k] -= allsites[k][p];
2324 }
2325 }
2326
2327 /* sort likelihoods -> sorted index list likelihoodOrder */
2328 /* insertion sort */
2329 {
2330 int tmp;
2331 int idx;
2332 for (k=0; k < numH; k++) likelihoodOrder[k] = k;
2333 for (j=0; j < numH-1; j++) {
2334 idx=j;
2335 for (k=j+1; k < numH; k++) {
2336 if (deltaL[likelihoodOrder[k]] < deltaL[likelihoodOrder[idx]]) idx=k;
2337 }
2338 tmp = likelihoodOrder[j];
2339 likelihoodOrder[j] = likelihoodOrder[idx];
2340 likelihoodOrder[idx] = tmp;
2341 }
2342 }
2343
2344
2345 /* Compute deltas */
2346 maxL= -deltaL[likelihoodOrder[0]];
2347 for (j = 0; j < numH; j++) {
2348 deltaL[j] = -(deltaL[j]+maxL);
2349 }
2350
2351 /* compute posterior probabilities */
2352 sum1 = 0.0;
2353 for (j = 0; j < numH; j++) {
2354 posterior[j] = exp(deltaL[j]);
2355 sum1 += posterior[j];
2356 }
2357 for (j = 0; j < numH; j++) {
2358 posterior[j] = posterior[j]/sum1;
2359 }
2360
2361 /* reverse sign of delta L */
2362 for (j = 0; j < numH; j++) {
2363 deltaL[j] = -deltaL[j];
2364 }
2365 deltaL[likelihoodOrder[0]] = 0.0;
2366
2367
2368 /* Bootstrap/Resample data */
2369 /***************************/
2370
2371 /* temporary memory */
2372 rs = new_dvector(numH);
2373
2374 /* MersenneTwisterFast mt = new MersenneTwisterFast(); */
2375 for (i = 0; i < elw_numboots; i++) { /* num bootstraps */
2376 for (k = 0; k < numH; k++) {
2377 rs[k] = 0;
2378 }
2379
2380 for (j = 0; j < numSites; j++) { /* bootstrapping sites for ...*/
2381 /* int s = mt.nextInt(numSites); */
2382 s = randominteger(numSites);
2383
2384 if (Alias == NULL) {
2385 p = s;
2386 } else {
2387 p = Alias[s];
2388 }
2389
2390 for (k = 0; k < numH; k++) { /* ...each utree <k> */
2391 /* rs[k] += pLogL[k][p]; */
2392 rs[k] += allsites[k][p];
2393 }
2394 }
2395
2396 /* find ml hypothesis */
2397 /* maxLogL = findMax(rs); */
2398 {
2399 int best = 0; /* can be removed (HAS) */
2400 int idx;
2401 double max = rs[0];
2402 for (idx = 1; idx < numH; idx++) {
2403 if (rs[idx] > max) {
2404 best = idx; /* can be removed (HAS) */
2405 max = rs[idx];
2406 }
2407 }
2408 maxLogL = max;
2409 }
2410
2411 /* compute log-likelihood difference */
2412 for (k = 0; k < numH; k++) {
2413 rs[k] = rs[k] - maxLogL;
2414 }
2415
2416 /* compute posteriors and sum over resampled data set */
2417 sum = 0.0;
2418 for (k = 0; k < numH; k++) {
2419 rs[k] = exp(rs[k]);
2420 sum += rs[k];
2421 }
2422 for (k = 0; k < numH; k++) {
2423 elw_support[k] += rs[k]/sum;
2424 }
2425 }
2426
2427 /* compute support values */
2428 for (j = 0; j < numH; j++) {
2429 elw_support[j] = elw_support[j]/elw_numboots;
2430 }
2431
2432 /* determine order of elw_support (smallest->largest) */
2433 /* HeapSort.sort(elw_support, supportOrder); */
2434 {
2435 int tmp;
2436 int idx;
2437 for (k=0; k < numH; k++) supportOrder[k] = k;
2438 for (j=0; j < numH-1; j++) {
2439 idx=j;
2440 for (k=j+1; k < numH; k++) {
2441 if (elw_support[supportOrder[k]] > elw_support[supportOrder[idx]]) idx=k;
2442 }
2443 tmp = supportOrder[j];
2444 supportOrder[j] = supportOrder[idx];
2445 supportOrder[idx] = tmp;
2446 }
2447 }
2448
2449 sum = 0.0;
2450 for (k=0; k<numH; k++) {
2451 if(sum <= .95) {
2452 sum += elw_support[supportOrder[k]];
2453 sr_test_res[supportOrder[k]] = 1; /* within confidence set */
2454 } else {
2455 /* sum += elw_support[supportOrder[k]]; */ /* shouldn't the sum increase after exclusion ??? (HAS) */
2456 sum += elw_support[supportOrder[k]]; /* included to code */
2457 sr_test_res[supportOrder[k]] = 0; /* excluded from confidence set */
2458 }
2459 }
2460
2461 } /* elw_test */
2462
2463
2464
2465
2466 /* TODO: move to treetest.c */
2467
2468 /*****************************************************************************/
2469 /* SH Test (Shimodaira & Hasegawa, 1999) */
2470 /*****************************************************************************/
2471 void sh_test(ivector Alias, /* site pattern translat array */
2472 dmatrix allsites, /* tree site log-likelihoods */
2473 int numutrees, /* number of trees */
2474 int numsites, /* number of sites */
2475 double siglevel, /* significance level to test */
2476 int numboots, /* number of bootstraps to draw */
2477 ivector *sh_test_passed, /* has tree passed tests */
2478 dvector *sh_Pval) /* and p-value */
2479 {
2480
2481 /*
2482 * Shimodaira-Hasegawa-Test (1999) to
2483 * compare a set of evolutionary hypotheses.
2484 * Compare all given hypotheses to the best (ML) hypothesis
2485 * and store results in public arrays delta, pval
2486 * (which will automatically be created by this procedure).
2487 *
2488 * allsites log-likelihoods of each pattern
2489 * Alias map of patterns to sites in sequence
2490 * numboots number of bootstraps
2491 */
2492
2493 int bestH; /* number of maximum likelihood hypothesis */
2494 dvector sh_delta; /* log-LH difference to ml hypothesis */
2495 dvector sh_pval; /* corresponding p-value */
2496 dvector sh_logL;
2497 int sh_numboots = 1000;
2498 double maxLogL;
2499 double m;
2500 dmatrix bs; /* = new int[numH,sh_numboots]; */
2501 int best;
2502 double colmax;
2503 int count;
2504 double sh_prob = .05;
2505 ivector sh_test_res; /* [numutrees]; */
2506 int numH;
2507 int numSites;
2508 int i, j, k;
2509 int p, s;
2510
2511
2512 /* number of hypothesis */
2513 numH = numutrees;
2514
2515 /* allocate memory for results */
2516 sh_delta = new_dvector(numH);
2517 sh_pval = new_dvector(numH);
2518 sh_test_res = new_ivector(numutrees);
2519
2520 *sh_Pval = sh_pval;
2521 *sh_test_passed = sh_test_res;
2522
2523 /* number of sites */
2524 numSites = numsites;
2525
2526 /* log likelihood of each hypothesis */
2527 sh_logL = new_dvector(numH);
2528 for (i = 0; i < numSites; i++) {
2529 if (Alias == NULL) {
2530 p = i;
2531 } else {
2532 p = Alias[i];
2533 }
2534
2535 for (j = 0; j < numH; j++) {
2536 /* sh_logL[j] += pLogL[j][p]; */
2537 sh_logL[j] += allsites[j][p];
2538 }
2539 }
2540
2541 /* find maximum-likelihood hypothesis */
2542 bestH = 0;
2543 maxLogL = sh_logL[0];
2544 for (i = 1; i < numH; i++) {
2545 if (sh_logL[i] > maxLogL) {
2546 bestH = i;
2547 maxLogL = sh_logL[i];
2548 }
2549 }
2550
2551 /* compute log-likelihood differences to best hypothesis */
2552 for (i = 0; i < numH; i++) {
2553 sh_delta[i] = sh_logL[bestH]-sh_logL[i];
2554 }
2555
2556 /* allocate temporary memory for resampling procedure */
2557 bs = new_dmatrix(numH,sh_numboots);
2558
2559 /* Resample data */
2560 /* MersenneTwisterFast mt = new MersenneTwisterFast(); */
2561 for (i = 0; i < sh_numboots; i++) {
2562 for (j = 0; j < numSites; j++) {
2563 /* int s = mt.nextInt(numSites); */
2564 s = randominteger(numSites);
2565
2566 if (Alias == NULL) {
2567 p = s;
2568 } else {
2569 p = Alias[s];
2570 }
2571
2572 for (k = 0; k < numH; k++) {
2573 /* rs[k][i] += pLogL[k][p]; */
2574 bs[k][i] += allsites[k][p];
2575 }
2576 }
2577 }
2578
2579 /* center resampled log-likelihoods */
2580 for (i = 0; i < numH; i++) {
2581
2582 /* double m = DiscreteStatistics.mean(rs[i]); */
2583 m = 0.0;
2584 for (j = 0; j < sh_numboots; j++) {
2585 m += bs[i][j];
2586 }
2587 m /= sh_numboots;
2588
2589
2590 for (j = 0; j < sh_numboots; j++) {
2591 bs[i][j] = bs[i][j] - m;
2592 }
2593 }
2594
2595 /* compute resampled log-likelihood differences */
2596 for (i = 0; i < sh_numboots; i++) {
2597 /* double max = findMaxInColumn(rs, i); */
2598 {
2599 best = 0;
2600 colmax = bs[0][i];
2601 for (j = 1; j < numH; j++) {
2602 if (bs[j][i] > colmax) {
2603 best = j;
2604 colmax = bs[j][i];
2605 }
2606 }
2607 }
2608
2609 for (j = 0; j < numH; j++) {
2610 bs[j][i] = colmax - bs[j][i];
2611 }
2612 }
2613
2614 /* compute p-values for each hypothesis */
2615 for (i = 0; i < numH; i++) {
2616 count = 0;
2617 for (j = 0; j < sh_numboots; j++) {
2618 if (bs[i][j] >= sh_delta[i]) {
2619 count++;
2620 }
2621 }
2622
2623 sh_pval[i] = (double) count/(double) sh_numboots;
2624 if (sh_pval[i] >= sh_prob) {
2625 sh_test_res[i] = 1;
2626 } else {
2627 sh_test_res[i] = 0;
2628 }
2629 }
2630
2631 /* free memory */
2632 free_dmatrix(bs);
2633 free_dvector(sh_logL);
2634
2635 } /* sh_test */
2636
2637
2638
2639
2640 /* TODO: move to treetest.c */
2641
2642 /* print tree statistics */
2643 void printtreestats(FILE *ofp)
2644 {
2645 int i, j; /* counter variables */
2646 int besttree; /* best tree */
2647
2648 /* for the KH Test (Kishino & Hasegawa, 1989) */
2649 double bestlkl; /* best likelihood */
2650 double difflkl; /* best likelihood difference */
2651 double difflklps; /* best likelihood difference per site */
2652 double temp;
2653 double sum;
2654
2655 /* variables for the SH Test (Shimodaira & Hasegawa, 1999) */
2656 dvector sh_pval; /* corresponding p-value */
2657 double sh_prob = .05; /* significance level */
2658 int sh_numboots = 1000; /* number of bootstrap samples */
2659 ivector sh_test_res; /* result vector [numutrees]; */
2660
2661 /* variables for the ELW/SR Test (Expected Likelihood Weights) */
2662 /* (Strimmer & Rambaut, 2002) */
2663
2664 dvector elw_support; /* = new double[numH; */
2665 ivector elw_test_res; /* result vector [numutrees]; */
2666 int elw_numboots = 1000; /* number of bootstrap samples */
2667 double elw_prob = .05; /* significance level */
2668
2669 /* variables for the one-sided KH Test using SH */
2670 dvector kh1_pval_tmp; /* temp p-value arry for pairwise SH */
2671 ivector kh1_test_res_tmp; /* temp result arry for pairwise SH */
2672 dvector kh1_pval; /* p-values */
2673 ivector kh1_test_res; /* result vector [numutrees]; */
2674 dvector kh1_allsites_tmp[2]; /* temp arry for pairwise SH test */
2675 int kh1_numboots = 1000; /* number of bootstrap samples */
2676 double kh1_prob = .05; /* significance level */
2677
2678 /* find best tree */
2679 besttree = 0;
2680 bestlkl = ulkl[0];
2681 for (i = 1; i < numutrees; i++) {
2682 if (ulkl[i] > bestlkl) {
2683 besttree = i;
2684 bestlkl = ulkl[i];
2685 }
2686 }
2687
2688 /* one sided KH = pairwise SH test between tree and besttree */
2689 fprintf(STDOUT, "Performing single sided KH test.\n");
2690 fflush(STDOUT);
2691 kh1_pval = new_dvector(numutrees);
2692 kh1_test_res = new_ivector(numutrees);
2693
2694 kh1_allsites_tmp[0] = allsites[besttree]; /* set best tree */
2695 for (i = 0; i < numutrees; i++) {
2696 if (i == besttree) { /* if best tree -> no test */
2697 kh1_pval[i] = 1.0;
2698 kh1_test_res[i] = 1;
2699 } else { /* other wise test pairwise SH */
2700 kh1_allsites_tmp[1] = allsites[i]; /* set site log-lh */
2701 sh_test(Alias, kh1_allsites_tmp, 2, Maxsite, kh1_prob, kh1_numboots, &kh1_test_res_tmp, &kh1_pval_tmp); /* pairwise SH */
2702 kh1_pval[i] = kh1_pval_tmp[1]; /* store p-value */
2703 kh1_test_res[i] = kh1_test_res_tmp[1]; /* save result */
2704 }
2705 }
2706 free_ivector(kh1_test_res_tmp); /* moved out of loop */
2707 free_dvector(kh1_pval_tmp);
2708
2709 /* ELW */
2710 fprintf(STDOUT, "Performing ELW test.\n");
2711 fflush(STDOUT);
2712 elw_test(Alias, allsites, numutrees, Maxsite, elw_prob, elw_numboots, &elw_test_res, &elw_support);
2713
2714 /* SH */
2715 fprintf(STDOUT, "Performing SH test.\n");
2716 fflush(STDOUT);
2717 sh_test(Alias, allsites, numutrees, Maxsite, sh_prob, sh_numboots, &sh_test_res, &sh_pval);
2718
2719 /*****************************************************************************/
2720 /* two-sided KH Test (Kishino & Hasegawa, 1989) */
2721 /* and output */
2722 /*****************************************************************************/
2723
2724 fprintf(ofp, "\n\nCOMPARISON OF USER TREES (NO CLOCK)\n\n");
2725 # ifdef KHTWOSIDED
2726 fprintf(ofp, "Tree log L difference S.E. Sig. worse p-1sKH p-SH c-ELW \n");
2727 fprintf(ofp, "-------------------------------------------------------------------------------\n");
2728 # else
2729 fprintf(ofp, "Tree log L difference S.E. p-1sKH p-SH c-ELW 2sKH\n");
2730 fprintf(ofp, "-------------------------------------------------------------------------------\n");
2731 # endif
2732 for (i = 0; i < numutrees; i++) {
2733 difflkl = ulkl[besttree]-ulkl[i];
2734 fprintf(ofp, "%2d %10.4f %8.4f ", i+1, ulkl[i], difflkl);
2735 /* fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulkl[i], difflkl); */
2736 if (i == besttree) {
2737 # ifdef KHTWOSIDED
2738 fprintf(ofp, " <-------------- best ");
2739 # else
2740 fprintf(ofp, " <---- best ");
2741 /* fprintf(ofp, " <---- best "); */
2742 # endif
2743
2744 if (kh1_test_res[i] == 1)
2745 fprintf(ofp, " %6.4f +", kh1_pval[i]);
2746 else
2747 fprintf(ofp, " %6.4f -", kh1_pval[i]);
2748
2749 if (sh_test_res[i] == 1)
2750 fprintf(ofp, " %6.4f +", sh_pval[i]);
2751 else
2752 fprintf(ofp, " %6.4f -", sh_pval[i]);
2753
2754 if (elw_test_res[i] == 1)
2755 fprintf(ofp, " %6.4f +", elw_support[i]);
2756 else
2757 fprintf(ofp, " %6.4f -", elw_support[i]);
2758
2759 # ifdef KHTWOSIDED
2760 # else
2761 fprintf(ofp, " best");
2762 # endif
2763 } else {
2764 /* compute variance of Log L differences over sites */
2765 #ifndef USE_WINDOWS
2766 difflklps = difflkl/(double)Maxsite;
2767 #else
2768 difflklps = difflkl/(double)alimaxsite;
2769 #endif
2770 /* compute std. error of sample mean with curvature method */
2771 sum = 0.0;
2772 for (j = 0; j < Numptrn; j++) {
2773 temp = allsites[besttree][j] - allsites[i][j] - difflklps;
2774 sum += temp*temp*Weight[j];
2775 }
2776 #ifndef USE_WINDOWS
2777 sum = sqrt( fabs(sum/(Maxsite-1.0)*Maxsite) );
2778 #else
2779 sum = sqrt(fabs(sum/(alimaxsite-1.0)*alimaxsite));
2780 #endif
2781 # ifdef KHTWOSIDED
2782 fprintf(ofp, "%11.2f ", sum);
2783 if (difflkl > 1.96*sum)
2784 fprintf(ofp, "yes ");
2785 else
2786 fprintf(ofp, "no ");
2787 # else
2788 fprintf(ofp, "%11.4f ", sum);
2789 /* fprintf(ofp, "%11.2f ", sum); */
2790 # endif
2791
2792 if (kh1_test_res[i] == 1)
2793 fprintf(ofp, " %6.4f +", kh1_pval[i]);
2794 else
2795 fprintf(ofp, " %6.4f -", kh1_pval[i]);
2796 if (sh_test_res[i] == 1)
2797 fprintf(ofp, " %6.4f +", sh_pval[i]);
2798 else
2799 fprintf(ofp, " %6.4f -", sh_pval[i]);
2800
2801 if (elw_test_res[i] == 1)
2802 fprintf(ofp, " %6.4f +", elw_support[i]);
2803 else
2804 fprintf(ofp, " %6.4f -", elw_support[i]);
2805
2806 # ifdef KHTWOSIDED
2807 # else
2808 if (difflkl > 1.96*sum)
2809 fprintf(ofp, " - (diff=%.4f 1.96*sum=%.4e)", difflkl, 1.96*sum);
2810 else
2811 fprintf(ofp, " + (diff=%.4f 1.96*sum=%.4e)", difflkl, 1.96*sum);
2812 #if 0
2813 if (difflkl > 1.96*sum)
2814 fprintf(ofp, " -");
2815 else
2816 fprintf(ofp, " +");
2817 #endif
2818 # endif
2819 }
2820 fprintf(ofp, "\n");
2821 }
2822
2823 fprintf(ofp, "\nThe columns show the results and p-values of the following tests:\n");
2824 fprintf(ofp, "1sKH - one sided KH test based on pairwise SH tests (Shimodaira-Hasegawa\n");
2825 fprintf(ofp, " 2000, Goldman et al., 2001, Kishino-Hasegawa 1989)\n");
2826 fprintf(ofp, "SH - Shimodaira-Hasegawa test (2000)\n");
2827 fprintf(ofp, "ELW - Expected Likelihood Weight (Strimmer-Rambaut 2002)\n");
2828 fprintf(ofp, "2sKH - two sided Kishino-Hasegawa test (1989)\n");
2829 fprintf(ofp, "\n");
2830 fprintf(ofp, "Plus signs denote the confidence sets. Minus signs denote significant\n");
2831 fprintf(ofp, "exclusion. All tests used 5%% significance level. 1sKH, SH, and ELW\n");
2832 fprintf(ofp, "performed 1000 resamplings using the RELL method.\n");
2833
2834 if (compclock) {
2835
2836 /* find best tree */
2837 besttree = 0;
2838 bestlkl = ulklc[0];
2839 for (i = 1; i < numutrees; i++)
2840 if (ulklc[i] > bestlkl) {
2841 besttree = i;
2842 bestlkl = ulklc[i];
2843 }
2844
2845 /* one sided KH */
2846 fprintf(STDOUT, "Performing single sided KH test (clock).\n");
2847 fflush(STDOUT);
2848 kh1_pval = new_dvector(numutrees);
2849 kh1_test_res = new_ivector(numutrees);
2850
2851 kh1_allsites_tmp[0] = allsitesc[besttree];
2852 for (i = 0; i < numutrees; i++) {
2853 if (i != besttree) {
2854 kh1_allsites_tmp[1] = allsitesc[i];
2855 sh_test(Alias, kh1_allsites_tmp, 2, Maxsite, kh1_prob, kh1_numboots, &kh1_test_res_tmp, &kh1_pval_tmp);
2856 kh1_pval[i] = kh1_pval_tmp[1];
2857 kh1_test_res[i] = kh1_test_res_tmp[1];
2858 free_ivector(kh1_test_res_tmp);
2859 free_dvector(kh1_pval_tmp);
2860 } else {
2861 kh1_pval[i] = 1.0;
2862 kh1_test_res[i] = 1;
2863 }
2864 }
2865
2866 /* ELW */
2867 fprintf(STDOUT, "Performing ELW test (clock).\n");
2868 fflush(STDOUT);
2869 elw_test(Alias, allsitesc, numutrees, Maxsite, elw_prob, elw_numboots, &elw_test_res, &elw_support);
2870
2871 /* SH */
2872 fprintf(STDOUT, "Performing SH test (clock).\n");
2873 fflush(STDOUT);
2874 sh_test(Alias, allsitesc, numutrees, Maxsite, sh_prob, sh_numboots, &sh_test_res, &sh_pval);
2875
2876
2877 fprintf(ofp, "\n\nCOMPARISON OF USER TREES (WITH CLOCK)\n\n");
2878 # ifdef KHTWOSIDED
2879 fprintf(ofp, "Tree log L difference S.E. Sig. worse p-1sKH p-SH c-ELW \n");
2880 fprintf(ofp, "-------------------------------------------------------------------------------\n");
2881 # else
2882 fprintf(ofp, "Tree log L difference S.E. p-1sKH p-SH c-ELW 2sKH\n");
2883 fprintf(ofp, "-------------------------------------------------------------------------------\n");
2884 # endif
2885 for (i = 0; i < numutrees; i++) {
2886 difflkl = ulklc[besttree]-ulklc[i];
2887 fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulklc[i], difflkl);
2888 if (i == besttree) {
2889 # ifdef KHTWOSIDED
2890 fprintf(ofp, " <-------------- best ");
2891 # else
2892 fprintf(ofp, " <---- best ");
2893 # endif
2894
2895 if (kh1_test_res[i] == 1)
2896 fprintf(ofp, " %6.4f +", kh1_pval[i]);
2897 else
2898 fprintf(ofp, " %6.4f -", kh1_pval[i]);
2899 if (sh_test_res[i] == 1)
2900 fprintf(ofp, " %6.4f +", sh_pval[i]);
2901 else
2902 fprintf(ofp, " %6.4f -", sh_pval[i]);
2903
2904 if (elw_test_res[i] == 1)
2905 fprintf(ofp, " %6.4f +", elw_support[i]);
2906 else
2907 fprintf(ofp, " %6.4f -", elw_support[i]);
2908
2909 # ifdef KHTWOSIDED
2910 # else
2911 fprintf(ofp, " best");
2912 # endif
2913 } else {
2914 /* compute variance of Log L differences over sites */
2915 #ifndef USE_WINDOWS
2916 difflklps = difflkl/(double)Maxsite;
2917 #else
2918 difflklps = difflkl/(double)alimaxsite;
2919 #endif
2920 sum = 0.0;
2921 for (j = 0; j < Numptrn; j++) {
2922 temp = allsitesc[besttree][j] - allsitesc[i][j] - difflklps;
2923 sum += temp*temp*Weight[j];
2924 }
2925 #ifndef USE_WINDOWS
2926 sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite));
2927 #else
2928 sum = sqrt(fabs(sum/(alimaxsite-1.0)*alimaxsite));
2929 #endif
2930 # ifdef KHTWOSIDED
2931 fprintf(ofp, "%11.2f ", sum);
2932 if (difflkl > 1.96*sum)
2933 fprintf(ofp, "yes ");
2934 else
2935 fprintf(ofp, "no ");
2936 # else
2937 fprintf(ofp, "%11.2f ", sum);
2938 # endif
2939
2940 if (kh1_test_res[i] == 1)
2941 fprintf(ofp, " %6.4f +", kh1_pval[i]);
2942 else
2943 fprintf(ofp, " %6.4f -", kh1_pval[i]);
2944 if (sh_test_res[i] == 1)
2945 fprintf(ofp, " %6.4f +", sh_pval[i]);
2946 else
2947 fprintf(ofp, " %6.4f -", sh_pval[i]);
2948
2949 if (elw_test_res[i] == 1)
2950 fprintf(ofp, " %6.4f +", elw_support[i]);
2951 else
2952 fprintf(ofp, " %6.4f -", elw_support[i]);
2953
2954 # ifdef KHTWOSIDED
2955 # else
2956 if (difflkl > 1.96*sum)
2957 fprintf(ofp, " -");
2958 else
2959 fprintf(ofp, " +");
2960 # endif
2961 }
2962 fprintf(ofp, "\n");
2963 }
2964 fprintf(ofp, "\nThe columns show the results and p-values of the following tests:\n");
2965 fprintf(ofp, "1sKH - one sided KH test based on pairwise SH tests (Shimodaira-Hasegawa\n");
2966 fprintf(ofp, " 2000, Goldman et al., 2001, Kishino-Hasegawa 1989)\n");
2967 fprintf(ofp, "SH - Shimodaira-Hasegawa test (2000)\n");
2968 fprintf(ofp, "ELW - Expected Likelihood Weight (Strimmer-Rambaut 2002)\n");
2969 fprintf(ofp, "2sKH - two sided Kishino-Hasegawa test (1989)\n");
2970 fprintf(ofp, "\n");
2971 fprintf(ofp, "Plus signs denote the confidence sets. Minus signs denote significant\n");
2972 fprintf(ofp, "exclusion. All tests used 5%% significance level. 1sKH, SH, and ELW\n");
2973 fprintf(ofp, "performed 1000 resamplings using the RELL method.\n");
2974 #if 0
2975 fprintf(ofp, "\nColmn 5 gives the results of the (old) two-sided Kishino-Hasegawa test \n");
2976 fprintf(ofp, "following Kishino and Hasegawa (1989). It tests whether a likelihood is \n");
2977 fprintf(ofp, "significantly worse than the best one, marked with 'best'. This test should \n");
2978 fprintf(ofp, "only be used for data not having been determined with the data tested on.\n");
2979 fprintf(ofp, "1sKH is the one-sided KH test (Goldman et al., 2001). It is applicable\n");
2980 fprintf(ofp, "to test whether likelihoods are worse than the lielihood of the ML tree.\n");
2981 fprintf(ofp, "SH tests for the best trees (Shimodaira and Hasegawa, 2000).\n");
2982 fprintf(ofp, "Note that KH, 1sKH, and SH assume to have the 'true' topologies among the tested.\n");
2983 fprintf(ofp, "ELW (Expected likelihood weights) seems to work even without this restriction\n");
2984 fprintf(ofp, "(Strimmer and Rambaut, 2002). Still plausible trees should be among the tested.\n");
2985 fprintf(ofp, "For 1sKH, SH, and ELW plus signs '+' mark the topologies belonging to the\n");
2986 fprintf(ofp, "confidence sets the numbers give the p-values (1sKH and SH) or the confidence\n");
2987 fprintf(ofp, "weight (ELW).\n");
2988 fprintf(ofp, "All tests used 5%% significance level. 1sKH, SH, and ELW used 1000 resamplings.\n");
2989 #endif
2990 }
2991 } /* printtreestats */
2992
2993 #endif
2994
2995
2996 /********************************************************/
2997
2998 /* time stamp */
2999 void timestamp(FILE* ofp)
3000 {
3001 double timespan;
3002 double cpuspan;
3003 timespan = difftime(Stoptime, Starttime);
3004 cpuspan = ((double) (Stopcpu - Startcpu) / CLOCKS_PER_SEC);
3005 fprintf(ofp, "\n\nTIME STAMP\n\n");
3006 fprintf(ofp, "Date and time: %s", asctime(localtime(&Starttime)) );
3007 fprintf(ofp, "Runtime (excl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n",
3008 timespan, timespan/60., timespan/3600.);
3009 fprintf(ofp, "Runtime (incl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n",
3010 fulltime, fulltime/60., fulltime/3600.);
3011 #ifdef TIMEDEBUG
3012 fprintf(ofp, "CPU time (incl. input): %.0f seconds (= %.1f minutes = %.1f hours)\n\n",
3013 fullcpu, fullcpu/60., fullcpu/3600.);
3014 #endif /* TIMEDEBUG */
3015
3016 } /* timestamp */
3017
3018 /* SONJA #include "sonja.c" */
3019 /***************************************************************************/
3020 /*** ***/
3021 /*** Sonja's distance problem ;-) ***/
3022 /*** ***/
3023 /***************************************************************************/
3024 #ifdef SONJA
3025 /* routines to compute the distances in the tree */
3026 /* # include "sonja.c" */
3027
3028 /* collect distances from the subtree rooted by np */
3029 void subtree2dist(Node *np, /* subtree root */
3030 ivector nodefound, /* leaves in this subtree */
3031 dvector nodedist, /* distances leaf <-> np */
3032 dmatrix distmat, /* dist matrix between leaves */
3033 imatrix distdone) /* done flags */
3034 {
3035 int n, m;
3036 ivector foundbelow;
3037 Node *tp;
3038
3039 /* external node = leaf */
3040 if (np->isop == NULL) {
3041 # ifdef DEBUGTREEDIST
3042 fprintf(stderr, "e%d: len=%.6f\n", np->number, np->length);
3043 # endif
3044 /* nodedist[np->number] = np->length; */
3045 nodefound[np->number] = 1;
3046 return;
3047 } else { /* if not external node */
3048 # ifdef DEBUGTREEDIST
3049 fprintf(stderr, "i%d: len=%.6f\n", np->number, np->length);
3050 # endif
3051 foundbelow = new_ivector(Maxspc);
3052
3053 /* for every branch at the inner node */
3054 tp = np->isop;
3055 while (tp != np) {
3056 for (n = 0; n < Maxspc; n++)
3057 foundbelow[n] = 0;
3058
3059 subtree2dist(tp->kinp, foundbelow, nodedist, distmat, distdone);
3060
3061 for (n = 0; n < Maxspc; n++) {
3062 if (foundbelow[n] == 1) {
3063 # ifdef DEBUGTREEDIST
3064 fprintf(stderr, " +++i%d: nd%d=%.5f+%.5f=%.5f\n", np->number, n, nodedist[n], tp->length, nodedist[n]+tp->length);
3065 # endif
3066 nodefound[n] = 1;
3067 nodedist[n] += tp->length;
3068 }
3069 }
3070 tp = tp->isop;
3071 } /* while tp != np */
3072
3073 for (n = 0; n < Maxspc; n++) {
3074 for (m = n+1; m < Maxspc; m++) {
3075 if ((distdone[n][m]!=1) && (nodefound[n]==1) && (nodefound[m]==1)) {
3076 distmat[n][m] = nodedist[n] + nodedist[m];
3077 distmat[m][n] = distmat[n][m];
3078 distdone[n][m] = 1;
3079 distdone[m][n] = 1;
3080 } /* if */
3081 } /* for m */
3082 } /* for n */
3083
3084 free (foundbelow);
3085
3086 } /* if not external node */
3087 }
3088
3089 /***************************************************************************/
3090
3091
3092
3093 /* collect all distances from the current tree */
3094 void tree2dist(dmatrix distmat)
3095 {
3096 ivector nodefound;
3097 imatrix distdone;
3098 dvector nodedist;
3099 int n, m;
3100
3101 nodefound = new_ivector(Maxspc);
3102 distdone = new_imatrix(Maxspc,Maxspc);
3103 nodedist = new_dvector(Maxspc);
3104
3105 for (n = 0; n < Maxspc; n++) {
3106 nodedist[n] = 0.0;
3107 nodefound[n] = 0;
3108 for (m = n+1; m < Maxspc; m++) {
3109 if (n==m) {
3110 distmat[n][m] = 0.0;
3111 distdone[n][m] = 1;
3112 } else {
3113 distmat[n][m] = 0.0;
3114 distmat[m][n] = 0.0;
3115 distdone[n][m] = 0;
3116 distdone[m][n] = 0;
3117 }
3118 } /* for m */
3119 } /* for n */
3120
3121 /* we are NOT on a leaf, because root -> collect */
3122 subtree2dist(Ctree->ebrnchp[outgroup]->kinp, nodefound, nodedist, distmat, distdone);
3123
3124 /* compute distances to the root */
3125 m = outgroup;
3126 for (n = 0; n < Maxspc; n++) {
3127 if ((distdone[n][m]!=1) && (nodefound[n]==1)) {
3128 distmat[n][m] = nodedist[n] + Ctree->ebrnchp[outgroup]->length;
3129 distmat[m][n] = distmat[n][m];
3130 distdone[n][m] = 1;
3131 distdone[m][n] = 1;
3132 } /* if */
3133 } /* for n */
3134
3135 free(nodefound);
3136 free(distdone);
3137 free(nodedist);
3138 }
3139
3140
3141
3142 #endif /* SONJA */
3143 /***************************************************************************/
3144 /* end of SONJA */
3145 /***************************************************************************/
3146
3147 /* write output file */
3148 void writeoutputfile(FILE *ofp, int part)
3149 {
3150 int i, fail, df;
3151 uli li;
3152 double pval, delta;
3153
3154 if ((part == WRITEPARAMS) || (part == WRITEALL)) {
3155 # ifndef ALPHA
3156 fprintf(ofp, "TREE-PUZZLE %s\n\n", VERSION);
3157 # else
3158 fprintf(ofp, "TREE-PUZZLE %s%s\n\n", VERSION, ALPHA);
3159 # endif
3160
3161 fprintf(ofp, "Input file name: %s\n",INFILE);
3162 if (puzzlemode == USERTREE || puzzlemode == CONSENSUS) fprintf(ofp, "User tree file name: %s\n",INTREE);
3163
3164
3165 fprintf(ofp, "Type of analysis: ");
3166 switch(typ_optn) {
3167 case TREERECON_OPTN: switch(puzzlemode) {
3168 case CONSENSUS: fprintf(ofp, "consensus construction\n");
3169 break;
3170 case USERTREE: fprintf(ofp, "user tree evaluation\n");
3171 break;
3172 default: fprintf(ofp, "tree reconstruction\n");
3173 break;
3174 }
3175 break;
3176 case LIKMAPING_OPTN: fprintf(ofp, "likelihood mapping\n");
3177 break;
3178 }
3179
3180 fprintf(ofp, "Parameter estimation: ");
3181 if (approxp_optn) fprintf(ofp, "approximate (faster)\n");
3182 else fprintf(ofp, "accurate (slow)\n");
3183 if (!((puzzlemode == USERTREE || puzzlemode == CONSENSUS) && typ_optn == TREERECON_OPTN)) {
3184 fprintf(ofp, "Parameter estimation uses: ");
3185 if (qcalg_optn)
3186 fprintf(ofp, "quartet sampling (for substitution process) + NJ tree (for rate variation)\n");
3187 else
3188 fprintf(ofp, "neighbor-joining tree (for substitution process and rate variation)\n");
3189 } else {
3190 fprintf(ofp, "Parameter estimation uses: ");
3191 if (utree_optn)
3192 fprintf(ofp, "1st user tree (for substitution process and rate variation)\n");
3193 else if (qcalg_optn)
3194 fprintf(ofp, "quartet sampling (for substitution process) + NJ tree (for rate variation)\n");
3195 else
3196 fprintf(ofp, "neighbor-joining tree (for substitution process and rate variation)\n");
3197 }
3198 fprintf(ofp, "\nStandard errors (S.E.) are obtained by the curvature method.\n");
3199 fprintf(ofp, "The upper and lower bounds of an approximate 95%% confidence interval\n");
3200 fprintf(ofp, "for parameter or branch length x are x-1.96*S.E. and x+1.96*S.E.\n");
3201 fprintf(ofp, "\n\n");
3202
3203 /***************************************************************************/
3204 /* SEQUENCE ALIGNMENT */
3205 /***************************************************************************/
3206
3207 fprintf(ofp, "SEQUENCE ALIGNMENT\n\n");
3208 #ifndef USE_WINDOWS
3209 fprintf(ofp, "Input data: %d sequences with %d ", Maxspc, Maxsite);
3210 #else
3211 fprintf(ofp, "Input data: %d sequences with %d ", Maxspc, alimaxsite);
3212 #endif
3213 if (data_optn == AMINOACID)
3214 fprintf(ofp, "amino acid");
3215 else if (data_optn == NUCLEOTIDE && SH_optn)
3216 #ifndef USE_WINDOWS
3217 fprintf(ofp, "doublet (%d nucleotide)", Maxsite*2);
3218 #else
3219 fprintf(ofp, "doublet (%d nucleotide)", alimaxsite*2);
3220 #endif
3221 else if (data_optn == NUCLEOTIDE && nuc_optn)
3222 fprintf(ofp, "nucleotide");
3223 else if (data_optn == BINARY)
3224 fprintf(ofp, "binary state");
3225 fprintf(ofp, " sites");
3226 if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) {
3227 if (codon_optn == 1) fprintf(ofp, " (1st codon positions)");
3228 if (codon_optn == 2) fprintf(ofp, " (2nd codon positions)");
3229 if (codon_optn == 3) fprintf(ofp, " (3rd codon positions)");
3230 if (codon_optn == 4) fprintf(ofp, " (1st and 2nd codon positions)");
3231 }
3232 if (data_optn == NUCLEOTIDE && SH_optn) {
3233 if (SHcodon)
3234 fprintf(ofp, " (1st and 2nd codon positions)");
3235 else
3236 fprintf(ofp, " (1st+2nd, 3rd+4th, etc. site)");
3237 }
3238 fprintf(ofp, "\n");
3239 fprintf(ofp, "Number of constant sites: %d (= %.1f%% of all sites)\n",
3240 Numconst, 100.0*fracconst);
3241 fprintf(ofp, "Number of site patterns: %d\n",
3242 Numptrn);
3243 fprintf(ofp, "Number of constant site patterns: %d (= %.1f%% of all site patterns)\n\n\n",
3244 Numconstpat, 100.0*fracconstpat);
3245
3246 /***************************************************************************/
3247 /* SUBSTITUTION PROCESS */
3248 /***************************************************************************/
3249
3250 fprintf(ofp, "SUBSTITUTION PROCESS\n\n");
3251 fprintf(ofp, "Model of substitution: ");
3252 if (data_optn == NUCLEOTIDE) { /* nucleotides */
3253 if (nuc_optn) {
3254 if(HKY_optn) fprintf(ofp, "HKY (Hasegawa et al. 1985)\n");
3255 if(TN_optn) fprintf(ofp, "TN (Tamura-Nei 1993)\n");
3256 if(GTR_optn) fprintf(ofp, "GTR (e.g. Lanave et al. 1980)\n");
3257
3258 if((HKY_optn) || (TN_optn)) {
3259 fprintf(ofp, "Transition/transversion parameter");
3260 if (optim_optn)
3261 fprintf(ofp, " (estimated from data set)");
3262 fprintf(ofp, ": %.2f", TSparam);
3263 if (optim_optn)
3264 fprintf(ofp, " (S.E. %.2f)", tserr);
3265 fprintf(ofp, "\n");
3266
3267 if (optim_optn && TSparam > MAXTS - 1.0)
3268 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
3269 if (optim_optn && TSparam < MINTS + 0.1)
3270 fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");
3271 }
3272
3273 if (TN_optn) {
3274 fprintf(ofp, "Y/R transition parameter");
3275 if (optim_optn)
3276 fprintf(ofp, " (estimated from data set)");
3277 fprintf(ofp, ": %.2f", YRparam);
3278 if (optim_optn)
3279 fprintf(ofp, " (S.E. %.2f)", yrerr);
3280 fprintf(ofp, "\n");
3281
3282 if (optim_optn && YRparam > MAXYR - 0.5)
3283 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
3284 if (optim_optn && YRparam < MINYR + 0.1)
3285 fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");
3286 }
3287
3288 if (GTR_optn || print_GTR_optn) {
3289 if((HKY_optn) || (TN_optn)) fprintf(ofp, "\n");
3290 fprintf(ofp, "Rate matrix R (parameters %s):\n\n", GTR_optn ? "set by user" : "restricted to selected model");
3291 fprintf(ofp, " A-C rate: ");
3292 fprintf(ofp, "%.5f\n", GTR_ACrate);
3293 fprintf(ofp, " A-G rate: ");
3294 fprintf(ofp, "%.5f\n", GTR_AGrate);
3295 fprintf(ofp, " A-T rate: ");
3296 fprintf(ofp, "%.5f\n", GTR_ATrate);
3297 fprintf(ofp, " C-G rate: ");
3298 fprintf(ofp, "%.5f\n", GTR_CGrate);
3299 fprintf(ofp, " C-T rate: ");
3300 fprintf(ofp, "%.5f\n", GTR_CTrate);
3301 fprintf(ofp, " G-T rate: ");
3302 fprintf(ofp, "%.5f\n", GTR_GTrate);
3303 fprintf(ofp, "\n");
3304 }
3305 }
3306 if (SH_optn) {
3307 fprintf(ofp, "SH (Schoeniger-von Haeseler 1994)\n");
3308 fprintf(ofp, "Transition/transversion parameter");
3309 if (optim_optn) fprintf(ofp, " (estimated from data set)");
3310 fprintf(ofp, ": %.2f\n", TSparam);
3311 if (optim_optn)
3312 fprintf(ofp, " (S.E. %.2f)", tserr);
3313 fprintf(ofp, "\n");
3314
3315 if (optim_optn && TSparam > MAXTS - 1.0)
3316 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
3317 if (optim_optn && TSparam < MINTS + 0.1)
3318 fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");
3319
3320 }
3321 }
3322 if (data_optn == AMINOACID) { /* amino acids */
3323 if (Dayhf_optn) fprintf(ofp, "Dayhoff (Dayhoff et al. 1978)\n");
3324 if (Jtt_optn) fprintf(ofp, "JTT (Jones et al. 1992)\n");
3325 if (mtrev_optn) fprintf(ofp, "mtREV24 (Adachi-Hasegawa 1996)\n");
3326 if (cprev_optn) fprintf(ofp, "cpREV45 (Adachi et al. 2000)\n");
3327 if (blosum62_optn) fprintf(ofp, "BLOSUM 62 (Henikoff-Henikoff 1992)\n");
3328 if (vtmv_optn) fprintf(ofp, "VT (Mueller-Vingron 2000)\n");
3329 if (wag_optn) fprintf(ofp, "WAG (Whelan-Goldman 2000)\n");
3330 }
3331 if (data_optn == BINARY) { /* binary states */
3332 fprintf(ofp, "Two-state model (Felsenstein 1981)\n");
3333 }
3334 if (data_optn == AMINOACID)
3335 fprintf(ofp, "Amino acid ");
3336 else if (data_optn == NUCLEOTIDE && SH_optn)
3337 fprintf(ofp, "Doublet ");
3338 else if (data_optn == NUCLEOTIDE && nuc_optn)
3339 fprintf(ofp, "Nucleotide ");
3340 else if (data_optn == BINARY)
3341 fprintf(ofp, "Binary state ");
3342 fprintf(ofp, "frequencies (");
3343 if (Frequ_optn) fprintf(ofp, "estimated from data set");
3344 else fprintf(ofp, "user specified");
3345 if (data_optn == NUCLEOTIDE && SH_optn && sym_optn)
3346 fprintf(ofp, " and symmetrized");
3347 fprintf(ofp, "):\n\n");
3348 for (i = 0; i < gettpmradix(); i++)
3349 fprintf(ofp, " pi(%s) = %5.1f%%\n",
3350 int2code(i), Freqtpm[i]*100);
3351 if (data_optn == NUCLEOTIDE) {
3352 if (!GTR_optn) {
3353 fprintf(ofp, "\nExpected transition/transversion ratio: %.2f",
3354 tstvratio);
3355 if (tstvf84 == 0.0) fprintf(ofp, "\n");
3356 else fprintf(ofp, " (= F84 parameter)\n");
3357 fprintf(ofp, "Expected pyrimidine transition/purine transition");
3358 fprintf(ofp, " ratio: %.2f\n", yrtsratio);
3359 if (tstvf84 != 0.0 && TN_optn)
3360 fprintf(ofp,
3361 "This TN model is equivalent to a F84 model (Felsenstein 1984).\n");
3362 }
3363 }
3364
3365
3366 /***************************************************************************/
3367 /* SEQUENCE COMPOSITION (SEQUENCES IN INPUT ORDER) */
3368 /***************************************************************************/
3369
3370 fprintf(ofp, "\n\nAMBIGUOUS CHARACTERS IN THE SEQUENCE (SEQUENCES IN INPUT ORDER)\n\n");
3371
3372
3373 {
3374 double fract;
3375 int sumgapchar=0;
3376 int sumotherchar=0;
3377
3378 fprintf(ofp, " gaps wildcards sum %% sequence\n");
3379 for (i = 0; i < Maxspc; i++) {
3380 fprintf(ofp, " ");
3381 fputid10(ofp, i);
3382 fprintf(ofp, " ");
3383 #ifndef USE_WINDOWS
3384 fract = (Seqgapchar[i] + Seqotherchar[i]) / (double) Maxsite;
3385 #else
3386 fract = (Seqgapchar[i] + Seqotherchar[i]) / (double) alimaxsite;
3387 #endif
3388 sumgapchar+=Seqgapchar[i];
3389 sumotherchar+=Seqotherchar[i];
3390 fprintf(ofp, "%6d %6d %6d %6.2f%% %c\n",
3391 Seqgapchar[i], Seqotherchar[i],
3392 (Seqgapchar[i] + Seqotherchar[i]),
3393 100.0 * fract, ((fract > 0.5) ? '!' : ' '));
3394 }
3395 fprintf(ofp, " -------------------------------------------------------\n");
3396 fprintf(ofp, " Sum ");
3397 #ifndef USE_WINDOWS
3398 fract = (sumgapchar + sumotherchar) / (double) (Maxsite * Maxspc);
3399 #else
3400 fract = (sumgapchar + sumotherchar) / (double) (alimaxsite * Maxspc);
3401 #endif
3402 fprintf(ofp, "%6d %6d %6d %6.2f%% %c\n",
3403 sumgapchar, sumotherchar,
3404 (sumgapchar + sumotherchar),
3405 100.0 * fract, ((fract > 0.5) ? '!' : ' '));
3406 fprintf(ofp, "\n\n");
3407 fprintf(ofp, "The table above shows the amount of gaps ('-') and other 'wildcard'\n");
3408 #ifndef USE_WINDOWS
3409 fprintf(ofp, "characters ('X', '?', etc.) and their percentage of the %d columns\n", Maxsite);
3410 #else
3411 fprintf(ofp, "characters ('X', '?', etc.) and their percentage of the %d columns\n", alimaxsite);
3412 #endif
3413 fprintf(ofp, "in the alignment.\n");
3414 fprintf(ofp, "Sequences with more than 50%% ambiguous characters are marked with a '!' and \n");
3415 fprintf(ofp, "should be checked, whether they have sufficient overlap to other sequences.\n");
3416 fprintf(ofp, "Sequences with 100%% ambiguous characters do not hold any phylogenetic\n");
3417 fprintf(ofp, "information and had to be discarded from the analysis.\n\n");
3418 }
3419
3420
3421 /***************************************************************************/
3422 /* SEQUENCE COMPOSITION (SEQUENCES IN INPUT ORDER) */
3423 /***************************************************************************/
3424
3425 fprintf(ofp, "\n\nSEQUENCE COMPOSITION (SEQUENCES IN INPUT ORDER)\n\n");
3426
3427 fail = FALSE;
3428 fprintf(ofp, " 5%% chi-square test p-value\n");
3429 for (i = 0; i < Maxspc; i++) {
3430 fprintf(ofp, " ");
3431 fputid10(ofp, i);
3432 pval = homogentest(i);
3433 if ( pval < 0.05 ) fprintf(ofp, " failed ");
3434 else fprintf(ofp, " passed ");
3435 if (chi2fail) fail = TRUE;
3436 fprintf(ofp, " %6.2f%% ", pval*100.0);
3437 fprintf(ofp, "\n");
3438 }
3439 fprintf(ofp, "\n");
3440 fprintf(ofp, "The chi-square tests compares the ");
3441 if (data_optn == AMINOACID)
3442 fprintf(ofp, "amino acid");
3443 else if (data_optn == NUCLEOTIDE && SH_optn)
3444 fprintf(ofp, "doublet");
3445 else if (data_optn == NUCLEOTIDE && nuc_optn)
3446 fprintf(ofp, "nucleotide");
3447 else if (data_optn == BINARY)
3448 fprintf(ofp, "binary state");
3449 fprintf(ofp," composition of each sequence\n");
3450 fprintf(ofp, "to the frequency distribution assumed in the maximum likelihood model.\n");
3451 if (fail) {
3452 fprintf(ofp, "\nWARNING: Result of chi-square test may not be valid");
3453 fprintf(ofp, " because of small\nmaximum likelihood frequencies and");
3454 fprintf(ofp, " short sequence length!\n");
3455 }
3456
3457 /***************************************************************************/
3458 /* IDENTICAL SEQUENCE */
3459 /***************************************************************************/
3460
3461 fprintf(ofp, "\n\nIDENTICAL SEQUENCES\n\n");
3462 fprintf(ofp, "The sequences in each of the following groups are all identical. To speed\n");
3463 fprintf(ofp, "up computation please remove all but one of each group from the data set.\n\n");
3464 findidenticals(ofp);
3465 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD DISTANCES\n\n");
3466 fprintf(ofp, "Maximum likelihood distances are computed using the ");
3467 fprintf(ofp, "selected model of\nsubstitution and rate heterogeneity.\n\n");
3468 putdistance(ofp);
3469 {
3470 double avgdist, meandist, mindist, maxdist, stddevdist, vardist;
3471 avgdist = averagedist(Maxspc, Distanmat, &meandist, &mindist, &maxdist, &stddevdist, &vardist);
3472 fprintf(ofp, "\nAverage distance (over all possible pairs of sequences): %.5f\n",
3473 meandist);
3474 fprintf(ofp, " minimum : %.5f, maximum : %.5f\n", mindist, maxdist);
3475 fprintf(ofp, " variance : %.5f, std.dev. : %.5f\n", vardist, stddevdist);
3476 }
3477
3478
3479 /***************************************************************************/
3480 /* RATE HETEROGENEITY (PART 1) */
3481 /***************************************************************************/
3482
3483 fprintf(ofp, "\n\nRATE HETEROGENEITY\n\n");
3484 fprintf(ofp, "Model of rate heterogeneity: ");
3485 if (rhetmode == UNIFORMRATE) fprintf(ofp, "uniform rate\n");
3486 if (rhetmode == GAMMARATE ) fprintf(ofp, "Gamma distributed rates\n");
3487 if (rhetmode == TWORATE ) fprintf(ofp, "two rates (1 invariable + 1 variable)\n");
3488 if (rhetmode == MIXEDRATE ) fprintf(ofp, "mixed (1 invariable + %d Gamma rates)\n", numcats);
3489 if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
3490 fprintf(ofp, "Fraction of invariable sites");
3491 if (fracinv_optim) fprintf(ofp, " (estimated from data set)");
3492 fprintf(ofp, ": %.2f", fracinv);
3493 if (fracinv_optim) fprintf(ofp, " (S.E. %.2f)", fierr);
3494 fprintf(ofp, "\n");
3495
3496 if (fracinv_optim && fracinv > MAXFI - 0.05)
3497 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
3498
3499 #ifndef USE_WINDOWS
3500 fprintf(ofp, "Number of invariable sites: %.0f\n", floor(fracinv*Maxsite));
3501 #else
3502 fprintf(ofp, "Number of invariable sites: %.0f\n", floor(fracinv*alimaxsite));
3503 #endif
3504 }
3505 if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
3506 fprintf(ofp, "Gamma distribution parameter alpha");
3507 if (grate_optim) fprintf(ofp, " (estimated from data set)");
3508 fprintf(ofp, ": %.2f", (1.0-Geta)/Geta);
3509 if (grate_optim) fprintf(ofp, " (S.E. %.2f)",
3510 geerr/(Geta*Geta)); /* first order approximation */
3511 fprintf(ofp, "\n");
3512
3513 if (grate_optim && Geta > MAXGE - 0.02)
3514 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
3515 if (grate_optim && Geta < MINGE + 0.01)
3516 fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");
3517
3518 fprintf(ofp, "Number of Gamma rate categories: %d\n", numcats);
3519 }
3520 if (rhetmode == MIXEDRATE) {
3521 fprintf(ofp, "Total rate heterogeneity (invariable sites + Gamma model): ");
3522 fprintf(ofp, "%.2f", fracinv + Geta - fracinv*Geta);
3523 if (grate_optim && fracinv_optim)
3524 fprintf(ofp, " (S.E. %.2f)", geerr + fierr); /* first order approximation */
3525 else if (grate_optim && !fracinv_optim)
3526 fprintf(ofp, " (S.E. %.2f)", geerr);
3527 else if (!grate_optim && fracinv_optim)
3528 fprintf(ofp, " (S.E. %.2f)", fierr);
3529 fprintf(ofp, "\n");
3530 }
3531 if (rhetmode != UNIFORMRATE) {
3532 fprintf(ofp, "\nRates and their respective probabilities used in the likelihood function:\n");
3533 fprintf(ofp, "\n Category Relative rate Probability\n");
3534 if (rhetmode == TWORATE || rhetmode == MIXEDRATE)
3535 fprintf(ofp, " 0 0.0000 %.4f\n", fracinv);
3536 for (i = 0; i < numcats; i++)
3537 fprintf(ofp, " %d %.4f %.4f\n",
3538 i+1, Rates[i], (1.0-fracinv)/(double) numcats);
3539 }
3540 if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
3541 fprintf(ofp, "\nCategories 1-%d approximate a continous ", numcats);
3542 fprintf(ofp, "Gamma-distribution with expectation 1\n");
3543 fprintf(ofp, "and variance ");
3544 if (Geta == 1.0) fprintf(ofp, "infinity");
3545 else fprintf(ofp, "%.2f", Geta/(1.0-Geta));
3546 fprintf(ofp, ".\n");
3547 }
3548
3549
3550 } /* if WRITEPARAMS) || WRITEALL */
3551
3552 if ((part == WRITEREST) || (part == WRITEALL)) {
3553
3554
3555 /***************************************************************************/
3556 /* RATE HETEROGENEITY (PART 2) */
3557 /***************************************************************************/
3558
3559 #if 0
3560 if (typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE || puzzlemode == CONSENSUS) && !skipmlbranch_optn) {
3561 #endif
3562
3563 #if 0
3564 if(typ_optn == TREERECON_OPTN) fprintf(stderr, "typ_optn == TREERECON_OPTN\n");
3565 if(puzzlemode == QUARTPUZ) fprintf(stderr, "puzzlemode == QUARTPUZZ\n");
3566 if(puzzlemode == USERTREE) fprintf(stderr, "puzzlemode == USERTREE\n");
3567 if(puzzlemode == CONSENSUS) fprintf(stderr, "puzzlemode == CONSENSUS\n");
3568 if(dotreelh_optn) fprintf(stderr, "dotreelh_optn\n");
3569 #endif
3570
3571 if (typ_optn == TREERECON_OPTN
3572 && (puzzlemode == QUARTPUZ
3573 || puzzlemode == USERTREE
3574 || puzzlemode == CONSENSUS)
3575 && dotreelh_optn) { /* if likelihood has been computed */
3576 if (rhetmode != UNIFORMRATE) {
3577 fprintf(ofp, "\nCombination of categories that contributes");
3578 fprintf(ofp, " the most to the likelihood\n");
3579 fprintf(ofp, "(computation done without clock assumption assuming ");
3580 if (puzzlemode == QUARTPUZ) fprintf(ofp, "quartet-puzzling tree");
3581 if (puzzlemode == USERTREE || puzzlemode == CONSENSUS) {
3582 if (utree_optn) fprintf(ofp, "1st user tree");
3583 else fprintf(ofp, "NJ tree");
3584 }
3585 fprintf(ofp, "):\n\n");
3586 if (bestratefound==0) {
3587 fprintf(STDOUT, "ERROR: no rate combination computed before output!!!\n");
3588 findbestratecombination();
3589 }
3590 printbestratecombination(ofp);
3591 } /* rhetmode != UNIFORMRATE */
3592
3593 if (savesiterate_optn) {
3594 FILE *ratefp;
3595
3596 openfiletowrite(&ratefp, SITERATE, "site rates");
3597 printbestratecombinationtofile(ratefp, rhetmode);
3598 fclose(ratefp);
3599 } /* if savesiterate_optn */
3600 } /* if likelihood has been computed */
3601
3602 /***************************************************************************/
3603 /* BAD QUARTET STATISTICS (SEQUENCES IN INPUT ORDER) */
3604 /***************************************************************************/
3605
3606 if (puzzlemode == QUARTPUZ &&typ_optn == TREERECON_OPTN) {
3607 fprintf(ofp, "\n\nQUARTET STATISTICS (SEQUENCES IN INPUT ORDER)\n\n");
3608
3609
3610 fprintf(ofp, "\n");
3611 fprintf(ofp, " name | resolved | partly resolved | unresolved | sum\n");
3612 fprintf(ofp, " --------------------------------------------------------------------------\n");
3613 for (i = 0; i < Maxspc; i++) {
3614 /* fprintf(ofp, " %3d. %-10s %6.2f%% [%7ld] %6.2f%% [%7ld] %6.2f%% [%7ld] %7ld\n", */
3615 /* fprintf(ofp, " %3d. %-10s (%6.2f%%) %7ld (%6.2f%%) %7ld (%6.2f%%) %7ld %7ld\n", */
3616 fprintf(ofp, " %-10s %7ld [%6.2f%%] %7ld [%6.2f%%] %7ld [%6.2f%%] %7ld\n",
3617 Namestr[i],
3618 (qinfomatr[1][i]+qinfomatr[2][i]+qinfomatr[4][i]),
3619 (100.0 * (qinfomatr[1][i]+qinfomatr[2][i]+qinfomatr[4][i])/qinfomatr[8][i]),
3620 (qinfomatr[3][i]+qinfomatr[5][i]+qinfomatr[6][i]),
3621 (100.0 * (qinfomatr[3][i]+qinfomatr[5][i]+qinfomatr[6][i])/qinfomatr[8][i]),
3622 qinfomatr[7][i],
3623 (100.0 * (qinfomatr[7][i])/qinfomatr[8][i]),
3624 qinfomatr[8][i]);
3625 /* MISSING QUARTETS (HAS ;-) */
3626 }
3627 fprintf(ofp, " --------------------------------------------------------------------------\n");
3628 fprintf(ofp, " #quartets :");
3629 {
3630 uli allqs;
3631 allqs = fullresqs + partresqs + unresqs + missingqs;
3632 fprintf(ofp, "%7ld [%6.2f%%] ", fullresqs, 100.0 * fullresqs / allqs);
3633 fprintf(ofp, "%7ld [%6.2f%%] ", partresqs, 100.0 * partresqs / allqs);
3634 fprintf(ofp, "%7ld [%6.2f%%] ", unresqs, 100.0 * unresqs / allqs);
3635 /* MISSING QUARTETS (HAS ;-) */
3636 /* fprintf(ofp, "%7ld [%6.2f%%] ", missingqs, 100.0 * missingqs / allqs); */
3637 fprintf(ofp, "%7ld", allqs);
3638 }
3639 fprintf(ofp, "\n\n");
3640 fprintf(ofp, "The table shows the occurrences of fully resolved, partially, and\n");
3641 fprintf(ofp, "completely unresolved quartets for each sequence and their percentage\n");
3642 fprintf(ofp, "relative to the number of times the sequence occurs in the list of \n");
3643 fprintf(ofp, "quartets (i.e. %ld quartets out of %ld in total).\n", (4 * Numquartets)/Maxspc, Numquartets);
3644 fprintf(ofp, "In fully resolved quartet one single topology is supported, while for\n");
3645 fprintf(ofp, "partially resolved quartets two and for completely unresolved quartets\n");
3646 fprintf(ofp, "none of the topologies (AB||CD, AC||BD, AD||BC) are favoured.\n");
3647 fprintf(ofp, "Note: Because 4 sequences are involved in one quartet numbers add up\n");
3648 fprintf(ofp, "to a four-fold of the existing quartets.\n");
3649 fprintf(ofp, "\n");
3650 fprintf(ofp, "Hint: The overall numbers in the last row give information about the\n");
3651 fprintf(ofp, "phylogenetic content of the dataset. The higher the percentage of partially\n");
3652 fprintf(ofp, "and unresolved quartets, the lower the content of phylogenetic information.\n");
3653 fprintf(ofp, "This can be visualized in more detail by likelihood mapping analysis.\n");
3654 fprintf(ofp, "\n");
3655
3656
3657
3658 }
3659
3660 if (typ_optn == TREERECON_OPTN) {
3661
3662
3663 /***************************************************************************/
3664 /* TREE SEARCH */
3665 /***************************************************************************/
3666
3667 fprintf(ofp, "\n\nTREE SEARCH\n\n");
3668 if (puzzlemode == QUARTPUZ) {
3669 fprintf(ofp, "Quartet puzzling is used to choose from the possible tree topologies\n");
3670 fprintf(ofp, "and to simultaneously infer support values for internal branches.\n\n");
3671 fprintf(ofp, "Number of puzzling steps: %lu\n", Numtrial);
3672 fprintf(ofp, "Analysed quartets: %lu\n", Numquartets);
3673 fprintf(ofp, "Fully resolved quartets: %lu (= %.1f%%)\n",
3674 fullresqs, (double) fullresqs / (double) Numquartets * 100.0);
3675 fprintf(ofp, "Partly resolved quartets: %lu (= %.1f%%)\n",
3676 partresqs, (double) partresqs / (double) Numquartets * 100.0);
3677 fprintf(ofp, "Unresolved quartets: %lu (= %.1f%%)\n",
3678 unresqs, (double) unresqs / (double) Numquartets * 100.0);
3679 /* MISSING QUARTETS (HAS ;-) */
3680 /*
3681 fprintf(ofp, "Missing quartets: %lu (= %.1f%%)\n",
3682 missingqs, (double) missingqs / (double) Numquartets * 100.0);
3683 */
3684 fprintf(ofp, "\nQuartet trees are based on %s maximum likelihood values\n",
3685 (approxqp ? "approximate" : "exact"));
3686 fprintf(ofp, "using the selected model of substitution and rate heterogeneity.\n");
3687 }
3688 if (puzzlemode == USERTREE || puzzlemode == CONSENSUS) {
3689 fprintf(ofp, "%d tree topologies were specified by the user.\n", numutrees);
3690 }
3691 if (puzzlemode == PAIRDIST) {
3692 fprintf(ofp, "No tree search performed (maximum likelihood distances only).\n");
3693 }
3694
3695
3696 /***************************************************************************/
3697 /* QUARTET PUZZLING TREE / CONSENSUS TREE */
3698 /***************************************************************************/
3699
3700 if ((puzzlemode == QUARTPUZ) || (puzzlemode == CONSENSUS)) {
3701 if (puzzlemode == QUARTPUZ) {
3702 fprintf(ofp, "\n\nQUARTET PUZZLING TREE\n\n");
3703 fprintf(ofp, "Support for the internal branches of the unrooted quartet puzzling\n");
3704 fprintf(ofp, "tree topology is shown in percent.\n");
3705 if (consincluded == (Maxspc - 3))
3706 fprintf(ofp,"\nThis quartet puzzling tree is completely resolved.\n");
3707 else
3708 fprintf(ofp,"\nThis quartet puzzling tree is not completely resolved!\n");
3709 } else {
3710 fprintf(ofp, "\n\nCONSENSUS TREE\n\n");
3711 fprintf(ofp, "Support for the internal branches of the unrooted consensus tree\n");
3712 fprintf(ofp, "topology is shown in percent.\n");
3713 if (consincluded == (Maxspc - 3))
3714 fprintf(ofp,"\nThis consensus tree is completely resolved.\n");
3715 else
3716 fprintf(ofp,"\nThis consensus tree is not completely resolved!\n");
3717 }
3718 fprintf(ofp, "\n\n");
3719 plotconsensustree(ofp);
3720 if (puzzlemode == QUARTPUZ) fprintf(ofp, "\n\nQuartet puzzling tree (in CLUSTAL W notation):\n\n");
3721 else fprintf(ofp, "\n\nConsensus tree (in CLUSTAL W notation):\n\n");
3722 writeconsensustree(ofp, FALSE, qsupportarr);
3723 if (qsupport_optn)
3724 writeconsensustree(ofp, TRUE, qsupportarr);
3725 if (puzzlemode == QUARTPUZ) {
3726 fprintf(ofp, "\n\nBIPARTITIONS\n\n");
3727 fprintf(ofp, "The following bipartitions occured at least once");
3728 fprintf(ofp, " in all intermediate\ntrees that have been generated ");
3729 fprintf(ofp, "in the %lu puzzling steps.\n", Numtrial);
3730 fprintf(ofp, "Bipartitions included in the quartet puzzling tree:\n");
3731 } else {
3732 fprintf(ofp, "\n\nBIPARTITIONS\n\n");
3733 fprintf(ofp, "The following bipartitions occured at least once");
3734 fprintf(ofp, " in the specified set\n of %d usertrees tree topologies.\n", numutrees);
3735 fprintf(ofp, "Bipartitions included in the consensus tree:\n");
3736 }
3737 fprintf(ofp,
3738 "(bipartition with sequences in input order : number of times seen)\n\n");
3739 for (li = 0; li < (uli)consfifty; li++) {
3740 fprintf(ofp, " ");
3741 fprintfsplit(ofp, splitfreqs[2*li+1], splitpatterns, splitlength, Maxspc);
3742 fprintf(ofp, " : %lu", splitfreqs[2*li]);
3743
3744 /* MISSING QUARTETS (HAS ;-) */
3745 /* todo (HAS) */
3746 if (qsupport_optn) {
3747 fprintf(ofp, " f+=%.0f|", (100.0*qsupportarr[li].fullres_pro/qsupportarr[li].qsum));
3748 fprintf(ofp, "p+=%.0f|", (100.0*qsupportarr[li].partres_pro/qsupportarr[li].qsum));
3749 fprintf(ofp, "f-=%.0f|", (100.0*qsupportarr[li].fullres_con/qsupportarr[li].qsum));
3750 fprintf(ofp, "p-=%.0f|", (100.0*qsupportarr[li].partres_con/qsupportarr[li].qsum));
3751 fprintf(ofp, "ur=%.0f", (100.0*qsupportarr[li].unres/qsupportarr[li].qsum));
3752 if (qsupportarr[li].missing > 0) {
3753 fprintf(ofp, "|md=%.0f", (100.0*qsupportarr[li].missing/qsupportarr[li].qsum));
3754 }
3755 fprintf(ofp, "/%ld\n", qsupportarr[li].qsum);
3756 } else {
3757 fprintf(ofp, "\n");
3758 }
3759 #if 0
3760 #endif
3761 }
3762 if (consincluded == 0) fprintf(ofp, " None (no bipartition included)\n");
3763 if (conssub50_optn) {
3764 if (! puzzlemode == CONSENSUS)
3765 fprintf(ofp, "\nCongruent bipartitions occurred in 50%% or less, included in \nthe quartet puzzling tree:\n");
3766 else
3767 fprintf(ofp, "\nCongruent bipartitions occurred in 50%% or less, included in \nthe consensus tree:\n");
3768 } else {
3769 if (! puzzlemode == CONSENSUS)
3770 fprintf(ofp, "\nCongruent bipartitions occurred in 50%% or less, not included in \nthe quartet puzzling tree:\n");
3771 else
3772 fprintf(ofp, "\nCongruent bipartitions occurred in 50%% or less, not included in \nthe consensus tree:\n");
3773 }
3774 fprintf(ofp,
3775 "(bipartition with sequences in input order : number of times seen)\n\n");
3776
3777 if ((uli)consfifty == numbiparts) {
3778 fprintf(ofp, " None (all bipartitions are included)\n");
3779 } else {
3780 if (consfifty < conscongruent) {
3781 for (li = (uli)consfifty; (li < (uli)numbiparts) && (li < (uli)conscongruent); li++) {
3782 fprintf(ofp, " ");
3783 fprintfsplit(ofp, splitfreqs[2*li+1], splitpatterns, splitlength, Maxspc);
3784 fprintf(ofp, " : %lu", splitfreqs[2*li]);
3785 /* MISSING QUARTETS (HAS ;-) */
3786 /* todo (HAS) */
3787 if (qsupport_optn) {
3788 fprintf(ofp, "f+=%.0f|", (100.0*qsupportarr[li].fullres_pro/qsupportarr[li].qsum));
3789 fprintf(ofp, "p+=%.0f|", (100.0*qsupportarr[li].partres_pro/qsupportarr[li].qsum));
3790 fprintf(ofp, "f-=%.0f|", (100.0*qsupportarr[li].fullres_con/qsupportarr[li].qsum));
3791 fprintf(ofp, "p-=%.0f|", (100.0*qsupportarr[li].partres_con/qsupportarr[li].qsum));
3792 fprintf(ofp, "ur=%.0f", (100.0*qsupportarr[li].unres/qsupportarr[li].qsum));
3793 if (qsupportarr[li].missing > 0) {
3794 fprintf(ofp, "|md=%.0f", (100.0*qsupportarr[li].missing/qsupportarr[li].qsum));
3795 }
3796 fprintf(ofp, "/%ld\n", qsupportarr[li].qsum);
3797 } else {
3798 fprintf(ofp, "\n");
3799 }
3800 #if 0
3801 #endif
3802 }
3803 } else {
3804 fprintf(ofp, " None (No congruent split not included)\n");
3805 }
3806
3807 if (! puzzlemode == CONSENSUS)
3808 fprintf(ofp, "\nIncongruent bipartitions not included in the quartet puzzling tree:\n");
3809 else
3810 fprintf(ofp, "\nIncongruent bipartitions not included in the consensus tree:\n");
3811 fprintf(ofp,
3812 "(bipartition with sequences in input order : number of times seen)\n\n");
3813
3814 /* print next 20 bipartions not included */
3815 for (li = conscongruent; (li < numbiparts) && (li < conscongruent + 20UL); li++) {
3816 fprintf(ofp, " ");
3817 fprintfsplit(ofp, splitfreqs[2*li+1], splitpatterns, splitlength, Maxspc);
3818 fprintf(ofp, " : %lu\n", splitfreqs[2*li]);
3819 }
3820 if ((li == conscongruent + 20UL) && (li != numbiparts))
3821 fprintf(ofp, "\n(%lu other less frequent bipartitions not shown)\n",
3822 numbiparts - conscongruent - 20UL);
3823 }
3824 if (! puzzlemode == CONSENSUS) {
3825 fprintfsortedpstrees(ofp, psteptreelist, psteptreenum, psteptreesum, 0, 5.0);
3826 }
3827 }
3828
3829
3830 /***************************************************************************/
3831 /* MAXIMUM LIKELIHOOD BRANCH LENGTHS ON QUARTET PUZZLING TREE (NO CLOCK) */
3832 /***************************************************************************/
3833
3834 #if 0
3835 if (((puzzlemode == QUARTPUZ) || (puzzlemode == CONSENSUS)) && !skipmlbranch_optn) {
3836 #endif
3837 if (((puzzlemode == QUARTPUZ) || (puzzlemode == CONSENSUS)) && dotreelh_optn) {
3838
3839 if (! puzzlemode == CONSENSUS) {
3840 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS ON QUARTET");
3841 fprintf(ofp, " PUZZLING TREE (NO CLOCK)\n\nBranch lengths are computed using");
3842 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n\n\n");
3843 } else {
3844 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS ON CONSENSUS");
3845 fprintf(ofp, " TREE (NO CLOCK)\n\nBranch lengths are computed using");
3846 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n\n\n");
3847 }
3848 clockmode = 0; /* nonclocklike branch lengths */
3849 prtopology(ofp);
3850 fprintf(ofp, "\n");
3851 resulttree(ofp, FALSE); /* branch lengths cannot be set by user in this case */
3852 if (! puzzlemode == CONSENSUS) {
3853 fprintf(ofp, "\n\nQuartet puzzling tree with maximum likelihood branch lengths");
3854 fprintf(ofp, "\n(in CLUSTAL W notation):\n\n");
3855 } else {
3856 fprintf(ofp, "\n\nConsensus tree with maximum likelihood branch lengths");
3857 fprintf(ofp, "\n(in CLUSTAL W notation):\n\n");
3858 }
3859 fputphylogeny(ofp);
3860
3861 /* SONJA */
3862 #ifdef SONJA
3863 /* computes the distances in the tree and writes them to file/stderr */
3864 tree2dist(Distanmat);
3865 putdistance(ofp);
3866 putdistance(stderr);
3867 #endif
3868
3869 /*****************************************************************************/
3870 /* MAXIMUM LIKELIHOOD BRANCH LENGTHS ON QUARTET PUZZLING TREE (WITH CLOCK) */
3871 /*****************************************************************************/
3872
3873 if (compclock) {
3874 if (! puzzlemode == CONSENSUS) {
3875 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF QUARTET");
3876 fprintf(ofp, " PUZZLING TREE (WITH CLOCK)\n\nBranch lengths are computed using");
3877 } else {
3878 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF CONSENSUS");
3879 fprintf(ofp, " TREE (WITH CLOCK)\n\nBranch lengths are computed using");
3880 }
3881 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n");
3882 fprintf(ofp, "\nRoot located at branch: %d ", locroot+1);
3883 if (locroot<Maxspc) {
3884 fprintf(ofp, "\""); fputid(ofp,locroot); fprintf(ofp, "\" ");
3885 } else fprintf(ofp, "\"internal branch\" ");
3886
3887 if (rootsearch == ROOT_USER) fprintf(ofp, "(user specified)\n\n\n");
3888 if (rootsearch == ROOT_AUTO) {
3889 fprintf(ofp, "(automatic search)");
3890 if (numbestroot > 1) fprintf(ofp, "- WARNING: %d best locations found! -", numbestroot);
3891 fprintf(ofp, "\n\n");
3892 fprintf(ofp, "If the automatic search misplaces the root please rerun the analysis\n");
3893 fprintf(ofp, "(rename \"outtree\" to \"intree\") and select location of root manually!");
3894 fprintf(ofp, "\n\n\n");
3895 }
3896 if (rootsearch == ROOT_DISPLAYED) fprintf(ofp, "(displayed outgroup)\n\n\n");
3897 clockmode = 1; /* clocklike branch lengths */
3898 prtopology(ofp);
3899 fprintf(ofp, "\n");
3900 fprintf(ofp, "\nTree drawn as unrooted tree for better ");
3901 fprintf(ofp, "comparison with non-clock tree!\n");
3902 resulttree(ofp, usebranch_optn);
3903 fprintf(ofp, "\n");
3904 resultheights(ofp);
3905 if (! puzzlemode == CONSENSUS) {
3906 fprintf(ofp, "\n\nRooted quartet puzzling tree with clocklike");
3907 } else {
3908 fprintf(ofp, "\n\nRooted consensus tree with clocklike");
3909 }
3910 fprintf(ofp, " maximum likelihood branch lengths\n");
3911 fprintf(ofp, "(in CLUSTAL W notation):\n\n");
3912 fputrooted(ofp, locroot);
3913 }
3914
3915
3916 /***************************************************************************/
3917 /* MOLECULAR CLOCK LIKELIHOOD RATIO TEST */
3918 /***************************************************************************/
3919
3920 if (compclock) {
3921 fprintf(ofp, "\n\nMOLECULAR CLOCK LIKELIHOOD RATIO TEST\n\n");
3922 fprintf(ofp, "log L without clock: %.2f (independent branch parameters: %d)\n",
3923 Ctree->lklhd, Numspc + Numibrnch);
3924 fprintf(ofp, "log L with clock: %.2f (independent branch parameters: %d)\n\n",
3925 Ctree->lklhdc, Numhts + 1);
3926 delta = fabs(2.0*((Ctree->lklhd) - (Ctree->lklhdc)));
3927 fprintf(ofp, "Likelihood ratio test statistic delta: %.2f\n", delta);
3928 df = Numspc + Numibrnch - Numhts - 1;
3929 fprintf(ofp, "Degress of freedom of chi-square distribution: %d\n", df);
3930
3931 pval = IncompleteGammaQ(df*0.5, delta*0.5);
3932
3933 fprintf(ofp, "Critical significance level: %.2f%%\n\n", pval*100.0);
3934 if (pval >= 0.05) {
3935 fprintf(ofp, "The simpler (clocklike) tree can not be rejected on a significance\n");
3936 fprintf(ofp, "level of 5%%. The log-likelihood of the more complex (no clock) tree\n");
3937 fprintf(ofp, "is not significantly increased.\n");
3938 } else {
3939 fprintf(ofp, "The simpler (clocklike) tree is rejected on a significance level\n");
3940 fprintf(ofp, "of 5%%. The log-likelihood of the more complex (no clock) tree is\n");
3941 fprintf(ofp, "significantly increased.\n");
3942 }
3943 fprintf(ofp, "\nPlease take care that the correct root is used!\n");
3944 }
3945
3946 }
3947 }
3948
3949
3950 /***************************************************************************/
3951 /* LIKELIHOOD MAPPING ANALYSIS */
3952 /***************************************************************************/
3953
3954 if (typ_optn == LIKMAPING_OPTN) {
3955
3956 fprintf(ofp, "\n\nLIKELIHOOD MAPPING ANALYSIS\n\n");
3957 fprintf(ofp, "Number of quartets: %lu", Numquartets);
3958 if (lmqts == 0) fprintf(ofp, " (all possible)\n");
3959 else fprintf(ofp, " (random choice)\n");
3960 fprintf(ofp, "\nQuartet trees are based on approximate maximum likelihood values\n");
3961 fprintf(ofp, "using the selected model of substitution and rate heterogeneity.\n\n\n");
3962 if (numclust == 1) {
3963 fprintf(ofp, "Sequences are not grouped in clusters.\n");
3964 } else {
3965 fprintf(ofp, "Sequences are grouped in %d clusters.\n", numclust);
3966 fprintf(ofp, "\nCluster a: %d sequences\n\n", clustA);
3967 for (i = 0; i < clustA; i++) {
3968 fprintf(ofp, " ");
3969 fputid(ofp, clusterA[i]);
3970 fprintf(ofp, "\n");
3971 }
3972 fprintf(ofp, "\nCluster b: %d sequences\n\n", clustB);
3973 for (i = 0; i < clustB; i++) {
3974 fprintf(ofp, " ");
3975 fputid(ofp, clusterB[i]);
3976 fprintf(ofp, "\n");
3977 }
3978 if (numclust > 2) {
3979 fprintf(ofp, "\nCluster c: %d sequences\n\n", clustC);
3980 for (i = 0; i < clustC; i++) {
3981 fprintf(ofp, " ");
3982 fputid(ofp, clusterC[i]);
3983 fprintf(ofp, "\n");
3984 }
3985 }
3986 if (numclust == 4) {
3987 fprintf(ofp, "\nCluster d: %d sequences\n\n", clustD);
3988 for (i = 0; i < clustD; i++) {
3989 fprintf(ofp, " ");
3990 fputid(ofp, clusterD[i]);
3991 fprintf(ofp, "\n");
3992 }
3993 }
3994 fprintf(ofp, "\nQuartets of sequences used in the likelihood");
3995 fprintf(ofp, " mapping analysis are generated\n");
3996 if (numclust == 2)
3997 fprintf(ofp, "by drawing two sequences from cluster a and two from cluster b.");
3998 if (numclust == 3)
3999 fprintf(ofp, "by drawing one sequence from clusters a and b and two from cluster c.");
4000 if (numclust == 4)
4001 fprintf(ofp, "by drawing one sequence from each of the clusters a, b, c, and d.");
4002 }
4003
4004
4005 /***************************************************************************/
4006 /* LIKELIHOOD MAPPING STATISTICS */
4007 /***************************************************************************/
4008
4009 fprintf(ofp, "\n\nLIKELIHOOD MAPPING STATISTICS\n\n");
4010 fprintf(ofp, "Occupancies of the three areas 1, 2, 3:\n\n");
4011 if (numclust == 4)
4012 fprintf(ofp, " (a,b)-(c,d)\n");
4013 if (numclust == 3)
4014 fprintf(ofp, " (a,b)-(c,c)\n");
4015 if (numclust == 2)
4016 fprintf(ofp, " (a,a)-(b,b)\n");
4017 fprintf(ofp, " /\\\n");
4018 fprintf(ofp, " / \\\n");
4019 fprintf(ofp, " / \\\n");
4020 fprintf(ofp, " / 1 \\\n");
4021 fprintf(ofp, " / \\ / \\\n");
4022 fprintf(ofp, " / \\ / \\\n");
4023 fprintf(ofp, " / \\/ \\\n");
4024 fprintf(ofp, " / 3 : 2 \\\n");
4025 fprintf(ofp, " / : \\\n");
4026 fprintf(ofp, " /__________________\\\n");
4027 if (numclust == 4)
4028 fprintf(ofp, " (a,d)-(b,c) (a,c)-(b,d)\n");
4029 if (numclust == 3)
4030 fprintf(ofp, " (a,c)-(b,c) (a,c)-(b,c)\n");
4031 if (numclust == 2)
4032 fprintf(ofp, " (a,b)-(a,b) (a,b)-(a,b)\n");
4033 fprintf(ofp, "\n");
4034 fprintf(ofp, "Number of quartets in region 1: %lu (= %.1f%%)\n",
4035 ar1, (double) ar1*100.0/Numquartets);
4036 fprintf(ofp, "Number of quartets in region 2: %lu (= %.1f%%)\n",
4037 ar2, (double) ar2*100.0/Numquartets);
4038 fprintf(ofp, "Number of quartets in region 3: %lu (= %.1f%%)\n\n",
4039 ar3, (double) ar3*100.0/Numquartets);
4040 fprintf(ofp, "Occupancies of the seven areas 1, 2, 3, 4, 5, 6, 7:\n\n");
4041 if (numclust == 4)
4042 fprintf(ofp, " (a,b)-(c,d)\n");
4043 if (numclust == 3)
4044 fprintf(ofp, " (a,b)-(c,c)\n");
4045 if (numclust == 2)
4046 fprintf(ofp, " (a,a)-(b,b)\n");
4047 fprintf(ofp, " /\\\n");
4048 fprintf(ofp, " / \\\n");
4049 fprintf(ofp, " / 1 \\\n");
4050 fprintf(ofp, " / \\ / \\\n");
4051 fprintf(ofp, " / /\\ \\\n");
4052 fprintf(ofp, " / 6 / \\ 4 \\\n");
4053 fprintf(ofp, " / / 7 \\ \\\n");
4054 fprintf(ofp, " / \\ /______\\ / \\\n");
4055 fprintf(ofp, " / 3 : 5 : 2 \\\n");
4056 fprintf(ofp, " /__________________\\\n");
4057 if (numclust == 4)
4058 fprintf(ofp, " (a,d)-(b,c) (a,c)-(b,d)\n");
4059 if (numclust == 3)
4060 fprintf(ofp, " (a,c)-(b,c) (a,c)-(b,c)\n");
4061 if (numclust == 2)
4062 fprintf(ofp, " (a,b)-(a,b) (a,b)-(a,b)\n");
4063 fprintf(ofp, "\n");
4064 fprintf(ofp, "Number of quartets in region 1: %lu (= %.1f%%) left: %lu right: %lu\n",
4065 reg1, (double) reg1*100.0/Numquartets, reg1l, reg1r);
4066 fprintf(ofp, "Number of quartets in region 2: %lu (= %.1f%%) bottom: %lu top: %lu\n",
4067 reg2, (double) reg2*100.0/Numquartets, reg2d, reg2u);
4068 fprintf(ofp, "Number of quartets in region 3: %lu (= %.1f%%) bottom: %lu top: %lu\n",
4069 reg3, (double) reg3*100.0/Numquartets, reg3d, reg3u);
4070 fprintf(ofp, "Number of quartets in region 4: %lu (= %.1f%%) bottom: %lu top: %lu\n",
4071 reg4, (double) reg4*100.0/Numquartets, reg4d, reg4u);
4072 fprintf(ofp, "Number of quartets in region 5: %lu (= %.1f%%) left: %lu right: %lu\n",
4073 reg5, (double) reg5*100.0/Numquartets, reg5l, reg5r);
4074 fprintf(ofp, "Number of quartets in region 6: %lu (= %.1f%%) bottom: %lu top: %lu\n",
4075 reg6, (double) reg6*100.0/Numquartets, reg6d, reg6u);
4076 fprintf(ofp, "Number of quartets in region 7: %lu (= %.1f%%)\n",
4077 reg7, (double) reg7*100.0/Numquartets);
4078 }
4079
4080 } /* if WRITEREST) || WRITEALL */
4081 } /* writeoutputfile */
4082
4083
4084 #if PARALLEL
4085 void writetimesstat(FILE *ofp)
4086 {
4087 int n;
4088 double cpusum = 0.0;
4089 double wallmax = 0.0;
4090 cputimes[0] = ((double)(cputimestop - cputimestart) / CLOCKS_PER_SEC);
4091 walltimes[0] = difftime(walltimestop, walltimestart);
4092 fullcpu = tarr.fullcpu;
4093 fulltime = tarr.fulltime;
4094 fullcputimes[0] = tarr.fullcpu;
4095 fullwalltimes[0] = tarr.fulltime;
4096 altcputimes[0] = tarr.cpu;
4097 altwalltimes[0] = tarr.time;
4098 fprintf(ofp, "\n\n\nPARALLEL LOAD STATISTICS\n\n");
4099
4100 fprintf(ofp, "The analysis was performed with %d parallel processes (1 master and \n", PP_NumProcs);
4101 fprintf(ofp, "%d worker processes).\n\n", PP_NumProcs-1);
4102 fprintf(ofp, "The following table the distribution of computation to the processes.\n");
4103 fprintf(ofp, "The first column gives the process number, where 0 is the master process.\n");
4104 fprintf(ofp, "The second and third column show the number of quartets computed (3 topologies \n");
4105 fprintf(ofp, "each) and the the number of scheduling blocks the came in. The last two columns \n");
4106 fprintf(ofp, "state the number of puzzling steps done by a process and number of scheduling \n");
4107 fprintf(ofp, "blocks.\n\n");
4108 fprintf(ofp, "process #quartets #chunks #puzzlings #chunks \n");
4109 fprintf(ofp, "-----------------------------------------------\n");
4110 for (n=0; n<PP_NumProcs; n++) {
4111 fprintf(ofp, "%6d %9d %7d %10d %7d \n", n,
4112 quartsent[n], quartsentn[n],
4113 splitsent[n], splitsentn[n]);
4114 } /* for */
4115 fprintf(ofp, "-----------------------------------------------\n");
4116 fprintf(ofp, " Sums: %9d %7d %10d %7d \n",
4117 PP_quartrecved, PP_quartrecvedn,
4118 PP_splitrecved, PP_splitrecvedn);
4119
4120 #ifdef TIMEDEBUG
4121 fprintf(ofp, "\n\nBelow the distribution of computing times (CPU and wallclock) per host is shown.\n");
4122 fprintf(ofp, "The times are shown in seconds, minutes, and hours. At the bottom of the table the\n");
4123 fprintf(ofp, "sum of CPU times and the maximum wallclock time is shown.\n\n");
4124 fprintf(ofp, "process CPU-time[s] [min] [hours] | wallclock[s] [min] [hours] \n");
4125 fprintf(ofp, "----------------------------------------------------------------------------\n");
4126 for (n=0; n<PP_NumProcs; n++) {
4127
4128 # ifdef TIMEDEBUG
4129 fprintf(ofp, "%6d %11.1f %9.1f %9.1f | %11.1f %9.1f %9.1f\n", n,
4130 cputimes[n], cputimes[n] /60, cputimes[n] /3600,
4131 walltimes[n], walltimes[n]/60, walltimes[n]/3600);
4132 # endif /* TIMEDEBUG */
4133
4134 fprintf(ofp, "%6d %11.1f %9.1f %9.1f | %11.1f %9.1f %9.1f\n", n,
4135 fullcputimes[n], fullcputimes[n] /60, fullcputimes[n] /3600,
4136 fullwalltimes[n], fullwalltimes[n]/60, fullwalltimes[n]/3600);
4137
4138 # ifdef TIMEDEBUG
4139 fprintf(ofp, "%6d %11.1f %9.1f %9.1f | %11.1f %9.1f %9.1f alt\n", n,
4140 altcputimes[n], altcputimes[n] /60, altcputimes[n] /3600,
4141 altwalltimes[n], altwalltimes[n]/60, altwalltimes[n]/3600);
4142 # endif /* TIMEDEBUG */
4143
4144 if (fullwalltimes[n] > wallmax) wallmax=fullwalltimes[n];
4145 cpusum += fullcputimes[n];
4146 } /* for */
4147 fprintf(ofp, "----------------------------------------------------------------------------\n");
4148 fprintf(ofp, "Sum/Max: %11.1f %9.1f %9.1f | %11.1f %9.1f %9.1f \n",
4149 cpusum, cpusum/60, cpusum/3600, wallmax, wallmax/60, wallmax/3600);
4150 #else /* TIMEDEBUG */
4151 fprintf(ofp, "\n\nBelow the distribution of computing times (wallclock) per host is shown.\n");
4152 fprintf(ofp, "The times are shown in seconds, minutes, and hours. At the bottom of the table the\n");
4153 fprintf(ofp, "the maximum wallclock times is shown.\n\n");
4154 fprintf(ofp, "process wallclock[s] [min] [hours] \n");
4155 fprintf(ofp, "----------------------------------------------------------------------------\n");
4156 for (n=0; n<PP_NumProcs; n++) {
4157
4158 # ifdef TIMEDEBUG
4159 fprintf(ofp, "%6d %11.1f %9.1f %9.1f\n", n,
4160 walltimes[n], walltimes[n]/60, walltimes[n]/3600);
4161 # endif /* TIMEDEBUG */
4162
4163 fprintf(ofp, "%6d %11.1f %9.1f %9.1f\n", n,
4164 fullwalltimes[n], fullwalltimes[n]/60, fullwalltimes[n]/3600);
4165
4166 # ifdef TIMEDEBUG
4167 fprintf(ofp, "%6d %11.1f %9.1f %9.1f alt\n", n,
4168 altwalltimes[n], altwalltimes[n]/60, altwalltimes[n]/3600);
4169 # endif /* TIMEDEBUG */
4170
4171 if (fullwalltimes[n] > wallmax) wallmax=fullwalltimes[n];
4172 cpusum += fullcputimes[n];
4173 } /* for */
4174 fprintf(ofp, "----------------------------------------------------------------------------\n");
4175 fprintf(ofp, "Sum/Max: %11.1f %9.1f %9.1f \n",
4176 wallmax, wallmax/60, wallmax/3600);
4177 #endif /* TIMEDEBUG */
4178
4179 fullcpu = cpusum;
4180 fulltime = wallmax;
4181
4182 } /* writetimesstat */
4183 #endif
4184
4185
4186 /* write current user tree to file */
4187 void writecutree(FILE *ofp, int num)
4188 {
4189 int df;
4190 double pval, delta;
4191
4192
4193 if (typ_optn == TREERECON_OPTN) {
4194
4195
4196 /***************************************************************************/
4197 /* MAXIMUM LIKELIHOOD BRANCH LENGTHS OF USER DEFINED TREE #xxx (NO CLOCK) */
4198 /***************************************************************************/
4199
4200 if (puzzlemode == USERTREE) {
4201 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF USER");
4202 fprintf(ofp, " DEFINED TREE # %d (NO CLOCK)\n\nBranch lengths are computed using", num);
4203 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n\n\n");
4204 clockmode = 0; /* nonclocklike branch lengths */
4205 prtopology(ofp);
4206 fprintf(ofp, "\n");
4207 resulttree(ofp, usebranch_optn);
4208 fprintf(ofp, "\n\nUnrooted user defined tree with maximum likelihood branch lengths");
4209 fprintf(ofp, "\n(in CLUSTAL W notation):\n\n");
4210 fputphylogeny(ofp);
4211 fflush(ofp);
4212
4213 /*****************************************************************************/
4214 /* MAXIMUM LIKELIHOOD BRANCH LENGTHS OF USER DEFINED TREE #xxx (WITH CLOCK) */
4215 /*****************************************************************************/
4216
4217 if (compclock) {
4218 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF USER");
4219 fprintf(ofp, " DEFINED TREE # %d (WITH CLOCK)\n\nBranch lengths are computed using", num);
4220 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n");
4221 fprintf(ofp, "\nRoot located at branch: %d ", locroot+1);
4222 if (locroot<Maxspc) {
4223 fprintf(ofp, "\""); fputid(ofp,locroot); fprintf(ofp, "\" ");
4224 } else fprintf(ofp, "\"internal branch\" ");
4225 if (rootsearch == ROOT_USER) fprintf(ofp, "(user specified)\n\n\n");
4226 if (rootsearch == ROOT_AUTO) {
4227 fprintf(ofp, "(automatic search)");
4228 if (numbestroot > 1) fprintf(ofp, "- WARNING: %d best locations found! -", numbestroot);
4229 fprintf(ofp, "\n\n");
4230 fprintf(ofp, "If the automatic search misplaces the root please rerun the analysis\n");
4231 fprintf(ofp, "and select location of root manually!");
4232 fprintf(ofp, "\n\n\n");
4233
4234 }
4235 if (rootsearch == ROOT_DISPLAYED) fprintf(ofp, "(displayed outgroup)\n\n\n");
4236 clockmode = 1; /* clocklike branch lengths */
4237 prtopology(ofp);
4238 fprintf(ofp, "\n");
4239 resulttree(ofp, usebranch_optn);
4240 fprintf(ofp, "\n");
4241 resultheights(ofp);
4242 fprintf(ofp, "\n\nRooted user defined tree with clocklike ");
4243 fprintf(ofp, "maximum likelihood branch lengths\n");
4244 fprintf(ofp, "(in CLUSTAL W notation):\n\n");
4245 fputrooted(ofp, locroot);
4246 fflush(ofp);
4247 }
4248
4249
4250 /*****************************************************************************/
4251 /* MOLECULAR CLOCK LIKELIHOOD RATIO TEST FOR USER TREE #xxx */
4252 /*****************************************************************************/
4253
4254 if (compclock) {
4255 fprintf(ofp, "\n\nMOLECULAR CLOCK LIKELIHOOD RATIO TEST FOR USER TREE # %d\n\n", num);
4256 fprintf(ofp, "log L without clock: %.2f (independent branch parameters: %d)\n",
4257 Ctree->lklhd, Numspc + Numibrnch);
4258 fprintf(ofp, "log L with clock: %.2f (independent branch parameters: %d)\n\n",
4259 Ctree->lklhdc, Numhts + 1);
4260 delta = fabs(2.0*((Ctree->lklhd) - (Ctree->lklhdc)));
4261 fprintf(ofp, "Likelihood ratio test statistic delta: %.2f\n", delta);
4262 df = Numspc + Numibrnch - Numhts - 1;
4263 fprintf(ofp, "Degrees of freedom of chi-square distribution: %d\n", df);
4264
4265 pval = IncompleteGammaQ (df*0.5, delta*0.5);
4266
4267 fprintf(ofp, "Critical significance level: %.2f%%\n\n", pval*100.0);
4268 if (pval >= 0.05) {
4269 fprintf(ofp, "The simpler (clocklike) tree can not be rejected on a significance\n");
4270 fprintf(ofp, "level of 5%%. The log-likelihood of the more complex (no clock) tree\n");
4271 fprintf(ofp, "is not significantly increased.\n");
4272 } else {
4273 fprintf(ofp, "The simpler (clocklike) tree is rejected on a significance level\n");
4274 fprintf(ofp, "of 5%%. The log-likelihood of the more complex (no clock) tree is\n");
4275 fprintf(ofp, "significantly increased.\n");
4276 }
4277 fprintf(ofp, "\nPlease take care that the correct root is used!\n");
4278 }
4279 }
4280 }
4281 } /* writecutree */
4282
4283
4284 /******************************************************************************/
4285 /* timer routines */
4286 /******************************************************************************/
4287
4288 void resetqblocktime(timearray_t *ta)
4289 {
4290 ta->quartcpu += ta->quartblockcpu;
4291 ta->quartblockcpu = 0.0;
4292 ta->quarttime += ta->quartblocktime;
4293 ta->quartblocktime = 0.0;
4294 } /* resetqblocktime */
4295
4296
4297 void resetpblocktime(timearray_t *ta)
4298 {
4299 ta->puzzcpu += ta->puzzblockcpu;
4300 ta->puzzblockcpu = 0.0;
4301 ta->puzztime += ta->puzzblocktime;
4302 ta->puzzblocktime = 0.0;
4303 } /* resetpblocktime */
4304
4305
4306 #ifdef TIMEDEBUG
4307 void printtimearr(timearray_t *ta)
4308 {
4309 # if ! PARALLEL
4310 int PP_Myid;
4311 PP_Myid = -1;
4312 # endif
4313 printf("(%2d) MMCPU: %11ld / %11ld \n", PP_Myid, ta->maxcpu, ta->mincpu);
4314 printf("(%2d) CTick: %11.6f [tks] / %11.6f [s] \n", PP_Myid, ta->mincputick, ta->mincputicktime);
4315
4316 printf("(%2d) MMTIM: %11ld / %11ld \n", PP_Myid, ta->maxtime, ta->mintime);
4317
4318 printf("(%2d) Mxblk: %11.6e / %11.6e \n", PP_Myid, ta->maxcpublock, ta->maxtimeblock);
4319 printf("(%2d) Mnblk: %11.6e / %11.6e \n", PP_Myid, ta->mincpublock, ta->mintimeblock);
4320
4321 printf("(%2d) Gnrl: %11.6e / %11.6e \n", PP_Myid, ta->generalcpu, ta->generaltime);
4322 printf("(%2d) Optn: %11.6e / %11.6e \n", PP_Myid, ta->optionscpu, ta->optionstime);
4323 printf("(%2d) Estm: %11.6e / %11.6e \n", PP_Myid, ta->paramestcpu, ta->paramesttime);
4324 printf("(%2d) Qurt: %11.6e / %11.6e \n", PP_Myid, ta->quartcpu, ta->quarttime);
4325 printf("(%2d) QBlk: %11.6e / %11.6e \n", PP_Myid, ta->quartblockcpu, ta->quartblocktime);
4326 printf("(%2d) QMax: %11.6e / %11.6e \n", PP_Myid, ta->quartmaxcpu, ta->quartmaxtime);
4327 printf("(%2d) QMin: %11.6e / %11.6e \n", PP_Myid, ta->quartmincpu, ta->quartmintime);
4328
4329 printf("(%2d) Puzz: %11.6e / %11.6e \n", PP_Myid, ta->puzzcpu, ta->puzztime);
4330 printf("(%2d) PBlk: %11.6e / %11.6e \n", PP_Myid, ta->puzzblockcpu, ta->puzzblocktime);
4331 printf("(%2d) PMax: %11.6e / %11.6e \n", PP_Myid, ta->puzzmaxcpu, ta->puzzmaxtime);
4332 printf("(%2d) PMin: %11.6e / %11.6e \n", PP_Myid, ta->puzzmincpu, ta->puzzmintime);
4333
4334 printf("(%2d) Tree: %11.6e / %11.6e \n", PP_Myid, ta->treecpu, ta->treetime);
4335 printf("(%2d) TBlk: %11.6e / %11.6e \n", PP_Myid, ta->treeblockcpu, ta->treeblocktime);
4336 printf("(%2d) TMax: %11.6e / %11.6e \n", PP_Myid, ta->treemaxcpu, ta->treemaxtime);
4337 printf("(%2d) TMin: %11.6e / %11.6e \n", PP_Myid, ta->treemincpu, ta->treemintime);
4338
4339 printf("(%2d) C/T : %11.6e / %11.6e \n", PP_Myid,
4340 (ta->generalcpu + ta->optionscpu + ta->paramestcpu + ta->quartblockcpu + ta->puzzblockcpu + ta->treeblockcpu),
4341 (ta->generaltime + ta->optionstime + ta->paramesttime + ta->quartblocktime + ta->puzzblocktime + ta->treeblocktime));
4342 printf("(%2d) CPU: %11.6e / Time: %11.6e \n", PP_Myid, ta->cpu, ta->time);
4343 printf("(%2d) aCPU: %11.6e / aTime: %11.6e \n", PP_Myid, ta->fullcpu, ta->fulltime);
4344
4345 } /* printtimearr */
4346 #endif /* TIMEDEBUG */
4347
4348 char *jtype [7];
4349
4350 void inittimearr(timearray_t *ta)
4351 {
4352 clock_t c0, c1, c2;
4353
4354 jtype[OVERALL] = "OVERALL";
4355 jtype[GENERAL] = "GENERAL";
4356 jtype[OPTIONS] = "OPTIONS";
4357 jtype[PARAMEST] = "PARAMeter ESTimation";
4358 jtype[QUARTETS] = "QUARTETS";
4359 jtype[PUZZLING] = "PUZZLING steps";
4360 jtype[TREEEVAL] = "TREE EVALuation";
4361 ta->currentjob = GENERAL;
4362
4363 c1 = clock();
4364 c2 = clock();
4365 while (c1 == c2)
4366 c2 = clock();
4367 ta->mincputick = (double)(c2 - c1);
4368 ta->mincputicktime = ((double)(c2 - c1))/CLOCKS_PER_SEC;
4369
4370 ta->tempcpu = clock();
4371 ta->tempcpustart = ta->tempcpu;
4372 ta->tempfullcpu = ta->tempcpu;
4373 time(&(ta->temptime));
4374 ta->temptimestart = ta->temptime;
4375 ta->tempfulltime = ta->temptime;
4376
4377 c0=0; c1=0; c2=(clock_t)((2 * c1) + 1);;
4378 while (c1 < c2) {
4379 c0 = c1;
4380 c1 = c2;
4381 c2 = (clock_t)((2 * c1) + 1);
4382 }
4383 if (c1 == c2) ta->maxcpu=c0;
4384 if (c1 > c2) ta->maxcpu=c1;
4385
4386 c0=0; c1=0; c2=(clock_t)((2 * c1) - 1);
4387 while (c1 > c2) {
4388 c0 = c1;
4389 c1 = c2;
4390 c2 = (clock_t)((2 * c1) - 1);
4391 }
4392 if (c1 == c2) ta->mincpu=c0;
4393 if (c1 < c2) ta->mincpu=c1;
4394
4395
4396
4397 ta->maxtime = 0;
4398 ta->mintime = 0;
4399
4400 ta->maxcpublock = 0;
4401 ta->mincpublock = DBL_MAX;
4402 ta->maxtimeblock = 0;
4403 ta->mintimeblock = DBL_MAX;
4404
4405 ta->cpu = 0.0;
4406 ta->time = 0.0;
4407
4408 ta->fullcpu = 0.0;
4409 ta->fulltime = 0.0;
4410
4411 ta->generalcpu = 0.0;
4412 ta->optionscpu = 0.0;
4413 ta->paramestcpu = 0.0;
4414 ta->quartcpu = 0.0;
4415 ta->quartblockcpu = 0.0;
4416 ta->quartmaxcpu = 0.0;
4417 ta->quartmincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
4418 ta->puzzcpu = 0.0;
4419 ta->puzzblockcpu = 0.0;
4420 ta->puzzmaxcpu = 0.0;
4421 ta->puzzmincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
4422 ta->treecpu = 0.0;
4423 ta->treeblockcpu = 0.0;
4424 ta->treemaxcpu = 0.0;
4425 ta->treemincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
4426
4427 ta->generaltime = 0.0;
4428 ta->optionstime = 0.0;
4429 ta->paramesttime = 0.0;
4430 ta->quarttime = 0.0;
4431 ta->quartblocktime = 0.0;
4432 ta->quartmaxtime = 0.0;
4433 ta->quartmintime = DBL_MAX;
4434 ta->puzztime = 0.0;
4435 ta->puzzblocktime = 0.0;
4436 ta->puzzmaxtime = 0.0;
4437 ta->puzzmintime = DBL_MAX;
4438 ta->treetime = 0.0;
4439 ta->treeblocktime = 0.0;
4440 ta->treemaxtime = 0.0;
4441 ta->treemintime = DBL_MAX;
4442 } /* inittimearr */
4443
4444
4445 /***************/
4446
4447 void addup(int jobtype, clock_t c1, clock_t c2, time_t t1, time_t t2, timearray_t *ta)
4448 {
4449 double c,
4450 t;
4451
4452 if (t2 != t1) t = difftime(t2, t1);
4453 else t = 0.0;
4454
4455 if (c2 < c1)
4456 c = ((double)(c2 - ta->mincpu))/CLOCKS_PER_SEC +
4457 ((double)(ta->maxcpu - c1))/CLOCKS_PER_SEC;
4458 else
4459 c = ((double)(c2 - c1))/CLOCKS_PER_SEC;
4460
4461 if (jobtype != OVERALL) {
4462
4463 if (ta->mincpublock > c) ta->mincpublock = c;
4464 if (ta->maxcpublock < c) ta->maxcpublock = c;
4465 if (ta->mintimeblock > t) ta->mintimeblock = t;
4466 if (ta->maxtimeblock < t) ta->maxtimeblock = t;
4467
4468 switch (jobtype) {
4469 case GENERAL: ta->generalcpu += c;
4470 ta->generaltime += t;
4471 break;
4472 case OPTIONS: ta->optionscpu += c;
4473 ta->optionstime += t;
4474 break;
4475 case PARAMEST: ta->paramestcpu += c;
4476 ta->paramesttime += t;
4477 break;
4478 case QUARTETS: ta->quartblockcpu += c;
4479 ta->quartblocktime += t;
4480 if (ta->quartmincpu > c) ta->quartmincpu = c;
4481 if (ta->quartmaxcpu < c) ta->quartmaxcpu = c;
4482 if (ta->quartmintime > t) ta->quartmintime = t;
4483 if (ta->quartmaxtime < t) ta->quartmaxtime = t;
4484 break;
4485 case PUZZLING: ta->puzzblockcpu += c;
4486 ta->puzzblocktime += t;
4487 if (ta->puzzmincpu > c) ta->puzzmincpu = c;
4488 if (ta->puzzmaxcpu < c) ta->puzzmaxcpu = c;
4489 if (ta->puzzmintime > t) ta->puzzmintime = t;
4490 if (ta->puzzmaxtime < t) ta->puzzmaxtime = t;
4491 break;
4492 case TREEEVAL: ta->treeblockcpu += c;
4493 ta->treeblocktime += t;
4494 if (ta->treemincpu > c) ta->treemincpu = c;
4495 if (ta->treemaxcpu < c) ta->treemaxcpu = c;
4496 if (ta->treemintime > t) ta->treemintime = t;
4497 if (ta->treemaxtime < t) ta->treemaxtime = t;
4498 break;
4499 }
4500 ta->cpu += c;
4501 ta->time += t;
4502
4503 } else {
4504 ta->fullcpu += c;
4505 ta->fulltime += t;
4506 }
4507
4508 # ifdef TIMEDEBUG
4509 {
4510 # if ! PARALLEL
4511 int PP_Myid = -1;
4512 # endif /* !PARALLEL */
4513 printf("(%2d) CPU: +%10.6f / Time: +%10.6f (%s)\n", PP_Myid, c, t, jtype[jobtype]);
4514 printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->cpu, ta->time, jtype[jobtype]);
4515 printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->fullcpu, ta->fulltime, jtype[jobtype]);
4516 }
4517 # endif /* TIMEDEBUG */
4518 } /* addup */
4519
4520
4521 /***************/
4522
4523
4524 void addtimes(int jobtype, timearray_t *ta)
4525 {
4526 clock_t tempc;
4527 time_t tempt;
4528
4529 time(&tempt);
4530 tempc = clock();
4531
4532 if ((tempc < ta->tempfullcpu) || (jobtype == OVERALL)) { /* CPU counter overflow for overall time */
4533 addup(OVERALL, ta->tempfullcpu, tempc, ta->tempfulltime, tempt, ta);
4534 ta->tempfullcpu = tempc;
4535 ta->tempfulltime = tempt;
4536 if (jobtype == OVERALL) {
4537 addup(ta->currentjob, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta);
4538 ta->tempcpustart = ta->tempcpu;
4539 ta->tempcpu = tempc;
4540 ta->temptimestart = ta->temptime;
4541 ta->temptime = tempt;
4542 }
4543 }
4544
4545 if((jobtype != ta->currentjob) && (jobtype != OVERALL)) { /* change of job type */
4546 addup(ta->currentjob, ta->tempcpustart, ta->tempcpu, ta->temptimestart, ta->temptime, ta);
4547 ta->tempcpustart = ta->tempcpu;
4548 ta->tempcpu = tempc;
4549 ta->temptimestart = ta->temptime;
4550 ta->temptime = tempt;
4551 ta->currentjob = jobtype;
4552 }
4553
4554 if (tempc < ta->tempcpustart) { /* CPU counter overflow */
4555 addup(jobtype, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta);
4556 ta->tempcpustart = ta->tempcpu;
4557 ta->tempcpu = tempc;
4558 ta->temptimestart = ta->temptime;
4559 ta->temptime = tempt;
4560 }
4561
4562 } /* addtimes */
4563
4564
4565
4566 /******************************************************************************/
4567 /* moved to mlparam.c */
4568 /******************************************************************************/
4569
4570 #if 0
4571 /* estimate parameters of substitution process and rate heterogeneity - no tree
4572 n-taxon tree is not needed because of quartet method or NJ tree topology */
4573 void estimateparametersnotree() /* moved to mlparam.c */
4574
4575 /* estimate parameters of substitution process and rate heterogeneity - tree
4576 same as above but here the n-taxon tree is already in memory */
4577 void estimateparameterstree() /* moved to mlparam.c */
4578 #endif
4579
4580
4581 /******************************************************************************/
4582 /* exported from main */
4583 /******************************************************************************/
4584
4585 void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx)
4586 {
4587 if (approx == APPROX) {
4588
4589 *d1 = quartet_alklhd(a,b, c,d); /* (a,b)-(c,d) */
4590 *d2 = quartet_alklhd(a,c, b,d); /* (a,c)-(b,d) */
4591 *d3 = quartet_alklhd(a,d, b,c); /* (a,d)-(b,c) */
4592
4593 } else /* approx == EXACT */ {
4594
4595 *d1 = quartet_lklhd(a,b, c,d); /* (a,b)-(c,d) */
4596 *d2 = quartet_lklhd(a,c, b,d); /* (a,c)-(b,d) */
4597 *d3 = quartet_lklhd(a,d, b,c); /* (a,d)-(b,c) */
4598
4599 }
4600 } /* compute_quartlklhds */
4601
4602 /***************************************************************/
4603
4604 /* Reconstruct a tree with QP */
4605 /* (parameter estimation already done) */
4606
4607 void recon_tree()
4608 {
4609 int i;
4610 unsigned char tmpweight;
4611
4612 /***********************************/
4613 /*** allocation + initialization ***/
4614 /***********************************/
4615
4616 /* allocate and initialize qinfomatr */
4617 qinfomatr = new_ulimatrix(9,Maxspc);
4618 for (i = 0; i < Maxspc; i++) {
4619 qinfomatr[0][i] = 0; /* missing quartets */
4620 qinfomatr[1][i] = 0; /* fully resolved quartet 1 for taxon i */
4621 qinfomatr[2][i] = 0; /* fully resolved quartet 2 for taxon i */
4622 qinfomatr[3][i] = 0; /* fully resolved quartet 3 for taxon i */
4623 qinfomatr[4][i] = 0; /* partly resolved quartet 1 for taxon i */
4624 qinfomatr[5][i] = 0; /* partly resolved quartet 2 for taxon i */
4625 qinfomatr[6][i] = 0; /* partly resolved quartet 3 for taxon i */
4626 qinfomatr[7][i] = 0; /* unresolved quartet for taxon i */
4627 qinfomatr[8][i] = 0; /* sum of quartets for taxon i */
4628 }
4629
4630 /* allocate memory for taxon list of bad quartets */
4631 badtaxon = new_ulivector(Maxspc);
4632 for (i = 0; i < Maxspc; i++) badtaxon[i] = 0;
4633
4634 /* allocate memory for quartets */
4635 quartetinfo = callocquartets(Maxspc);
4636
4637 /* prepare for consensus tree analysis */
4638 initconsensus();
4639
4640 time(&MLstepStarttime);
4641 # ifdef DEBUG_PRINTTIMES
4642 fprintftimespan(stdout, PEstStoptime, MLstepStarttime, Starttime, "PEst -> MLstep");
4643 # endif /* DEBUG_PRINTTIMES */
4644
4645
4646 /***************/
4647 /*** ML step ***/
4648 /***************/
4649
4650 if (!(readquart_optn) || (readquart_optn && savequart_optn)) {
4651 /* compute quartets */
4652 fprintf(STDOUT, "Computing quartet maximum likelihood trees\n");
4653 fflush(STDOUT);
4654 computeallquartets();
4655 }
4656
4657 time(&MLstepStoptime);
4658 # ifdef DEBUG_PRINTTIMES
4659 fprintftimespan(stdout, MLstepStarttime, MLstepStoptime, Starttime, "MLstep");
4660 # endif /* DEBUG_PRINTTIMES */
4661
4662
4663 /** output/input if necessary **/
4664
4665 if (savequart_optn)
4666 writeallquarts(Maxspc, ALLQUART, quartetinfo);
4667 if (readquart_optn) {
4668 readallquarts (Maxspc, ALLQUART, quartetinfo);
4669 if (show_optn) { /* list all unresolved quartets */
4670 openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
4671 fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
4672 }
4673 if (readsubset_optn) { /* missing data -> read subsets */
4674 FILE *ssfp;
4675 openfiletoread(&ssfp, SUBSET, "taxon subsets");
4676 fprintf(STDOUT, "Reading taxon subsets: %s\n", SUBSET);
4677
4678 readsubsetfile(ssfp, Maxspc, Namestr,
4679 &Maxsubset, &ss_setovlgraph,
4680 &ss_setoverlaps, &ss_setovllist,
4681 &ss_setovllistsize, &ss_matrix,
4682 &ss_list, &ss_listsize);
4683 #if 0
4684 fprintf(stderr,"Maxspc=%d Maxsubset=%d\n", Maxspc, Maxsubset);
4685 fprintfss(stdout, Maxspc, Maxsubset, ss_setovlgraph,
4686 ss_setoverlaps, ss_setovllist,
4687 ss_setovllistsize, ss_matrix, ss_list,
4688 ss_listsize);
4689 #endif
4690 checkss(stdout, Maxspc, Maxsubset, ss_setovlgraph,
4691 ss_setoverlaps, ss_setovllist,
4692 ss_setovllistsize, ss_matrix, ss_list,
4693 ss_listsize);
4694 }
4695 } /* readquart_optn */
4696
4697 # if !PARALLEL /* do it every time in case of a parallel run - less comunication */
4698 if (readquart_optn)
4699 # endif /* PARALLEL */
4700 {
4701 int xx1, xx2, xx3, xx4, count;
4702 /* initialize qinfomatr */
4703 for (i = 0; i < Maxspc; i++) {
4704 qinfomatr[0][i] = 0; /* missing quartets */
4705 qinfomatr[1][i] = 0; /* fully resolved quartet 1 for taxon i */
4706 qinfomatr[2][i] = 0; /* fully resolved quartet 2 for taxon i */
4707 qinfomatr[3][i] = 0; /* fully resolved quartet 3 for taxon i */
4708 qinfomatr[4][i] = 0; /* partly resolved quartet 1 for taxon i */
4709 qinfomatr[5][i] = 0; /* partly resolved quartet 2 for taxon i */
4710 qinfomatr[6][i] = 0; /* partly resolved quartet 3 for taxon i */
4711 qinfomatr[7][i] = 0; /* unresolved quartet for taxon i */
4712 qinfomatr[8][i] = 0; /* sum of quartets for taxon i */
4713 }
4714
4715 /* initialize bad quartet memory */
4716 for (count = 0; count < Maxspc; count++) badtaxon[count] = 0;
4717 badqs = 0;
4718 fullresqs = 0;
4719 partresqs = 0;
4720 unresqs = 0;
4721 missingqs = 0;
4722
4723 for (xx4 = 3; xx4 < Maxspc; xx4++)
4724 for (xx3 = 2; xx3 < xx4; xx3++)
4725 for (xx2 = 1; xx2 < xx3; xx2++)
4726 for (xx1 = 0; xx1 < xx2; xx1++) {
4727 tmpweight = readquartet(xx1, xx2, xx3, xx4);
4728 /* compute sums of topologies per taxon, step by step */
4729 ++(qinfomatr[8][xx1]);
4730 ++(qinfomatr[8][xx2]);
4731 ++(qinfomatr[8][xx3]);
4732 ++(qinfomatr[8][xx4]);
4733 ++(qinfomatr[tmpweight][xx1]);
4734 ++(qinfomatr[tmpweight][xx2]);
4735 ++(qinfomatr[tmpweight][xx3]);
4736 ++(qinfomatr[tmpweight][xx4]);
4737
4738 if ((tmpweight <= 2) || (tmpweight == 4)) {
4739 fullresqs++;
4740 } else {
4741 if (tmpweight == 7) {
4742 unresqs++;
4743 } else {
4744 if (tmpweight == 0) {
4745 missingqs++;
4746 } else {
4747 partresqs++;
4748 }
4749 }
4750 }
4751
4752 if (tmpweight == 7) { /* to be deleted */
4753 badqs++;
4754 badtaxon[xx1]++;
4755 badtaxon[xx2]++;
4756 badtaxon[xx3]++;
4757 badtaxon[xx4]++;
4758 if (show_optn) {
4759 fputid10(unresfp, xx1);
4760 fprintf(unresfp, " ");
4761 fputid10(unresfp, xx2);
4762 fprintf(unresfp, " ");
4763 fputid10(unresfp, xx3);
4764 fprintf(unresfp, " ");
4765 fputid (unresfp, xx4);
4766 fprintf(unresfp, "\n");
4767 } /* if show_optn */
4768 } /* if 7 */ /* to be deleted */
4769 } /* end for xx4; for xx3; for xx2; for xx1 */
4770 if (show_optn) /* list all unresolved quartets */
4771 fclose(unresfp);
4772 } /* readquart_optn */
4773
4774 # if PARALLEL
4775 PP_SendAllQuarts(numquarts(Maxspc), quartetinfo);
4776 # endif /* PARALLEL */
4777
4778
4779 /*********************/
4780 /*** puzzling step ***/
4781 /*********************/
4782
4783 fprintf(STDOUT, "Computing quartet puzzling trees\n");
4784 fflush(STDOUT);
4785
4786 /* start timer - percentage of completed trees */
4787 time(&time0);
4788 time1 = time0;
4789 mflag = 0;
4790
4791 time(&PStepStarttime);
4792 # ifdef DEBUG_PRINTTIMES
4793 fprintftimespan(stdout, MLstepStoptime, PStepStarttime, Starttime, "MLstep -> Pstep");
4794 # endif /* DEBUG_PRINTTIMES */
4795
4796 /* open file for chronological list of puzzling step trees */
4797 if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER))
4798 openfiletowrite(&qptlist, OUTPTLIST, "puzzling step trees (chonological)");
4799
4800 # if PARALLEL
4801 PP_SendDoPermutBlock(Numtrial);
4802 # else /* ! PARALLEL */
4803 addtimes(GENERAL, &tarr);
4804
4805 /* loop exported to allpstep */
4806 allpstep(Numtrial, quartetinfo, Maxspc, fixedorder_optn);
4807 # endif /* else ! PARALLEL */
4808
4809 time(&PStepStoptime);
4810 # ifdef DEBUG_PRINTTIMES
4811 fprintftimespan(stdout, PStepStarttime, PStepStoptime, Starttime, "Pstep");
4812 # endif /* DEBUG_PRINTTIMES */
4813
4814 /* close file for list of puzzling step trees */
4815 if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER))
4816 closefile(qptlist);
4817
4818 if (mflag == 1) fprintf(STDOUT, "\n");
4819
4820 /* garbage collection */
4821 free(splitcomptemp);
4822
4823 # if ! PARALLEL
4824 free_cmatrix(biparts);
4825 # endif /* ! PARALLEL */
4826
4827
4828 /**********************/
4829 /*** consensus step ***/
4830 /**********************/
4831
4832 /* compute majority rule consensus tree */
4833 makeconsensus(Numtrial, qsupport_optn);
4834
4835 /* write consensus tree to tmp file */
4836 #if 0
4837 if (skipmlbranch_optn && (puzzlemode == QUARTPUZ))
4838 #endif
4839 if (!dotreelh_optn && (puzzlemode == QUARTPUZ)) {
4840 openfiletowrite(&tmpfp, TREEFILE, "output tree(s)");
4841 writeconsensustree(tmpfp, FALSE, qsupportarr);
4842 closefile(tmpfp);
4843 } else {
4844 tmpfp = tmpfile();
4845 writeconsensustree(tmpfp, FALSE, qsupportarr);
4846 }
4847 } /* recon_tree */
4848
4849 /***************************************************************/
4850
4851 void map_lklhd()
4852 {
4853 int i, a, a1, a2, b, b1, b2, c, c1, c2, d;
4854 uli nq;
4855 double logs[3], d1, d2, d3, temp;
4856 ivector qts, mlorder, gettwo;
4857 /* reset variables */
4858 ar1 = ar2 = ar3 = 0;
4859 reg1 = reg2 = reg3 = reg4 = reg5 = reg6 = reg7 = 0;
4860 reg1l = reg1r = reg2u = reg2d = reg3u = reg3d = reg4u =
4861 reg4d = reg5l = reg5r = reg6u = reg6d = 0;
4862
4863 /* place for random quartet */
4864 qts = new_ivector(4);
4865
4866 /* initialize output file */
4867 openfiletowrite(&trifp, TRIANGLE, "Postscript output");
4868 initps(trifp);
4869 fprintf(STDOUT, "Performing likelihood mapping analysis\n");
4870 fflush(STDOUT);
4871
4872 nq = 0;
4873 mflag = 0;
4874
4875 addtimes(GENERAL, &tarr);
4876 if (lmqts == 0) { /* all possible quartets */
4877
4878 if (numclust == 4) { /* four-cluster analysis */
4879
4880 for (a = 0; a < clustA; a++)
4881 for (b = 0; b < clustB; b++)
4882 for (c = 0; c < clustC; c++)
4883 for (d = 0; d < clustD; d++) {
4884
4885 nq++;
4886
4887 /* maximum likelihood values */
4888 /* approximate ML is sufficient */
4889 compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c],clusterD[d],&d1,&d2,&d3, APPROX);
4890
4891 /* draw point for LM analysis */
4892 makelmpoint(trifp, d1, d2, d3);
4893 addtimes(QUARTETS, &tarr);
4894
4895 }
4896 }
4897
4898 if (numclust == 3) { /* three-cluster analysis */
4899
4900 gettwo = new_ivector(2);
4901
4902 for (a = 0; a < clustA; a++)
4903 for (b = 0; b < clustB; b++)
4904 for (c1 = 0; c1 < clustC-1; c1++)
4905 for (c2 = c1+1; c2 < clustC; c2++) {
4906
4907 nq++;
4908
4909 /* maximum likelihood values */
4910 /* approximate ML is sufficient */
4911 compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c1],clusterC[c2],&d1,&d2,&d3, APPROX);
4912
4913 /* randomize order of d2 and d3 */
4914 if (randominteger(2) == 1) {
4915 temp = d3;
4916 d3 = d2;
4917 d2 = temp;
4918 }
4919
4920 /* draw point for LM analysis */
4921 makelmpoint(trifp, d1, d2, d3);
4922 addtimes(QUARTETS, &tarr);
4923
4924 }
4925 free_ivector(gettwo);
4926 }
4927
4928 if (numclust == 2) { /* two-cluster analysis */
4929
4930 gettwo = new_ivector(2);
4931
4932 for (a1 = 0; a1 < clustA-1; a1++)
4933 for (a2 = a1+1; a2 < clustA; a2++)
4934 for (b1 = 0; b1 < clustB-1; b1++)
4935 for (b2 = b1+1; b2 < clustB; b2++) {
4936
4937 nq++;
4938
4939 /* maximum likelihood values */
4940 /* approximate ML is sufficient */
4941 compute_quartlklhds(clusterA[a1],clusterA[a2],clusterB[b1],clusterB[b2],&d1,&d2,&d3, APPROX);
4942
4943 /* randomize order of d2 and d3 */
4944 if (randominteger(2) == 1) {
4945 temp = d3;
4946 d3 = d2;
4947 d2 = temp;
4948 }
4949
4950 /* draw point for LM analysis */
4951 makelmpoint(trifp, d1, d2, d3);
4952 addtimes(QUARTETS, &tarr);
4953
4954 }
4955
4956 free_ivector(gettwo);
4957 }
4958
4959 if (numclust == 1) { /* normal likelihood mapping (one cluster) */
4960
4961 mlorder = new_ivector(3);
4962
4963 for (i = 3; i < Maxspc; i++)
4964 for (c = 2; c < i; c++)
4965 for (b = 1; b < c; b++)
4966 for (a = 0; a < b; a++) {
4967
4968 nq++;
4969
4970 /* maximum likelihood values */
4971 /* approximate ML is sufficient */
4972 compute_quartlklhds(a,b,c,i,&logs[0],&logs[1],&logs[2], APPROX);
4973
4974 /* randomize order */
4975 chooser(3,3,mlorder);
4976 d1 = logs[mlorder[0]];
4977 d2 = logs[mlorder[1]];
4978 d3 = logs[mlorder[2]];
4979
4980 /* draw point for LM analysis */
4981 makelmpoint(trifp, d1, d2, d3);
4982 addtimes(QUARTETS, &tarr);
4983
4984 }
4985 free_ivector(mlorder);
4986 }
4987
4988 } else { /* randomly selected quartets */
4989
4990 if (numclust == 4) { /* four-cluster analysis */
4991
4992 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
4993
4994 nq++;
4995
4996 /* choose random quartet */
4997 qts[0] = clusterA[ randominteger(clustA) ];
4998 qts[1] = clusterB[ randominteger(clustB) ];
4999 qts[2] = clusterC[ randominteger(clustC) ];
5000 qts[3] = clusterD[ randominteger(clustD) ];
5001
5002 /* maximum likelihood values */
5003 /* approximate ML is sufficient */
5004 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
5005
5006 /* draw point for LM analysis */
5007 makelmpoint(trifp, d1, d2, d3);
5008 addtimes(QUARTETS, &tarr);
5009
5010 }
5011 }
5012
5013 if (numclust == 3) { /* three-cluster analysis */
5014
5015 gettwo = new_ivector(2);
5016
5017 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
5018
5019 nq++;
5020
5021 /* choose random quartet */
5022 qts[0] = clusterA[ randominteger(clustA) ];
5023 qts[1] = clusterB[ randominteger(clustB) ];
5024 chooser(clustC, 2, gettwo);
5025 qts[2] = clusterC[gettwo[0]];
5026 qts[3] = clusterC[gettwo[1]];
5027
5028 /* maximum likelihood values */
5029 /* approximate ML is sufficient */
5030 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
5031
5032 /* order of d2 and d3 is already randomized! */
5033
5034 /* draw point for LM analysis */
5035 makelmpoint(trifp, d1, d2, d3);
5036 addtimes(QUARTETS, &tarr);
5037
5038 }
5039
5040 free_ivector(gettwo);
5041 }
5042
5043 if (numclust == 2) { /* two-cluster analysis */
5044
5045 gettwo = new_ivector(2);
5046
5047 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
5048
5049 nq++;
5050
5051 /* choose random quartet */
5052 chooser(clustA, 2, gettwo);
5053 qts[0] = clusterA[gettwo[0]];
5054 qts[1] = clusterA[gettwo[1]];
5055 chooser(clustB, 2, gettwo);
5056 qts[2] = clusterB[gettwo[0]];
5057 qts[3] = clusterB[gettwo[1]];
5058
5059 /* maximum likelihood values */
5060 /* approximate ML is sufficient */
5061 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
5062
5063 /* order of d2 and d3 is already randomized! */
5064
5065 /* draw point for LM analysis */
5066 makelmpoint(trifp, d1, d2, d3);
5067 addtimes(QUARTETS, &tarr);
5068
5069 }
5070 free_ivector(gettwo);
5071 }
5072
5073 if (numclust == 1) { /* normal likelihood mapping (one cluster) */
5074
5075 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
5076
5077 nq++;
5078
5079 /* choose random quartet */
5080 chooser(Maxspc, 4, qts);
5081
5082 /* maximum likelihood values */
5083 /* approximate ML is sufficient */
5084 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
5085
5086 /* order of d1, d2, and d3 is already randomized! */
5087
5088 /* draw point for LM analysis */
5089 makelmpoint(trifp, d1, d2, d3);
5090 addtimes(QUARTETS, &tarr);
5091
5092 }
5093 }
5094 }
5095
5096 finishps(trifp);
5097 closefile(trifp);
5098 free_ivector(qts);
5099
5100 } /* map_lklhd */
5101
5102 /***************************************************************/
5103
5104 void setdefaults() {
5105
5106 strcpy(INFILE, INFILEDEFAULT);
5107 strcpy(OUTFILE, OUTFILEDEFAULT);
5108 strcpy(TREEFILE, TREEFILEDEFAULT);
5109 strcpy(INTREE, INTREEDEFAULT);
5110 strcpy(DISTANCES, DISTANCESDEFAULT);
5111 strcpy(TRIANGLE, TRIANGLEDEFAULT);
5112 strcpy(UNRESOLVED, UNRESOLVEDDEFAULT);
5113 strcpy(ALLQUART, ALLQUARTDEFAULT);
5114 strcpy(ALLQUARTLH, ALLQUARTLHDEFAULT);
5115 strcpy(SITELH, SITELHDEFAULT);
5116 strcpy(SITELHB, SITELHBDEFAULT);
5117 strcpy(SITERATE, SITERATEDEFAULT);
5118 strcpy(SITERATEB, SITERATEBDEFAULT);
5119 strcpy(OUTPARAM, OUTPARAMDEFAULT);
5120 strcpy(SUBSET, SUBSETDEFAULT);
5121 strcpy(OUTPTLIST, OUTPTLISTDEFAULT);
5122 strcpy(OUTPTORDER, OUTPTORDERDEFAULT);
5123
5124 setprefix_optn = SETPREFIX_NONE; /* 0: not set */
5125 /* 1: INFILE */
5126 /* 2: INTREE */
5127 /* 3: reset by openfileto... */
5128 /* 4: -prefix= */
5129
5130 usebestq_optn = FALSE;
5131 usebranch_optn = FALSE;
5132 printrmatr_optn = FALSE;
5133 savequartlh_optn = FALSE;
5134 savequart_optn = FALSE;
5135 readquart_optn = FALSE;
5136 fixedorder_optn = FALSE;
5137 #if 0
5138 skipmlbranch_optn = FALSE;
5139 #endif
5140 conssub50_optn = FALSE;
5141 qsupport_optn = FALSE;
5142 dotreetest_optn = TRUE;
5143 dotreelh_optn = TRUE;
5144 savesitelhb_optn = FALSE;
5145 savesitelh_optn = FALSE;
5146 savesiterateb_optn = FALSE;
5147 savesiterate_optn = FALSE;
5148 consensus_optn = FALSE;
5149 readsubset_optn = FALSE;
5150
5151 #if 0
5152 saveparam_optn = FALSE;
5153 #endif
5154
5155
5156 randseed = -1; /* to set random random seed */
5157
5158 } /* setdefaults */
5159
5160 /***************************************************************/
5161
5162 void printversion(char *fname)
5163 {
5164 # if ! PARALLEL
5165 fprintf(stderr, "puzzle (%s) %s\n", PACKAGE, VERSION);
5166 #else
5167 fprintf(stderr, "ppuzzle (%s) %s\n", PACKAGE, VERSION);
5168 PP_SendDone();
5169 MPI_Finalize();
5170 # endif
5171 exit (0);
5172 } /* printversion */
5173 /***************************************************************/
5174
5175 void printusage(char *fname)
5176 {
5177 /* |<--- 80 characters --->| */
5178 fprintf(stderr, "\n\nUsage: %s [-h] [options] [ Infilename [ UserTreeFilename ] ]\n\n", fname);
5179 fprintf(stderr, " -h - print usage\n");
5180 fprintf(stderr, " -prefix=XXX - use 'XXX' as filename prefix instead of 'Infilename'\n");
5181 fprintf(stderr, " of 'Infilename' or 'UserTreeFilename'\n");
5182 fprintf(stderr, " -consmrel - perform consensus step below 50%% to first incongruence,\n");
5183 fprintf(stderr, " that is a 'relative mayority consensus' is built.\n");
5184 fprintf(stderr, " -randseed<#> \n");
5185 fprintf(stderr, " -randseed=<#> - use <#> as random number seed, for debug purposes only\n");
5186 fprintf(stderr, " Infilename - Filename of multiple sequence alignment in PHYLIP format\n");
5187 fprintf(stderr, " UserTreeFilename - Filename of the treefile in PHYLIP format\n");
5188 # if PARALLEL
5189 PP_SendDone();
5190 MPI_Finalize();
5191 # endif
5192 exit (1);
5193 } /* printusage */
5194
5195 /***************************************************************/
5196
5197 void printusagehhh(char *fname)
5198 {
5199 fprintf(stderr, "\n\nUsage: %s [options] [ Infilename [ UserTreeFilename ] ]\n\n", fname);
5200 /* |<--- 80 characters --->| */
5201 fprintf(stderr, " -h - print usage\n");
5202 fprintf(stderr, " -prefix=XXX - use 'XXX' as filename prefix instead of 'Infilename'\n");
5203 fprintf(stderr, " of 'Infilename' or 'UserTreeFilename'\n");
5204 fprintf(stderr, " -consmrel\n");
5205 fprintf(stderr, " -sub50 - perform consensus step below 50%% to first incongruence\n");
5206 fprintf(stderr, " -randseed<#> \n");
5207 fprintf(stderr, " -randseed=<#> - use <#> as random number seed, for debug purposes only\n");
5208 fprintf(stderr, " -wqf - write quartet file to PREFIX.allquart\n");
5209 fprintf(stderr, " -rqf - read quartet file from PREFIX.allquart\n");
5210 fprintf(stderr, " -wqlb - write quart llhs to PREFIX.allquartlh (bin) [non-parallel]\n");
5211 fprintf(stderr, " -wqla - write quart llhs to PREFIX.allquartlh (ASCII)[non-parallel]\n");
5212 fprintf(stderr, " -wsl - write site llhs to PREFIX.sitelh (PHILIP-like)\n");
5213 fprintf(stderr, " -wsr - write site rates to PREFIX.siterate (PHYLIP-like)\n");
5214 fprintf(stderr, " -bestq - use best quart, no Bayesian weights\n");
5215 fprintf(stderr, " -printrmat - print rate matrix to screen\n");
5216 fprintf(stderr, " -consensus - construct a consensus tree from user trees [non-parallel]\n");
5217 fprintf(stderr, " -usebranch - use branch length from usertree file\n");
5218 fprintf(stderr, " -achim - reads quartet set and sets puzzling step orders to 1,2,...n\n");
5219 fprintf(stderr, " -rssm - read subset matrix from PREFIX.subsetmatr for missing data\n");
5220 fprintf(stderr, " -notreetest - skip test for usertree topologies (KH, SH, ELW)\n");
5221 fprintf(stderr, " -notreelh - skip likelihood/branch length (QP or consensus)\n");
5222 fprintf(stderr, " Infilename - Filename of multiple sequence alignment in PHYLIP format\n");
5223 fprintf(stderr, " UserTreeFilename - Filename of the treefile in PHYLIP format\n");
5224 # ifdef USE_ADJUSTABLE_EPS
5225 fprintf(stderr, " -epsrate=# - stop threshold for rate heterogeneity estimation (%.2e)\n", EPSILON_RATEPARAM_DEFAULT);
5226 fprintf(stderr, " -epssubst=# - stop threshold for subst. parameter estimation (%.2e)\n", EPSILON_SUBSTPARAM_DEFAULT);
5227 fprintf(stderr, " -epsbranch=# - stop threshold for branch length estimation (%.2e)\n", EPSILON_BRANCH_DEFAULT);
5228 fprintf(stderr, " -epsheight=# - stop threshold for tree height estimation (%.2e)\n", EPSILON_HEIGHTS_DEFAULT);
5229 # endif
5230 fprintf(stderr, "\n");
5231 #ifdef HHH
5232 /* |<--- 80 characters --->| */
5233 fprintf(stderr, " not yet implemented in this version:\n");
5234 fprintf(stderr, " -wslb - write site lhs to PREFIX.sitelhb (binary) [not yet]\n");
5235 fprintf(stderr, " -wsrb - write site rates to PREFIX.siterateb (binary) [not yet]\n");
5236 fprintf(stderr, " -wparam - write params to PREFIX.param [not yet]\n");
5237 fprintf(stderr, " -qsupport - prints quartet support for incorporated splits [not yet]\n");
5238 fprintf(stderr, " -consth=<#> - consensus threshold, e.g. 50=M_50 consensus [not yet]\n");
5239 fprintf(stderr, " -wqnex - write quartets in nexus format [not yet]\n");
5240 fprintf(stderr, " -wsnex - write splits in nexus format [not yet]\n");
5241 fprintf(stderr, " -wsth=<#> - split threshold, e.g. 50 - splits of the M_50 [not yet]\n");
5242 fprintf(stderr, " -wsnex=<#> - write splits with > <#>%% occurence - nexus format[not yet]\n");
5243 fprintf(stderr, " -witnex - write intermediate trees in nexus format [not yet]\n");
5244 fprintf(stderr, " -quartml - use ML values to evaluate quartets [not yet]\n");
5245 fprintf(stderr, " -quartmlapprox - use approximate ML values to evaluate quartets [default]\n");
5246 fprintf(stderr, " -quartnj - use NJ to evaluate quartets [not yet]\n");
5247 fprintf(stderr, " -quartme - use ME to evaluate quartets [not yet]\n");
5248 fprintf(stderr, " -quartpars - use parsimony to evaluate quartets [not yet]\n");
5249 fprintf(stderr, "\n");
5250 #endif /* HHH */
5251
5252 # if PARALLEL
5253 PP_SendDone();
5254 MPI_Finalize();
5255 # endif
5256 exit (2);
5257 } /* printusagehhh */
5258
5259 /***************************************************************/
5260
5261 /* infiletype 1=infile, 2=intree */
5262 void setfilenames(int setprefix_optn, int infiletype, char fname[]){
5263
5264 # if 0
5265 fprintf(stderr, "XXXX: %d, %d, %s\n", setprefix_optn, infiletype, fname);
5266 # endif
5267
5268
5269 /* set filenames, if set by -prefix=, INFILE, or INTREE */
5270 if (setprefix_optn < SETPREFIX_PREFIX) {
5271 switch(setprefix_optn) {
5272 case SETPREFIX_INTREE:
5273 sprintf(FILEPREFIX, "%s", INTREE);
5274 break;
5275 case SETPREFIX_INFILE:
5276 sprintf(FILEPREFIX, "%s", INFILE);
5277 break;
5278 default:
5279 break;
5280 }
5281 }
5282
5283 sprintf(OUTFILE ,"%s.%s", FILEPREFIX, OUTFILEEXT);
5284 sprintf(TREEFILE ,"%s.%s", FILEPREFIX, TREEFILEEXT);
5285 sprintf(DISTANCES ,"%s.%s", FILEPREFIX, DISTANCESEXT);
5286 sprintf(TRIANGLE ,"%s.%s", FILEPREFIX, TRIANGLEEXT);
5287 sprintf(UNRESOLVED ,"%s.%s", FILEPREFIX, UNRESOLVEDEXT);
5288 sprintf(ALLQUART ,"%s.%s", FILEPREFIX, ALLQUARTEXT);
5289 sprintf(ALLQUARTLH ,"%s.%s", FILEPREFIX, ALLQUARTLHEXT);
5290 sprintf(SITELH ,"%s.%s", FILEPREFIX, SITELHEXT);
5291 sprintf(SITELHB ,"%s.%s", FILEPREFIX, SITELHBEXT);
5292 sprintf(SITERATE ,"%s.%s", FILEPREFIX, SITERATEEXT);
5293 sprintf(SITERATEB ,"%s.%s", FILEPREFIX, SITERATEBEXT);
5294 sprintf(OUTPARAM ,"%s.%s", FILEPREFIX, OUTPARAMEXT);
5295 sprintf(SUBSET ,"%s.%s", FILEPREFIX, SUBSETEXT);
5296 sprintf(OUTPTLIST ,"%s.%s", FILEPREFIX, OUTPTLISTEXT);
5297 sprintf(OUTPTORDER ,"%s.%s", FILEPREFIX, OUTPTORDEREXT);
5298
5299 } /* scancmdline */
5300
5301
5302 /***************************************************************/
5303
5304
5305 void scancmdline(int *argc, char **argv[])
5306 {
5307 static short infileset = 0;
5308 static short intreefileset = 0;
5309 short flagused;
5310 int n;
5311 int count, dummyint;
5312 # ifdef USE_ADJUSTABLE_EPS
5313 double dummydbl;
5314 # endif
5315
5316 for (n = 1; n < *argc; n++) {
5317 # ifdef VERBOSE1
5318 printf("argv[%d] = %s\n", n, (*argv)[n]);
5319 # endif
5320
5321 flagused = FALSE;
5322
5323 /* use given prefix rather than INFILENAME for output filename */
5324 dummyint = 0;
5325 count = sscanf((*argv)[n],"-prefix=%s%n", FILEPREFIX, &dummyint);
5326 if (dummyint>8) {
5327 printf("PREFIX set to = \"%s\"\n", FILEPREFIX);
5328 setprefix_optn = SETPREFIX_PREFIX;
5329 flagused = TRUE;
5330 }
5331
5332 /* write quartet likelihoods (binary) - only in sequential version */
5333 dummyint = 0;
5334 count = sscanf((*argv)[n], "-wqlb%n", &dummyint);
5335 if (dummyint == 5) {
5336 savequartlh_optn = TRUE;
5337 saveqlhbin_optn = TRUE;
5338 flagused = TRUE;
5339 }
5340
5341 /* write quartet likelihoods (ASCII) - only in sequential version */
5342 dummyint = 0;
5343 count = sscanf((*argv)[n], "-wqla%n", &dummyint);
5344 if (dummyint == 5) {
5345 savequartlh_optn = TRUE;
5346 saveqlhbin_optn = FALSE;
5347 flagused = TRUE;
5348 }
5349
5350 /* write quartet topologies */
5351 dummyint = 0;
5352 count = sscanf((*argv)[n], "-wqf%n", &dummyint);
5353 if (dummyint == 4) {
5354 savequart_optn = TRUE;
5355 flagused = TRUE;
5356 }
5357
5358 /* read quartet topologies -> no ML step */
5359 dummyint = 0;
5360 count = sscanf((*argv)[n],"-rqf%n", &dummyint);
5361 if (dummyint == 4) {
5362 readquart_optn = TRUE;
5363 flagused = TRUE;
5364 }
5365
5366 /* use quartet topology with best likelihood -> no posterior probs */
5367 dummyint = 0;
5368 count = sscanf((*argv)[n],"-bestq%n", &dummyint);
5369 if (dummyint == 6) {
5370 usebestq_optn = TRUE;
5371 flagused = TRUE;
5372 }
5373
5374 /* print substitution matrix Q to the screen */
5375 dummyint = 0;
5376 count = sscanf((*argv)[n],"-printrmat%n", &dummyint);
5377 if (dummyint == 10) {
5378 printrmatr_optn = TRUE;
5379 flagused = TRUE;
5380 }
5381
5382 /* print extended help */
5383 dummyint = 0;
5384 count = sscanf((*argv)[n],"-hhh%n", &dummyint);
5385 if (dummyint==4) {
5386 printusagehhh((*argv)[0]);
5387 flagused = TRUE;
5388 }
5389
5390 /* resolve congruent splits < 50% (M_rel) in consensus */
5391 dummyint = 0;
5392 count = sscanf((*argv)[n],"-sub50%n", &dummyint);
5393 if (dummyint == 6) {
5394 conssub50_optn = TRUE;
5395 flagused = TRUE;
5396 }
5397
5398 /* resolve congruent splits < 50% (M_rel) in consensus */
5399 dummyint = 0;
5400 count = sscanf((*argv)[n],"-consmrel%n", &dummyint);
5401 if (dummyint == 9) {
5402 conssub50_optn = TRUE;
5403 flagused = TRUE;
5404 }
5405
5406 /* include: fixed order addition (1,2,3,...,n) */
5407 /* no ML branch lengths for consensus */
5408 /* read quartets from file */
5409 dummyint = 0;
5410 count = sscanf((*argv)[n],"-achim%n", &dummyint);
5411 if (dummyint == 6) {
5412 fixedorder_optn = TRUE;
5413 #if 0
5414 skipmlbranch_optn = TRUE;
5415 #endif
5416 dotreelh_optn = FALSE;
5417 readquart_optn = TRUE;
5418 flagused = TRUE;
5419 }
5420
5421 # ifdef HHH
5422 /* save site likelihoods (binary) file */
5423 dummyint = 0;
5424 count = sscanf((*argv)[n], "-wslb%n", &dummyint);
5425 if (dummyint == 5) {
5426 savesitelhb_optn = TRUE;
5427 fprintf(stderr, "cmdline option '-wslb' is not yet implemented!!!\n");
5428 flagused = TRUE;
5429 }
5430 # endif /* HHH */
5431
5432 /* save site likelihoods to PHYLIP like file */
5433 dummyint = 0;
5434 count = sscanf((*argv)[n], "-wsl%n", &dummyint);
5435 if (dummyint == 4) {
5436 savesitelh_optn = TRUE;
5437 flagused = TRUE;
5438 }
5439
5440 # ifdef HHH
5441 /* save site rates (binary) file */
5442 dummyint = 0;
5443 count = sscanf((*argv)[n], "-wsrb%n", &dummyint);
5444 if (dummyint == 5) {
5445 savesiterateb_optn = TRUE;
5446 fprintf(stderr, "cmdline option '-wsrb' is not yet implemented!!!\n");
5447 flagused = TRUE;
5448 }
5449 # endif /* HHH */
5450
5451 /* save site rates to PHYLIP like file */
5452 dummyint = 0;
5453 count = sscanf((*argv)[n], "-wsr%n", &dummyint);
5454 if (dummyint == 4) {
5455 savesiterate_optn = TRUE;
5456 flagused = TRUE;
5457 }
5458
5459 /* print version */
5460 dummyint = 0;
5461 count = sscanf((*argv)[n],"-V%n", &dummyint);
5462 if (dummyint==2) {
5463 printversion((*argv)[0]);
5464 flagused = TRUE;
5465 }
5466
5467 /* print version */
5468 dummyint = 0;
5469 count = sscanf((*argv)[n],"-version%n", &dummyint);
5470 if (dummyint==8) {
5471 printversion((*argv)[0]);
5472 flagused = TRUE;
5473 }
5474
5475 /* print version */
5476 dummyint = 0;
5477 count = sscanf((*argv)[n],"--version%n", &dummyint);
5478 if (dummyint>=4) {
5479 printversion((*argv)[0]);
5480 flagused = TRUE;
5481 }
5482
5483 /* print help message */
5484 dummyint = 0;
5485 count = sscanf((*argv)[n],"-h%n", &dummyint);
5486 if (dummyint==2) {
5487 printusage((*argv)[0]);
5488 flagused = TRUE;
5489 }
5490
5491 /* set random number seed to fixed value */
5492 count = sscanf((*argv)[n],"-randseed%d", &dummyint);
5493 if (count == 1) {
5494 randseed = dummyint;
5495 flagused = TRUE;
5496 }
5497
5498 /* set random number seed to fixed value */
5499 count = sscanf((*argv)[n],"-randseed=%d", &dummyint);
5500 if (count == 1) {
5501 randseed = dummyint;
5502 flagused = TRUE;
5503 }
5504
5505 # ifdef USE_ADJUSTABLE_EPS
5506 /* set stop threshold for branch length estimation */
5507 dummydbl=EPSILON_BRANCH_DEFAULT;
5508 count = sscanf((*argv)[n],"-epsbranch%lf", &dummydbl);
5509 if (count == 1) {
5510 EPSILON_BRANCH = dummydbl;
5511 fprintf(stderr, "branch length estimation threshold set to %e! (default=%e)\n",
5512 EPSILON_BRANCH, EPSILON_BRANCH_DEFAULT);
5513 flagused = TRUE;
5514 }
5515
5516 /* set stop threshold for tree heights estimation */
5517 dummydbl=EPSILON_HEIGHTS_DEFAULT;
5518 count = sscanf((*argv)[n],"-epsheight%lf", &dummydbl);
5519 if (count == 1) {
5520 EPSILON_HEIGHTS = dummydbl;
5521 fprintf(stderr, "tree heights estimation threshold set to %e! (default=%e)\n",
5522 EPSILON_HEIGHTS, EPSILON_HEIGHTS_DEFAULT);
5523 flagused = TRUE;
5524 }
5525
5526 /* set stop threshold for rate heterogeneity estimation */
5527 dummydbl=EPSILON_RATEPARAM_DEFAULT;
5528 count = sscanf((*argv)[n],"-epsrate%lf", &dummydbl);
5529 if (count == 1) {
5530 EPSILON_RATEPARAM = dummydbl;
5531 fprintf(stderr, "rate heterogeneity estimation threshold set to %e! (default=%.2e)\n",
5532 EPSILON_RATEPARAM, EPSILON_RATEPARAM_DEFAULT);
5533 flagused = TRUE;
5534 }
5535
5536 /* set stop threshold for substitution model parameter estimation */
5537 dummydbl=EPSILON_SUBSTPARAM_DEFAULT;
5538 count = sscanf((*argv)[n],"-epssubst%lf", &dummydbl);
5539 if (count == 1) {
5540 EPSILON_SUBSTPARAM = dummydbl;
5541 fprintf(stderr, "substitution model parameter estimation threshold set to %e! (default=%e)\n",
5542 EPSILON_SUBSTPARAM, EPSILON_SUBSTPARAM_DEFAULT);
5543 flagused = TRUE;
5544 }
5545 # endif
5546
5547 /* compute consensus from usertrees (also 'k' in menu) */
5548 dummyint = 0;
5549 count = sscanf((*argv)[n],"-consensus%n", &dummyint);
5550 if (dummyint == 10) {
5551 consensus_optn = TRUE;
5552 flagused = TRUE;
5553 }
5554
5555 /* read branch lengths from usertree file */
5556 dummyint = 0;
5557 count = sscanf((*argv)[n],"-usebranch%n", &dummyint);
5558 if (dummyint == 10) {
5559 usebranch_optn = TRUE;
5560 flagused = TRUE;
5561 }
5562
5563 /* read subset matrix for missing data analysis */
5564 dummyint = 0;
5565 count = sscanf((*argv)[n], "-rssm%n", &dummyint);
5566 if (dummyint == 5) {
5567 readsubset_optn = TRUE;
5568 readquart_optn = TRUE;
5569 flagused = TRUE;
5570 }
5571
5572 /* write quartet support for splits */
5573 dummyint = 0;
5574 count = sscanf((*argv)[n], "-qsupport%n", &dummyint);
5575 if (dummyint == 9) {
5576 qsupport_optn = TRUE;
5577 flagused = TRUE;
5578 }
5579
5580 /* skip tree tests (KH,SH,ELW) */
5581 dummyint = 0;
5582 count = sscanf((*argv)[n], "-notreetest%n", &dummyint);
5583 if (dummyint == 11) {
5584 dotreetest_optn = FALSE;
5585 flagused = TRUE;
5586 }
5587
5588 /* skip lh/branch length computation (QP/consensus)*/
5589 dummyint = 0;
5590 count = sscanf((*argv)[n], "-notreelh%n", &dummyint);
5591 if (dummyint == 9) {
5592 dotreelh_optn = FALSE;
5593 flagused = TRUE;
5594 }
5595
5596 #if 0
5597 /* use window of alignment */
5598 /* todo (HAS) */
5599 count = sscanf((*argv)[n],"-ws%d", &dummyint);
5600 if (count == 1) windowsize = dummyint;
5601
5602 /* write parameter file (for resart) */
5603 dummyint = 0;
5604 count = sscanf((*argv)[n], "-wparam%n", &dummyint);
5605 if (dummyint == 7) {
5606 saveparam_optn = TRUE;
5607 flagused = TRUE;
5608 }
5609
5610
5611 #endif
5612
5613 /* get alignment filename */
5614 if ((*argv)[n][0] != '-') {
5615 if (infileset == 0) {
5616 if (strlen((*argv)[n]) >= FILENAMELENGTH) {
5617 fprintf(STDOUT, "Unable to proceed (filename too long)\n");
5618 fprintf(STDOUT, "You may want to recompile with FILENAMELENGTH>%d\n\n\n", FILENAMELENGTH);
5619 # if PARALLEL
5620 PP_Finalize();
5621 # endif
5622 exit(1);
5623 }
5624 strcpy(INFILE, (*argv)[n]);
5625 infileset++;
5626 if (setprefix_optn < SETPREFIX_PREFIX) {
5627 strcpy(FILEPREFIX, INFILE);
5628 /* printf("PREFIX set to = \"%s\"\n", FILEPREFIX); */
5629 setprefix_optn = SETPREFIX_INFILE;
5630 }
5631 fprintf(STDOUT, "Input file: %s\n", INFILE);
5632 flagused = TRUE;
5633 } else {
5634 /* get usertree filename */
5635 if (intreefileset == 0) {
5636 if (strlen((*argv)[n]) >= FILENAMELENGTH) {
5637 fprintf(STDOUT, "Unable to proceed (filename too long)\n");
5638 fprintf(STDOUT, "You may want to recompile with FILENAMELENGTH>%d\n\n\n", FILENAMELENGTH);
5639 # if PARALLEL
5640 PP_Finalize();
5641 # endif
5642 exit(1);
5643 }
5644 strcpy(INTREE, (*argv)[n]);
5645 intreefileset++;
5646 if (setprefix_optn < SETPREFIX_PREFIX) {
5647 strcpy(FILEPREFIX, INTREE);
5648 /* printf("PREFIX set to = \"%s\"\n", FILEPREFIX); */
5649 setprefix_optn = SETPREFIX_INTREE;
5650 }
5651 fprintf(STDOUT, "Usertree file: %s\n", INTREE);
5652 flagused = TRUE;
5653 }
5654 }
5655
5656 }
5657 if (flagused == FALSE) {
5658 fprintf(stderr, "WARNING: commandline parameter %d not recognized (\"%s\")\n", n, (*argv)[n]);
5659 }
5660 flagused = FALSE;
5661 }
5662
5663 /* setprefix_optn = SETPREFIX_INFILE; */
5664 if (setprefix_optn > 0) {
5665 setfilenames(setprefix_optn, RESET_NONE, INFILE);
5666 }
5667
5668 } /* scancmdline */
5669
5670
5671 /***************************************************************/
5672
5673 void inputandinit(int *argc, char **argv[]) {
5674
5675 int ci;
5676 int taxon;
5677 int reset;
5678 #ifdef EPE_DEBUG
5679 int i; /*epe*/
5680 #endif /* EPE_DEBUG */
5681
5682 # ifndef ALPHA
5683 fprintf(STDOUT, "\n\n\nWELCOME TO TREE-PUZZLE %s!\n\n\n\n", VERSION);
5684 # else
5685 fprintf(STDOUT, "\n\n\nWELCOME TO TREE-PUZZLE %s%s!\n\n\n\n", VERSION, ALPHA);
5686 # endif
5687
5688
5689 /* vectors used in QP and LM analysis */
5690 qweight = new_dvector(3);
5691 sqdiff = new_dvector(3);
5692 qworder = new_ivector(3);
5693 sqorder = new_ivector(3);
5694
5695 /**************************************/
5696 /*** pre-calculations of parameters ***/
5697 /**************************************/
5698
5699 /* Initialization and parsing of Commandline */
5700 setdefaults();
5701 scancmdline(argc, argv);
5702
5703 /* initialize random numbers generator */
5704 if (randseed >= 0)
5705 fprintf(stderr, "WARNING: random seed set to %d for debugging!\n", randseed);
5706 randseed = initrandom(randseed);
5707 fprintf(STDOUT, "RANDOM SEED: %d\n", randseed);
5708
5709 psteptreelist = NULL;
5710 psteptreesum = 0;
5711 bestratefound = 0;
5712
5713 /* get sequences */
5714 reset = openfiletoread(&seqfp, INFILE, "sequence data");
5715 #if 0
5716 fprintf(stderr, "INFILE=%s\n", INFILE);
5717 #endif
5718 if (reset && (setprefix_optn < SETPREFIX_INTREE)) {
5719 setprefix_optn = SETPREFIX_INFILE;
5720 setfilenames(setprefix_optn, RESET_INFILE, INFILE);
5721 }
5722 /* XXX INFILE */
5723 readsequencefile(seqfp, &Maxspc, &Maxseqc, &Identif, &Namestr, &Seqchars);
5724 fprintf(STDOUT, "\nInput data set (%s) contains %d sequences of length %d\n", INFILE, Maxspc, Maxseqc);
5725
5726 Maxbrnch = 2*Maxspc - 3;
5727
5728 /* output taxa to STDOUT */
5729 for (taxon = 0; taxon < Maxspc; taxon++) {
5730 fprintf(stdout, "%4d. ", taxon+1);
5731 fputid(stdout, taxon);
5732 fprintf(stdout, "\n");
5733 } /* for taxon (species) */
5734
5735 closefile(seqfp);
5736 data_optn = guessdatatype(Seqchars, Maxspc, Maxseqc);
5737
5738 /* translate characters into format used by ML engine */
5739 nuc_optn = TRUE;
5740 SH_optn = FALSE;
5741 Seqchar = NULL;
5742 translatedataset(Maxspc, Maxseqc, &Maxsite, Seqchars, &Seqchar, &Seqgapchar, &Seqotherchar);
5743
5744 /* estimate base frequencies from data set */
5745 Freqtpm = NULL;
5746 Basecomp = NULL;
5747 estimatebasefreqs();
5748
5749 /* guess model of substitution */
5750 guessmodel();
5751
5752 /* initialize guess variables */
5753 auto_datatype = AUTO_GUESS;
5754 if (data_optn == AMINOACID) auto_aamodel = AUTO_GUESS;
5755 else auto_aamodel = AUTO_DEFAULT;
5756 /* save guessed amino acid options */
5757 guessDayhf_optn = Dayhf_optn;
5758 guessJtt_optn = Jtt_optn;
5759 guessmtrev_optn = mtrev_optn;
5760 guesscprev_optn = cprev_optn;
5761 guessblosum62_optn = blosum62_optn;
5762 guessvtmv_optn = vtmv_optn;
5763 guesswag_optn = wag_optn;
5764 guessauto_aamodel = auto_aamodel;
5765
5766
5767 /* check for user specified tree */
5768 if ((utfp = fopen(INTREE, "r")) != NULL) {
5769 fclose(utfp);
5770 if (! consensus_optn) puzzlemode = USERTREE;
5771 else puzzlemode = CONSENSUS;
5772 } else {
5773 puzzlemode = QUARTPUZ;
5774 }
5775
5776 /* reserve memory for cluster LM analysis */
5777 clusterA = new_ivector(Maxspc);
5778 clusterB = new_ivector(Maxspc);
5779 clusterC = new_ivector(Maxspc);
5780 clusterD = new_ivector(Maxspc);
5781
5782
5783 /**********************************/
5784 /*** menu setting of parameters ***/
5785 /**********************************/
5786
5787 /* set options interactively */
5788 setoptions();
5789
5790 /* open usertree file right after start */
5791 if (typ_optn == TREERECON_OPTN && (puzzlemode == USERTREE || puzzlemode == CONSENSUS)) {
5792 openfiletoread(&utfp, INTREE, "user trees");
5793 if (reset && (setprefix_optn < SETPREFIX_PREFIX)) {
5794 setprefix_optn = SETPREFIX_INTREE;
5795 setfilenames(setprefix_optn, RESET_INTREE, INTREE);
5796 }
5797 /* XXX INFILE */
5798 }
5799
5800 /* start main timer */
5801 time(&Starttime);
5802 Startcpu=clock();
5803 addtimes(OPTIONS, &tarr);
5804
5805 /* symmetrize doublet frequencies if specified */
5806 symdoublets();
5807
5808 /* initialise ML */
5809 mlstart();
5810
5811 /* determine how many usertrees */
5812 if (typ_optn == TREERECON_OPTN && (puzzlemode == USERTREE || puzzlemode == CONSENSUS)) {
5813 numutrees = 0;
5814 do {
5815 ci = fgetc(utfp);
5816 if ((char) ci == ';') numutrees++;
5817 } while (ci != EOF);
5818 rewind(utfp);
5819 if (numutrees < 1) {
5820 fprintf(STDOUT, "Unable to proceed (no tree in input tree file)\n\n\n");
5821 # if PARALLEL
5822 PP_Finalize();
5823 # endif
5824 exit(1);
5825 }
5826 }
5827
5828 /*epe PP_SendSizes ()/SendData() relocated*/
5829 # if PARALLEL
5830 # ifndef USE_WINDOWS
5831 PP_SendSizes(Maxspc, Maxsite,
5832 numcats, Numptrn, tpmradix,
5833 outgroup, fracconst, randseed,
5834 fixedorder_optn, consensus_optn);
5835 # else
5836 PP_SendSizes(Maxspc, Maxsite,
5837 alimaxsite, alistart, aliend,
5838 numcats, Numptrn, tpmradix,
5839 outgroup, fracconst, randseed,
5840 fixedorder_optn, consensus_optn);
5841 # endif
5842 PP_SendData(Seqpat, /* cmatrix */
5843 Alias, Weight, constpat, /* ivector */
5844 Rates, Eval, Freqtpm, /* dvector */
5845 Evec, Ievc, iexp,
5846 /* Distanmat,*/ /* dmatrix */
5847 ltprobr); /* dcube */
5848 PP_NoUpdate();
5849 # endif /* PARALLEL */
5850
5851 addtimes(GENERAL, &tarr);
5852 time(&PEstStarttime);
5853 # ifdef PRINTTIME_DEBUG
5854 fprintftimespan(stdout, Starttime, PEstStarttime, Starttime, "Start -> PEst");
5855 # endif /* PRINTTIME_DEBUG */
5856
5857 /*moved from mlstart by epe*/
5858 /* computing ML distances */
5859 computedistan();
5860
5861 #ifdef EPE_DEBUG
5862 /*epe*/ for (i=1; i<Maxspc/2; i++) fprintf(STDOUT, "%.12f ", Distanmat[0][i]/100.0);
5863 fprintf(STDOUT, "\n");
5864 #endif /* EPE_DEBUG */
5865
5866 /* check fraction of invariable sites */
5867 if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && !fracinv_optim)
5868 /* fraction of invariable site was specified manually */
5869 if (fracinv > MAXFI)
5870 fracinv = MAXFI;
5871
5872 addtimes(GENERAL, &tarr);
5873 time(&PEstStarttime);
5874 # ifdef DEBUG_PRINTTIMES
5875 fprintftimespan(stdout, Starttime, PEstStarttime, Starttime, "Start -> PEst");
5876 # endif /* DEBUG_PRINTTIMES */
5877
5878
5879 /***************************/
5880 /*** estimate parameters ***/
5881 /***************************/
5882
5883 if (!(typ_optn == TREERECON_OPTN && (puzzlemode == USERTREE || puzzlemode == CONSENSUS))) {
5884 /* no tree present */
5885 estimateparametersnotree();
5886 } else {
5887 if (utree_optn) {
5888 /* use 1st user tree */
5889 readusertree(utfp, usebranch_optn);
5890 rewind(utfp);
5891 estimateparameterstree();
5892 } else {
5893 /* don't use first user tree */
5894 estimateparametersnotree();
5895 }
5896 }
5897 time(&PEstStoptime);
5898 # ifdef PRINTTIME_DEBUG
5899 fprintftimespan(stdout, PEstStarttime, PEstStoptime, Starttime, "PEst");
5900 # endif /* PRINTTIME_DEBUG */
5901 addtimes(PARAMEST, &tarr);
5902
5903 /* compute expected Ts/Tv ratio */
5904 if (data_optn == NUCLEOTIDE) computeexpectations();
5905
5906 /*epe*/
5907 # if PARALLEL
5908 PP_Final_Update();
5909 # endif
5910
5911 /* free_ivector(Seqgapchar); */
5912 /* free_ivector(Seqotherchar); */
5913 } /* inputandinit */
5914
5915
5916
5917 /***************************************************************/
5918
5919 void evaluatetree(FILE *intreefp, FILE *outtreefp, int pmode, int utreenum, int maxutree, int *oldlocroot)
5920 /* FILE *intreefp, - file to read tree(s) from */
5921 /* FILE *outtreefp, - file to write result to */
5922 /* int pmode, - analysis type: QUARTPUZ/USERTREE */
5923 /* int utreenum, - current tree number */
5924 /* int maxutree, - amount of trees to analyse */
5925 /* int *oldlocroot - root location for clock analysis */
5926 {
5927
5928 switch (pmode) {
5929 case QUARTPUZ: /* read QP tree */
5930 readusertree(intreefp, usebranch_optn);
5931 fprintf(STDOUT, "Computing maximum likelihood branch lengths (without clock)\n");
5932 fflush(STDOUT);
5933 /* pre-set branchlen = FALSE; parallel estim = FALSE */
5934 usertree_lklhd(FALSE, FALSE);
5935 /* for savesitelh_optn */
5936 allsitelkl(Ctree->condlkl, allsites[0]);
5937 if (rhetmode != UNIFORMRATE) {
5938 findbestratecombination();
5939 }
5940 break;
5941 case USERTREE: /* read user tree */
5942 readusertree(intreefp, usebranch_optn);
5943 fprintf(STDOUT, "Computing maximum likelihood branch lengths (without clock) for tree # %d\n", utreenum+1);
5944 fflush(STDOUT);
5945
5946 /* pre-set branchlen = TRUE; parallel estim = FALSE */
5947 if (usebranch_optn) usertree_lklhd(TRUE, FALSE);
5948 /* pre-set branchlen = FALSE; parallel estim = FALSE */
5949 else usertree_lklhd(FALSE, FALSE);
5950
5951
5952 if (maxutree > 1) {
5953 ulkl[utreenum] = Ctree->lklhd;
5954 allsitelkl(Ctree->condlkl, allsites[utreenum]);
5955 } else { /* if maxutree == 1 */
5956 /* for savesitelh_optn */
5957 allsitelkl(Ctree->condlkl, allsites[0]);
5958 }
5959 if ((utreenum==0) && (rhetmode != UNIFORMRATE)) {
5960 findbestratecombination();
5961 }
5962 break;
5963 } /* switch (pmode) */
5964
5965
5966 if (compclock) { /* clocklike branch length */
5967 switch (pmode) {
5968 case QUARTPUZ:
5969 fprintf(STDOUT, "Computing maximum likelihood branch lengths (with clock)\n");
5970 fflush(STDOUT);
5971 break;
5972 case USERTREE:
5973 fprintf(STDOUT, "Computing maximum likelihood branch lengths (with clock) for tree # %d\n", utreenum+1);
5974 fflush(STDOUT);
5975 break;
5976 }
5977
5978 /* find best place for root */
5979
5980 /* default user-set root */
5981 rootsearch = ROOT_USER;
5982
5983 /* save or reset the default (user set or -1) */
5984 if ((pmode == USERTREE) && (utreenum > 0)) { /* user trees >1 */
5985 locroot = *oldlocroot;
5986 } else { /* 1st user tree or reconstructed tree */
5987 *oldlocroot = locroot;
5988 }
5989
5990 if (locroot < 0) { /* not set (-1) -> automatic */
5991 locroot = findrootedge();
5992 rootsearch = ROOT_AUTO;
5993 }
5994
5995 /* if user-specified edge for root does not exist use */
5996 /* displayed outgroup */
5997 if (!checkedge(locroot)) {
5998 locroot = outgroup;
5999 rootsearch = ROOT_DISPLAYED;
6000 }
6001
6002 /* compute likelihood */
6003 clock_lklhd(locroot);
6004 if (maxutree > 1) {
6005 ulklc[utreenum] = Ctree->lklhdc;
6006 allsitelkl(Ctree->condlkl, allsitesc[utreenum]);
6007 } else { /* if maxutree == 1 */
6008 /* for savesitelh_optn */
6009 allsitelkl(Ctree->condlkl, allsitesc[0]);
6010 }
6011
6012 }
6013
6014 /* write ML branch length tree to outree file */
6015 clockmode = 0; /* nonclocklike branch lengths */
6016 fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhd);
6017 fputphylogeny(outtreefp);
6018
6019 /* clocklike branch lengths */
6020 if (compclock) {
6021 clockmode = 1;
6022 fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhdc);
6023 fputrooted(outtreefp, locroot);
6024 }
6025 fflush(outtreefp);
6026 } /* evaluatetree */
6027
6028 /***************************************************************/
6029
6030 void memcleanup() {
6031 if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) {
6032 free(splitfreqs);
6033 free(splitpatterns);
6034 free(splitsizes);
6035 free_ivector(consconfid);
6036 free_ivector(conssizes);
6037 free_cmatrix(consbiparts);
6038 free_ulivector(badtaxon);
6039 free_ulimatrix(qinfomatr);
6040
6041 if(readsubset_optn) {
6042 free_imatrix(ss_setovlgraph);
6043 free_imatrix(ss_setoverlaps);
6044 free_imatrix(ss_setovllist);
6045 free_ivector(ss_setovllistsize);
6046 free_imatrix(ss_matrix);
6047 free_imatrix(ss_list);
6048 free_ivector(ss_listsize);
6049 }
6050 }
6051 free_cmatrix(Identif);
6052 free_cmatrix(Namestr);
6053 free_dvector(Freqtpm);
6054 free_imatrix(Basecomp);
6055 free_ivector(clusterA);
6056 free_ivector(clusterB);
6057 free_ivector(clusterC);
6058 free_ivector(clusterD);
6059 free_dvector(qweight);
6060 free_dvector(sqdiff);
6061 free_ivector(qworder);
6062 free_ivector(sqorder);
6063 freetreelist(&psteptreelist, &psteptreenum, &psteptreesum);
6064 freequartets();
6065
6066 } /* memcleanup */
6067
6068 /***************************************************************/
6069 /*** ***/
6070 /*** routines for consensus tree computation ***/
6071 /*** ***/
6072 /***************************************************************/
6073 /* produce consensus from usertrees (onlyconsensus.c) */
6074 /***************************************************************/
6075
6076
6077 /* print i leading spaces to stream fp */
6078 void indent(FILE *fp, int i)
6079 {
6080 int n;
6081 for (n=0; n < i; n++)
6082 fputc(' ', fp);
6083 } /* indent */
6084
6085 /******************/
6086
6087 /* print speciesnames in leafset (size = setsize) to stream fp */
6088 void printset(FILE *fp, int *leafset, int setsize)
6089 {
6090 int i;
6091 fprintf(fp, "Size=%d: ", setsize);
6092
6093 for (i = 0; i < (setsize) ; i++) {
6094 fprintf(fp, "%d (", leafset[i]);
6095 fputid10(fp, leafset[i]);
6096 fprintf(fp, "),");
6097 }
6098 fprintf(fp, "\n");
6099 fflush(fp);
6100 } /* printset */
6101
6102 /***************************************************************************/
6103
6104 /* copy subtree recursively */
6105 void subtree2bipart(int depth, /* depth from root (for debug) */
6106 cmatrix bip, /* list of bipartitions */
6107 int *numsp, /* number of bipartitions in it */
6108 int *leafset, /* set of leaves in this subtree */
6109 int *setsize, /* size of the set of leaves */
6110 Node *np) /* edge in puzzling step tree */
6111 {
6112 int x;
6113 int *leafsetr; /* set of leaves in this subtree */
6114 int setsizer; /* size of the set of leaves */
6115 Node *tp;
6116
6117 /* test whether we are on an external node */
6118 if (np->isop == NULL) {
6119 leafset[(*setsize)] = np->number;
6120 (*setsize)++;
6121 return;
6122 }
6123
6124 /* leafsetr = new_ivector(Maxspc); */
6125 setsizer = 0;
6126
6127 /* we are NOT on a leaf */
6128 tp = np->isop;
6129 while (tp != np) {
6130 leafsetr = new_ivector(Maxspc);
6131 setsizer = 0;
6132 for (x = 0; x < setsizer ; x++) {
6133 leafsetr[x]=-1;
6134 }
6135
6136 subtree2bipart(depth + 1, bip, numsp, leafsetr, &setsizer, tp->kinp);
6137
6138 for (x = 0; x < setsizer ; x++) {
6139 leafset[(*setsize) + x] = leafsetr[x];
6140 }
6141 (*setsize) += setsizer;
6142
6143 tp = tp->isop;
6144 free_ivector(leafsetr);
6145 }
6146 if (depth > 1){
6147 for (x = 0; x < (*setsize) ; x++) {
6148 bip[*numsp][leafset[x]] = '.';
6149 }
6150
6151 # ifdef VERBOSE1
6152 indent(stderr, depth*2);
6153 for (x = 0; x < Maxspc ; x++) {
6154 fprintf(stderr, "%c", bip[*numsp][x]);
6155 }
6156 fprintf(stderr, "\n");
6157 # endif /* VERBOSE */
6158
6159 (*numsp)++;
6160 }
6161
6162 } /* subtree2bipart */
6163
6164
6165 /***************************************************************************/
6166
6167
6168 /* copy treestructure to sorting structure */
6169 void tree2bipart(cmatrix bip, int *numsp)
6170 {
6171 int *leafset; /* set of leaves in this subtree */
6172 int setsize; /* size of the set of leaves */
6173 int depth;
6174
6175 leafset = new_ivector(Maxspc);
6176
6177 depth = 0;
6178 setsize = 0;
6179
6180 /* we are NOT on a leaf */
6181 subtree2bipart(depth + 1, bip, numsp, leafset, &setsize, Ctree->ebrnchp[outgroup]->kinp);
6182 free_ivector(leafset);
6183
6184 } /* tree2bipart */
6185
6186
6187 /***************************************************************************/
6188 /***************************************************************************/
6189
6190
6191
6192 void usertreeconsensus(FILE* utfp, FILE* outfp, int *oldlocrootptr)
6193 {
6194 int ci;
6195 int i;
6196 int n1;
6197 int n2;
6198 int numsp;
6199 FILE *treefp;
6200
6201 fprintf(STDOUT, "Performing consensus construction\n");
6202 fflush(STDOUT);
6203 numutrees = 0;
6204 do {
6205 ci = fgetc(utfp);
6206 if ((char) ci == ';') numutrees++;
6207 } while (ci != EOF);
6208 rewind(utfp);
6209 if (numutrees < 1) {
6210 fprintf(STDOUT, "Unable to proceed (no tree in input tree file)\n\n\n");
6211 exit(1);
6212 }
6213
6214 /* prepare for consensus tree analysis */
6215 initconsensus();
6216
6217
6218 for (i = 0; i < numutrees; i++) {
6219 /* fprintf(stdout, "%d ", i); */
6220
6221 readusertree(utfp, usebranch_optn);
6222
6223 for (n1 = 0; n1 < Maxspc - 3; n1++)
6224 for (n2 = 0; n2 < Maxspc; n2++)
6225 biparts[n1][n2] = '*';
6226 numsp = 0;
6227 tree2bipart(biparts, &numsp);
6228
6229 makenewsplitentries(biparts, numsp, &splitpatterns, &splitsizes, &splitfreqs, &numbiparts, &maxbiparts, Maxspc);
6230
6231
6232 }
6233
6234 /* compute majority rule consensus tree */
6235 makeconsensus(numutrees, FALSE);
6236
6237 /* write consensus tree */
6238 openfiletowrite(&treefp, TREEFILE, "output tree(s)");
6239
6240 tmpfp = tmpfile();
6241 writeconsensustree(tmpfp, FALSE, qsupportarr);
6242
6243 #if 0
6244 if (! skipmlbranch_optn) {
6245 #endif
6246 if (dotreelh_optn) {
6247 rewind(tmpfp);
6248 openfiletowrite(&treefp, TREEFILE, "output tree(s)");
6249 evaluatetree(tmpfp, treefp, QUARTPUZ, 0, 1, oldlocrootptr);
6250 } else {
6251 writeconsensustree(treefp, FALSE, qsupportarr);
6252 }
6253
6254 closefile(utfp);
6255 closefile(treefp);
6256 closefile(tmpfp);
6257
6258 /* exit(0); */
6259
6260 } /* usertreeconsensus */
6261
6262 /* end onlyconsensus.c */
6263
6264
6265
6266 /******************************************************************************/
6267 /* main part */
6268 /******************************************************************************/
6269
6270 int main(int argc, char *argv[])
6271 {
6272 int i, oldlocroot=0;
6273
6274 # if !PARALLEL
6275 # ifdef USE_CONSOLE
6276 argc = ccommand(&argv);
6277 # endif
6278 # endif
6279
6280 /* start main timer */
6281 time(&walltimestart);
6282 cputimestart = clock();
6283 inittimearr(&tarr);
6284
6285 # if PARALLEL
6286 PP_Init(&argc, &argv);
6287 if (PP_IamSlave) {
6288 slave_main(argc, argv);
6289 } else {
6290 # endif /* PARALLEL */
6291
6292 inputandinit(&argc, &argv);
6293
6294 fprintf(STDOUT, "Writing parameters to file %s\n", OUTFILE);
6295 openfiletowrite(&ofp, OUTFILE, "general output");
6296 /* openfiletowrite(&ofp, "xxxx", "general output"); */
6297 writeoutputfile(ofp,WRITEPARAMS);
6298 fclose(ofp);
6299
6300
6301 /* write distance matrix */
6302 fprintf(STDOUT, "Writing pairwise distances to file %s\n", DISTANCES);
6303 openfiletowrite(&dfp, DISTANCES, "pairwise distances");
6304 putdistance(dfp);
6305 closefile(dfp);
6306
6307
6308 /*epe PP_SendSizes()/SendData() relocated*/
6309 #if 0
6310
6311 # if PARALLEL
6312 #ifndef USE_WINDOWS
6313 PP_SendSizes(Maxspc, Maxsite, numcats, Numptrn, tpmradix, outgroup, fracconst, randseed, fixedorder_optn, consensus_optn);
6314 #else
6315 PP_SendSizes(Maxspc, Maxsite, alimaxsite, alistart, aliend, numcats, Numptrn, tpmradix, outgroup, fracconst, randseed, fixedorder_optn, consensus_optn);
6316 #endif
6317 PP_SendData(Seqpat, /* cmatrix */
6318 Alias, Weight, constpat, /* ivector */
6319 Rates, Eval, Freqtpm, /* dvector */
6320 Evec, Ievc, iexp, Distanmat, /* dmatrix */
6321 ltprobr); /* dcube */
6322 # endif /* PARALLEL */
6323 #endif
6324
6325 psteptreestrlen = (Maxspc * (int)(1 + log10((double)Maxspc))) +
6326 (Maxspc * 3);
6327
6328 switch (typ_optn) {
6329 case TREERECON_OPTN: /* tree reconstruction */
6330
6331 if (puzzlemode == QUARTPUZ) { /* quartet puzzling */
6332 recon_tree();
6333 } /* quartet puzzling */
6334 break;
6335
6336 case LIKMAPING_OPTN: /* likelihood mapping */
6337
6338 map_lklhd();
6339 break;
6340 } /* switch typ_optn */
6341
6342
6343 free_cmatrix(Seqchar);
6344 free_cmatrix(Seqchars);
6345
6346 /* reserve memory for tree statistics */
6347 if (typ_optn == TREERECON_OPTN) {
6348 /* CONSENSUS -> QUARTPUZ, allocates unnecessary memory (HAS) */
6349 if ((puzzlemode == USERTREE) && numutrees > 1) {
6350 ulkl = new_dvector(numutrees);
6351 allsites = new_dmatrix(numutrees,Numptrn);
6352 if (compclock) {
6353 ulklc = new_dvector(numutrees);
6354 allsitesc = new_dmatrix(numutrees,Numptrn);
6355 }
6356 } else { /* QUARTPUZ, CONSENSUS */
6357 allsites = new_dmatrix(1,Numptrn);
6358 if (compclock) {
6359 allsitesc = new_dmatrix(1,Numptrn);
6360 }
6361 }
6362 }
6363
6364 /* write puzzling step tree list */
6365 if ((listqptrees == PSTOUT_ORDER) || (listqptrees == PSTOUT_LISTORDER)) {
6366 openfiletowrite(&qptorder, OUTPTORDER, "puzzling step trees (unique)");
6367
6368 fprintfsortedpstrees(qptorder, psteptreelist, psteptreenum, psteptreesum, 1, 0.0);
6369 closefile(qptorder);
6370 }
6371
6372 /* compute ML branch lengths for QP tree and for 1st user tree */
6373 switch(typ_optn) {
6374 case TREERECON_OPTN:
6375 addtimes(GENERAL, &tarr);
6376
6377 switch (puzzlemode) {
6378 case QUARTPUZ: /* read QP tree */
6379 #if 0
6380 if (! skipmlbranch_optn) {
6381 #endif
6382 if (dotreelh_optn) {
6383 rewind(tmpfp);
6384 openfiletowrite(&tfp, TREEFILE, "output tree(s)");
6385 evaluatetree(tmpfp, tfp, puzzlemode, 0, 1, &oldlocroot);
6386 addtimes(TREEEVAL, &tarr);
6387 closefile(tmpfp);
6388 closefile(tfp);
6389 }
6390
6391 openfiletoappend(&ofp, OUTFILE, "general output");
6392 writeoutputfile(ofp,WRITEREST);
6393 break;
6394 case USERTREE: /* read user tree */
6395 openfiletoappend(&ofp, OUTFILE, "general output");
6396
6397 openfiletowrite(&tfp, TREEFILE, "output tree(s)");
6398 for (i = 0; i < numutrees; i++) {
6399 evaluatetree(utfp, tfp, puzzlemode, i, numutrees, &oldlocroot);
6400 if (i==0) writeoutputfile(ofp,WRITEREST);
6401 writecutree(ofp, i+1);
6402 addtimes(TREEEVAL, &tarr);
6403 }
6404 closefile(tfp);
6405 closefile(utfp);
6406 break;
6407 case CONSENSUS: /* USERCONSENSUS */
6408 openfiletoappend(&ofp, OUTFILE, "general output");
6409 usertreeconsensus(utfp, ofp, &oldlocroot); /* ends with exit */
6410 openfiletoappend(&ofp, OUTFILE, "general output");
6411 writeoutputfile(ofp,WRITEREST);
6412 break;
6413 default: /* e.g. distances only */
6414 openfiletoappend(&ofp, OUTFILE, "general output");
6415 writeoutputfile(ofp,WRITEREST);
6416 break;
6417 } /* switch puzzlemode */
6418 break;
6419 default:
6420 openfiletoappend(&ofp, OUTFILE, "general output");
6421 writeoutputfile(ofp,WRITEREST);
6422 break;
6423 } /* switch typ_optn */
6424
6425
6426 /* print tree statistics */
6427 if ((typ_optn == TREERECON_OPTN) && (puzzlemode == USERTREE) && (numutrees > 1) && dotreetest_optn)
6428 printtreestats(ofp, ulkl, ulklc, numutrees,
6429 allsites, allsitesc, Alias, Maxsite,
6430 Numptrn, Weight, compclock);
6431
6432
6433 if ((savesitelh_optn || savesitelhb_optn) && (typ_optn == TREERECON_OPTN)) {
6434 if (savesitelh_optn) {
6435 FILE *outf;
6436 openfiletowrite(&outf, SITELH, "write site likelihood");
6437 if (puzzlemode == QUARTPUZ)
6438 writesitelklmatrixphy(outf, 1, Maxsite, allsites, allsitesc, compclock);
6439 else /* puzzlemode == CONSENSUS ??? */
6440 writesitelklmatrixphy(outf, numutrees, Maxsite, allsites, allsitesc, compclock);
6441 fclose(outf);
6442 }
6443 }
6444
6445 /* free memory for tree statistics */
6446 if (typ_optn == TREERECON_OPTN) {
6447 if ((puzzlemode == USERTREE || puzzlemode == CONSENSUS) && numutrees > 1) {
6448 free_dvector(ulkl);
6449 free_dmatrix(allsites);
6450 if (compclock) {
6451 free_dvector(ulklc);
6452 free_dmatrix(allsitesc);
6453 }
6454 } else {
6455 free_dmatrix(allsites);
6456 if (compclock)
6457 free_dmatrix(allsitesc);
6458 }
6459 }
6460
6461 # if PARALLEL
6462 PP_SendDone();
6463 # endif /* PARALLEL */
6464
6465 /* write CPU/Wallclock times and parallel statistics */
6466 time(&walltimestop);
6467 cputimestop = clock();
6468 addtimes(OVERALL, &tarr);
6469 # ifdef TIMEDEBUG
6470 printtimearr(&tarr);
6471 # endif /* TIMEDEBUG */
6472 fullcpu = tarr.fullcpu;
6473 fulltime = tarr.fulltime;
6474
6475 # if PARALLEL
6476 writetimesstat(ofp);
6477 # endif /* PARALLEL */
6478
6479 /* stop timer */
6480 time(&Stoptime);
6481 Stopcpu=clock();
6482 timestamp(ofp);
6483 closefile(ofp);
6484
6485
6486 /* printbestratecombination(stderr); */
6487 mlfinish();
6488
6489 fprintf(STDOUT, "\nAll results written to disk:\n");
6490 fprintf(STDOUT, " Puzzle report file: %s\n", OUTFILE);
6491 fprintf(STDOUT, " Likelihood distances: %s\n", DISTANCES);
6492
6493 if (typ_optn == TREERECON_OPTN && puzzlemode != PAIRDIST)
6494 fprintf(STDOUT, " Phylip tree file: %s\n", TREEFILE);
6495 if (typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) {
6496 if ((listqptrees == PSTOUT_ORDER) ||(listqptrees == PSTOUT_LISTORDER))
6497 fprintf(STDOUT, " Unique puzzling step trees: %s\n", OUTPTORDER);
6498 if ((listqptrees == PSTOUT_LIST) ||(listqptrees == PSTOUT_LISTORDER))
6499 fprintf(STDOUT, " Puzzling step tree list: %s\n", OUTPTLIST);
6500 }
6501 if (dotreelh_optn && typ_optn == TREERECON_OPTN && puzzlemode != PAIRDIST) {
6502 if (savesitelh_optn)
6503 fprintf(STDOUT, " Site likelihoods (PHYLIP): %s\n", SITELH);
6504 if (savesitelhb_optn)
6505 fprintf(STDOUT, " Site likelihoods (binary): %s\n", SITELHB);
6506 if (savesiterate_optn)
6507 fprintf(STDOUT, " Site rates (PHYLIP): %s\n", SITERATE);
6508 if (savesiterateb_optn)
6509 fprintf(STDOUT, " Site rates (binary): %s\n", SITERATEB);
6510 }
6511 if (show_optn && typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ)
6512 fprintf(STDOUT, " Unresolved quartets: %s\n", UNRESOLVED);
6513 if (typ_optn == LIKMAPING_OPTN)
6514 fprintf(STDOUT, " Likelihood mapping diagram: %s\n", TRIANGLE);
6515 fprintf(STDOUT, "\n");
6516
6517 # ifdef DEBUG_PRINTTIMES
6518 fprintftimespan(stdout, PStepStoptime, Stoptime, Starttime, "Pstep -> end");
6519 # endif /* DEBUG_PRINTTIMES */
6520
6521 /* runtime message */
6522 fprintf(STDOUT,
6523 "The parameter estimation took %.2f seconds (= %.2f minutes = %.2f hours)\n",
6524 difftime(PEstStoptime, PEstStarttime), difftime(PEstStoptime, PEstStarttime)/60.,
6525 difftime(PEstStoptime, PEstStarttime)/3600.);
6526 fprintf(STDOUT,
6527 "The ML step took %.2f seconds (= %.2f minutes = %.2f hours)\n",
6528 difftime(MLstepStoptime, MLstepStarttime), difftime(MLstepStoptime, MLstepStarttime)/60.,
6529 difftime(MLstepStoptime, MLstepStarttime)/3600.);
6530 fprintf(STDOUT,
6531 "The puzzling step took %.2f seconds (= %.2f minutes = %.2f hours)\n",
6532 difftime(PStepStoptime, PStepStarttime), difftime(PStepStoptime, PStepStarttime)/60.,
6533 difftime(PStepStoptime, PStepStarttime)/3600.);
6534 fprintf(STDOUT,
6535 "The computation took %.2f seconds (= %.2f minutes = %.2f hours)\n",
6536 difftime(Stoptime, Starttime), difftime(Stoptime, Starttime)/60.,
6537 difftime(Stoptime, Starttime)/3600.);
6538 fprintf(STDOUT,
6539 " including input %.2f seconds (= %.2f minutes = %.2f hours)\n",
6540 fulltime, fulltime/60., fulltime/3600.);
6541 #ifdef TIMEDEBUG
6542 fprintf(STDOUT,
6543 "and %.0f seconds CPU time (= %.2f minutes = %.2f hours)\n\n",
6544 fullcpu, fullcpu/60., fullcpu/3600.);
6545 #endif /* TIMEDEBUG */
6546
6547 /* free memory */
6548 memcleanup();
6549
6550 # if PARALLEL
6551 } /* !IamSlave */
6552 PP_Finalize();
6553 # endif /* PARALLEL */
6554
6555 return 0;
6556 } /* main */
6557
6558
6559 /* compare function for uli - sort largest numbers first */
6560 int ulicmp(const void *ap, const void *bp)
6561 {
6562 uli a, b;
6563
6564 a = *((uli *) ap);
6565 b = *((uli *) bp);
6566
6567 if (a > b) return -1;
6568 else if (a < b) return 1;
6569 else return 0;
6570 } /* ulicmp */
6571
6572 /* compare function for int - sort smallest numbers first */
6573 int intcmp(const void *ap, const void *bp)
6574 {
6575 int a, b;
6576
6577 a = *((int *) ap);
6578 b = *((int *) bp);
6579
6580 if (a < b) return -1;
6581 else if (a > b) return 1;
6582 else return 0;
6583 } /* intcmp */
6584