1 /*
2 * puzzle2.c
3 *
4 *
5 * Part of TREE-PUZZLE 5.2 (July 2004)
6 *
7 * (c) 2003-2004 by Heiko A. Schmidt, Korbinian Strimmer, and Arndt von Haeseler
8 * (c) 1999-2003 by Heiko A. Schmidt, Korbinian Strimmer,
9 * M. Vingron, and Arndt von Haeseler
10 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 *
12 * All parts of the source except where indicated are distributed under
13 * the GNU public licence. See http://www.opensource.org for details.
14 *
15 * ($Id$)
16 *
17 */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23
24 #define EXTERN extern
25
26 #include "puzzle.h"
27 #include <string.h>
28
29 #if PARALLEL
30 # include "ppuzzle.h"
31 # include "sched.h"
32 #endif /* PARALLEL */
33
34
35 /******************************************************************************/
36 /* sequences */
37 /******************************************************************************/
38
39 /* read ten characters of current line as identifier */
readid(FILE * infp,int t)40 void readid(FILE *infp, int t)
41 {
42 int i, j, flag, ci;
43
44 for (i = 0; i < 10; i++) {
45 ci = fgetc(infp);
46 if (ci == EOF || !isprint(ci)) {
47 fprintf(STDOUT, "\n\n\nUnable to proceed (no name for sequence %d)\n\n\n", t+1);
48 # if PARALLEL
49 PP_Finalize();
50 # endif
51 exit(1);
52 }
53 Identif[t][i] = (char) ci;
54 }
55 /* convert leading blanks in taxon name to underscores */
56 flag = FALSE;
57 for (i = 9; i > -1; i--) {
58 if (flag == FALSE) {
59 if (Identif[t][i] != ' ') flag = TRUE;
60 } else {
61 if (Identif[t][i] == ' ') Identif[t][i] = '_';
62 }
63 }
64 /* check whether this name is already used */
65 for (i = 0; i < t; i++) { /* compare with all other taxa */
66 flag = TRUE; /* assume identity */
67 for (j = 0; (j < 10) && (flag == TRUE); j++)
68 if (Identif[t][j] != Identif[i][j])
69 flag = FALSE;
70 if (flag) {
71 fprintf(STDOUT, "\n\n\nUnable to proceed (multiple occurence of sequence name '");
72 fputid(STDOUT, t);
73 fprintf(STDOUT, "')\n\n\n");
74 # if PARALLEL
75 PP_Finalize();
76 # endif
77 exit(1);
78 }
79 }
80 } /* readid */
81
82 /******************/
83
84
85 /* read next allowed character */
readnextcharacter(FILE * ifp,int notu,int nsite)86 char readnextcharacter(FILE *ifp, int notu, int nsite)
87 {
88 char c;
89
90 /* ignore blanks and control characters except newline (UNIX,DOS) or CR (Mac,DOS) */
91 do {
92 if (fscanf(ifp, "%c", &c) != 1) {
93 fprintf(STDOUT, "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
94 fputid(STDOUT, notu);
95 fprintf(STDOUT, "')\n\n\n");
96 # if PARALLEL
97 PP_Finalize();
98 # endif
99 exit(1);
100 }
101 } while (c == ' ' || (iscntrl((int) c) && (c != '\n') && (c != '\r')));
102 return c;
103 } /* readnextcharacter */
104
105 /******************/
106
107
108 /* skip rest of the line */
skiprestofline(FILE * ifp,int notu,int nsite)109 void skiprestofline(FILE *ifp, /* input file stream */
110 int notu, /* taxon number - for error msg. */
111 int nsite) /* sequence position - for error msg. */
112 {
113 int ci;
114
115 /* read chars until the first newline or CR */
116 do{
117 ci = fgetc(ifp);
118 if (ci == EOF) {
119 fprintf(STDOUT, "Unable to proceed (missing newline at position %d in sequence '", nsite + 1);
120 fputid(STDOUT, notu);
121 fprintf(STDOUT, "')\n\n\n");
122 # if PARALLEL
123 PP_Finalize();
124 # endif
125 exit(1);
126 }
127 } while (((char) ci != '\n') && ((char) ci != '\r'));
128 } /* skiprestofline */
129
130 /******************/
131
132
133 /* skip control characters and blanks */
skipcntrl(FILE * ifp,int notu,int nsite)134 void skipcntrl(FILE *ifp, /* input file stream */
135 int notu, /* taxon number - for error msg. */
136 int nsite) /* sequence position - for error msg. */
137 {
138 int ci;
139
140 /* read over all control characters and blanks */
141 do {
142 ci = fgetc(ifp);
143 if (ci == EOF) {
144 fprintf(STDOUT, "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
145 fputid(STDOUT, notu);
146 fprintf(STDOUT, "')\n\n\n");
147 # if PARALLEL
148 PP_Finalize();
149 # endif
150 exit(1);
151 }
152 } while (iscntrl(ci) || (char) ci == ' ');
153 /* go one character back */
154 if (ungetc(ci, ifp) == EOF) {
155 fprintf(STDOUT, "\n\n\nUnable to proceed (positioning error at position %d in sequence '", nsite + 1);
156 fputid(STDOUT, notu);
157 fprintf(STDOUT, "')\n\n\n");
158 # if PARALLEL
159 PP_Finalize();
160 # endif
161 exit(1);
162 }
163 } /* skipcntrl */
164
165 /******************/
166
167
168 /* read sequences of one data set */
getseqs(FILE * ifp,int Maxspc,int Maxseqc,cmatrix * seqch,cmatrix identif)169 void getseqs(FILE *ifp, /* in: input file stream */
170 int Maxspc, /* in: number of taxa */
171 int Maxseqc, /* in: number of sites */
172 cmatrix *seqch, /* out: alignment matrix */
173 cmatrix identif) /* io: taxon names (10 char w/o stop) */
174 {
175 int notu, nsite, endofline, linelength, i;
176 char c;
177 cmatrix seqchars;
178
179 seqchars = new_cmatrix(Maxspc, Maxseqc);
180 /* read all characters */
181 nsite = 0; /* next site to be read */
182 while (nsite < Maxseqc) {
183 /* read first taxon */
184 notu = 0;
185
186 /* only Maxspc, Maxseqc read so far: */
187 /* go to next true line */
188 skiprestofline(ifp, notu, nsite);
189
190 skipcntrl(ifp, notu, nsite);
191
192 if (nsite == 0) readid(ifp, notu);
193 endofline = FALSE;
194 linelength = 0;
195 do {
196 c = readnextcharacter(ifp, notu, nsite + linelength);
197 if ((c == '\n') || (c == '\r')) endofline = TRUE;
198 else if (c == '.') {
199 fprintf(STDOUT, "\n\n\nUnable to proceed (invalid character '.' at position ");
200 fprintf(STDOUT, "%d in first sequence)\n\n\n", nsite + linelength + 1);
201 # if PARALLEL
202 PP_Finalize();
203 # endif
204 exit(1);
205 } else if (nsite + linelength < Maxseqc) {
206 /* change to upper case */
207 seqchars[notu][nsite + linelength] = (char) toupper((int) c);
208 linelength++;
209 } else {
210 endofline = TRUE;
211 skiprestofline(ifp, notu, nsite + linelength);
212 }
213 } while (!endofline);
214 if (linelength == 0) {
215 fprintf(STDOUT, "\n\n\nUnable to proceed (line with length 0 at position %d in sequence '", nsite + 1);
216 fputid(STDOUT, notu);
217 fprintf(STDOUT, "')\n\n\n");
218 # if PARALLEL
219 PP_Finalize();
220 # endif
221 exit(1);
222 }
223 /* read other taxa */
224 for (notu = 1; notu < Maxspc; notu++) {
225 /* go to next true line */
226 if (notu != 1) skiprestofline(ifp, notu, nsite);
227 skipcntrl(ifp, notu, nsite);
228 if (nsite == 0) readid(ifp, notu);
229 for (i = nsite; i < nsite + linelength; i++) {
230 c = readnextcharacter(ifp, notu, i);
231 if ((c == '\n') || (c == '\r')) { /* too short */
232 fprintf(STDOUT, "\n\n\nUnable to proceed (line to short at position %d in sequence '", i + 1);
233 fputid(STDOUT, notu);
234 fprintf(STDOUT, "')\n\n\n");
235 # if PARALLEL
236 PP_Finalize();
237 # endif
238 exit(1);
239 } else if (c == '.') {
240 seqchars[notu][i] = seqchars[0][i];
241 } else {
242 /* change to upper case */
243 seqchars[notu][i] = (char) toupper((int) c);
244 }
245 }
246 }
247 nsite = nsite + linelength;
248 }
249 *seqch =seqchars;
250
251 } /* getseqs */
252
253 /******************/
254
255
256 /* initialize identifer arrays */
initid(int t,cmatrix * identif,cmatrix * namestr)257 void initid(int t, /* in: number of taxa */
258 cmatrix *identif, /* out: name array w/o end of string */
259 cmatrix *namestr) /* out: name array with end of string */
260 {
261 int i, j;
262
263 *identif = new_cmatrix(t, 10);
264 *namestr = new_cmatrix(t, 11);
265 for (i = 0; i < t; i++) {
266 (*namestr)[i][0] = '\0';
267 (*namestr)[i][9] = '\0';
268 for (j = 0; j < 10; j++) {
269 (*identif)[i][j] = ' ';
270 } /* for j */
271 } /* for i */
272 } /* initid */
273
274 /******************/
275
276
277 /******************/
278
279
280 /* copy undelimited identifer array to '\0' delimited identifer array */
identif2namestr(int num,cmatrix Identif,cmatrix Namestr)281 void identif2namestr(int num, /* number of taxa */
282 cmatrix Identif, /* non-delimited names */
283 cmatrix Namestr) /* proper delimited names */
284 {
285 int i, j;
286 int laggingspc;
287
288 for (i = 0; i < num; i++) {
289 laggingspc = 1;
290 for (j = 9; j >= 0; j--) {
291 Namestr[i][j] = Identif[i][j];
292 if (laggingspc == 1) {
293 if (Identif[i][j] != ' ') {
294 Namestr[i][j+1] = '\0';
295 laggingspc = 0;
296 }
297 }
298
299 } /* for j (letter 10-1) */
300 } /* for i (species) */
301 } /* identif2namestr */
302
303 /******************/
304
305
306 /* print identifier of specified taxon in full 10 char length */
fputid10(FILE * ofp,int t)307 void fputid10(FILE *ofp, int t)
308 {
309 int i;
310
311 for (i = 0; i < 10; i++) fputc(Identif[t][i], ofp);
312 } /* fputid10 */
313
314 /******************/
315
316
317 /* print identifier of specified taxon up to first space */
fputid(FILE * ofp,int t)318 int fputid(FILE *ofp, int t)
319 {
320 int i;
321
322 i = 0;
323 while (i < 10 && Identif[t][i] != ' ') {
324 fputc(Identif[t][i], ofp);
325 i++;
326 }
327 return i;
328 } /* fputid */
329
330 /******************/
331
332
333 /* read first line of sequence data set */
getsizesites(FILE * ifp,int * Maxspc,int * Maxseqc)334 void getsizesites(FILE *ifp, /* in: input file stream */
335 int *Maxspc, /* out: number of taxa */
336 int *Maxseqc) /* out: number of sites */
337 {
338 if (fscanf(ifp, "%d", Maxspc) != 1) {
339 fprintf(STDOUT, "\n\n\nUnable to proceed (missing number of sequences)\n\n\n");
340 # if PARALLEL
341 PP_Finalize();
342 # endif
343 exit(1);
344 }
345 if (fscanf(ifp, "%d", Maxseqc) != 1) {
346 fprintf(STDOUT, "\n\n\nUnable to proceed (missing number of sites)\n\n\n");
347 # if PARALLEL
348 PP_Finalize();
349 # endif
350 exit(1);
351 }
352
353 if (*Maxspc < 3) {
354 fprintf(STDOUT, "\n\n\nUnable to proceed (less than 3 sequences - no tree possible)\n\n\n");
355 # if PARALLEL
356 PP_Finalize();
357 # endif
358 exit(1);
359 }
360 seqnumcheck = SEQNUM_OK;
361 if (*Maxspc < 4) {
362 fprintf(STDOUT, "\n\n\nLess than 4 sequences: No quartet methods possible!!!\n\n\n");
363 fprintf(STDOUT, "Parameter estimation and tree testing is still possible.\n\n\n");
364 seqnumcheck = SEQNUM_TOOFEW;
365 }
366 if (*Maxspc > 257) {
367 fprintf(STDOUT, "\n\n\nMore than 257 sequences: No quartet puzzling available!!!\n");
368 fprintf(STDOUT, "Parameter estimation, likelihood mapping, and tree testing is still possible.\n\n\n");
369 seqnumcheck = SEQNUM_TOOMANY;
370 }
371 /* correct default analysis types */
372 if ((seqnumcheck!=SEQNUM_OK) && (puzzlemode==QUARTPUZ))
373 puzzlemode = (puzzlemode + 1) % NUMPUZZLEMODES;
374 if ((seqnumcheck==SEQNUM_TOOFEW) && (typ_optn==LIKMAPING_OPTN))
375 typ_optn = (typ_optn + 1) % NUMTYPES;
376 #if 0
377 if (*Maxspc < 4) {
378 fprintf(STDOUT, "\n\n\nUnable to proceed (less than 4 sequences)\n\n\n");
379 # if PARALLEL
380 PP_Finalize();
381 # endif
382 exit(1);
383 }
384 if (*Maxspc > 257) {
385 fprintf(STDOUT, "\n\n\nUnable to proceed (more than 257 sequences)\n\n\n");
386 # if PARALLEL
387 PP_Finalize();
388 # endif
389 exit(1);
390 }
391 # endif
392 if (*Maxseqc < 1) {
393 fprintf(STDOUT, "\n\n\nUnable to proceed (no sequence sites)\n\n\n");
394 # if PARALLEL
395 PP_Finalize();
396 # endif
397 exit(1);
398 }
399 } /* getsizesites */
400
401 /******************/
402
403
404 /* read alignment from file */
readsequencefile(FILE * seqfp,int * Maxspc,int * Maxseqc,cmatrix * identif,cmatrix * namestr,cmatrix * Seqchars)405 void readsequencefile(FILE *seqfp, /* in: sequence input file stream */
406 int *Maxspc, /* out: number of taxa */
407 int *Maxseqc, /* out: number of sites */
408 cmatrix *identif, /* out: name array w/o end of str */
409 cmatrix *namestr, /* out: name array with end of str */
410 cmatrix *Seqchars) /* out: alignment matrix */
411 {
412 getsizesites(seqfp, Maxspc, Maxseqc);
413 initid(*Maxspc, identif, namestr);
414 getseqs(seqfp, *Maxspc, *Maxseqc, Seqchars, *identif);
415 identif2namestr(*Maxspc, *identif, *namestr);
416 } /* readsequencefile */
417
418 /******************/
419
420
421 /* read subsets from file */
readsubsetfile(FILE * seqfp,int Maxspc,cmatrix namestr,int * Maxsubset,imatrix * ss_setovlgraph,imatrix * ss_setoverlaps,imatrix * ss_setovllist,ivector * ss_setovllistsize,imatrix * ss_matrix,imatrix * ss_list,ivector * ss_listsize)422 void readsubsetfile(FILE *seqfp, /* in: sequence input file stream */
423 int Maxspc, /* in: number of taxa */
424 cmatrix namestr, /* in: names taxa (seq file) */
425 int *Maxsubset, /* out: number of subsets */
426 imatrix *ss_setovlgraph, /* out: size of overlap >= 3 between 2 subsets */
427 imatrix *ss_setoverlaps, /* out: size of overlap between 2 subsets */
428 imatrix *ss_setovllist, /* out: list with ovlerlapping subsets */
429 ivector *ss_setovllistsize, /* out: size of list with ovlerlapping subsets */
430 imatrix *ss_matrix, /* out: boolean list: taxon in set? */
431 imatrix *ss_list, /* out: list of taxa in set */
432 ivector *ss_listsize) /* out: size of list with taxa */
433 {
434 int ss_maxspc;
435 int n;
436 int set1, set2;
437 int tmp;
438
439 cmatrix ss_identarr;
440 cmatrix ss_namearr;
441 cmatrix ss_strmatr;
442
443 /* read the subset file sizes */
444 getsizesites(seqfp, &ss_maxspc, Maxsubset);
445
446 /* check sizes */
447 if (ss_maxspc != Maxspc) {
448 fprintf(STDOUT, "ERROR: Number of taxa in subsetfile does not match!!! (%d != %d)\n", ss_maxspc, Maxspc);
449 # if PARALLEL
450 PP_SendDone();
451 MPI_Finalize();
452 # endif /* PARALLEL */
453 exit(1);
454 }
455
456 /* initialize name arrays for checking */
457 initid(ss_maxspc, &ss_identarr, &ss_namearr);
458
459 /* read the data */
460 getseqs(seqfp, ss_maxspc, *Maxsubset, &ss_strmatr, ss_identarr);
461
462 /* check the file names */
463 identif2namestr(ss_maxspc, ss_identarr, ss_namearr);
464 for(n = 0; n<Maxspc; n++) {
465 if (ss_maxspc != Maxspc) {
466 fprintf(STDOUT, "ERROR: Taxon names or order in subset file do not match!!!\n (%d \"%s\" != \"%s\")\n", n+1, namestr[n], ss_namearr[n]);
467 # if PARALLEL
468 PP_SendDone();
469 MPI_Finalize();
470 # endif /* PARALLEL */
471 exit(1);
472 }
473
474 }
475
476 free_cmatrix(ss_identarr);
477 free_cmatrix(ss_namearr);
478
479 *ss_setovlgraph = new_imatrix(*Maxsubset, *Maxsubset);
480 *ss_setoverlaps = new_imatrix(*Maxsubset, *Maxsubset);
481
482 *ss_setovllist = new_imatrix(*Maxsubset, *Maxsubset);
483 *ss_setovllistsize = new_ivector(*Maxsubset);
484
485 *ss_matrix = new_imatrix(*Maxsubset, Maxspc);
486
487 *ss_list = new_imatrix(*Maxsubset, Maxspc);
488 *ss_listsize = new_ivector(*Maxsubset);
489
490 for (n=0; n<Maxspc; n++) {
491 for (set1=0; set1<*Maxsubset; set1++) {
492 switch (ss_strmatr[n][set1]) {
493 case 'X':
494 case '1':
495 (*ss_matrix)[set1][n] = 1;
496 (*ss_list)[set1][((*ss_listsize)[set1])++] = n;
497 break;
498 case '-':
499 case '0':
500 (*ss_matrix)[set1][n] = 0;
501 break;
502 default:
503 fprintf(STDOUT, "ERROR: Unknown character in subset file!!! (\"%c\", taxon %d, site %d)\n", ss_strmatr[n][set1], n+1, set1+1);
504 # if PARALLEL
505 PP_SendDone();
506 MPI_Finalize();
507 # endif /* PARALLEL */
508 exit(1);
509 break;
510 }
511 }
512
513 for (set1=1; set1<*Maxsubset; set1++) {
514 for (set2=0; set2<set1; set2++) {
515 if (((*ss_matrix)[set1][n]) && ((*ss_matrix)[set2][n]))
516 /* number of sequences in overlap */
517 ((*ss_setoverlaps)[set1][set2])++;
518
519 }
520 }
521 } /* for Maxspc */
522
523 for (set1=1; set1<*Maxsubset; set1++) {
524 for (set2=0; set2<set1; set2++) {
525 /* number of sequences in overlap */
526 tmp = (*ss_setoverlaps)[set1][set2];
527 (*ss_setoverlaps)[set2][set1] = tmp;
528 if (tmp >= 3) {
529 /* number of sequences (>=3) in overlap */
530 (*ss_setovlgraph)[set1][set2] = tmp;
531 (*ss_setovlgraph)[set2][set1] = tmp;
532 /* list of overlapping sets */
533 (*ss_setovllist)[set1][((*ss_setovllistsize)[set1])++] = set2;
534 (*ss_setovllist)[set2][((*ss_setovllistsize)[set2])++] = set1;
535 }
536
537 }
538 }
539 } /* readsubsetfile */
540
541 /******************/
542
543
544 /* permute taxon order */
permutetaxa_ss(int Maxspc,ivector permutation,cmatrix namestr,int Maxsubset,imatrix ss_setovlgraph,imatrix ss_setoverlaps,imatrix ss_setovllist,ivector ss_setovllistsize,imatrix ss_matrix,imatrix ss_list,ivector ss_listsize)545 void permutetaxa_ss(int Maxspc, /* in: number of taxa */
546 ivector permutation, /* permuted taxon order */
547 cmatrix namestr, /* in: names taxa (seq file) */
548 int Maxsubset, /* out: number of subsets */
549 imatrix ss_setovlgraph, /* out: size of overlap >= 3 between 2 subsets */
550 imatrix ss_setoverlaps, /* out: size of overlap between 2 subsets */
551 imatrix ss_setovllist, /* out: list with ovlerlapping subsets */
552 ivector ss_setovllistsize, /* out: size of list with ovlerlapping subsets */
553 imatrix ss_matrix, /* out: boolean list: taxon in set? */
554 imatrix ss_list, /* out: list of taxa in set */
555 ivector ss_listsize) /* out: size of list with taxa */
556 {
557 ivector candidatesetlist;
558 int candidatesetsize = 0;
559 ivector candidatetaxalist;
560 int candidatetaxasize = 0;
561 ivector taxonstatus; /* 0 - not done; 1 - candidate; 2 - inserted */
562 ivector setstatus; /* 0 - not used; 1 - front; 2 - used*/
563 int permpos;
564 int k;
565 int tmprand;
566 int currset;
567 int currtaxon;
568 candidatesetlist = new_ivector(Maxsubset);
569 candidatetaxalist = new_ivector(Maxspc);
570 taxonstatus = new_ivector(Maxspc);
571 setstatus = new_ivector(Maxsubset);
572
573 /* first subset */
574 currset = randominteger(Maxsubset);
575 setstatus[currset] = 2; /* inserted */
576
577 /* init subset front nodes */
578 /* insert sets with ovl(currset) >= 3 into candidate set list */
579 for (k=0; k<ss_setovllistsize[currset]; k++) {
580 candidatesetlist[candidatesetsize] = ss_setovllist[currset][k];
581 setstatus[candidatesetlist[candidatesetsize++]] = 1;
582 }
583 /* insert taxa from currset into candidate taxon list */
584 for (k=0; k<ss_listsize[currset]; k++) {
585 candidatetaxalist[candidatetaxasize] = ss_list[currset][k];
586 taxonstatus[candidatetaxalist[candidatetaxasize++]] = 1;
587 }
588
589 /* start collectin permutation */
590 for (permpos=0; permpos<Maxspc; permpos++) {
591 /* if no taxa left, choose next set */
592 while (candidatetaxasize == 0) {
593 /* next set */
594 tmprand = randominteger(candidatesetsize);
595 currset = candidatesetlist[tmprand];
596 /* move last set entry to gap */
597 candidatesetlist[tmprand] = candidatesetlist[--candidatesetsize]; /* XXXX */
598 /* candidatesetlist[tmprand] = candidatesetlist[candidatesetsize--]; */
599 setstatus[currset] = 2; /* inserted */
600 /* add new sets (ovl>=3) to set candidate list */
601 for (k=0; k<ss_setovllistsize[currset]; k++) {
602 if (setstatus[ss_setovllist[currset][k]] == 0) {
603 candidatesetlist[candidatesetsize] = ss_setovllist[currset][k];
604 setstatus[candidatesetlist[candidatesetsize++]] = 1;
605 }
606 }
607 /* add new taxa to taxon candidate list */
608 for (k=0; k<ss_listsize[currset]; k++) {
609 if (taxonstatus[ss_list[currset][k]] == 0) {
610 candidatetaxalist[candidatetaxasize] = ss_list[currset][k];
611 taxonstatus[candidatetaxalist[candidatetaxasize++]] = 1;
612 }
613 }
614
615 } /* if candidatetaxasize == 0 */
616
617 tmprand = randominteger(candidatetaxasize);
618 currtaxon = candidatetaxalist[tmprand];
619
620 candidatetaxalist[tmprand] = candidatetaxalist[--candidatetaxasize];
621 permutation[permpos] = currtaxon;
622 taxonstatus[currtaxon] = 2;
623
624 } /* one random taxon after the other */
625
626 free_ivector(candidatesetlist);
627 free_ivector(candidatetaxalist);
628 free_ivector(taxonstatus);
629 free_ivector(setstatus);
630 } /* permutetaxa_ss */
631
632 /******************/
633
634
635 /* print subsets */
fprintfss(FILE * ofp,int Maxspc,int Maxsubset,imatrix ss_setovlgraph,imatrix ss_setoverlaps,imatrix ss_setovllist,ivector ss_setovllistsize,imatrix ss_matrix,imatrix ss_list,ivector ss_listsize)636 void fprintfss(FILE *ofp, /* in: output file stream */
637 int Maxspc, /* in: number of taxa */
638 int Maxsubset, /* out: number of subsets */
639 imatrix ss_setovlgraph, /* out: size of overlap >= 3 between 2 subsets */
640 imatrix ss_setoverlaps, /* out: size of overlap between 2 subsets */
641 imatrix ss_setovllist, /* out: list with ovlerlapping subsets */
642 ivector ss_setovllistsize, /* out: size of list with ovlerlapping subsets */
643 imatrix ss_matrix, /* out: boolean list: taxon in set? */
644 imatrix ss_list, /* out: list of taxa in set */
645 ivector ss_listsize) /* out: size of list with taxa */
646 {
647 int s, s1, s2, t;
648
649
650 for (s=0; s<Maxsubset; s++) {
651 fprintf(ofp, " set %d (%d taxa)\n", s, ss_listsize[s]);
652 fprintf(ofp, " taxa: ");
653 for (t=0; t<ss_listsize[s]; t++) {
654 fprintf(ofp, "%3d", ss_list[s][t]);
655 if (t+1 < ss_listsize[s]) fprintf(ofp, ",");
656 if (((t+1) % 15) == 0)
657 fprintf(ofp, "\n ");
658 }
659 fprintf(ofp, "\n overlap with:");
660 for (t=0; t<ss_setovllistsize[s]; t++) {
661 fprintf(ofp, "%3d", ss_setovllist[s][t]);
662 if (t+1 < ss_setovllistsize[s]) fprintf(ofp, ",");
663 if (((t+1) % 15) == 0)
664 fprintf(ofp, "\n ");
665 }
666 fprintf(ofp, "\n\n");
667 }
668 fprintf(ofp, " Overlap Graph:\n");
669 for (s1=0; s1<Maxsubset; s1++) {
670 fprintf(ofp, " %3d :\t", s1);
671 for (s2=0; s2<Maxsubset; s2++) {
672 if (s1==s2)
673 fprintf(ofp, "(%3d) ", ss_listsize[s1]);
674 else
675 fprintf(ofp, " %3d ", ss_setoverlaps[s1][s2]);
676 }
677 fprintf(ofp, "\n");
678 }
679 fprintf(ofp, "\n\n");
680 fflush(ofp);
681 } /* fprintfss */
682
683 /******************/
684 /* check connectedness of subsets */
checkss(FILE * ofp,int Maxspc,int Maxsubset,imatrix ss_setovlgraph,imatrix ss_setoverlaps,imatrix ss_setovllist,ivector ss_setovllistsize,imatrix ss_matrix,imatrix ss_list,ivector ss_listsize)685 void checkss(FILE *ofp, /* in: output file stream */
686 int Maxspc, /* in: number of taxa */
687 int Maxsubset, /* out: number of subsets */
688 imatrix ss_setovlgraph, /* out: size of overlap >= 3 between 2 subsets */
689 imatrix ss_setoverlaps, /* out: size of overlap between 2 subsets */
690 imatrix ss_setovllist, /* out: list with ovlerlapping subsets */
691 ivector ss_setovllistsize, /* out: size of list with ovlerlapping subsets */
692 imatrix ss_matrix, /* out: boolean list: taxon in set? */
693 imatrix ss_list, /* out: list of taxa in set */
694 ivector ss_listsize) /* out: size of list with taxa */
695 {
696 int s1, s2;
697 int numdone;
698 int *setarr;
699 setarr = new_ivector(Maxsubset);
700
701 s1 = 0;
702 numdone=0;
703 do {
704 setarr[s1] = 2; /* 1=seen, 2=visited */
705 for (s2=0; s2<Maxsubset; s2++) {
706 if ((setarr[s2]==0) && (ss_setoverlaps[s1][s2] >= 3))
707 setarr[s2]=1;
708 }
709
710 numdone++;
711 s1=0;
712 while ((s1 < Maxsubset) && (setarr[s1]!=1)) s1++;
713
714 } while (s1 != Maxsubset);
715
716 if (Maxsubset != numdone) {
717 fprintf(stderr, "Unable to proceed. More than one connected component! (%d != %d)\n", Maxsubset, numdone);
718 fprintf(stderr, "visited: \n ");
719 for (s2=0; s2<Maxsubset; s2++)
720 if (setarr[s2]==2) fprintf(stderr, "%d, ", s2);
721 fprintf(stderr, "\nseen: \n ");
722 for (s2=0; s2<Maxsubset; s2++)
723 if (setarr[s2]==1) fprintf(stderr, "%d, ", s2);
724 fprintf(stderr, "\nlost: \n ");
725 for (s2=0; s2<Maxsubset; s2++)
726 if (setarr[s2]==0) fprintf(stderr, "%d, ", s2);
727 exit(1);
728 }
729 } /* checkss */
730
731 /******************/
732
733
734 /* guess data type: NUCLEOTIDE:0, AMINOACID:1, BINARY:2 */
guessdatatype(cmatrix Seqchars,int Maxspc,int Maxseqc)735 int guessdatatype(cmatrix Seqchars, /* alignment matrix (Maxspc x Maxseqc) */
736 int Maxspc, /* number of taxa */
737 int Maxseqc) /* number of sites */
738 {
739 uli numnucs, numchars, numbins;
740 int notu, nsite;
741 char c;
742
743 /* count A, C, G, T, U, N */
744 numnucs = 0;
745 numchars = 0;
746 numbins = 0;
747 for (notu = 0; notu < Maxspc; notu++)
748 for (nsite = 0; nsite < Maxseqc; nsite++) {
749 c = Seqchars[notu][nsite];
750 if (c == 'A' || c == 'C' || c == 'G' ||
751 c == 'T' || c == 'U' || c == 'N') numnucs++;
752 if (c != '-' && c != '?') numchars++;
753 if (c == '0' || c == '1') numbins++;
754 }
755 if (numchars == 0) numchars = 1;
756 /* more than 85 % frequency means nucleotide data */
757 if ((double) numnucs / (double) numchars > 0.85)
758 return NUCLEOTIDE; /* 0 */
759 else if ((double) numbins / (double) numchars > 0.2)
760 return BINARY; /* 2 */
761 else
762 return AMINOACID; /* 1 */
763 } /* guessdatatype */
764
765 /******************/
766
767
768 /* translate characters into format used by ML engine */
translatedataset(int maxspc,int maxseqc,int * maxsite,cmatrix seqchars,cmatrix * seqchar,ivector * seqgapchar,ivector * seqotherchar)769 void translatedataset(int maxspc, int maxseqc, int *maxsite, cmatrix seqchars, cmatrix *seqchar, ivector *seqgapchar, ivector *seqotherchar)
770 {
771 int notu, sn, co;
772 char c;
773 cvector code;
774
775 if (*seqgapchar != NULL) free_ivector(*seqgapchar);
776 if (*seqotherchar != NULL) free_ivector(*seqotherchar);
777 Seqgapchar = new_ivector(Maxspc);
778 Seqotherchar = new_ivector(Maxspc);
779
780 /* Seqgapchar = (int *) calloc(maxspc, sizeof(int)); */
781 /* Seqotherchar = (int *) calloc(maxspc, sizeof(int)); */
782
783
784 /* determine maxsite - number of ML sites per taxon */
785 if (data_optn == NUCLEOTIDE && SH_optn) {
786 if (SHcodon)
787 *maxsite = maxseqc / 3;
788 else
789 *maxsite = maxseqc / 2; /* assume doublets */
790
791 } else
792 *maxsite = maxseqc;
793 if (data_optn == NUCLEOTIDE && (*maxsite % 3) == 0 && !SH_optn) {
794 if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
795 *maxsite = *maxsite / 3; /* only one of the three codon positions */
796 if (codon_optn == 4)
797 *maxsite = 2*(*maxsite / 3); /* 1st + 2nd codon positions */
798 }
799
800 /* reserve memory */
801 if (*seqchar != NULL) free_cmatrix(*seqchar);
802 *seqchar = new_cmatrix(maxspc, *maxsite);
803
804 /* code length */
805 if (data_optn == NUCLEOTIDE && SH_optn)
806 code = new_cvector(2);
807 else
808 code = new_cvector(1);
809
810 /* decode characters */
811 if (data_optn == NUCLEOTIDE && SH_optn) { /* SH doublets */
812
813 for (notu = 0; notu < maxspc; notu++) {
814 for (sn = 0; sn < *maxsite; sn++) {
815 for (co = 0; co < 2; co++) {
816 if (SHcodon)
817 c = seqchars[notu][sn*3 + co];
818 else
819 c = seqchars[notu][sn*2 + co];
820 code[co] = c;
821 }
822 (*seqchar)[notu][sn] = code2int(code, &((*seqgapchar)[notu]), &((*seqotherchar)[notu]));
823 }
824 }
825
826 } else if (!(data_optn == NUCLEOTIDE && (maxseqc % 3) == 0)) { /* use all */
827
828 for (notu = 0; notu < maxspc; notu++) {
829 for (sn = 0; sn < *maxsite; sn++) {
830 code[0] = seqchars[notu][sn];
831 (*seqchar)[notu][sn] = code2int(code, &((*seqgapchar)[notu]), &((*seqotherchar)[notu]));
832 }
833 }
834
835 } else { /* codons */
836
837 for (notu = 0; notu < maxspc; notu++) {
838 for (sn = 0; sn < *maxsite; sn++) {
839 if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
840 code[0] = seqchars[notu][sn*3+codon_optn-1];
841 else if (codon_optn == 4) {
842 if ((sn % 2) == 0)
843 code[0] = seqchars[notu][(sn/2)*3];
844 else
845 code[0] = seqchars[notu][((sn-1)/2)*3+1];
846 } else
847 code[0] = seqchars[notu][sn];
848 (*seqchar)[notu][sn] = code2int(code, &((*seqgapchar)[notu]), &((*seqotherchar)[notu]));
849 }
850 }
851
852 }
853
854 free_cvector(code);
855
856 } /* translatedataset */
857
858 /******************/
859
860
861
862 /* estimate mean base frequencies from translated data set */
estimatebasefreqs()863 void estimatebasefreqs()
864 {
865 int tpmradix, i, j;
866 uli all, *gene;
867
868 tpmradix = gettpmradix();
869
870 if (Freqtpm != NULL) free_dvector(Freqtpm);
871 Freqtpm = new_dvector(tpmradix);
872
873 if (Basecomp != NULL) free_imatrix(Basecomp);
874 Basecomp = new_imatrix(Maxspc, tpmradix);
875
876 gene = (uli *) calloc((size_t) (tpmradix + 1), sizeof(uli));
877 if (gene == NULL) maerror("gene in estimatebasefreqs");
878
879 for (i = 0; i < tpmradix + 1; i++) gene[i] = 0;
880 for (i = 0; i < Maxspc; i++)
881 for (j = 0; j < tpmradix; j++) Basecomp[i][j] = 0;
882 for (i = 0; i < Maxspc; i++)
883 for (j = 0; j < Maxsite; j++) {
884 gene[(int) Seqchar[i][j]]++;
885 if (Seqchar[i][j] != tpmradix) Basecomp[i][(int) Seqchar[i][j]]++;
886 }
887
888 all = Maxspc * Maxsite - gene[tpmradix];
889 if (all != 0) { /* normal case */
890 for (i = 0; i < tpmradix; i++)
891 Freqtpm[i] = (double) gene[i] / (double) all;
892 } else { /* pathological case with no unique character in data set */
893 for (i = 0; i < tpmradix; i++)
894 Freqtpm[i] = 1.0 / (double) tpmradix;
895 }
896
897 free(gene);
898
899 Frequ_optn = TRUE;
900 } /* estimatebasefreqs */
901
902 /******************/
903
904
905 /* guess model of substitution */
guessmodel()906 void guessmodel()
907 {
908 double c1, c2, c3, c4, c5, c6;
909 dvector f;
910 dmatrix a;
911 int i;
912
913 Dayhf_optn = FALSE;
914 Jtt_optn = TRUE;
915 mtrev_optn = FALSE;
916 cprev_optn = FALSE;
917 blosum62_optn = FALSE;
918 vtmv_optn = FALSE;
919 wag_optn = FALSE;
920 TSparam = 2.0;
921 YRparam = 1.0;
922 optim_optn = TRUE;
923 HKY_optn = TRUE;
924 print_GTR_optn = TRUE;
925 GTR_optn = FALSE;
926 TN_optn = FALSE;
927
928 if (data_optn == AMINOACID) { /* amino acids */
929
930 /* chi2 fit to amino acid frequencies */
931
932 f = new_dvector(20);
933 a = new_dmatrix(20,20);
934 /* chi2 distance Dayhoff */
935 dyhfdata(a, f);
936 c1 = 0;
937 for (i = 0; i < 20; i++)
938 c1 = c1 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
939 /* chi2 distance JTT */
940 jttdata(a, f);
941 c2 = 0;
942 for (i = 0; i < 20; i++)
943 c2 = c2 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
944 /* chi2 distance mtREV */
945 mtrevdata(a, f);
946 c3 = 0;
947 for (i = 0; i < 20; i++)
948 c3 = c3 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
949 /* chi2 distance VT */
950 vtmvdata(a, f);
951 c4 = 0;
952 for (i = 0; i < 20; i++)
953 c4 = c4 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
954 /* chi2 distance WAG */
955 wagdata(a, f);
956 c5 = 0;
957 for (i = 0; i < 20; i++)
958 c5 = c5 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
959 /* chi2 distance cpREV */
960 cprev45data(a, f);
961 c6 = 0;
962 for (i = 0; i < 20; i++)
963 c6 = c6 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
964
965 free_dvector(f);
966 free_dmatrix(a);
967
968 #ifndef CPREV
969 if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5)) {
970 /* c1 -> Dayhoff */
971 Dayhf_optn = TRUE;
972 Jtt_optn = FALSE;
973 mtrev_optn = FALSE;
974 cprev_optn = FALSE;
975 vtmv_optn = FALSE;
976 wag_optn = FALSE;
977 fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
978 } else {
979 if ((c2 < c3) && (c2 < c4) && (c2 < c5)) {
980 /* c2 -> JTT */
981 Dayhf_optn = FALSE;
982 Jtt_optn = TRUE;
983 mtrev_optn = FALSE;
984 cprev_optn = FALSE;
985 vtmv_optn = FALSE;
986 wag_optn = FALSE;
987 fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
988 } else {
989 if ((c3 < c4) && (c3 < c5)) {
990 /* c3 -> mtREV */
991 Dayhf_optn = FALSE;
992 Jtt_optn = FALSE;
993 mtrev_optn = TRUE;
994 cprev_optn = FALSE;
995 vtmv_optn = FALSE;
996 wag_optn = FALSE;
997 fprintf(STDOUT, "(consists very likely of amino acids encoded on mtDNA)\n");
998 } else {
999 if ((c4 < c5)) {
1000 /* c4 -> VT */
1001 Dayhf_optn = FALSE;
1002 Jtt_optn = FALSE;
1003 mtrev_optn = FALSE;
1004 cprev_optn = FALSE;
1005 vtmv_optn = TRUE;
1006 wag_optn = FALSE;
1007 fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1008 } else {
1009 /* c5 -> WAG */
1010 Dayhf_optn = FALSE;
1011 Jtt_optn = FALSE;
1012 mtrev_optn = FALSE;
1013 cprev_optn = FALSE;
1014 vtmv_optn = FALSE;
1015 wag_optn = TRUE;
1016 fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1017 } /* if c4 else c5 */
1018 } /* if c3 else c4 */
1019 } /* if c2 */
1020 } /* if c1 */
1021
1022 #else /* CPREV */
1023
1024 if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5) && (c1 < c6)) {
1025 /* c1 -> Dayhoff */
1026 Dayhf_optn = TRUE;
1027 Jtt_optn = FALSE;
1028 mtrev_optn = FALSE;
1029 cprev_optn = FALSE;
1030 vtmv_optn = FALSE;
1031 wag_optn = FALSE;
1032 fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1033 } else {
1034 if ((c2 < c3) && (c2 < c4) && (c2 < c5) && (c2 < c6)) {
1035 /* c2 -> JTT */
1036 Dayhf_optn = FALSE;
1037 Jtt_optn = TRUE;
1038 mtrev_optn = FALSE;
1039 cprev_optn = FALSE;
1040 vtmv_optn = FALSE;
1041 wag_optn = FALSE;
1042 fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1043 } else {
1044 if ((c3 < c4) && (c3 < c5) && (c3 < c6)) {
1045 /* c3 -> mtREV */
1046 Dayhf_optn = FALSE;
1047 Jtt_optn = FALSE;
1048 mtrev_optn = TRUE;
1049 cprev_optn = FALSE;
1050 vtmv_optn = FALSE;
1051 wag_optn = FALSE;
1052 fprintf(STDOUT, "(consists very likely of amino acids encoded on mtDNA)\n");
1053 } else {
1054 if ((c4 < c5) && (c4 < c6)) {
1055 /* c4 -> VT */
1056 Dayhf_optn = FALSE;
1057 Jtt_optn = FALSE;
1058 mtrev_optn = FALSE;
1059 cprev_optn = FALSE;
1060 vtmv_optn = TRUE;
1061 wag_optn = FALSE;
1062 fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1063 } else {
1064 if (c5 < c6) {
1065 /* c5 -> WAG */
1066 Dayhf_optn = FALSE;
1067 Jtt_optn = FALSE;
1068 mtrev_optn = FALSE;
1069 cprev_optn = FALSE;
1070 vtmv_optn = FALSE;
1071 wag_optn = TRUE;
1072 fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1073 } else {
1074 /* if (c6) */
1075 /* c6 -> cpREV */
1076 Dayhf_optn = FALSE;
1077 Jtt_optn = FALSE;
1078 mtrev_optn = FALSE;
1079 cprev_optn = TRUE;
1080 vtmv_optn = FALSE;
1081 wag_optn = FALSE;
1082 fprintf(STDOUT, "(consists very likely of amino acids encoded on cpDNA)\n");
1083 } /* if c5 else c6 */
1084 } /* if c4 else c5 */
1085 } /* if c3 else c4 */
1086 } /* if c2 */
1087 } /* if c1 */
1088 #endif /* CPREV */
1089
1090 } else if (data_optn == NUCLEOTIDE) {
1091 fprintf(STDOUT, "(consists very likely of nucleotides)\n");
1092 } else {
1093 fprintf(STDOUT, "(consists very likely of binary state data)\n");
1094 }
1095 } /* guessmodel */
1096
1097
1098 /******************************************************************************/
1099
1100 /* parts outsourced to treesort.c/treesort.h */
1101 #if 0 /* TREESORT_H */
1102 #endif /* TREESORT_H */
1103
1104 /* parts outsourced to consensus.c/consensus.h */
1105 #if 0 /* CONSENSUS_H */
1106 #endif /* CONSENSUS_H */
1107
1108
1109 /******************************************************************************/
1110 /* storing and evaluating quartet branching information */
1111 /******************************************************************************/
1112
1113 /* general remarks:
1114
1115 for a quartet with the taxa a, b, c, d there are
1116 three possible binary trees:
1117
1118 1) (a,b)-(c,d)
1119 2) (a,c)-(b,d)
1120 3) (a,d)-(b,c)
1121
1122 For every quartet information about its branching structure is
1123 stored. With the functions readquartet and writequartet
1124 this information can be accessed. For every quartet (a,b,c,d)
1125 with a < b < c < d (taxa) the branching information is encoded
1126 using 4 bits:
1127
1128 value 8 4 2 1
1129 +-------------+-------------+-------------+-------------+
1130 | not used | tree 3 | tree 2 | tree 1 |
1131 +-------------+-------------+-------------+-------------+
1132
1133 If the branching structure of the taxa corresponds to one of the
1134 three trees the corresponding bit is set. If the branching structure
1135 is unclear because two of the three trees have the same maximum
1136 likelihood value the corresponding two bits are set. If the branching
1137 structure is completely unknown all the bits are set (the highest
1138 bit is always cleared because it is not used).
1139
1140 */
1141
1142 /* allocate memory for quartets */
callocquartets(int taxa)1143 unsigned char *callocquartets(int taxa)
1144 {
1145 uli nc, numch;
1146 unsigned char *qinfo;
1147
1148 /* compute number of quartets */
1149 Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24;
1150 if (Numquartets % 2 == 0) { /* even number */
1151 numch = Numquartets/2;
1152 } else { /* odd number */
1153 numch = (Numquartets + 1)/2;
1154 }
1155 /* allocate memory */
1156 qinfo = (unsigned char *) calloc((size_t) numch, sizeof(unsigned char));
1157 if (qinfo == NULL) maerror("quartetinfo in callocquartets");
1158 for (nc = 0; nc < numch; nc++) qinfo[nc] = 0;
1159 return(qinfo);
1160 } /* callocquartets */
1161
1162 /* free quartet memory */
freequartets()1163 void freequartets()
1164 {
1165 free(quartetinfo);
1166 } /* freequartets */
1167
1168 /**************/
1169
1170 /* read quartet info - a < b < c < d */
readquartet(int a,int b,int c,int d)1171 unsigned char readquartet(int a, int b, int c, int d)
1172 {
1173 uli qnum;
1174
1175 if (! ((a < b) && (b < c) && (c < d))) {
1176 fprintf(stderr, "\n\n\nHALT: PLEASE REPORT ERROR HS2 TO DEVELOPERS! (%d,%d,%d,%d)\n", a,b,c,d);
1177 # if PARALLEL
1178 PP_Finalize();
1179 # endif
1180 exit (999);
1181
1182 }
1183 qnum = (uli) a
1184 + (uli) b*(b-1)/2
1185 + (uli) c*(c-1)*(c-2)/6
1186 + (uli) d*(d-1)*(d-2)*(d-3)/24;
1187 if (qnum % 2 == 0) { /* even number */
1188 /* bits 0 to 3 */
1189 return (quartetinfo[qnum/2] & (unsigned char) 15);
1190 } else { /* odd number */
1191 /* bits 4 to 7 */
1192 return ((quartetinfo[(qnum-1)/2] & (unsigned char) 240)>>4);
1193 }
1194 } /* readquartet */
1195
1196 /**************/
1197
1198 /* write quartet info - a < b < c < d, 0 <= info <= 15 */
writequartet(int a,int b,int c,int d,unsigned char info)1199 void writequartet(int a, int b, int c, int d, unsigned char info)
1200 {
1201 uli qnum;
1202
1203 qnum = (uli) a
1204 + (uli) b*(b-1)/2
1205 + (uli) c*(c-1)*(c-2)/6
1206 + (uli) d*(d-1)*(d-2)*(d-3)/24;
1207 if (qnum % 2 == 0) { /* even number */
1208 /* bits 0 to 3 */
1209 quartetinfo[qnum/2] =
1210 ((quartetinfo[qnum/2] & (unsigned char) 240) |
1211 (info & (unsigned char) 15));
1212 } else { /* odd number */
1213 /* bits 4 to 7 */
1214 quartetinfo[(qnum-1)/2] =
1215 ((quartetinfo[(qnum-1)/2] & (unsigned char) 15) |
1216 ((info & (unsigned char) 15)<<4));
1217 }
1218 } /* writequartet */
1219
1220 /**************/
1221 /* prototypes */
1222 int openfiletowrite(FILE **, char[], char[]);
1223 void closefile(FILE *);
1224
1225 /* sorts three doubles in descending order */
sort3doubles(dvector num,ivector order)1226 void sort3doubles(dvector num, ivector order)
1227 {
1228 if (num[0] > num[1]) {
1229 if(num[2] > num[0]) {
1230 order[0] = 2;
1231 order[1] = 0;
1232 order[2] = 1;
1233 } else if (num[2] < num[1]) {
1234 order[0] = 0;
1235 order[1] = 1;
1236 order[2] = 2;
1237 } else {
1238 order[0] = 0;
1239 order[1] = 2;
1240 order[2] = 1;
1241 }
1242 } else {
1243 if(num[2] > num[1]) {
1244 order[0] = 2;
1245 order[1] = 1;
1246 order[2] = 0;
1247 } else if (num[2] < num[0]) {
1248 order[0] = 1;
1249 order[1] = 0;
1250 order[2] = 2;
1251 } else {
1252 order[0] = 1;
1253 order[1] = 2;
1254 order[2] = 0;
1255 }
1256 }
1257 } /* sort3doubles */
1258
1259 /*************************/
1260
1261 /* compute Bayesian weights from log-lkls d1, d2, d3 */
loglkl2weight(int a,int b,int c,int i,double d1,double d2,double d3,int usebestq)1262 unsigned char loglkl2weight(int a,
1263 int b,
1264 int c,
1265 int i,
1266 double d1,
1267 double d2,
1268 double d3,
1269 int usebestq)
1270 {
1271 double onethird;
1272 unsigned char treebits[3];
1273 double templog;
1274 unsigned char tmpweight;
1275 double temp;
1276 double temp1, temp2, temp3;
1277 unsigned char discreteweight[3];
1278
1279 double tttqweight[3];
1280 double tttsqdiff[3];
1281 int tttsqorder[3];
1282 int tttqworder[3];
1283
1284 tttqweight[0] = d1;
1285 tttqweight[1] = d2;
1286 tttqweight[2] = d3;
1287
1288 onethird = 1.0/3.0;
1289 treebits[0] = (unsigned char) 1;
1290 treebits[1] = (unsigned char) 2;
1291 treebits[2] = (unsigned char) 4;
1292
1293 /* sort in descending order */
1294 sort3doubles(tttqweight, tttqworder);
1295
1296 if (usebestq) {
1297 tttsqorder[2] = 2;
1298 discreteweight[tttsqorder[2]] = treebits[tttqworder[0]];
1299 if (tttqweight[tttqworder[0]] == tttqweight[tttqworder[1]]) {
1300 discreteweight[tttsqorder[2]] = discreteweight[tttsqorder[2]] || treebits[tttqworder[1]];
1301 if (tttqweight[tttqworder[1]] == tttqweight[tttqworder[2]]) {
1302 discreteweight[tttsqorder[2]] = discreteweight[tttsqorder[2]] || treebits[tttqworder[2]];
1303 discreteweight[tttsqorder[2]] = 7;
1304 }
1305 }
1306 } else {
1307
1308 /* compute Bayesian weights */
1309 templog = tttqweight[tttqworder[1]]-tttqweight[tttqworder[0]];
1310 if(templog < -TP_MAX_EXP_DIFF) /* possible, since 1.0+exp(>36) == 1.0 */
1311 tttqweight[tttqworder[1]] = 0.0;
1312 else
1313 tttqweight[tttqworder[1]] = exp(templog);
1314
1315 templog = tttqweight[tttqworder[2]]-tttqweight[tttqworder[0]];
1316 if(templog < -TP_MAX_EXP_DIFF) /* possible, since 1.0+exp(>36) == 1.0 */
1317 tttqweight[tttqworder[2]] = 0.0;
1318 else
1319 tttqweight[tttqworder[2]] = exp(templog);
1320
1321 tttqweight[tttqworder[0]] = 1.0;
1322
1323 temp = tttqweight[0] + tttqweight[1] + tttqweight[2];
1324
1325 tttqweight[0] = tttqweight[0]/temp;
1326 tttqweight[1] = tttqweight[1]/temp;
1327 tttqweight[2] = tttqweight[2]/temp;
1328
1329 /* square deviations */
1330 temp1 = 1.0 - tttqweight[tttqworder[0]];
1331 tttsqdiff[0] = temp1 * temp1 +
1332 tttqweight[tttqworder[1]] * tttqweight[tttqworder[1]] +
1333 tttqweight[tttqworder[2]] * tttqweight[tttqworder[2]];
1334 discreteweight[0] = treebits[tttqworder[0]];
1335
1336 temp1 = 0.5 - tttqweight[tttqworder[0]];
1337 temp2 = 0.5 - tttqweight[tttqworder[1]];
1338 tttsqdiff[1] = temp1 * temp1 + temp2 * temp2 +
1339 tttqweight[tttqworder[2]] * tttqweight[tttqworder[2]];
1340 discreteweight[1] = treebits[tttqworder[0]] + treebits[tttqworder[1]];
1341
1342 temp1 = onethird - tttqweight[tttqworder[0]];
1343 temp2 = onethird - tttqweight[tttqworder[1]];
1344 temp3 = onethird - tttqweight[tttqworder[2]];
1345 tttsqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
1346 discreteweight[2] = (unsigned char) 7;
1347
1348 /* sort in descending order */
1349 sort3doubles(tttsqdiff, tttsqorder);
1350 }
1351
1352 tmpweight = discreteweight[tttsqorder[2]];
1353 return(tmpweight);
1354 } /* loglkl2weight */
1355
1356
1357 /*************************/
1358
1359 /* checks out all possible quartets */
computeallquartets()1360 void computeallquartets()
1361 {
1362 double onethird;
1363 uli nq;
1364 unsigned char treebits[3];
1365 # if ! PARALLEL
1366 unsigned char tmpweight;
1367 FILE *lhfp;
1368 int a, b, c, i;
1369 # endif
1370
1371 onethird = 1.0/3.0;
1372 treebits[0] = (unsigned char) 1;
1373 treebits[1] = (unsigned char) 2;
1374 treebits[2] = (unsigned char) 4;
1375
1376 if (show_optn) { /* list all unresolved quartets */
1377 openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
1378 fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
1379 }
1380
1381 nq = 0;
1382 badqs = 0;
1383
1384 /* start timer - percentage of completed quartets */
1385 time(&time0);
1386 time1 = time0;
1387 mflag = 0;
1388
1389 # if PARALLEL
1390 {
1391 schedtype sched;
1392 int flag;
1393 MPI_Status stat;
1394
1395 int dest = 1;
1396 uli qaddr =0;
1397 uli qamount=0;
1398 int qblocksent = 0;
1399 int apr;
1400 uli Sumquartets;
1401 uli sq, noq;
1402
1403 uli tmpfullresqs;
1404 uli tmppartresqs;
1405 uli tmpunresqs;
1406 uli tmpmissingqs;
1407
1408 Sumquartets = numquarts(Maxspc);
1409 initsched(&sched, Sumquartets, PP_NumProcs-1, 4);
1410 qamount=SCHEDALG_ML_STEP(&sched);
1411
1412 while (qamount > 0) {
1413 if (PP_emptyslave()) {
1414 PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &tmpfullresqs, &tmppartresqs, &tmpunresqs, &tmpmissingqs, &usebestq_optn, &apr);
1415 qblocksent -= noq;
1416 }
1417 dest = PP_getslave();
1418 PP_SendDoQuartBlock(dest, qaddr, qamount, usebestq_optn, (approxqp ? APPROX : EXACT));
1419 qblocksent += qamount;
1420 qaddr += qamount;
1421 qamount=SCHEDALG_ML_STEP(&sched);
1422
1423 MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
1424 while (flag) {
1425 PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &tmpfullresqs, &tmppartresqs, &tmpunresqs, &tmpmissingqs, &usebestq_optn, &apr);
1426 qblocksent -= noq;
1427 MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
1428 }
1429 checktime(&time0, &time1, &time2, qaddr - qblocksent, Sumquartets, &mflag);
1430 }
1431 while (qblocksent > 0) {
1432 PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &tmpfullresqs, &tmppartresqs, &tmpunresqs, &tmpmissingqs, &usebestq_optn, &apr);
1433 qblocksent -= noq;
1434 checktime(&time0, &time1, &time2, qaddr - qblocksent, Sumquartets, &mflag);
1435 }
1436 }
1437 # else /* ! PARALLEL */
1438
1439 addtimes(GENERAL, &tarr);
1440 if (savequartlh_optn) {
1441 openfiletowrite(&lhfp, ALLQUARTLH, "all quartet likelihoods");
1442 if (saveqlhbin_optn) writetpqfheader(Maxspc, lhfp, 3);
1443 else writetpqfheader(Maxspc, lhfp, 4);
1444 }
1445
1446 for (i = 3; i < Maxspc; i++)
1447 for (c = 2; c < i; c++)
1448 for (b = 1; b < c; b++)
1449 for (a = 0; a < b; a++) {
1450 nq++;
1451
1452 /* generate message every >TIMECHECK_INTERVAL seconds */
1453 /* check timer */
1454 checktime(&time0, &time1, &time2, nq, Numquartets, &mflag);
1455
1456 /* maximum likelihood values */
1457
1458 /* exact or approximate maximum likelihood values */
1459 compute_quartlklhds(a,b,c,i,&qweight[0],&qweight[1],&qweight[2], (approxqp ? APPROX : EXACT));
1460
1461 if (savequartlh_optn) {
1462 if (saveqlhbin_optn)
1463 fwrite(qweight, sizeof(double), 3, lhfp);
1464 else
1465 fprintf(lhfp, "(%d,%d,%d,%d)\t%f\t%f\t%f\n", a, b, c, i,
1466 qweight[0], qweight[1], qweight[2]);
1467 }
1468
1469 tmpweight = loglkl2weight(a, b, c, i, qweight[0], qweight[1], qweight[2], usebestq_optn);
1470 /* tmpweight = loglkl2weight(a, b, c, i, d1, d2, d3, usebestq_optn); */
1471
1472 /* determine best discrete weight */
1473 /* writequartet(a, b, c, i, discreteweight[sqorder[2]]); */
1474 writequartet(a, b, c, i, tmpweight);
1475
1476
1477 /* compute sums of topologies per taxon, step by step */
1478 ++(qinfomatr[8][a]);
1479 ++(qinfomatr[8][b]);
1480 ++(qinfomatr[8][c]);
1481 ++(qinfomatr[8][i]);
1482 ++(qinfomatr[tmpweight][a]);
1483 ++(qinfomatr[tmpweight][b]);
1484 ++(qinfomatr[tmpweight][c]);
1485 ++(qinfomatr[tmpweight][i]);
1486
1487 if ((tmpweight <= 2) || (tmpweight == 4)) {
1488 fullresqs++;
1489 } else {
1490 if (tmpweight == 7) {
1491 unresqs++;
1492 } else {
1493 if (tmpweight == 0) {
1494 missingqs++;
1495 } else {
1496 partresqs++;
1497 }
1498 }
1499 }
1500
1501 /* counting completely unresolved quartets */
1502 if (tmpweight == 7) {
1503 badqs++;
1504 badtaxon[a]++;
1505 badtaxon[b]++;
1506 badtaxon[c]++;
1507 badtaxon[i]++;
1508 if (show_optn) {
1509 fputid10(unresfp, a);
1510 fprintf(unresfp, " ");
1511 fputid10(unresfp, b);
1512 fprintf(unresfp, " ");
1513 fputid10(unresfp, c);
1514 fprintf(unresfp, " ");
1515 fputid(unresfp, i);
1516 fprintf(unresfp, "\n");
1517 }
1518 }
1519 addtimes(QUARTETS, &tarr);
1520 }
1521 if (savequartlh_optn) {
1522 closefile(lhfp);
1523 }
1524 if (show_optn)
1525 closefile(unresfp);
1526 if (mflag == 1)
1527 fprintf(STDOUT, "\n");
1528 # endif /* PARALLEL */
1529
1530 } /* computeallquartets */
1531
1532 /* trueID (HAS) */
1533 /* check the branching structure between the leaves (not the taxa!)
1534 A, B, C, and I (A, B, C, I don't need to be ordered). As a result,
1535 the two leaves that are closer related to each other than to leaf I
1536 are found in chooseX and chooseY. If the branching structure is
1537 not uniquely defined, chooseX and chooseY are chosen randomly
1538 from the possible taxa */
checkquartet_trueID(int A,int B,int C,int I,int * trueID,int * chooseX,int * chooseY)1539 void checkquartet_trueID(int A, /* quartet leaf idx in permutation array */
1540 int B, /* dito. */
1541 int C, /* dito. */
1542 int I, /* dito., to be inserted */
1543 int *trueID, /* permutation array */
1544 int *chooseX, /* (chooseX,chooseY | x,I) */
1545 int *chooseY) /* chooseX+Y are non-neighbors of I */
1546 {
1547 int i, j; /* counter */
1548 int a, b; /* temp variable for sorting */
1549 int taxon[5]; /* sorting array for taxa from trueID */
1550 int leaf[5]; /* sorting array for indices in trueID */
1551 int ipos; /* position of leaf I in sorted array */
1552 unsigned char qresult; /* quartet topology */
1553 int notunique = FALSE; /* non-unique topology */
1554
1555 /* The relationship between leaves and taxa is defined by trueID */
1556 taxon[1] = trueID[A]; /* taxon number */
1557 leaf[1] = A; /* leaf index in permutation */
1558 taxon[2] = trueID[B];
1559 leaf[2] = B;
1560 taxon[3] = trueID[C];
1561 leaf[3] = C;
1562 taxon[4] = trueID[I];
1563 leaf[4] = I;
1564
1565 /* sort for taxa */
1566 /* Source: Numerical Recipes (PIKSR2.C) */
1567 for (j = 2; j <= 4; j++) {
1568 a = taxon[j];
1569 b = leaf[j];
1570 i = j-1;
1571 while (i > 0 && taxon[i] > a) {
1572 taxon[i+1] = taxon[i];
1573 leaf[i+1] = leaf[i];
1574 i--;
1575 }
1576 taxon[i+1] = a;
1577 leaf[i+1] = b;
1578 }
1579
1580 /* where is leaf I ? */
1581 ipos = 1;
1582 while (leaf[ipos] != I) ipos++;
1583
1584 /* look at sequence quartet */
1585 qresult = readquartet(taxon[1], taxon[2], taxon[3], taxon[4]);
1586
1587 /* chooseX and chooseY */
1588 /* qresult is not unique, a covered unique qresult is chosen */
1589 /* and the while loop is circled again (optimizable?) (HAS) */
1590 do {
1591 switch (qresult) {
1592
1593 /* one single branching structure */
1594
1595 /* 001 */
1596 case 1: if (ipos == 1 || ipos == 2) {
1597 *chooseX = leaf[3];
1598 *chooseY = leaf[4];
1599 } else {
1600 *chooseX = leaf[1];
1601 *chooseY = leaf[2];
1602 }
1603 notunique = FALSE;
1604 break;
1605
1606 /* 010 */
1607 case 2: if (ipos == 1 || ipos == 3) {
1608 *chooseX = leaf[2];
1609 *chooseY = leaf[4];
1610 } else {
1611 *chooseX = leaf[1];
1612 *chooseY = leaf[3];
1613 }
1614 notunique = FALSE;
1615 break;
1616
1617 /* 100 */
1618 case 4: if (ipos == 1 || ipos == 4) {
1619 *chooseX = leaf[2];
1620 *chooseY = leaf[3];
1621 } else {
1622 *chooseX = leaf[1];
1623 *chooseY = leaf[4];
1624 }
1625 notunique = FALSE;
1626 break;
1627
1628 /* two possible branching structures */
1629
1630 /* 011 */
1631 case 3: if (randominteger(2)) qresult = 1;
1632 else qresult = 2;
1633 notunique = TRUE;
1634 break;
1635
1636 /* 101 */
1637 case 5: if (randominteger(2)) qresult = 1;
1638 else qresult = 4;
1639 notunique = TRUE;
1640 break;
1641
1642 /* 110 */
1643 case 6: if (randominteger(2)) qresult = 2;
1644 else qresult = 4;
1645 notunique = TRUE;
1646 break;
1647
1648 /* three possible branching structures */
1649
1650 /* 111 */
1651 case 7: qresult = (1 << randominteger(3)); /* 1, 2, or 4 */
1652 notunique = TRUE;
1653 break;
1654
1655 /* 000 */ /* empty quartet: missing data -> OK, otherwise ERROR */
1656 case 0: if (readsubset_optn) {
1657 *chooseX = -1;
1658 *chooseY = -1;
1659 } else {
1660 # if PARALLEL
1661 fprintf(STDOUT, "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K1-PARALLEL TO DEVELOPERS : empty (%d,%d,%d,%d) = %ld\n\n\n",
1662 PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4],
1663 quart2num(taxon[1], taxon[2], taxon[3], taxon[4]));
1664 PP_Finalize();
1665 exit (999);
1666 # else
1667 fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR K1 TO DEVELOPERS: empty (%d,%d,%d,%d) = %ld\n\n\n",
1668 taxon[1], taxon[2], taxon[3], taxon[4],
1669 quart2num(taxon[1], taxon[2], taxon[3], taxon[4]));
1670 exit (999);
1671 # endif
1672 }
1673 notunique = FALSE;
1674 break;
1675
1676 default: /* Program error [checkquartet] */
1677 # if PARALLEL
1678 fprintf(STDOUT, "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K2-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld (%d)\n\n\n",
1679 PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4],
1680 quart2num(taxon[1], taxon[2], taxon[3], taxon[4]), qresult);
1681 PP_Finalize();
1682 exit (999);
1683 # else
1684 fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR K2 TO DEVELOPERS (%d,%d,%d,%d) = %ld (%d)\n\n\n",
1685 taxon[1], taxon[2], taxon[3], taxon[4],
1686 quart2num(taxon[1], taxon[2], taxon[3], taxon[4]), qresult);
1687 exit (999);
1688 # endif
1689
1690 } /* switch (qresult) */
1691 } while (notunique);
1692
1693 return;
1694 } /* checkquartet_trueID */
1695
1696 /* un-trueID-ed (HAS) */
1697 /* check the branching structure between the leaves (not the taxa!)
1698 A, B, C, and I (A, B, C, I don't need to be ordered). As a result,
1699 the two leaves that are closer related to each other than to leaf I
1700 are found in chooseX and chooseY. If the branching structure is
1701 not uniquely defined, chooseX and chooseY are chosen randomly
1702 from the possible taxa */
checkquartet(int A,int B,int C,int I,int * chooseX,int * chooseY)1703 void checkquartet(int A, /* quartet taxon ID */
1704 int B, /* dito. */
1705 int C, /* dito. */
1706 int I, /* dito., to be inserted */
1707 int *chooseX, /* (chooseX,chooseY | x,I) */
1708 int *chooseY) /* chooseX+Y are non-neighbors of I */
1709 {
1710 int i, j; /* counter */
1711 int a; /* temp variable for sorting */
1712 int leaf[5]; /* sorting array */
1713 int ipos; /* position of leaf I in sorted array */
1714 unsigned char qresult; /* quartet topology */
1715 int notunique = FALSE; /* non-unique topology */
1716
1717 /* The relationship between leaves and taxa is defined by trueID */
1718 leaf [1] = A; /* taxon ID of leaf 1 */
1719 leaf [2] = B; /* taxon ID of leaf 2 */
1720 leaf [3] = C; /* taxon ID of leaf 3 */
1721 leaf [4] = I; /* taxon ID of leaf 4 */
1722
1723 /* sort for taxa */
1724 /* Source: Numerical Recipes (PIKSR2.C) */
1725 for (j = 2; j <= 4; j++) {
1726 a = leaf[j];
1727 i = j-1;
1728 while (i > 0 && leaf[i] > a) {
1729 leaf[i+1] = leaf[i];
1730 i--;
1731 }
1732 leaf[i+1] = a;
1733 }
1734
1735 /* where is leaf I ? */
1736 ipos = 1;
1737 while (leaf[ipos] != I) ipos++;
1738
1739 /* look at sequence quartet */
1740 qresult = readquartet(leaf[1], leaf[2], leaf[3], leaf[4]);
1741
1742 /* chooseX and chooseY */
1743 /* qresult is not unique, a covered unique qresult is chosen */
1744 /* and the while loop is circled again (optimizable?) (HAS) */
1745 do {
1746 switch (qresult) {
1747
1748 /* one single branching structure */
1749
1750 /* 001 */
1751 case 1: if (ipos == 1 || ipos == 2) {
1752 *chooseX = leaf[3];
1753 *chooseY = leaf[4];
1754 } else {
1755 *chooseX = leaf[1];
1756 *chooseY = leaf[2];
1757 }
1758 notunique = FALSE;
1759 break;
1760
1761 /* 010 */
1762 case 2: if (ipos == 1 || ipos == 3) {
1763 *chooseX = leaf[2];
1764 *chooseY = leaf[4];
1765 } else {
1766 *chooseX = leaf[1];
1767 *chooseY = leaf[3];
1768 }
1769 notunique = FALSE;
1770 break;
1771
1772 /* 100 */
1773 case 4: if (ipos == 1 || ipos == 4) {
1774 *chooseX = leaf[2];
1775 *chooseY = leaf[3];
1776 } else {
1777 *chooseX = leaf[1];
1778 *chooseY = leaf[4];
1779 }
1780 notunique = FALSE;
1781 break;
1782
1783 /* two possible branching structures */
1784
1785 /* 011 */
1786 case 3: if (randominteger(2)) qresult = 1;
1787 else qresult = 2;
1788 notunique = TRUE;
1789 break;
1790
1791 /* 101 */
1792 case 5: if (randominteger(2)) qresult = 1;
1793 else qresult = 4;
1794 notunique = TRUE;
1795 break;
1796
1797 /* 110 */
1798 case 6: if (randominteger(2)) qresult = 2;
1799 else qresult = 4;
1800 notunique = TRUE;
1801 break;
1802
1803 /* three possible branching structures */
1804
1805 /* 111 */
1806 case 7: qresult = (1 << randominteger(3)); /* 1, 2, or 4 */
1807 notunique = TRUE;
1808 break;
1809
1810 /* 000 */ /* empty quartet: missing data -> OK, otherwise ERROR */
1811 case 0: if (readsubset_optn) {
1812 *chooseX = -1;
1813 *chooseY = -1;
1814 } else {
1815 # if PARALLEL
1816 fprintf(STDOUT, "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K1-PARALLEL TO DEVELOPERS : empty (%d,%d,%d,%d) = %ld\n\n\n",
1817 PP_Myid, leaf[1], leaf[2], leaf[3], leaf[4],
1818 quart2num(leaf[1], leaf[2], leaf[3], leaf[4]));
1819 PP_Finalize();
1820 exit (999);
1821 # else
1822 fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR K1 TO DEVELOPERS: empty (%d,%d,%d,%d) = %ld\n\n\n",
1823 leaf[1], leaf[2], leaf[3], leaf[4],
1824 quart2num(leaf[1], leaf[2], leaf[3], leaf[4]));
1825 exit (999);
1826 # endif
1827 }
1828 notunique = FALSE;
1829 break;
1830
1831 default: /* Program error [checkquartet] */
1832 # if PARALLEL
1833 fprintf(STDOUT, "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K2-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld (%d)\n\n\n",
1834 PP_Myid, leaf[1], leaf[2], leaf[3], leaf[4],
1835 quart2num(leaf[1], leaf[2], leaf[3], leaf[4]), qresult);
1836 PP_Finalize();
1837 exit (999);
1838 # else
1839 fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR K2 TO DEVELOPERS (%d,%d,%d,%d) = %ld (%d)\n\n\n",
1840 leaf[1], leaf[2], leaf[3], leaf[4],
1841 quart2num(leaf[1], leaf[2], leaf[3], leaf[4]), qresult);
1842 exit (999);
1843 # endif
1844
1845 } /* switch (qresult) */
1846 } while (notunique);
1847
1848 return;
1849 } /* checkquartet */
1850
1851