1 /*
2  * puzzle2.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.2 (July 2004)
6  *
7  * (c) 2003-2004 by Heiko A. Schmidt, Korbinian Strimmer, and Arndt von Haeseler
8  * (c) 1999-2003 by Heiko A. Schmidt, Korbinian Strimmer,
9  *                  M. Vingron, and Arndt von Haeseler
10  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11  *
12  * All parts of the source except where indicated are distributed under
13  * the GNU public licence.  See http://www.opensource.org for details.
14  *
15  * ($Id$)
16  *
17  */
18 
19 #ifdef HAVE_CONFIG_H
20 #  include <config.h>
21 #endif
22 
23 
24 #define EXTERN extern
25 
26 #include "puzzle.h"
27 #include <string.h>
28 
29 #if PARALLEL
30 #	include "ppuzzle.h"
31 #	include "sched.h"
32 #endif /* PARALLEL */
33 
34 
35 /******************************************************************************/
36 /* sequences                                                                  */
37 /******************************************************************************/
38 
39 /* read ten characters of current line as identifier */
readid(FILE * infp,int t)40 void readid(FILE *infp, int t)
41 {
42 	int i, j, flag, ci;
43 
44 	for (i = 0; i < 10; i++) {
45 		ci = fgetc(infp);
46 		if (ci == EOF || !isprint(ci)) {
47 			fprintf(STDOUT, "\n\n\nUnable to proceed (no name for sequence %d)\n\n\n", t+1);
48 #			if PARALLEL
49 				PP_Finalize();
50 #			endif
51 			exit(1);
52 		}
53 		Identif[t][i] = (char) ci;
54 	}
55 	/* convert leading blanks in taxon name to underscores */
56 	flag = FALSE;
57 	for (i = 9; i > -1; i--) {
58 		if (flag == FALSE) {
59 			if (Identif[t][i] != ' ') flag = TRUE;
60 		} else {
61 			if (Identif[t][i] == ' ') Identif[t][i] = '_';
62 		}
63 	}
64 	/* check whether this name is already used */
65 	for (i = 0; i < t; i++) { /* compare with all other taxa */
66 		flag = TRUE; /* assume identity */
67 		for (j = 0; (j < 10) && (flag == TRUE); j++)
68 			if (Identif[t][j] != Identif[i][j])
69 				flag = FALSE;
70 		if (flag) {
71 			fprintf(STDOUT, "\n\n\nUnable to proceed (multiple occurence of sequence name '");
72 			fputid(STDOUT, t);
73 			fprintf(STDOUT, "')\n\n\n");
74 #			if PARALLEL
75 				PP_Finalize();
76 #			endif
77 			exit(1);
78 		}
79 	}
80 } /* readid */
81 
82 /******************/
83 
84 
85 /* read next allowed character */
readnextcharacter(FILE * ifp,int notu,int nsite)86 char readnextcharacter(FILE *ifp, int notu, int nsite)
87 {
88 	char c;
89 
90 	/* ignore blanks and control characters except newline (UNIX,DOS) or CR (Mac,DOS) */
91 	do {
92 		if (fscanf(ifp, "%c", &c) != 1) {
93 			fprintf(STDOUT, "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
94 			fputid(STDOUT, notu);
95 			fprintf(STDOUT, "')\n\n\n");
96 #			if PARALLEL
97 				PP_Finalize();
98 #			endif
99 			exit(1);
100 		}
101 	} while (c == ' ' || (iscntrl((int) c) && (c != '\n') && (c != '\r')));
102 	return c;
103 } /* readnextcharacter */
104 
105 /******************/
106 
107 
108 /* skip rest of the line */
skiprestofline(FILE * ifp,int notu,int nsite)109 void skiprestofline(FILE *ifp,    /* input file stream */
110                     int   notu,   /* taxon number      - for error msg. */
111                     int   nsite)  /* sequence position - for error msg. */
112 {
113 	int ci;
114 
115 	/* read chars until the first newline or CR */
116 	do{
117 		ci = fgetc(ifp);
118 		if (ci == EOF) {
119 			fprintf(STDOUT, "Unable to proceed (missing newline at position %d in sequence '", nsite + 1);
120 			fputid(STDOUT, notu);
121 			fprintf(STDOUT, "')\n\n\n");
122 #			if PARALLEL
123 				PP_Finalize();
124 #			endif
125 			exit(1);
126 		}
127 	} while (((char) ci != '\n') && ((char) ci != '\r'));
128 } /* skiprestofline */
129 
130 /******************/
131 
132 
133 /* skip control characters and blanks */
skipcntrl(FILE * ifp,int notu,int nsite)134 void skipcntrl(FILE *ifp,    /* input file stream */
135                int   notu,   /* taxon number      - for error msg. */
136                int   nsite)  /* sequence position - for error msg. */
137 {
138 	int ci;
139 
140 	/* read over all control characters and blanks */
141 	do {
142 		ci = fgetc(ifp);
143 		if (ci == EOF) {
144 			fprintf(STDOUT, "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
145 			fputid(STDOUT, notu);
146 			fprintf(STDOUT, "')\n\n\n");
147 #			if PARALLEL
148 				PP_Finalize();
149 #			endif
150 			exit(1);
151 		}
152 	} while (iscntrl(ci) || (char) ci == ' ');
153 	/* go one character back */
154 	if (ungetc(ci, ifp) == EOF) {
155 		fprintf(STDOUT, "\n\n\nUnable to proceed (positioning error at position %d in sequence '", nsite + 1);
156 		fputid(STDOUT, notu);
157 		fprintf(STDOUT, "')\n\n\n");
158 #		if PARALLEL
159 			PP_Finalize();
160 #		endif
161 		exit(1);
162 	}
163 } /* skipcntrl */
164 
165 /******************/
166 
167 
168 /* read sequences of one data set */
getseqs(FILE * ifp,int Maxspc,int Maxseqc,cmatrix * seqch,cmatrix identif)169 void getseqs(FILE    *ifp,      /* in:  input file stream */
170              int      Maxspc,   /* in:  number of taxa */
171              int      Maxseqc,  /* in:  number of sites */
172              cmatrix *seqch,    /* out: alignment matrix */
173              cmatrix  identif)  /* io:  taxon names (10 char w/o stop) */
174 {
175 	int notu, nsite, endofline, linelength, i;
176 	char c;
177 	cmatrix seqchars;
178 
179 	seqchars = new_cmatrix(Maxspc, Maxseqc);
180 	/* read all characters */
181 	nsite = 0; /* next site to be read */
182 	while (nsite < Maxseqc) {
183 		/* read first taxon */
184 		notu = 0;
185 
186 		/* only Maxspc, Maxseqc read so far: */
187 		/* go to next true line */
188 		skiprestofline(ifp, notu, nsite);
189 
190 		skipcntrl(ifp, notu, nsite);
191 
192 		if (nsite == 0) readid(ifp, notu);
193 		endofline = FALSE;
194 		linelength = 0;
195 		do {
196 			c = readnextcharacter(ifp, notu, nsite + linelength);
197 			if ((c == '\n') || (c == '\r')) endofline = TRUE;
198 			else if (c == '.') {
199 				fprintf(STDOUT, "\n\n\nUnable to proceed (invalid character '.' at position ");
200 				fprintf(STDOUT, "%d in first sequence)\n\n\n", nsite + linelength + 1);
201 #				if PARALLEL
202 					PP_Finalize();
203 #				endif
204 				exit(1);
205 			     } else if (nsite + linelength < Maxseqc) {
206 				       /* change to upper case */
207 				       seqchars[notu][nsite + linelength] = (char) toupper((int) c);
208 				       linelength++;
209 			            } else {
210 				       endofline = TRUE;
211 				       skiprestofline(ifp, notu, nsite + linelength);
212 			            }
213 		} while (!endofline);
214 		if (linelength == 0) {
215 			fprintf(STDOUT, "\n\n\nUnable to proceed (line with length 0 at position %d in sequence '", nsite + 1);
216 			fputid(STDOUT, notu);
217 			fprintf(STDOUT, "')\n\n\n");
218 #			if PARALLEL
219 				PP_Finalize();
220 #			endif
221 			exit(1);
222 		}
223 		/* read other taxa */
224 		for (notu = 1; notu < Maxspc; notu++) {
225 			/* go to next true line */
226 			if (notu != 1) skiprestofline(ifp, notu, nsite);
227 			skipcntrl(ifp, notu, nsite);
228 			if (nsite == 0) readid(ifp, notu);
229 			for (i = nsite; i < nsite + linelength; i++) {
230 				c = readnextcharacter(ifp, notu, i);
231 				if ((c == '\n') || (c == '\r')) { /* too short */
232 					fprintf(STDOUT, "\n\n\nUnable to proceed (line to short at position %d in sequence '", i + 1);
233 					fputid(STDOUT, notu);
234 					fprintf(STDOUT, "')\n\n\n");
235 #					if PARALLEL
236 						PP_Finalize();
237 #					endif
238 					exit(1);
239 				} else if (c == '.') {
240 					seqchars[notu][i] = seqchars[0][i];
241 				} else {
242 					/* change to upper case */
243 					seqchars[notu][i] = (char) toupper((int) c);
244 				}
245 			}
246 		}
247 		nsite = nsite + linelength;
248 	}
249 	*seqch =seqchars;
250 
251 } /* getseqs */
252 
253 /******************/
254 
255 
256 /* initialize identifer arrays */
initid(int t,cmatrix * identif,cmatrix * namestr)257 void initid(int      t,        /* in:  number of taxa */
258             cmatrix *identif,  /* out: name array w/o end of string */
259             cmatrix *namestr)  /* out: name array with end of string */
260 {
261 	int i, j;
262 
263 	*identif = new_cmatrix(t, 10);
264 	*namestr = new_cmatrix(t, 11);
265 	for (i = 0; i < t; i++) {
266 		(*namestr)[i][0] = '\0';
267 		(*namestr)[i][9] = '\0';
268 		for (j = 0; j < 10; j++) {
269 			(*identif)[i][j] = ' ';
270 		} /* for j */
271 	} /* for i */
272 } /* initid */
273 
274 /******************/
275 
276 
277 /******************/
278 
279 
280 /* copy undelimited identifer array to '\0' delimited identifer array */
identif2namestr(int num,cmatrix Identif,cmatrix Namestr)281 void identif2namestr(int     num,      /* number of taxa         */
282                      cmatrix Identif,  /* non-delimited names    */
283                      cmatrix Namestr)  /* proper delimited names */
284 {
285 	int i, j;
286 	int laggingspc;
287 
288 	for (i = 0; i < num; i++) {
289 		laggingspc = 1;
290 		for (j = 9; j >= 0; j--) {
291 			Namestr[i][j] = Identif[i][j];
292 			if (laggingspc == 1) {
293 				if (Identif[i][j] != ' ') {
294 					Namestr[i][j+1] = '\0';
295 					laggingspc = 0;
296 				}
297 			}
298 
299 		} /* for j (letter 10-1) */
300 	} /* for i (species) */
301 } /* identif2namestr */
302 
303 /******************/
304 
305 
306 /* print identifier of specified taxon in full 10 char length */
fputid10(FILE * ofp,int t)307 void fputid10(FILE *ofp, int t)
308 {
309 	int i;
310 
311 	for (i = 0; i < 10; i++) fputc(Identif[t][i], ofp);
312 } /* fputid10 */
313 
314 /******************/
315 
316 
317 /* print identifier of specified taxon up to first space */
fputid(FILE * ofp,int t)318 int fputid(FILE *ofp, int t)
319 {
320 	int i;
321 
322 	i = 0;
323 	while (i < 10 && Identif[t][i] != ' ') {
324 		fputc(Identif[t][i], ofp);
325 		i++;
326 	}
327 	return i;
328 } /* fputid */
329 
330 /******************/
331 
332 
333 /* read first line of sequence data set */
getsizesites(FILE * ifp,int * Maxspc,int * Maxseqc)334 void getsizesites(FILE *ifp,      /* in: input file stream */
335                   int  *Maxspc,   /* out: number of taxa */
336                   int  *Maxseqc)  /* out: number of sites */
337 {
338 	if (fscanf(ifp, "%d", Maxspc) != 1) {
339 			fprintf(STDOUT, "\n\n\nUnable to proceed (missing number of sequences)\n\n\n");
340 #			if PARALLEL
341 				PP_Finalize();
342 #			endif
343 			exit(1);
344 	}
345 	if (fscanf(ifp, "%d", Maxseqc) != 1) {
346 			fprintf(STDOUT, "\n\n\nUnable to proceed (missing number of sites)\n\n\n");
347 #			if PARALLEL
348 				PP_Finalize();
349 #			endif
350 			exit(1);
351 	}
352 
353 	if (*Maxspc < 3) {
354 		fprintf(STDOUT, "\n\n\nUnable to proceed (less than 3 sequences - no tree possible)\n\n\n");
355 #		if PARALLEL
356 			PP_Finalize();
357 #		endif
358 		exit(1);
359 	}
360 	seqnumcheck = SEQNUM_OK;
361 	if (*Maxspc < 4) {
362 		fprintf(STDOUT, "\n\n\nLess than 4 sequences: No quartet methods possible!!!\n\n\n");
363 		fprintf(STDOUT, "Parameter estimation and tree testing is still possible.\n\n\n");
364 		seqnumcheck = SEQNUM_TOOFEW;
365 	}
366 	if (*Maxspc > 257) {
367 		fprintf(STDOUT, "\n\n\nMore than 257 sequences: No quartet puzzling available!!!\n");
368 		fprintf(STDOUT, "Parameter estimation, likelihood mapping, and tree testing is still possible.\n\n\n");
369 		seqnumcheck = SEQNUM_TOOMANY;
370 	}
371 	/* correct default analysis types */
372 	if ((seqnumcheck!=SEQNUM_OK) && (puzzlemode==QUARTPUZ))
373 		puzzlemode = (puzzlemode + 1) % NUMPUZZLEMODES;
374 	if ((seqnumcheck==SEQNUM_TOOFEW) && (typ_optn==LIKMAPING_OPTN))
375 		typ_optn = (typ_optn + 1) % NUMTYPES;
376 #if 0
377 	if (*Maxspc < 4) {
378 		fprintf(STDOUT, "\n\n\nUnable to proceed (less than 4 sequences)\n\n\n");
379 #		if PARALLEL
380 			PP_Finalize();
381 #		endif
382 		exit(1);
383 	}
384 	if (*Maxspc > 257) {
385 		fprintf(STDOUT, "\n\n\nUnable to proceed (more than 257 sequences)\n\n\n");
386 #		if PARALLEL
387 			PP_Finalize();
388 #		endif
389 		exit(1);
390 	}
391 #		endif
392 	if (*Maxseqc < 1) {
393 		fprintf(STDOUT, "\n\n\nUnable to proceed (no sequence sites)\n\n\n");
394 #		if PARALLEL
395 			PP_Finalize();
396 #		endif
397 		exit(1);
398 	}
399 } /* getsizesites */
400 
401 /******************/
402 
403 
404 /* read alignment from file */
readsequencefile(FILE * seqfp,int * Maxspc,int * Maxseqc,cmatrix * identif,cmatrix * namestr,cmatrix * Seqchars)405 void readsequencefile(FILE    *seqfp,     /* in:  sequence input file stream */
406                       int     *Maxspc,    /* out: number of taxa             */
407                       int     *Maxseqc,   /* out: number of sites            */
408                       cmatrix *identif,   /* out: name array w/o end of str  */
409                       cmatrix *namestr,   /* out: name array with end of str */
410                       cmatrix *Seqchars)  /* out: alignment matrix           */
411 {
412 	getsizesites(seqfp, Maxspc, Maxseqc);
413 	initid(*Maxspc, identif, namestr);
414 	getseqs(seqfp, *Maxspc, *Maxseqc, Seqchars, *identif);
415 	identif2namestr(*Maxspc, *identif, *namestr);
416 } /* readsequencefile */
417 
418 /******************/
419 
420 
421 /* read subsets from file */
readsubsetfile(FILE * seqfp,int Maxspc,cmatrix namestr,int * Maxsubset,imatrix * ss_setovlgraph,imatrix * ss_setoverlaps,imatrix * ss_setovllist,ivector * ss_setovllistsize,imatrix * ss_matrix,imatrix * ss_list,ivector * ss_listsize)422 void readsubsetfile(FILE    *seqfp,              /* in:  sequence input file stream */
423                     int      Maxspc,             /* in:  number of taxa             */
424                     cmatrix  namestr,            /* in:  names taxa (seq file)      */
425                     int     *Maxsubset,          /* out: number of subsets          */
426                     imatrix *ss_setovlgraph,     /* out: size of overlap >= 3 between 2 subsets */
427                     imatrix *ss_setoverlaps,     /* out: size of overlap between 2 subsets */
428                     imatrix *ss_setovllist,      /* out: list with ovlerlapping subsets */
429                     ivector *ss_setovllistsize,  /* out: size of list with ovlerlapping subsets */
430                     imatrix *ss_matrix,          /* out: boolean list: taxon in set? */
431                     imatrix *ss_list,            /* out: list of taxa in set */
432                     ivector *ss_listsize)         /* out: size of list with taxa */
433 {
434 	int ss_maxspc;
435 	int n;
436 	int set1, set2;
437 	int tmp;
438 
439         cmatrix  ss_identarr;
440         cmatrix  ss_namearr;
441         cmatrix  ss_strmatr;
442 
443 	/* read the subset file sizes */
444 	getsizesites(seqfp, &ss_maxspc, Maxsubset);
445 
446 	/* check sizes */
447 	if (ss_maxspc != Maxspc) {
448 		fprintf(STDOUT, "ERROR: Number of taxa in subsetfile does not match!!! (%d != %d)\n", ss_maxspc, Maxspc);
449 #		if PARALLEL
450 			PP_SendDone();
451 			MPI_Finalize();
452 #		endif /* PARALLEL */
453 		exit(1);
454 	}
455 
456 	/* initialize name arrays for checking */
457 	initid(ss_maxspc, &ss_identarr, &ss_namearr);
458 
459 	/* read the data */
460 	getseqs(seqfp, ss_maxspc, *Maxsubset, &ss_strmatr, ss_identarr);
461 
462 	/* check the file names */
463 	identif2namestr(ss_maxspc, ss_identarr, ss_namearr);
464 	for(n = 0; n<Maxspc; n++) {
465 		if (ss_maxspc != Maxspc) {
466 			fprintf(STDOUT, "ERROR: Taxon names or order in subset file do not match!!!\n       (%d \"%s\" != \"%s\")\n", n+1, namestr[n], ss_namearr[n]);
467 #			if PARALLEL
468 				PP_SendDone();
469 				MPI_Finalize();
470 #			endif /* PARALLEL */
471 			exit(1);
472 		}
473 
474 	}
475 
476 	free_cmatrix(ss_identarr);
477 	free_cmatrix(ss_namearr);
478 
479 	*ss_setovlgraph = new_imatrix(*Maxsubset, *Maxsubset);
480 	*ss_setoverlaps = new_imatrix(*Maxsubset, *Maxsubset);
481 
482 	*ss_setovllist     = new_imatrix(*Maxsubset, *Maxsubset);
483 	*ss_setovllistsize = new_ivector(*Maxsubset);
484 
485 	*ss_matrix         = new_imatrix(*Maxsubset, Maxspc);
486 
487 	*ss_list           = new_imatrix(*Maxsubset, Maxspc);
488 	*ss_listsize       = new_ivector(*Maxsubset);
489 
490 	for (n=0; n<Maxspc; n++) {
491 		for (set1=0; set1<*Maxsubset; set1++) {
492 			switch (ss_strmatr[n][set1]) {
493 				case 'X':
494 				case '1':
495                     			(*ss_matrix)[set1][n] = 1;
496                     			(*ss_list)[set1][((*ss_listsize)[set1])++] = n;
497 					break;
498 				case '-':
499 				case '0':
500                     			(*ss_matrix)[set1][n] = 0;
501 					break;
502 				default:
503 					fprintf(STDOUT, "ERROR: Unknown character in subset file!!! (\"%c\", taxon %d, site %d)\n", ss_strmatr[n][set1], n+1, set1+1);
504 #					if PARALLEL
505 						PP_SendDone();
506 						MPI_Finalize();
507 #					endif /* PARALLEL */
508 					exit(1);
509 					break;
510 			}
511 		}
512 
513 		for (set1=1; set1<*Maxsubset; set1++) {
514 			for (set2=0; set2<set1; set2++) {
515 				if (((*ss_matrix)[set1][n]) && ((*ss_matrix)[set2][n]))
516 					/* number of sequences in overlap */
517                     			((*ss_setoverlaps)[set1][set2])++;
518 
519 			}
520 		}
521 	} /* for Maxspc */
522 
523 	for (set1=1; set1<*Maxsubset; set1++) {
524 		for (set2=0; set2<set1; set2++) {
525 			/* number of sequences in overlap */
526                    	tmp = (*ss_setoverlaps)[set1][set2];
527                    	(*ss_setoverlaps)[set2][set1] = tmp;
528 			if (tmp >= 3) {
529 				/* number of sequences (>=3) in overlap */
530                    		(*ss_setovlgraph)[set1][set2] = tmp;
531                    		(*ss_setovlgraph)[set2][set1] = tmp;
532 				/* list of overlapping sets */
533                     		(*ss_setovllist)[set1][((*ss_setovllistsize)[set1])++] = set2;
534                     		(*ss_setovllist)[set2][((*ss_setovllistsize)[set2])++] = set1;
535 			}
536 
537 		}
538 	}
539 } /* readsubsetfile */
540 
541 /******************/
542 
543 
544 /* permute taxon order */
permutetaxa_ss(int Maxspc,ivector permutation,cmatrix namestr,int Maxsubset,imatrix ss_setovlgraph,imatrix ss_setoverlaps,imatrix ss_setovllist,ivector ss_setovllistsize,imatrix ss_matrix,imatrix ss_list,ivector ss_listsize)545 void permutetaxa_ss(int      Maxspc,             /* in:  number of taxa             */
546                     ivector  permutation,        /* permuted taxon order            */
547                     cmatrix  namestr,            /* in:  names taxa (seq file)      */
548                     int      Maxsubset,          /* out: number of subsets          */
549                     imatrix  ss_setovlgraph,     /* out: size of overlap >= 3 between 2 subsets */
550                     imatrix  ss_setoverlaps,     /* out: size of overlap between 2 subsets */
551                     imatrix  ss_setovllist,      /* out: list with ovlerlapping subsets */
552                     ivector  ss_setovllistsize,  /* out: size of list with ovlerlapping subsets */
553                     imatrix  ss_matrix,          /* out: boolean list: taxon in set? */
554                     imatrix  ss_list,            /* out: list of taxa in set */
555                     ivector  ss_listsize)         /* out: size of list with taxa */
556 {
557 	ivector candidatesetlist;
558 	int     candidatesetsize = 0;
559 	ivector candidatetaxalist;
560 	int     candidatetaxasize = 0;
561 	ivector taxonstatus;			/* 0 - not done; 1 - candidate; 2 - inserted */
562 	ivector setstatus;			/* 0 - not used; 1 - front; 2 - used*/
563 	int     permpos;
564 	int     k;
565 	int     tmprand;
566 	int     currset;
567 	int     currtaxon;
568 	candidatesetlist  = new_ivector(Maxsubset);
569 	candidatetaxalist = new_ivector(Maxspc);
570 	taxonstatus       = new_ivector(Maxspc);
571 	setstatus         = new_ivector(Maxsubset);
572 
573 	/* first subset */
574 	currset = randominteger(Maxsubset);
575 	setstatus[currset] = 2; /* inserted */
576 
577 	/* init subset front nodes */
578 	/*   insert sets with ovl(currset) >= 3 into candidate set list */
579 	for (k=0; k<ss_setovllistsize[currset]; k++) {
580 		candidatesetlist[candidatesetsize] = ss_setovllist[currset][k];
581 		setstatus[candidatesetlist[candidatesetsize++]] = 1;
582 	}
583 	/*   insert taxa from currset into candidate taxon list */
584 	for (k=0; k<ss_listsize[currset]; k++) {
585 		candidatetaxalist[candidatetaxasize] = ss_list[currset][k];
586 		taxonstatus[candidatetaxalist[candidatetaxasize++]] = 1;
587 	}
588 
589 	/* start collectin permutation */
590 	for (permpos=0; permpos<Maxspc; permpos++) {
591 		/* if no taxa left, choose next set */
592 		while (candidatetaxasize == 0) {
593 			/* next set */
594 			tmprand = randominteger(candidatesetsize);
595 			currset = candidatesetlist[tmprand];
596 			/* move last set entry to gap */
597 			candidatesetlist[tmprand] = candidatesetlist[--candidatesetsize]; /* XXXX */
598 			/* candidatesetlist[tmprand] = candidatesetlist[candidatesetsize--]; */
599 			setstatus[currset] = 2; /* inserted */
600 			/* add new sets (ovl>=3) to set candidate list */
601 			for (k=0; k<ss_setovllistsize[currset]; k++) {
602 				if (setstatus[ss_setovllist[currset][k]] == 0) {
603 					candidatesetlist[candidatesetsize] = ss_setovllist[currset][k];
604 					setstatus[candidatesetlist[candidatesetsize++]] = 1;
605 				}
606 			}
607 			/* add new taxa to taxon candidate list */
608 			for (k=0; k<ss_listsize[currset]; k++) {
609 				if (taxonstatus[ss_list[currset][k]] == 0) {
610 					candidatetaxalist[candidatetaxasize] = ss_list[currset][k];
611 					taxonstatus[candidatetaxalist[candidatetaxasize++]] = 1;
612 				}
613 			}
614 
615 		} /* if candidatetaxasize == 0 */
616 
617 		tmprand = randominteger(candidatetaxasize);
618 		currtaxon = candidatetaxalist[tmprand];
619 
620 		candidatetaxalist[tmprand] = candidatetaxalist[--candidatetaxasize];
621 		permutation[permpos] = currtaxon;
622 		taxonstatus[currtaxon] = 2;
623 
624 	} /* one random taxon after the other */
625 
626 	free_ivector(candidatesetlist);
627 	free_ivector(candidatetaxalist);
628 	free_ivector(taxonstatus);
629 	free_ivector(setstatus);
630 } /* permutetaxa_ss */
631 
632 /******************/
633 
634 
635 /* print subsets */
fprintfss(FILE * ofp,int Maxspc,int Maxsubset,imatrix ss_setovlgraph,imatrix ss_setoverlaps,imatrix ss_setovllist,ivector ss_setovllistsize,imatrix ss_matrix,imatrix ss_list,ivector ss_listsize)636 void fprintfss(FILE    *ofp,              /* in:  output file stream         */
637              int      Maxspc,             /* in:  number of taxa             */
638              int      Maxsubset,          /* out: number of subsets          */
639              imatrix  ss_setovlgraph,     /* out: size of overlap >= 3 between 2 subsets */
640              imatrix  ss_setoverlaps,     /* out: size of overlap between 2 subsets */
641              imatrix  ss_setovllist,      /* out: list with ovlerlapping subsets */
642              ivector  ss_setovllistsize,  /* out: size of list with ovlerlapping subsets */
643              imatrix  ss_matrix,          /* out: boolean list: taxon in set? */
644              imatrix  ss_list,            /* out: list of taxa in set */
645              ivector  ss_listsize)         /* out: size of list with taxa */
646 {
647 	int     s, s1, s2, t;
648 
649 
650 	for (s=0; s<Maxsubset; s++) {
651 		fprintf(ofp, "   set %d (%d taxa)\n", s, ss_listsize[s]);
652 			fprintf(ofp, "      taxa:        ");
653 		for (t=0; t<ss_listsize[s]; t++) {
654 			fprintf(ofp, "%3d", ss_list[s][t]);
655 			if (t+1 < ss_listsize[s]) fprintf(ofp, ",");
656 			if (((t+1) % 15) == 0)
657 				fprintf(ofp, "\n                   ");
658 		}
659 		fprintf(ofp, "\n      overlap with:");
660 		for (t=0; t<ss_setovllistsize[s]; t++) {
661 			fprintf(ofp, "%3d", ss_setovllist[s][t]);
662 			if (t+1 < ss_setovllistsize[s]) fprintf(ofp, ",");
663 			if (((t+1) % 15) == 0)
664 				fprintf(ofp, "\n                   ");
665 		}
666 		fprintf(ofp, "\n\n");
667 	}
668 	fprintf(ofp, "   Overlap Graph:\n");
669 	for (s1=0; s1<Maxsubset; s1++) {
670 		fprintf(ofp, "   %3d :\t", s1);
671 		for (s2=0; s2<Maxsubset; s2++) {
672 			if (s1==s2)
673 				fprintf(ofp, "(%3d) ", ss_listsize[s1]);
674 			else
675 				fprintf(ofp, " %3d  ", ss_setoverlaps[s1][s2]);
676 		}
677 		fprintf(ofp, "\n");
678 	}
679 	fprintf(ofp, "\n\n");
680 	fflush(ofp);
681 } /* fprintfss */
682 
683 /******************/
684 /* check connectedness of subsets */
checkss(FILE * ofp,int Maxspc,int Maxsubset,imatrix ss_setovlgraph,imatrix ss_setoverlaps,imatrix ss_setovllist,ivector ss_setovllistsize,imatrix ss_matrix,imatrix ss_list,ivector ss_listsize)685 void checkss(FILE    *ofp,              /* in:  output file stream         */
686              int      Maxspc,             /* in:  number of taxa             */
687              int      Maxsubset,          /* out: number of subsets          */
688              imatrix  ss_setovlgraph,     /* out: size of overlap >= 3 between 2 subsets */
689              imatrix  ss_setoverlaps,     /* out: size of overlap between 2 subsets */
690              imatrix  ss_setovllist,      /* out: list with ovlerlapping subsets */
691              ivector  ss_setovllistsize,  /* out: size of list with ovlerlapping subsets */
692              imatrix  ss_matrix,          /* out: boolean list: taxon in set? */
693              imatrix  ss_list,            /* out: list of taxa in set */
694              ivector  ss_listsize)         /* out: size of list with taxa */
695 {
696 	int     s1, s2;
697 	int numdone;
698         int *setarr;
699         setarr = new_ivector(Maxsubset);
700 
701 	s1 = 0;
702 	numdone=0;
703 	do {
704 		setarr[s1] = 2; /* 1=seen, 2=visited */
705 		for (s2=0; s2<Maxsubset; s2++) {
706 			if ((setarr[s2]==0) && (ss_setoverlaps[s1][s2] >= 3))
707 		  	setarr[s2]=1;
708 		}
709 
710 		numdone++;
711 		s1=0;
712 		while ((s1 < Maxsubset) && (setarr[s1]!=1)) s1++;
713 
714 	} while (s1 != Maxsubset);
715 
716 	if (Maxsubset != numdone) {
717 		fprintf(stderr, "Unable to proceed. More than one connected component! (%d != %d)\n", Maxsubset, numdone);
718 		fprintf(stderr, "visited: \n  ");
719 		for (s2=0; s2<Maxsubset; s2++)
720 			if (setarr[s2]==2) fprintf(stderr, "%d, ", s2);
721 		fprintf(stderr, "\nseen: \n  ");
722 		for (s2=0; s2<Maxsubset; s2++)
723 			if (setarr[s2]==1) fprintf(stderr, "%d, ", s2);
724 		fprintf(stderr, "\nlost: \n  ");
725 		for (s2=0; s2<Maxsubset; s2++)
726 			if (setarr[s2]==0) fprintf(stderr, "%d, ", s2);
727 		exit(1);
728 	}
729 } /* checkss */
730 
731 /******************/
732 
733 
734 /* guess data type: NUCLEOTIDE:0, AMINOACID:1, BINARY:2 */
guessdatatype(cmatrix Seqchars,int Maxspc,int Maxseqc)735 int guessdatatype(cmatrix Seqchars,   /* alignment matrix (Maxspc x Maxseqc) */
736                   int     Maxspc,     /* number of taxa */
737                   int     Maxseqc)    /* number of sites */
738 {
739 	uli numnucs, numchars, numbins;
740 	int notu, nsite;
741 	char c;
742 
743 	/* count A, C, G, T, U, N */
744 	numnucs = 0;
745 	numchars = 0;
746 	numbins = 0;
747 	for (notu = 0; notu < Maxspc; notu++)
748 		for (nsite = 0; nsite < Maxseqc; nsite++) {
749 			c = Seqchars[notu][nsite];
750 			if (c == 'A' || c == 'C' || c == 'G' ||
751 			    c == 'T' || c == 'U' || c == 'N') numnucs++;
752 			if (c != '-' && c != '?') numchars++;
753 			if (c == '0' || c == '1') numbins++;
754 		}
755 	if (numchars == 0) numchars = 1;
756 	/* more than 85 % frequency means nucleotide data */
757 	if ((double) numnucs / (double) numchars > 0.85)
758 		return NUCLEOTIDE; /* 0 */
759 	else if ((double) numbins / (double) numchars > 0.2)
760 		return BINARY; /* 2 */
761 	else
762 		return AMINOACID; /* 1 */
763 } /* guessdatatype */
764 
765 /******************/
766 
767 
768 /* translate characters into format used by ML engine */
translatedataset(int maxspc,int maxseqc,int * maxsite,cmatrix seqchars,cmatrix * seqchar,ivector * seqgapchar,ivector * seqotherchar)769 void translatedataset(int maxspc, int maxseqc, int *maxsite, cmatrix seqchars, cmatrix *seqchar, ivector *seqgapchar, ivector *seqotherchar)
770 {
771 	int notu, sn, co;
772 	char c;
773 	cvector code;
774 
775 	if (*seqgapchar   != NULL) free_ivector(*seqgapchar);
776 	if (*seqotherchar != NULL) free_ivector(*seqotherchar);
777 	Seqgapchar   = new_ivector(Maxspc);
778 	Seqotherchar = new_ivector(Maxspc);
779 
780 	/* Seqgapchar   = (int *) calloc(maxspc, sizeof(int)); */
781 	/* Seqotherchar = (int *) calloc(maxspc, sizeof(int)); */
782 
783 
784 	/* determine maxsite - number of ML sites per taxon */
785 	if (data_optn == NUCLEOTIDE && SH_optn) {
786 		if (SHcodon)
787 			*maxsite = maxseqc / 3;
788 		else
789 			*maxsite = maxseqc / 2; /* assume doublets */
790 
791 	} else
792 		*maxsite = maxseqc;
793 	if (data_optn == NUCLEOTIDE && (*maxsite % 3) == 0  && !SH_optn) {
794 		if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
795 			*maxsite = *maxsite / 3; /* only one of the three codon positions */
796 		if (codon_optn == 4)
797 			*maxsite = 2*(*maxsite / 3); /* 1st + 2nd codon positions */
798 	}
799 
800 	/* reserve memory */
801 	if (*seqchar != NULL) free_cmatrix(*seqchar);
802 	*seqchar = new_cmatrix(maxspc, *maxsite);
803 
804 	/* code length */
805 	if (data_optn == NUCLEOTIDE && SH_optn)
806 		code = new_cvector(2);
807 	else
808 		code = new_cvector(1);
809 
810 	/* decode characters */
811 	if (data_optn == NUCLEOTIDE && SH_optn) { /* SH doublets */
812 
813 		for (notu = 0; notu < maxspc; notu++) {
814 			for (sn = 0; sn < *maxsite; sn++) {
815 					for (co = 0; co < 2; co++) {
816 						if (SHcodon)
817 							c = seqchars[notu][sn*3 + co];
818 						else
819 							c = seqchars[notu][sn*2 + co];
820 						code[co] = c;
821 					}
822 				(*seqchar)[notu][sn] = code2int(code, &((*seqgapchar)[notu]), &((*seqotherchar)[notu]));
823 			}
824 		}
825 
826 	} else if (!(data_optn == NUCLEOTIDE && (maxseqc % 3) == 0)) { /* use all */
827 
828 		for (notu = 0; notu < maxspc; notu++) {
829 			for (sn = 0; sn < *maxsite; sn++) {
830 				code[0] = seqchars[notu][sn];
831 				(*seqchar)[notu][sn] = code2int(code, &((*seqgapchar)[notu]), &((*seqotherchar)[notu]));
832 			}
833 		}
834 
835 	} else { /* codons */
836 
837 		for (notu = 0; notu < maxspc; notu++) {
838 			for (sn = 0; sn < *maxsite; sn++) {
839 				if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
840 					code[0] = seqchars[notu][sn*3+codon_optn-1];
841 				else if (codon_optn == 4) {
842 					if ((sn % 2) == 0)
843 						code[0] = seqchars[notu][(sn/2)*3];
844 					else
845 						code[0] = seqchars[notu][((sn-1)/2)*3+1];
846 				} else
847 					code[0] = seqchars[notu][sn];
848 				(*seqchar)[notu][sn] = code2int(code, &((*seqgapchar)[notu]), &((*seqotherchar)[notu]));
849 			}
850 		}
851 
852 	}
853 
854 	free_cvector(code);
855 
856 } /* translatedataset */
857 
858 /******************/
859 
860 
861 
862 /* estimate mean base frequencies from translated data set */
estimatebasefreqs()863 void estimatebasefreqs()
864 {
865 	int tpmradix, i, j;
866 	uli all, *gene;
867 
868 	tpmradix = gettpmradix();
869 
870 	if (Freqtpm != NULL) free_dvector(Freqtpm);
871 	Freqtpm = new_dvector(tpmradix);
872 
873 	if (Basecomp != NULL) free_imatrix(Basecomp);
874 	Basecomp = new_imatrix(Maxspc, tpmradix);
875 
876 	gene = (uli *) calloc((size_t) (tpmradix + 1), sizeof(uli));
877 	if (gene == NULL) maerror("gene in estimatebasefreqs");
878 
879 	for (i = 0; i < tpmradix + 1; i++) gene[i] = 0;
880 	for (i = 0; i < Maxspc; i++)
881 		for (j = 0; j < tpmradix; j++) Basecomp[i][j] = 0;
882 	for (i = 0; i < Maxspc; i++)
883 		for (j = 0; j < Maxsite; j++) {
884 			gene[(int) Seqchar[i][j]]++;
885 			if (Seqchar[i][j] != tpmradix) Basecomp[i][(int) Seqchar[i][j]]++;
886 			}
887 
888 	all = Maxspc * Maxsite - gene[tpmradix];
889 	if (all != 0) { /* normal case */
890 		for (i = 0; i < tpmradix; i++)
891 			Freqtpm[i] = (double) gene[i] / (double) all;
892 	} else { /* pathological case with no unique character in data set */
893 		for (i = 0; i < tpmradix; i++)
894 			Freqtpm[i] = 1.0 / (double) tpmradix;
895 	}
896 
897 	free(gene);
898 
899 	Frequ_optn = TRUE;
900 } /* estimatebasefreqs */
901 
902 /******************/
903 
904 
905 /* guess model of substitution */
guessmodel()906 void guessmodel()
907 {
908 	double c1, c2, c3, c4, c5, c6;
909 	dvector f;
910 	dmatrix a;
911 	int i;
912 
913 	Dayhf_optn = FALSE;
914 	Jtt_optn = TRUE;
915 	mtrev_optn = FALSE;
916 	cprev_optn = FALSE;
917 	blosum62_optn = FALSE;
918 	vtmv_optn = FALSE;
919 	wag_optn = FALSE;
920 	TSparam = 2.0;
921 	YRparam = 1.0;
922 	optim_optn = TRUE;
923 	HKY_optn = TRUE;
924 	print_GTR_optn = TRUE;
925 	GTR_optn = FALSE;
926 	TN_optn = FALSE;
927 
928 	if (data_optn == AMINOACID) { /* amino acids */
929 
930 		/* chi2 fit to amino acid frequencies */
931 
932 		f = new_dvector(20);
933 		a = new_dmatrix(20,20);
934 		/* chi2 distance Dayhoff */
935 		dyhfdata(a, f);
936 		c1 = 0;
937 		for (i = 0; i < 20; i++)
938 			c1 = c1 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
939 		/* chi2 distance JTT */
940 		jttdata(a, f);
941 		c2 = 0;
942 		for (i = 0; i < 20; i++)
943 			c2 = c2 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
944 		/* chi2 distance mtREV */
945 		mtrevdata(a, f);
946 		c3 = 0;
947 		for (i = 0; i < 20; i++)
948 			c3 = c3 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
949 		/* chi2 distance VT */
950 		vtmvdata(a, f);
951 		c4 = 0;
952 		for (i = 0; i < 20; i++)
953 			c4 = c4 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
954 		/* chi2 distance WAG */
955 		wagdata(a, f);
956 		c5 = 0;
957 		for (i = 0; i < 20; i++)
958 			c5 = c5 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
959 		/* chi2 distance cpREV */
960 		cprev45data(a, f);
961 		c6 = 0;
962 		for (i = 0; i < 20; i++)
963 			c6 = c6 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
964 
965 		free_dvector(f);
966 		free_dmatrix(a);
967 
968 #ifndef CPREV
969 		if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5)) {
970 		        /* c1 -> Dayhoff */
971 			Dayhf_optn = TRUE;
972 			Jtt_optn = FALSE;
973 			mtrev_optn = FALSE;
974 			cprev_optn = FALSE;
975 			vtmv_optn = FALSE;
976 			wag_optn = FALSE;
977 			fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
978 		} else {
979 			if ((c2 < c3) && (c2 < c4) && (c2 < c5)) {
980 				/* c2 -> JTT */
981 				Dayhf_optn = FALSE;
982 				Jtt_optn = TRUE;
983 				mtrev_optn = FALSE;
984 				cprev_optn = FALSE;
985 				vtmv_optn = FALSE;
986 				wag_optn = FALSE;
987 				fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
988 			} else {
989 				if ((c3 < c4) && (c3 < c5)) {
990 					/* c3 -> mtREV */
991 					Dayhf_optn = FALSE;
992 					Jtt_optn = FALSE;
993 					mtrev_optn = TRUE;
994 					cprev_optn = FALSE;
995 					vtmv_optn = FALSE;
996 					wag_optn = FALSE;
997 					fprintf(STDOUT, "(consists very likely of amino acids encoded on mtDNA)\n");
998 				} else {
999 					if ((c4 < c5)) {
1000 						/* c4 -> VT */
1001 						Dayhf_optn = FALSE;
1002 						Jtt_optn = FALSE;
1003 						mtrev_optn = FALSE;
1004 						cprev_optn = FALSE;
1005 						vtmv_optn = TRUE;
1006 						wag_optn = FALSE;
1007 						fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1008 					} else {
1009 							/* c5 -> WAG */
1010 							Dayhf_optn = FALSE;
1011 							Jtt_optn = FALSE;
1012 							mtrev_optn = FALSE;
1013 							cprev_optn = FALSE;
1014 							vtmv_optn = FALSE;
1015 							wag_optn = TRUE;
1016 							fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1017 					} /* if c4 else c5 */
1018 				} /* if c3 else c4 */
1019 			} /* if c2 */
1020 		} /* if c1 */
1021 
1022 #else /* CPREV */
1023 
1024 		if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5) && (c1 < c6)) {
1025 		        /* c1 -> Dayhoff */
1026 			Dayhf_optn = TRUE;
1027 			Jtt_optn = FALSE;
1028 			mtrev_optn = FALSE;
1029 			cprev_optn = FALSE;
1030 			vtmv_optn = FALSE;
1031 			wag_optn = FALSE;
1032 			fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1033 		} else {
1034 			if ((c2 < c3) && (c2 < c4) && (c2 < c5) && (c2 < c6)) {
1035 				/* c2 -> JTT */
1036 				Dayhf_optn = FALSE;
1037 				Jtt_optn = TRUE;
1038 				mtrev_optn = FALSE;
1039 				cprev_optn = FALSE;
1040 				vtmv_optn = FALSE;
1041 				wag_optn = FALSE;
1042 				fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1043 			} else {
1044 				if ((c3 < c4) && (c3 < c5) && (c3 < c6)) {
1045 					/* c3 -> mtREV */
1046 					Dayhf_optn = FALSE;
1047 					Jtt_optn = FALSE;
1048 					mtrev_optn = TRUE;
1049 					cprev_optn = FALSE;
1050 					vtmv_optn = FALSE;
1051 					wag_optn = FALSE;
1052 					fprintf(STDOUT, "(consists very likely of amino acids encoded on mtDNA)\n");
1053 				} else {
1054 					if ((c4 < c5) && (c4 < c6)) {
1055 						/* c4 -> VT */
1056 						Dayhf_optn = FALSE;
1057 						Jtt_optn = FALSE;
1058 						mtrev_optn = FALSE;
1059 						cprev_optn = FALSE;
1060 						vtmv_optn = TRUE;
1061 						wag_optn = FALSE;
1062 						fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1063 					} else {
1064 						if (c5 < c6) {
1065 							/* c5 -> WAG */
1066 							Dayhf_optn = FALSE;
1067 							Jtt_optn = FALSE;
1068 							mtrev_optn = FALSE;
1069 							cprev_optn = FALSE;
1070 							vtmv_optn = FALSE;
1071 							wag_optn = TRUE;
1072 							fprintf(STDOUT, "(consists very likely of amino acids encoded on nuclear DNA)\n");
1073 						} else {
1074 							/* if (c6) */
1075 							/* c6 -> cpREV */
1076 							Dayhf_optn = FALSE;
1077 							Jtt_optn = FALSE;
1078 							mtrev_optn = FALSE;
1079 							cprev_optn = TRUE;
1080 							vtmv_optn = FALSE;
1081 							wag_optn = FALSE;
1082 							fprintf(STDOUT, "(consists very likely of amino acids encoded on cpDNA)\n");
1083 						} /* if c5 else c6 */
1084 					} /* if c4 else c5 */
1085 				} /* if c3 else c4 */
1086 			} /* if c2 */
1087 		} /* if c1 */
1088 #endif /* CPREV */
1089 
1090 	} else if (data_optn == NUCLEOTIDE) {
1091 		fprintf(STDOUT, "(consists very likely of nucleotides)\n");
1092 	} else {
1093 		fprintf(STDOUT, "(consists very likely of binary state data)\n");
1094 	}
1095 } /* guessmodel */
1096 
1097 
1098 /******************************************************************************/
1099 
1100 /* parts outsourced to treesort.c/treesort.h */
1101 #if 0 /* TREESORT_H */
1102 #endif /* TREESORT_H */
1103 
1104 /* parts outsourced to consensus.c/consensus.h */
1105 #if 0 /* CONSENSUS_H */
1106 #endif /* CONSENSUS_H */
1107 
1108 
1109 /******************************************************************************/
1110 /* storing and evaluating quartet branching information                       */
1111 /******************************************************************************/
1112 
1113 /* general remarks:
1114 
1115 	for a quartet with the taxa a, b, c, d there are
1116 	three possible binary trees:
1117 
1118 		1)  (a,b)-(c,d)
1119 		2)  (a,c)-(b,d)
1120 		3)  (a,d)-(b,c)
1121 
1122 	For every quartet information about its branching structure is
1123 	stored. With the functions  readquartet  and  writequartet
1124 	this information can be accessed. For every quartet (a,b,c,d)
1125 	with a < b < c < d (taxa) the branching information is encoded
1126 	using 4 bits:
1127 
1128 	value          8             4             2             1
1129 	        +-------------+-------------+-------------+-------------+
1130 	        |  not used   |   tree 3    |   tree 2    |   tree 1    |
1131 	        +-------------+-------------+-------------+-------------+
1132 
1133 	If the branching structure of the taxa corresponds to one of the
1134 	three trees the corresponding bit is set. If the branching structure
1135 	is unclear because two of the three trees have the same maximum
1136 	likelihood value the corresponding two bits are set. If the branching
1137 	structure is completely unknown all the bits are set (the highest
1138 	bit is always cleared because it is not used).
1139 
1140 */
1141 
1142 /* allocate memory for quartets */
callocquartets(int taxa)1143 unsigned char *callocquartets(int taxa)
1144 {
1145 	uli nc, numch;
1146 	unsigned char *qinfo;
1147 
1148 	/* compute number of quartets */
1149 	Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24;
1150 	if (Numquartets % 2 == 0) { /* even number */
1151 		numch = Numquartets/2;
1152 	} else { /* odd number */
1153 		numch = (Numquartets + 1)/2;
1154 	}
1155 	/* allocate memory */
1156 	qinfo = (unsigned char *) calloc((size_t) numch, sizeof(unsigned char));
1157 	if (qinfo == NULL) maerror("quartetinfo in callocquartets");
1158 	for (nc = 0; nc < numch; nc++) qinfo[nc] = 0;
1159 	return(qinfo);
1160 } /* callocquartets */
1161 
1162 /* free quartet memory */
freequartets()1163 void freequartets()
1164 {
1165 	free(quartetinfo);
1166 } /* freequartets */
1167 
1168 /**************/
1169 
1170 /* read quartet info - a < b < c < d */
readquartet(int a,int b,int c,int d)1171 unsigned char readquartet(int a, int b, int c, int d)
1172 {
1173 	uli qnum;
1174 
1175 	if (! ((a < b) && (b < c) && (c < d))) {
1176 		fprintf(stderr, "\n\n\nHALT: PLEASE REPORT ERROR HS2 TO DEVELOPERS! (%d,%d,%d,%d)\n", a,b,c,d);
1177 #		if PARALLEL
1178 			PP_Finalize();
1179 #		endif
1180 		exit (999);
1181 
1182 	}
1183 	qnum = (uli) a
1184 			+ (uli) b*(b-1)/2
1185 			+ (uli) c*(c-1)*(c-2)/6
1186 			+ (uli) d*(d-1)*(d-2)*(d-3)/24;
1187 	if (qnum % 2 == 0) { /* even number */
1188 		/* bits 0 to 3 */
1189 		return (quartetinfo[qnum/2] & (unsigned char) 15);
1190 	} else { /* odd number */
1191 		/* bits 4 to 7 */
1192 		return ((quartetinfo[(qnum-1)/2] & (unsigned char) 240)>>4);
1193 	}
1194 } /* readquartet */
1195 
1196 /**************/
1197 
1198 /* write quartet info - a < b < c < d, 0 <= info <= 15 */
writequartet(int a,int b,int c,int d,unsigned char info)1199 void writequartet(int a, int b, int c, int d, unsigned char info)
1200 {
1201 	uli qnum;
1202 
1203 	qnum = (uli) a
1204 			+ (uli) b*(b-1)/2
1205 			+ (uli) c*(c-1)*(c-2)/6
1206 			+ (uli) d*(d-1)*(d-2)*(d-3)/24;
1207 	if (qnum % 2 == 0) { /* even number */
1208 		/* bits 0 to 3 */
1209 		quartetinfo[qnum/2] =
1210 			((quartetinfo[qnum/2] & (unsigned char) 240) |
1211 			(info & (unsigned char) 15));
1212 	} else { /* odd number */
1213 		/* bits 4 to 7 */
1214 		quartetinfo[(qnum-1)/2] =
1215 			((quartetinfo[(qnum-1)/2] & (unsigned char) 15) |
1216 			((info & (unsigned char) 15)<<4));
1217 	}
1218 } /* writequartet */
1219 
1220 /**************/
1221 /* prototypes */
1222 int openfiletowrite(FILE **, char[], char[]);
1223 void closefile(FILE *);
1224 
1225 /* sorts three doubles in descending order */
sort3doubles(dvector num,ivector order)1226 void sort3doubles(dvector num, ivector order)
1227 {
1228 	if (num[0] > num[1]) {
1229 		if(num[2] > num[0]) {
1230 			order[0] = 2;
1231 			order[1] = 0;
1232 			order[2] = 1;
1233 		} else if (num[2] < num[1]) {
1234 			order[0] = 0;
1235 			order[1] = 1;
1236 			order[2] = 2;
1237 		} else {
1238 			order[0] = 0;
1239 			order[1] = 2;
1240 			order[2] = 1;
1241 		}
1242 	} else {
1243 		if(num[2] > num[1]) {
1244 			order[0] = 2;
1245 			order[1] = 1;
1246 			order[2] = 0;
1247 		} else if (num[2] < num[0]) {
1248 			order[0] = 1;
1249 			order[1] = 0;
1250 			order[2] = 2;
1251 		} else {
1252 			order[0] = 1;
1253 			order[1] = 2;
1254 			order[2] = 0;
1255 		}
1256 	}
1257 } /* sort3doubles */
1258 
1259 /*************************/
1260 
1261 /* compute Bayesian weights from log-lkls d1, d2, d3 */
loglkl2weight(int a,int b,int c,int i,double d1,double d2,double d3,int usebestq)1262 unsigned char loglkl2weight(int    a,
1263                             int    b,
1264                             int    c,
1265                             int    i,
1266                             double d1,
1267                             double d2,
1268                             double d3,
1269                             int    usebestq)
1270 {
1271 	double onethird;
1272 	unsigned char treebits[3];
1273 	double templog;
1274 	unsigned char tmpweight;
1275 	double temp;
1276 	double temp1, temp2, temp3;
1277 	unsigned char discreteweight[3];
1278 
1279 	double tttqweight[3];
1280 	double tttsqdiff[3];
1281 	int tttsqorder[3];
1282 	int tttqworder[3];
1283 
1284 	tttqweight[0] = d1;
1285 	tttqweight[1] = d2;
1286 	tttqweight[2] = d3;
1287 
1288 	onethird = 1.0/3.0;
1289 	treebits[0] = (unsigned char) 1;
1290 	treebits[1] = (unsigned char) 2;
1291 	treebits[2] = (unsigned char) 4;
1292 
1293 	/* sort in descending order */
1294 	sort3doubles(tttqweight, tttqworder);
1295 
1296 	if (usebestq) {
1297 		tttsqorder[2] = 2;
1298 		discreteweight[tttsqorder[2]] = treebits[tttqworder[0]];
1299 		if (tttqweight[tttqworder[0]] == tttqweight[tttqworder[1]]) {
1300 			discreteweight[tttsqorder[2]] = discreteweight[tttsqorder[2]] || treebits[tttqworder[1]];
1301 			if (tttqweight[tttqworder[1]] == tttqweight[tttqworder[2]]) {
1302 				discreteweight[tttsqorder[2]] = discreteweight[tttsqorder[2]] || treebits[tttqworder[2]];
1303 				discreteweight[tttsqorder[2]] = 7;
1304 			}
1305 		}
1306 	} else {
1307 
1308 		/* compute Bayesian weights */
1309 		templog = tttqweight[tttqworder[1]]-tttqweight[tttqworder[0]];
1310 		if(templog < -TP_MAX_EXP_DIFF)	/* possible, since 1.0+exp(>36) == 1.0 */
1311 			tttqweight[tttqworder[1]] = 0.0;
1312 		else
1313 			tttqweight[tttqworder[1]] = exp(templog);
1314 
1315 	 	templog = tttqweight[tttqworder[2]]-tttqweight[tttqworder[0]];
1316 		if(templog < -TP_MAX_EXP_DIFF)	/* possible, since 1.0+exp(>36) == 1.0 */
1317 			tttqweight[tttqworder[2]] = 0.0;
1318 		else
1319 			tttqweight[tttqworder[2]] = exp(templog);
1320 
1321 		tttqweight[tttqworder[0]] = 1.0;
1322 
1323 		temp = tttqweight[0] + tttqweight[1] + tttqweight[2];
1324 
1325 		tttqweight[0] = tttqweight[0]/temp;
1326 		tttqweight[1] = tttqweight[1]/temp;
1327 		tttqweight[2] = tttqweight[2]/temp;
1328 
1329 		/* square deviations */
1330 		temp1 = 1.0 - tttqweight[tttqworder[0]];
1331 		tttsqdiff[0] = temp1 * temp1 +
1332 		tttqweight[tttqworder[1]] * tttqweight[tttqworder[1]] +
1333 		tttqweight[tttqworder[2]] * tttqweight[tttqworder[2]];
1334 		discreteweight[0] = treebits[tttqworder[0]];
1335 
1336 		temp1 = 0.5 - tttqweight[tttqworder[0]];
1337 		temp2 = 0.5 - tttqweight[tttqworder[1]];
1338 		tttsqdiff[1] = temp1 * temp1 + temp2 * temp2 +
1339 		tttqweight[tttqworder[2]] * tttqweight[tttqworder[2]];
1340 		discreteweight[1] = treebits[tttqworder[0]] + treebits[tttqworder[1]];
1341 
1342 		temp1 = onethird - tttqweight[tttqworder[0]];
1343 		temp2 = onethird - tttqweight[tttqworder[1]];
1344 		temp3 = onethird - tttqweight[tttqworder[2]];
1345 		tttsqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
1346 		discreteweight[2] = (unsigned char) 7;
1347 
1348 		/* sort in descending order */
1349 		sort3doubles(tttsqdiff, tttsqorder);
1350 	}
1351 
1352 	tmpweight = discreteweight[tttsqorder[2]];
1353 	return(tmpweight);
1354 } /* loglkl2weight */
1355 
1356 
1357 /*************************/
1358 
1359 /* checks out all possible quartets */
computeallquartets()1360 void computeallquartets()
1361 {
1362 	double onethird;
1363 	uli nq;
1364 	unsigned char treebits[3];
1365 #	if ! PARALLEL
1366 		unsigned char tmpweight;
1367 		FILE *lhfp;
1368 		int a, b, c, i;
1369 #	endif
1370 
1371 		onethird = 1.0/3.0;
1372 		treebits[0] = (unsigned char) 1;
1373 		treebits[1] = (unsigned char) 2;
1374 		treebits[2] = (unsigned char) 4;
1375 
1376 	if (show_optn) { /* list all unresolved quartets */
1377 		openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
1378 		fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
1379 	}
1380 
1381 	nq = 0;
1382 	badqs = 0;
1383 
1384 	/* start timer - percentage of completed quartets */
1385 	time(&time0);
1386 	time1 = time0;
1387 	mflag = 0;
1388 
1389 #	if PARALLEL
1390 	{
1391 		schedtype sched;
1392 		int flag;
1393 		MPI_Status stat;
1394 
1395 		int dest = 1;
1396 		uli qaddr  =0;
1397 		uli qamount=0;
1398 		int qblocksent = 0;
1399 		int apr;
1400 		uli Sumquartets;
1401 		uli sq, noq;
1402 
1403 		uli tmpfullresqs;
1404 		uli tmppartresqs;
1405 		uli tmpunresqs;
1406 		uli tmpmissingqs;
1407 
1408 		Sumquartets = numquarts(Maxspc);
1409 		initsched(&sched, Sumquartets, PP_NumProcs-1, 4);
1410 		qamount=SCHEDALG_ML_STEP(&sched);
1411 
1412 		while (qamount > 0) {
1413 			if (PP_emptyslave()) {
1414 				PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &tmpfullresqs, &tmppartresqs, &tmpunresqs, &tmpmissingqs, &usebestq_optn, &apr);
1415 				qblocksent -= noq;
1416 			}
1417 			dest = PP_getslave();
1418 			PP_SendDoQuartBlock(dest, qaddr, qamount, usebestq_optn, (approxqp ? APPROX : EXACT));
1419 			qblocksent += qamount;
1420 			qaddr += qamount;
1421 			qamount=SCHEDALG_ML_STEP(&sched);
1422 
1423 			MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
1424 			while (flag) {
1425 				PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &tmpfullresqs, &tmppartresqs, &tmpunresqs, &tmpmissingqs, &usebestq_optn, &apr);
1426 				qblocksent -= noq;
1427 				MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
1428 			}
1429 			checktime(&time0, &time1, &time2, qaddr - qblocksent, Sumquartets, &mflag);
1430 		}
1431 		while (qblocksent > 0) {
1432 			PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &tmpfullresqs, &tmppartresqs, &tmpunresqs, &tmpmissingqs, &usebestq_optn, &apr);
1433 			qblocksent -= noq;
1434 			checktime(&time0, &time1, &time2, qaddr - qblocksent, Sumquartets, &mflag);
1435 		}
1436 	}
1437 #	else /* ! PARALLEL */
1438 
1439 		addtimes(GENERAL, &tarr);
1440 		if (savequartlh_optn) {
1441 			openfiletowrite(&lhfp, ALLQUARTLH, "all quartet likelihoods");
1442 			if (saveqlhbin_optn) writetpqfheader(Maxspc, lhfp, 3);
1443 			else                 writetpqfheader(Maxspc, lhfp, 4);
1444 		}
1445 
1446 		for (i = 3; i < Maxspc; i++)
1447 			for (c = 2; c < i; c++)
1448 				for (b = 1; b < c; b++)
1449 					for (a = 0; a < b; a++) {
1450 						nq++;
1451 
1452 						/* generate message every >TIMECHECK_INTERVAL seconds */
1453 						/* check timer */
1454 						checktime(&time0, &time1, &time2, nq, Numquartets, &mflag);
1455 
1456 						/* maximum likelihood values */
1457 
1458 						/* exact or approximate maximum likelihood values */
1459 						compute_quartlklhds(a,b,c,i,&qweight[0],&qweight[1],&qweight[2], (approxqp ? APPROX : EXACT));
1460 
1461 						if (savequartlh_optn) {
1462 							if (saveqlhbin_optn)
1463 								fwrite(qweight, sizeof(double), 3, lhfp);
1464 							else
1465 								fprintf(lhfp, "(%d,%d,%d,%d)\t%f\t%f\t%f\n", a, b, c, i,
1466 							qweight[0], qweight[1], qweight[2]);
1467 						}
1468 
1469 						tmpweight = loglkl2weight(a, b, c, i, qweight[0], qweight[1], qweight[2], usebestq_optn);
1470 /* tmpweight = loglkl2weight(a, b, c, i, d1, d2, d3, usebestq_optn); */
1471 
1472 						/* determine best discrete weight */
1473 						/* writequartet(a, b, c, i, discreteweight[sqorder[2]]); */
1474 						writequartet(a, b, c, i, tmpweight);
1475 
1476 
1477 						/* compute sums of topologies per taxon, step by step */
1478 						++(qinfomatr[8][a]);
1479 						++(qinfomatr[8][b]);
1480 						++(qinfomatr[8][c]);
1481 						++(qinfomatr[8][i]);
1482 						++(qinfomatr[tmpweight][a]);
1483 						++(qinfomatr[tmpweight][b]);
1484 						++(qinfomatr[tmpweight][c]);
1485 						++(qinfomatr[tmpweight][i]);
1486 
1487 						if ((tmpweight <= 2) || (tmpweight == 4)) {
1488 							fullresqs++;
1489 						} else {
1490 							if (tmpweight == 7) {
1491 								unresqs++;
1492 							} else {
1493 								if (tmpweight == 0) {
1494 									missingqs++;
1495 								} else {
1496 									partresqs++;
1497 								}
1498 							}
1499 						}
1500 
1501 						/* counting completely unresolved quartets */
1502 						if (tmpweight == 7) {
1503 							badqs++;
1504 							badtaxon[a]++;
1505 							badtaxon[b]++;
1506 							badtaxon[c]++;
1507 							badtaxon[i]++;
1508 							if (show_optn) {
1509 								fputid10(unresfp, a);
1510 								fprintf(unresfp, "  ");
1511 								fputid10(unresfp, b);
1512 								fprintf(unresfp, "  ");
1513 								fputid10(unresfp, c);
1514 								fprintf(unresfp, "  ");
1515 								fputid(unresfp, i);
1516 								fprintf(unresfp, "\n");
1517 							}
1518 						}
1519 						addtimes(QUARTETS, &tarr);
1520 		}
1521 		if (savequartlh_optn) {
1522 			closefile(lhfp);
1523 		}
1524 		if (show_optn)
1525 			closefile(unresfp);
1526 		if (mflag == 1)
1527 			fprintf(STDOUT, "\n");
1528 #	endif /* PARALLEL */
1529 
1530 } /* computeallquartets */
1531 
1532 /* trueID (HAS) */
1533 /* check the branching structure between the leaves (not the taxa!)
1534    A, B, C, and I (A, B, C, I don't need to be ordered). As a result,
1535    the two leaves that are closer related to each other than to leaf I
1536    are found in chooseX and chooseY. If the branching structure is
1537    not uniquely defined, chooseX and chooseY are chosen randomly
1538    from the possible taxa */
checkquartet_trueID(int A,int B,int C,int I,int * trueID,int * chooseX,int * chooseY)1539 void checkquartet_trueID(int  A,   /* quartet leaf idx in permutation array */
1540                   int  B,          /* dito.                                 */
1541                   int  C,          /* dito.                                 */
1542                   int  I,          /* dito., to be inserted                 */
1543                   int *trueID,     /* permutation array                     */
1544                   int *chooseX,    /* (chooseX,chooseY | x,I)               */
1545                   int *chooseY)    /* chooseX+Y are non-neighbors of I      */
1546 {
1547 	int i, j;              /* counter */
1548 	int a, b;              /* temp variable for sorting */
1549 	int taxon[5];          /* sorting array for taxa from trueID */
1550 	int leaf[5];           /* sorting array for indices in trueID */
1551 	int ipos;              /* position of leaf I in sorted array */
1552 	unsigned char qresult; /* quartet topology */
1553 	int notunique = FALSE; /* non-unique topology */
1554 
1555 	/* The relationship between leaves and taxa is defined by trueID */
1556 	taxon[1] = trueID[A]; /* taxon number */
1557 	leaf[1] = A;          /* leaf index in permutation */
1558 	taxon[2] = trueID[B];
1559 	leaf[2] = B;
1560 	taxon[3] = trueID[C];
1561 	leaf[3] = C;
1562 	taxon[4] = trueID[I];
1563 	leaf[4] = I;
1564 
1565 	/* sort for taxa */
1566 	/* Source: Numerical Recipes (PIKSR2.C) */
1567 	for (j = 2; j <= 4; j++) {
1568 		a = taxon[j];
1569 		b = leaf[j];
1570 		i = j-1;
1571 		while (i > 0 && taxon[i] > a) {
1572 			taxon[i+1] = taxon[i];
1573 			leaf[i+1] = leaf[i];
1574 			i--;
1575 		}
1576 		taxon[i+1] = a;
1577 		leaf[i+1] = b;
1578 	}
1579 
1580 	/* where is leaf I ? */
1581 	ipos = 1;
1582 	while (leaf[ipos] != I) ipos++;
1583 
1584 	/* look at sequence quartet */
1585 	qresult = readquartet(taxon[1], taxon[2], taxon[3], taxon[4]);
1586 
1587 	/* chooseX and chooseY */
1588  	/* qresult is not unique, a covered unique qresult is chosen */
1589 	/* and the while loop is circled again (optimizable?) (HAS)  */
1590 	do {
1591 		switch (qresult) {
1592 
1593 			/* one single branching structure */
1594 
1595 			/* 001 */
1596 			case 1:		if (ipos == 1 || ipos == 2) {
1597 						*chooseX = leaf[3];
1598 						*chooseY = leaf[4];
1599 					} else {
1600 						*chooseX = leaf[1];
1601 						*chooseY = leaf[2];
1602 					}
1603 					notunique = FALSE;
1604 					break;
1605 
1606 			/* 010 */
1607 			case 2:		if (ipos == 1 || ipos == 3) {
1608 						*chooseX = leaf[2];
1609 						*chooseY = leaf[4];
1610 					} else {
1611 						*chooseX = leaf[1];
1612 						*chooseY = leaf[3];
1613 					}
1614 					notunique = FALSE;
1615 					break;
1616 
1617 			/* 100 */
1618 			case 4:		if (ipos == 1 || ipos == 4) {
1619 						*chooseX = leaf[2];
1620 						*chooseY = leaf[3];
1621 					} else {
1622 						*chooseX = leaf[1];
1623 						*chooseY = leaf[4];
1624 					}
1625 					notunique = FALSE;
1626 					break;
1627 
1628 			/* two possible branching structures */
1629 
1630 			/* 011 */
1631 			case 3:		if (randominteger(2)) qresult = 1;
1632 					else qresult = 2;
1633 					notunique = TRUE;
1634 					break;
1635 
1636 			/* 101 */
1637 			case 5:		if (randominteger(2)) qresult = 1;
1638 					else qresult = 4;
1639 					notunique = TRUE;
1640 					break;
1641 
1642 			/* 110 */
1643 			case 6:		if (randominteger(2)) qresult = 2;
1644 					else qresult = 4;
1645 					notunique = TRUE;
1646 					break;
1647 
1648 			/* three possible branching structures */
1649 
1650 			/* 111 */
1651 			case 7:		qresult = (1 << randominteger(3)); /* 1, 2, or 4 */
1652 					notunique = TRUE;
1653 					break;
1654 
1655 			/* 000 */  /* empty quartet: missing data -> OK, otherwise ERROR */
1656 			case 0:		if (readsubset_optn) {
1657 						*chooseX = -1;
1658 						*chooseY = -1;
1659 					} else {
1660 #						if PARALLEL
1661 							fprintf(STDOUT, "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K1-PARALLEL TO DEVELOPERS : empty (%d,%d,%d,%d) = %ld\n\n\n",
1662 								PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4],
1663 								quart2num(taxon[1], taxon[2], taxon[3], taxon[4]));
1664 								PP_Finalize();
1665 								exit (999);
1666 #						else
1667 							fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR K1 TO DEVELOPERS: empty (%d,%d,%d,%d) = %ld\n\n\n",
1668 								taxon[1], taxon[2], taxon[3], taxon[4],
1669 								quart2num(taxon[1], taxon[2], taxon[3], taxon[4]));
1670 								exit (999);
1671 #						endif
1672 					}
1673 					notunique = FALSE;
1674 					break;
1675 
1676 			default:	/* Program error [checkquartet] */
1677 #					if PARALLEL
1678 						fprintf(STDOUT, "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K2-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld (%d)\n\n\n",
1679 							PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4],
1680 							quart2num(taxon[1], taxon[2], taxon[3], taxon[4]), qresult);
1681 							PP_Finalize();
1682 							exit (999);
1683 #					else
1684 						fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR K2 TO DEVELOPERS (%d,%d,%d,%d) = %ld (%d)\n\n\n",
1685 							taxon[1], taxon[2], taxon[3], taxon[4],
1686 							quart2num(taxon[1], taxon[2], taxon[3], taxon[4]), qresult);
1687 							exit (999);
1688 #					endif
1689 
1690 		} /* switch (qresult) */
1691 	} while (notunique);
1692 
1693 	return;
1694 } /* checkquartet_trueID */
1695 
1696 /* un-trueID-ed (HAS) */
1697 /* check the branching structure between the leaves (not the taxa!)
1698    A, B, C, and I (A, B, C, I don't need to be ordered). As a result,
1699    the two leaves that are closer related to each other than to leaf I
1700    are found in chooseX and chooseY. If the branching structure is
1701    not uniquely defined, chooseX and chooseY are chosen randomly
1702    from the possible taxa */
checkquartet(int A,int B,int C,int I,int * chooseX,int * chooseY)1703 void checkquartet(int  A,          /* quartet taxon ID                 */
1704                   int  B,          /* dito.                            */
1705                   int  C,          /* dito.                            */
1706                   int  I,          /* dito., to be inserted            */
1707                   int *chooseX,    /* (chooseX,chooseY | x,I)          */
1708                   int *chooseY)    /* chooseX+Y are non-neighbors of I */
1709 {
1710 	int i, j;              /* counter */
1711 	int a;                 /* temp variable for sorting */
1712 	int leaf[5];           /* sorting array */
1713 	int ipos;              /* position of leaf I in sorted array */
1714 	unsigned char qresult; /* quartet topology */
1715 	int notunique = FALSE; /* non-unique topology */
1716 
1717 	/* The relationship between leaves and taxa is defined by trueID */
1718 	leaf [1] = A; /* taxon ID of leaf 1 */
1719 	leaf [2] = B; /* taxon ID of leaf 2 */
1720 	leaf [3] = C; /* taxon ID of leaf 3 */
1721 	leaf [4] = I; /* taxon ID of leaf 4 */
1722 
1723 	/* sort for taxa */
1724 	/* Source: Numerical Recipes (PIKSR2.C) */
1725 	for (j = 2; j <= 4; j++) {
1726 		a = leaf[j];
1727 		i = j-1;
1728 		while (i > 0 && leaf[i] > a) {
1729 			leaf[i+1] = leaf[i];
1730 			i--;
1731 		}
1732 		leaf[i+1] = a;
1733 	}
1734 
1735 	/* where is leaf I ? */
1736 	ipos = 1;
1737 	while (leaf[ipos] != I) ipos++;
1738 
1739 	/* look at sequence quartet */
1740 	qresult = readquartet(leaf[1], leaf[2], leaf[3], leaf[4]);
1741 
1742 	/* chooseX and chooseY */
1743  	/* qresult is not unique, a covered unique qresult is chosen */
1744 	/* and the while loop is circled again (optimizable?) (HAS)  */
1745 	do {
1746 		switch (qresult) {
1747 
1748 			/* one single branching structure */
1749 
1750 			/* 001 */
1751 			case 1:		if (ipos == 1 || ipos == 2) {
1752 						*chooseX = leaf[3];
1753 						*chooseY = leaf[4];
1754 					} else {
1755 						*chooseX = leaf[1];
1756 						*chooseY = leaf[2];
1757 					}
1758 					notunique = FALSE;
1759 					break;
1760 
1761 			/* 010 */
1762 			case 2:		if (ipos == 1 || ipos == 3) {
1763 						*chooseX = leaf[2];
1764 						*chooseY = leaf[4];
1765 					} else {
1766 						*chooseX = leaf[1];
1767 						*chooseY = leaf[3];
1768 					}
1769 					notunique = FALSE;
1770 					break;
1771 
1772 			/* 100 */
1773 			case 4:		if (ipos == 1 || ipos == 4) {
1774 						*chooseX = leaf[2];
1775 						*chooseY = leaf[3];
1776 					} else {
1777 						*chooseX = leaf[1];
1778 						*chooseY = leaf[4];
1779 					}
1780 					notunique = FALSE;
1781 					break;
1782 
1783 			/* two possible branching structures */
1784 
1785 			/* 011 */
1786 			case 3:		if (randominteger(2)) qresult = 1;
1787 					else qresult = 2;
1788 					notunique = TRUE;
1789 					break;
1790 
1791 			/* 101 */
1792 			case 5:		if (randominteger(2)) qresult = 1;
1793 					else qresult = 4;
1794 					notunique = TRUE;
1795 					break;
1796 
1797 			/* 110 */
1798 			case 6:		if (randominteger(2)) qresult = 2;
1799 					else qresult = 4;
1800 					notunique = TRUE;
1801 					break;
1802 
1803 			/* three possible branching structures */
1804 
1805 			/* 111 */
1806 			case 7:		qresult = (1 << randominteger(3)); /* 1, 2, or 4 */
1807 					notunique = TRUE;
1808 					break;
1809 
1810 			/* 000 */  /* empty quartet: missing data -> OK, otherwise ERROR */
1811 			case 0:		if (readsubset_optn) {
1812 						*chooseX = -1;
1813 						*chooseY = -1;
1814 					} else {
1815 #						if PARALLEL
1816 							fprintf(STDOUT, "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K1-PARALLEL TO DEVELOPERS : empty (%d,%d,%d,%d) = %ld\n\n\n",
1817 								PP_Myid, leaf[1], leaf[2], leaf[3], leaf[4],
1818 								quart2num(leaf[1], leaf[2], leaf[3], leaf[4]));
1819 								PP_Finalize();
1820 								exit (999);
1821 #						else
1822 							fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR K1 TO DEVELOPERS: empty (%d,%d,%d,%d) = %ld\n\n\n",
1823 								leaf[1], leaf[2], leaf[3], leaf[4],
1824 								quart2num(leaf[1], leaf[2], leaf[3], leaf[4]));
1825 								exit (999);
1826 #						endif
1827 					}
1828 					notunique = FALSE;
1829 					break;
1830 
1831 			default:	/* Program error [checkquartet] */
1832 #					if PARALLEL
1833 						fprintf(STDOUT, "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K2-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld (%d)\n\n\n",
1834 							PP_Myid, leaf[1], leaf[2], leaf[3], leaf[4],
1835 							quart2num(leaf[1], leaf[2], leaf[3], leaf[4]), qresult);
1836 							PP_Finalize();
1837 							exit (999);
1838 #					else
1839 						fprintf(STDOUT, "\n\n\nHALT: PLEASE REPORT ERROR K2 TO DEVELOPERS (%d,%d,%d,%d) = %ld (%d)\n\n\n",
1840 							leaf[1], leaf[2], leaf[3], leaf[4],
1841 							quart2num(leaf[1], leaf[2], leaf[3], leaf[4]), qresult);
1842 							exit (999);
1843 #					endif
1844 
1845 		} /* switch (qresult) */
1846 	} while (notunique);
1847 
1848 	return;
1849 } /* checkquartet */
1850 
1851